<a href="https://colab.research.google.com/github/neel26desai/deep_learing_with_low_code_libraries/blob/main/keras_nlp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q --upgrade keras-nlp
!pip install -q --upgrade keras  # Upgrade to Keras 3.

In [2]:
import os

#using tensorflow backend with keras
os.environ["KERAS_BACKEND"] = "tensorflow"  # or "tensorflow" or "torch"

import keras_nlp
import keras
import pandas as pd
import numpy as np
# Use mixed precision to speed up the training process ()in mixed precision training we use botth float16 and 32 variables.
keras.mixed_precision.set_global_policy("mixed_float16")

# Data:  Using Threads App review data from Kaggle
https://www.kaggle.com/datasets/shuvammandal121/37000-reviews-of-thread-app-dataset

In [3]:
# Load the dataset
file_path = '/content/drive/MyDrive/data/37000_reviews_of_thread_app.csv'
reviews_df = pd.read_csv(file_path)

# Display the first few rows of the dataset
reviews_df.head()


Unnamed: 0.1,Unnamed: 0,source,review_id,user_name,review_title,review_description,rating,thumbs_up,review_date,developer_response,developer_response_date,appVersion,laguage_code,country_code
0,0,Google Play,7cd90e5b-4829-43b9-9fb4-c8c6d1e339c1,Eddie Clark Jr.,,Good,5,0.0,2023-08-07 19:14:36,,,294.0.0.27.110,en,us
1,1,Google Play,6deb8265-2bac-4524-bcb6-f90829fa4e69,Rasa RT,,Weak copy of Twitter,1,0.0,2023-08-07 19:07:04,,,,en,us
2,2,Google Play,91ef61ce-0f05-4f3b-b3d3-5d19cd408ab8,SITI NUR HAFIZA BINTI AZIZ,,i wish threads have a save button for images a...,3,0.0,2023-08-07 18:57:07,,,294.0.0.27.110,en,us
3,3,Google Play,b7721b78-6b77-4f8c-a1d3-a854af4c1f0f,Asap Khalifah,,Love it,5,0.0,2023-08-07 18:37:16,,,,en,us
4,4,Google Play,c89ef522-c94c-4171-878f-1d672dce7f11,Syed Hussein,,Very god,5,0.0,2023-08-07 18:14:15,,,,en,us


In [4]:
# Code generated using ChatGPT
# Preprocessing steps

# 1. Remove unnecessary columns
reviews_df = reviews_df[['review_description', 'rating']]

# 2. Drop rows with missing review descriptions
reviews_df = reviews_df.dropna(subset=['review_description'])

# 3. Convert ratings to sentiment labels (e.g., 1-2: Negative,3-5: Positive)
def rating_to_sentiment(rating):
    if rating <=2:
        return 'Negative'
    else:
        return 'Positive'

reviews_df['sentiment'] = reviews_df['rating'].apply(rating_to_sentiment)

# 4. Keep only the 'review_description' and 'sentiment' columns for sentiment analysis
final_df = reviews_df[['review_description', 'sentiment']]

# Display the first few rows of the preprocessed dataset
final_df.head()


Unnamed: 0,review_description,sentiment
0,Good,Positive
1,Weak copy of Twitter,Negative
2,i wish threads have a save button for images a...,Positive
3,Love it,Positive
4,Very god,Positive


In [5]:
# encoding the values in the sentiment column
label_mapping = {'Negative': 0, 'Positive': 1}
final_df['sentiment_label'] = final_df['sentiment'].map(label_mapping)

In [8]:
final_df.isna().sum()

review_description    0
sentiment             0
sentiment_label       0
dtype: int64

In [6]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    final_df['review_description'],
    final_df['sentiment_label'],
    test_size=0.2,
    random_state=42
)
#performing label encoding as each output will  have score of a sentence belonging to each class (no need for one hot encoding when duing binary classfication using BertClassifier)
# y_train = to_categorical(y_train)
# y_test = to_categorical(y_test)

# Using a Pretrained Classifier

In [7]:
# we will be using a pretrained bert classfier for sentiment classification on the text, we will be using bert_tiny_en_uncased_sst2 model , which has 4.39M parameter
pos_text = 'I am happy'
neg_text =  'I am sad'
classifier = keras_nlp.models.BertClassifier.from_preset("bert_tiny_en_uncased_sst2")
# Note: batched inputs expected so must wrap string in iterable
predctions = classifier.predict([pos_text,neg_text])

Downloading from https://www.kaggle.com/api/v1/models/keras/bert/keras/bert_tiny_en_uncased_sst2/3/download/config.json...
100%|██████████| 2.14k/2.14k [00:00<00:00, 1.35MB/s]
Downloading from https://www.kaggle.com/api/v1/models/keras/bert/keras/bert_tiny_en_uncased_sst2/3/download/assets/tokenizer/vocabulary.txt...
100%|██████████| 226k/226k [00:00<00:00, 302kB/s]
Downloading from https://www.kaggle.com/api/v1/models/keras/bert/keras/bert_tiny_en_uncased_sst2/3/download/model.weights.h5...
100%|██████████| 16.8M/16.8M [00:02<00:00, 7.01MB/s]
  trackable.load_own_variables(weights_store.get(inner_path))
  trackable.load_own_variables(weights_store.get(inner_path))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


In [8]:
# Get the class labels
class_labels = np.argmax(predctions, axis=1)

In [9]:
class_labels

array([1, 0])

The first sentence is positive and the second one is negative

In [10]:
res = classifier.evaluate(X_test,y_test)

[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - loss: 0.8002 - sparse_categorical_accuracy: 0.6874


Well the pretrained model has a Spare categorial accuracy of 68.7% on our test data.

Now let us see if we can improve it by fine tuning a pre trained backbone

# Fine Tuning a Pretrained Backbone

Using a BertClassifier

In [12]:
# we will be fine tuning the bert_tiny_en_uncased, which has 4.39M parameter, we previoud
classifier_backbone = keras_nlp.models.BertClassifier.from_preset(
    "bert_tiny_en_uncased_sst2",
    num_classes=2,
)

#fine tuning the pretrained classifier
history = classifier_backbone.fit(
    X_train,y_train,
    validation_data=(X_test,y_test),
    epochs=3
)

  trackable.load_own_variables(weights_store.get(inner_path))
  trackable.load_own_variables(weights_store.get(inner_path))


Epoch 1/3
[1m924/924[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 51ms/step - loss: 0.4557 - sparse_categorical_accuracy: 0.7916 - val_loss: 0.3694 - val_sparse_categorical_accuracy: 0.8394
Epoch 2/3
[1m924/924[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 87ms/step - loss: 0.3655 - sparse_categorical_accuracy: 0.8430 - val_loss: 0.3535 - val_sparse_categorical_accuracy: 0.8483
Epoch 3/3
[1m924/924[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 71ms/step - loss: 0.3353 - sparse_categorical_accuracy: 0.8579 - val_loss: 0.3527 - val_sparse_categorical_accuracy: 0.8525


In [16]:
res = classifier_backbone.evaluate(X_test,y_test)

[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 0.3555 - sparse_categorical_accuracy: 0.8514


After fine tunining the pretrained backbone for 3 epochs, we were able to achieve a sparse_categorical_accuracy of 85.14, which is greater than using the the pretrained classifier

# Fine Tuning User Controller PreProcessing

In [14]:
import tensorflow as tf

#creating a bert ppreprocessor, where we specify preprocessing steps/ parameters that we neeed to apply to all the data
preprocessor = keras_nlp.models.BertPreprocessor.from_preset(
    "bert_tiny_en_uncased_sst2",
    sequence_length=512,
    truncate="round_robin"

)

#getting the model with preprocessor
classifier_with_preprocessor = keras_nlp.models.BertClassifier.from_preset(
    "bert_tiny_en_uncased_sst2", preprocessor=preprocessor, num_classes=2
)

#fitting the model with user controlled preprocessing
history = classifier_with_preprocessor.fit(
    X_train,y_train,
    validation_data=(X_test,y_test),
    epochs=3
)

Downloading from https://www.kaggle.com/api/v1/models/keras/bert/keras/bert_tiny_en_uncased_sst2/3/download/tokenizer.json...
100%|██████████| 547/547 [00:00<00:00, 344kB/s]
  trackable.load_own_variables(weights_store.get(inner_path))
  trackable.load_own_variables(weights_store.get(inner_path))


Epoch 1/3
[1m924/924[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 47ms/step - loss: 0.4520 - sparse_categorical_accuracy: 0.7919 - val_loss: 0.3728 - val_sparse_categorical_accuracy: 0.8353
Epoch 2/3
[1m924/924[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 26ms/step - loss: 0.3632 - sparse_categorical_accuracy: 0.8442 - val_loss: 0.3558 - val_sparse_categorical_accuracy: 0.8476
Epoch 3/3
[1m924/924[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 66ms/step - loss: 0.3356 - sparse_categorical_accuracy: 0.8581 - val_loss: 0.3494 - val_sparse_categorical_accuracy: 0.8521


In [15]:
res = classifier_with_preprocessor.evaluate(X_test,y_test)

[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 0.3533 - sparse_categorical_accuracy: 0.8510


After fine tuning a pre trained backbone withuser controlled preprocessing we received sparse_categorical_accuracy of 85.1% which is slightly less that what we had acheive previously

In [17]:
#to see what the preprocesser does
txt = "Hello it is me, I was wondering after all these years"
preprocessor([txt])

{'token_ids': <tf.Tensor: shape=(1, 512), dtype=int32, numpy=
 array([[ 101, 7592, 2009, 2003, 2033, 1010, 1045, 2001, 6603, 2044, 2035,
         2122, 2086,  102,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,

The preprocessor will generate tokens, segment_ids and padding masks, which can then be used by the model for  prediction and training purpose

## Fine Tuning a Custom Model

In ourprevious fine tuning we , we just updating the weights if the model, in this part ,we will freeze the backbone i.e. we wont be updating any weight in it, but instead we will add 2 more layers on top of the model and wil be update them.

Note we dont expect the performance to be better than bert_tiny_en_uncased_sst2, it is explicitly trainined on sentiment analysis data. In this section we are using bert_tiny_en_uncased backbone which is trained on wikipedia and book corpus data, which we are trying to modify it to be used for sentiment analysis

In [54]:
preprocessor = keras_nlp.models.BertPreprocessor.from_preset("bert_tiny_en_uncased")

In [38]:
# preprocessor = keras_nlp.models.BertPreprocessor.from_preset("bert_tiny_en_uncased")
backbone = keras_nlp.models.BertBackbone.from_preset("bert_tiny_en_uncased")


In [39]:
#we wont updte any weights of the backbone
backbone.trainable = False
#getting the input requirements for the backbone
inputs = backbone.input


In [40]:
inputs

{'token_ids': <KerasTensor shape=(None, None), dtype=int32, sparse=None, name=token_ids>,
 'segment_ids': <KerasTensor shape=(None, None), dtype=int32, sparse=None, name=segment_ids>,
 'padding_mask': <KerasTensor shape=(None, None), dtype=int32, sparse=None, name=padding_mask>}

In [41]:
#checking the outputs of the model
backbone(inputs)# we have 2 outputs, sequence_output and pooled_output. For our case we work only with the sequence outpt


{'sequence_output': <KerasTensor shape=(None, None, 128), dtype=float16, sparse=False, name=keras_tensor_95>,
 'pooled_output': <KerasTensor shape=(None, 128), dtype=float16, sparse=False, name=keras_tensor_94>}

In [48]:
#extracting the sequence output details, we will use this to add new layers
sequence = backbone(inputs)["sequence_output"]


In [49]:
n_layers = 2
for i in range(n_layers):
  #adding transformer encoder layer
  sequence =keras_nlp.layers.TransformerEncoder(
        num_heads=2,
        intermediate_dim=512,
        dropout=0.1,
    )(sequence)
#adding a output layer
outputs = keras.layers.Dense(2)(sequence[:, backbone.cls_token_index, :])

In [50]:
#create new model with the decribe input and output
my_model = keras.Model(inputs, outputs)

In [64]:
my_model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.AdamW(5e-5),
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
    jit_compile=False,
)

In [65]:
my_model.summary()

In [35]:
backbone.summary()

From the above 2 cells we can see how the architecture of the bacbone and out model differ

In [62]:
#getting the input in the fornat required by the backbone model
X_train_processed = preprocessor(X_train)
X_test_processed = preprocessor(X_test)

In [59]:
tf.config.run_functions_eagerly(True)

In [68]:
history_2 = my_model.fit(
    X_train_processed,y_train,
    validation_data=(X_test_processed,y_test),
    epochs=3, batch_size=32
)



Epoch 1/3
[1m924/924[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m431s[0m 466ms/step - loss: 0.5549 - sparse_categorical_accuracy: 0.7126 - val_loss: 0.4676 - val_sparse_categorical_accuracy: 0.7810
Epoch 2/3
[1m924/924[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m456s[0m 481ms/step - loss: 0.4862 - sparse_categorical_accuracy: 0.7641 - val_loss: 0.4402 - val_sparse_categorical_accuracy: 0.7955
Epoch 3/3
[1m924/924[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m409s[0m 443ms/step - loss: 0.4650 - sparse_categorical_accuracy: 0.7826 - val_loss: 0.4284 - val_sparse_categorical_accuracy: 0.8050


In [69]:
res = my_model.evaluate(X_test_processed,y_test)

[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 83ms/step - loss: 0.4260 - sparse_categorical_accuracy: 0.8079


Our custom model, after fine tuning has a sparse categorical accuracy of 81%, which is in line with what we expected

 Reference:
 1. https://colab.research.google.com/github/keras-team/keras-io/blob/master/guides/ipynb/keras_nlp/getting_started.ipynb#scrollTo=3XAfOjQoFGh9