### Loading Data from Kaggle

In [None]:
import kagglehub

path = kagglehub.dataset_download('nelgiriyewithana/emotions')
print("Dataset path:", path)

Using Colab cache for faster access to the 'emotions' dataset.
Dataset path: /kaggle/input/emotions


In [None]:
import pandas as pd

data = pd.read_csv(path + '/text.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,text,label
0,0,i just feel really helpless and heavy hearted,4
1,1,ive enjoyed being able to slouch about relax a...,0
2,2,i gave up my internship with the dmrg and am f...,4
3,3,i dont know i feel so lost,0
4,4,i am a kindergarten teacher and i am thoroughl...,4


Each entry in this dataset consists of a text segment representing a Twitter message and a corresponding label indicating the predominant emotion conveyed. The emotions are classified into six categories: sadness (0), joy (1), love (2), anger (3), fear (4), and surprise (5)

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 416809 entries, 0 to 416808
Data columns (total 3 columns):
 #   Column      Non-Null Count   Dtype 
---  ------      --------------   ----- 
 0   Unnamed: 0  416809 non-null  int64 
 1   text        416809 non-null  object
 2   label       416809 non-null  int64 
dtypes: int64(2), object(1)
memory usage: 9.5+ MB


In [None]:
data.shape

(416809, 3)

In [None]:
class_labels = {0: 'sadness', 1: 'joy', 2: 'love', 3: 'anger', 4: 'fear', 5: 'surprise'}
print("Emotions Distribution")
for i in range(6):
    print(f"{class_labels[i]:<10}: {data.label.value_counts()[i]:<7} {data.label.value_counts(normalize=True)[i]:0.02f}")

Emotions Distribution
sadness   : 121187  0.29
joy       : 141067  0.34
love      : 34554   0.08
anger     : 57317   0.14
fear      : 47712   0.11
surprise  : 14972   0.04


In [None]:
from sklearn.model_selection import train_test_split

X = data["text"]
y = data["label"]

X_small,_,y_small,_ = train_test_split(X,y,test_size=0.90,stratify=y,random_state=42) # using only 10% data because system is crashing for large data


## ANN

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Embedding, Flatten

#### One - Hot

In [None]:
from sklearn.feature_extraction.text import CountVectorizer

one_hot = CountVectorizer(max_features=5000,binary=True)
X_onehot = one_hot.fit_transform(X_small).toarray()
print("TF-IDF shape:", X_onehot.shape)

In [None]:
X_tr, X_test_oh, y_tr, y_test = train_test_split(X_onehot, y_small, test_size=0.2, stratify=y_small, random_state=42)
X_train_oh, X_val_oh, y_train, y_val = train_test_split(X_tr, y_tr, test_size=0.1, stratify=y_tr, random_state=42)

In [None]:
model_oh = Sequential()
model_oh.add(Dense(128, activation='relu', input_dim=X_train_oh.shape[1]))
model_oh.add(Dropout(0.3))
model_oh.add(Dense(64, activation='relu'))
model_oh.add(Dense(6, activation='softmax'))

model_oh.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model_oh.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
model_oh.fit(X_train_oh, y_train, epochs=5, batch_size=32, validation_data=(X_val_oh, y_val))

Epoch 1/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.6224 - loss: 1.0278 - val_accuracy: 0.8894 - val_loss: 0.2771
Epoch 2/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9321 - loss: 0.1855 - val_accuracy: 0.8945 - val_loss: 0.2712
Epoch 3/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9606 - loss: 0.1071 - val_accuracy: 0.8924 - val_loss: 0.3165
Epoch 4/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9739 - loss: 0.0732 - val_accuracy: 0.8900 - val_loss: 0.3267
Epoch 5/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9795 - loss: 0.0548 - val_accuracy: 0.8897 - val_loss: 0.3750


<keras.src.callbacks.history.History at 0x7b59b398ce60>

In [None]:
import numpy as np
from sklearn.metrics import classification_report

y_preds = np.argmax(model_oh.predict(X_test_oh),axis=1)

print(classification_report(y_test,y_preds))

[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
              precision    recall  f1-score   support

           0       0.92      0.93      0.92      2424
           1       0.91      0.91      0.91      2821
           2       0.77      0.78      0.77       691
           3       0.88      0.90      0.89      1146
           4       0.86      0.81      0.83       954
           5       0.81      0.73      0.77       300

    accuracy                           0.89      8336
   macro avg       0.86      0.84      0.85      8336
weighted avg       0.89      0.89      0.89      8336



#### Count Vectorization

In [None]:
cv = CountVectorizer(max_features=5000,binary=False)
X_cv = one_hot.fit_transform(X_small).toarray()
print("TF-IDF shape:", X_onehot.shape)

TF-IDF shape: (41680, 5000)


In [None]:
X_tr, X_test_cv, y_tr, y_test = train_test_split(X_cv, y_small, test_size=0.2, stratify=y_small, random_state=42)
X_train_cv, X_val_cv, y_train, y_val = train_test_split(X_tr, y_tr, test_size=0.1, stratify=y_tr, random_state=42)

In [None]:
model_cv = Sequential()
model_cv.add(Dense(128, activation='relu', input_dim=X_train_cv.shape[1]))
model_cv.add(Dropout(0.3))
model_cv.add(Dense(64, activation='relu'))
model_cv.add(Dense(6, activation='softmax'))

model_cv.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model_cv.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
model_cv.fit(X_train_cv, y_train, epochs=5, batch_size=32, validation_data=(X_val_cv, y_val))

Epoch 1/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.6144 - loss: 1.0404 - val_accuracy: 0.8921 - val_loss: 0.2767
Epoch 2/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9309 - loss: 0.1838 - val_accuracy: 0.8930 - val_loss: 0.2840
Epoch 3/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9612 - loss: 0.1062 - val_accuracy: 0.8936 - val_loss: 0.2901
Epoch 4/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9710 - loss: 0.0791 - val_accuracy: 0.8933 - val_loss: 0.3292
Epoch 5/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9811 - loss: 0.0562 - val_accuracy: 0.8915 - val_loss: 0.3647


<keras.src.callbacks.history.History at 0x7b59b4748cb0>

In [None]:
y_preds = np.argmax(model_cv.predict(X_test_cv),axis=1)

print(classification_report(y_test,y_preds))

[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step
              precision    recall  f1-score   support

           0       0.94      0.91      0.92      2424
           1       0.91      0.91      0.91      2821
           2       0.75      0.83      0.79       691
           3       0.88      0.90      0.89      1146
           4       0.83      0.84      0.84       954
           5       0.77      0.79      0.78       300

    accuracy                           0.89      8336
   macro avg       0.85      0.86      0.86      8336
weighted avg       0.89      0.89      0.89      8336



#### Tf-Idf

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(max_features=5000)
X_tfIdf = vectorizer.fit_transform(X_small).toarray()
print("TF-IDF shape:", X_tfIdf.shape)

TF-IDF shape: (41680, 5000)


In [None]:
X_tr, X_test_tf, y_tr, y_test = train_test_split(X_tfIdf, y_small, test_size=0.2, stratify=y_small, random_state=42)
X_train_tf, X_val_tf, y_train, y_val = train_test_split(X_tr, y_tr, test_size=0.1, stratify=y_tr, random_state=42)

In [None]:
model_tfIdf = Sequential()
model_tfIdf.add(Dense(128, activation='relu', input_dim=X_train_tf.shape[1]))
model_tfIdf.add(Dropout(0.3))
model_tfIdf.add(Dense(64, activation='relu'))
model_tfIdf.add(Dense(6, activation='softmax'))

model_tfIdf.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model_tfIdf.summary()

In [None]:
model_tfIdf.fit(X_train_tf, y_train, epochs=5, batch_size=32, validation_data=(X_val_tf, y_val))

Epoch 1/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.5861 - loss: 1.0927 - val_accuracy: 0.8834 - val_loss: 0.3049
Epoch 2/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9294 - loss: 0.1979 - val_accuracy: 0.8843 - val_loss: 0.2947
Epoch 3/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9627 - loss: 0.1113 - val_accuracy: 0.8762 - val_loss: 0.3208
Epoch 4/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9733 - loss: 0.0791 - val_accuracy: 0.8750 - val_loss: 0.3694
Epoch 5/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9819 - loss: 0.0567 - val_accuracy: 0.8765 - val_loss: 0.4030


<keras.src.callbacks.history.History at 0x7d14600512b0>

In [None]:
import numpy as np
from sklearn.metrics import classification_report

y_preds = np.argmax(model_tfIdf.predict(X_test_tf),axis=1)

print(classification_report(y_test,y_preds))

[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
              precision    recall  f1-score   support

           0       0.92      0.93      0.93      2424
           1       0.87      0.94      0.90      2821
           2       0.79      0.69      0.74       691
           3       0.91      0.86      0.89      1146
           4       0.86      0.82      0.84       954
           5       0.80      0.74      0.77       300

    accuracy                           0.88      8336
   macro avg       0.86      0.83      0.84      8336
weighted avg       0.88      0.88      0.88      8336



#### Embedding Layer

In [None]:
X_tr, X_test_emb, y_tr, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
X_train_emb, X_val_emb, y_train, y_val = train_test_split(X_tr, y_tr, test_size=0.1, stratify=y_tr, random_state=42)

##### 100 dimentsion

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Tokenize
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train_emb)
vocab_size = len(tokenizer.word_index) + 1
print(f'Vocab Size : {vocab_size}')

# Convert to sequences
X_train_seq = tokenizer.texts_to_sequences(X_train_emb)
X_val_seq = tokenizer.texts_to_sequences(X_val_emb)
X_test_seq = tokenizer.texts_to_sequences(X_test_emb)
maxlen = 100
X_train_padded = pad_sequences(X_train_seq, maxlen=maxlen)
X_val_padded = pad_sequences(X_val_seq, maxlen=maxlen)
X_test_padded = pad_sequences(X_test_seq, maxlen=maxlen)

Vocab Size : 64646


In [None]:
model_emb = Sequential([
    Embedding(vocab_size, 100, input_length=maxlen),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(6, activation='softmax')
])
model_emb.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model_emb.summary()



In [None]:
model_emb.fit(X_train_padded, y_train, epochs=5, batch_size=2000, validation_data=(X_val_padded, y_val))

Epoch 1/5
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 598ms/step - accuracy: 0.4934 - loss: 1.3131 - val_accuracy: 0.8900 - val_loss: 0.3097
Epoch 2/5
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 562ms/step - accuracy: 0.8984 - loss: 0.2758 - val_accuracy: 0.8972 - val_loss: 0.2407
Epoch 3/5
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 574ms/step - accuracy: 0.9198 - loss: 0.1944 - val_accuracy: 0.8946 - val_loss: 0.2323
Epoch 4/5
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 562ms/step - accuracy: 0.9282 - loss: 0.1632 - val_accuracy: 0.8934 - val_loss: 0.2551
Epoch 5/5
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 569ms/step - accuracy: 0.9319 - loss: 0.1499 - val_accuracy: 0.8908 - val_loss: 0.2426


<keras.src.callbacks.history.History at 0x7fa411288e60>

In [None]:
model_emb.summary()

In [None]:
import numpy as np
from sklearn.metrics import classification_report

y_preds = np.argmax(model_emb.predict(X_test_padded),axis=1)

print(classification_report(y_test,y_preds))

[1m2606/2606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 8ms/step
              precision    recall  f1-score   support

           0       0.94      0.94      0.94     24238
           1       0.91      0.92      0.91     28214
           2       0.76      0.75      0.75      6911
           3       0.90      0.89      0.89     11463
           4       0.85      0.81      0.83      9542
           5       0.71      0.72      0.71      2994

    accuracy                           0.89     83362
   macro avg       0.84      0.84      0.84     83362
weighted avg       0.89      0.89      0.89     83362



##### 150 dimension

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Tokenize
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train_emb)
vocab_size = len(tokenizer.word_index) + 1
print(f'Vocab Size : {vocab_size}')

# Convert to sequences
X_train_seq = tokenizer.texts_to_sequences(X_train_emb)
X_val_seq = tokenizer.texts_to_sequences(X_val_emb)
X_test_seq = tokenizer.texts_to_sequences(X_test_emb)

maxlen = max([len(text) for text in X_train_seq])

X_train_padded = pad_sequences(X_train_seq, maxlen=maxlen)
X_val_padded = pad_sequences(X_val_seq, maxlen=maxlen)
X_test_padded = pad_sequences(X_test_seq, maxlen=maxlen)

Vocab Size : 64646


In [None]:
X_train_padded.shape,X_test_padded.shape,X_val_padded.shape

((300102, 178), (83362, 178), (33345, 178))

In [None]:
model_emb2 = Sequential([
    Embedding(vocab_size, 150, input_length=X_train_padded.shape[1]),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(6, activation='softmax')
])
model_emb2.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model_emb2.summary()



In [None]:
model_emb2.fit(X_train_padded, y_train, epochs=10, batch_size=2000, validation_data=(X_val_padded, y_val))

Epoch 1/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 35ms/step - accuracy: 0.7317 - loss: 0.7884 - val_accuracy: 0.8016 - val_loss: 0.6376
Epoch 2/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 36ms/step - accuracy: 0.7892 - loss: 0.6508 - val_accuracy: 0.8210 - val_loss: 0.5654
Epoch 3/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 35ms/step - accuracy: 0.8285 - loss: 0.5346 - val_accuracy: 0.8811 - val_loss: 0.3495
Epoch 4/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 35ms/step - accuracy: 0.9016 - loss: 0.2828 - val_accuracy: 0.8932 - val_loss: 0.2775
Epoch 5/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 36ms/step - accuracy: 0.9194 - loss: 0.2036 - val_accuracy: 0.8951 - val_loss: 0.2585
Epoch 6/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 36ms/step - accuracy: 0.9273 - loss: 0.1696 - val_accuracy: 0.8916 - val_loss: 0.2628
Epoch 7/10
[1m151/151

<keras.src.callbacks.history.History at 0x7b9f1fa742f0>

In [None]:
model_emb2.summary()

In [None]:
import numpy as np
from sklearn.metrics import classification_report

y_preds = np.argmax(model_emb2.predict(X_test_padded),axis=1)

print(classification_report(y_test,y_preds))

[1m2606/2606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step
              precision    recall  f1-score   support

           0       0.93      0.94      0.94     24238
           1       0.91      0.91      0.91     28214
           2       0.73      0.75      0.74      6911
           3       0.91      0.87      0.89     11463
           4       0.84      0.83      0.83      9542
           5       0.69      0.74      0.72      2994

    accuracy                           0.89     83362
   macro avg       0.84      0.84      0.84     83362
weighted avg       0.89      0.89      0.89     83362



## RNN

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense, Dropout, Bidirectional

In [None]:
X_tr, X_test_rnn, y_tr, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
X_train_rnn, X_val_rnn, y_train, y_val = train_test_split(X_tr, y_tr, test_size=0.1, stratify=y_tr, random_state=42)

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Tokenize
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train_rnn)
vocab_size = len(tokenizer.word_index) + 1
print(f'Vocab Size : {vocab_size}')

# Convert to sequences
X_train_seq = tokenizer.texts_to_sequences(X_train_rnn)
X_val_seq = tokenizer.texts_to_sequences(X_val_rnn)
X_test_seq = tokenizer.texts_to_sequences(X_test_rnn)
maxlen = 100
X_train_padded = pad_sequences(X_train_seq, maxlen=maxlen)
X_val_padded = pad_sequences(X_val_seq, maxlen=maxlen)
X_test_padded = pad_sequences(X_test_seq, maxlen=maxlen)

Vocab Size : 64646


#### SimpleRNN

In [None]:
rnn_model = Sequential([
    Embedding(vocab_size, 100, input_length=maxlen),
    SimpleRNN(128, activation='tanh'),
    Dropout(0.3),
    Dense(6, activation='softmax')
])

rnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

rnn_model.summary()




In [None]:
rnn_model.fit(X_train_padded, y_train, epochs=5, batch_size=2000, validation_data=(X_val_padded, y_val))

Epoch 1/5
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m228s[0m 1s/step - accuracy: 0.4439 - loss: 1.4012 - val_accuracy: 0.8798 - val_loss: 0.3815
Epoch 2/5
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m208s[0m 1s/step - accuracy: 0.8940 - loss: 0.3174 - val_accuracy: 0.8897 - val_loss: 0.3122
Epoch 3/5
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m215s[0m 1s/step - accuracy: 0.9178 - loss: 0.2241 - val_accuracy: 0.9054 - val_loss: 0.2521
Epoch 4/5
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m253s[0m 1s/step - accuracy: 0.9270 - loss: 0.1855 - val_accuracy: 0.9120 - val_loss: 0.2196
Epoch 5/5
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m208s[0m 1s/step - accuracy: 0.9320 - loss: 0.1617 - val_accuracy: 0.8977 - val_loss: 0.2520


<keras.src.callbacks.history.History at 0x788614227a70>

In [None]:
rnn_model.summary()

In [None]:
import numpy as np
from sklearn.metrics import classification_report

y_preds = np.argmax(rnn_model.predict(X_test_padded),axis=1)

print(classification_report(y_test,y_preds))

[1m2606/2606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 14ms/step
              precision    recall  f1-score   support

           0       0.98      0.89      0.93     24238
           1       0.90      0.92      0.91     28214
           2       0.75      0.83      0.79      6911
           3       0.88      0.93      0.90     11463
           4       0.89      0.83      0.86      9542
           5       0.70      0.89      0.79      2994

    accuracy                           0.89     83362
   macro avg       0.85      0.88      0.86     83362
weighted avg       0.90      0.89      0.90     83362



#### Bi RNN

In [None]:
bi_rnn_model = Sequential([
    Embedding(vocab_size, 100, input_length=maxlen),
    Bidirectional(SimpleRNN(128, activation='tanh')),
    Dropout(0.3),
    Dense(6, activation='softmax')
])

bi_rnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

bi_rnn_model.summary()




In [None]:
bi_rnn_model.fit(X_train_padded, y_train, epochs=5, batch_size=2000, validation_data=(X_val_padded, y_val))

Epoch 1/5
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m409s[0m 3s/step - accuracy: 0.4297 - loss: 1.4524 - val_accuracy: 0.8428 - val_loss: 0.4833
Epoch 2/5
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m456s[0m 3s/step - accuracy: 0.8668 - loss: 0.4044 - val_accuracy: 0.8860 - val_loss: 0.3138
Epoch 3/5
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m444s[0m 3s/step - accuracy: 0.9095 - loss: 0.2469 - val_accuracy: 0.9019 - val_loss: 0.2645
Epoch 4/5
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m398s[0m 3s/step - accuracy: 0.9227 - loss: 0.1994 - val_accuracy: 0.9075 - val_loss: 0.2513
Epoch 5/5
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m385s[0m 3s/step - accuracy: 0.9293 - loss: 0.1750 - val_accuracy: 0.9123 - val_loss: 0.2199


<keras.src.callbacks.history.History at 0x78861fd047d0>

In [None]:
import numpy as np
from sklearn.metrics import classification_report

y_preds = np.argmax(bi_rnn_model.predict(X_test_padded),axis=1)

print(classification_report(y_test,y_preds))

[1m2606/2606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 20ms/step
              precision    recall  f1-score   support

           0       0.95      0.96      0.96     24238
           1       0.96      0.90      0.93     28214
           2       0.73      0.91      0.81      6911
           3       0.91      0.91      0.91     11463
           4       0.86      0.85      0.86      9542
           5       0.75      0.75      0.75      2994

    accuracy                           0.91     83362
   macro avg       0.86      0.88      0.87     83362
weighted avg       0.91      0.91      0.91     83362



#### Deep RNN

In [None]:
deep_rnn_model = Sequential([
    Embedding(vocab_size, 100, input_length=maxlen),
    SimpleRNN(128, return_sequences=True, activation='tanh'),
    Dropout(0.3),
    SimpleRNN(64, activation='tanh'),
    Dropout(0.3),
    Dense(6, activation='softmax')
])

deep_rnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

deep_rnn_model.summary()



In [None]:
deep_rnn_model.fit(X_train_padded, y_train, epochs=5, batch_size=2000, validation_data=(X_val_padded, y_val))

Epoch 1/5
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m403s[0m 3s/step - accuracy: 0.3690 - loss: 1.5680 - val_accuracy: 0.8230 - val_loss: 0.5553
Epoch 2/5
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m451s[0m 3s/step - accuracy: 0.8207 - loss: 0.5570 - val_accuracy: 0.8880 - val_loss: 0.3455
Epoch 3/5
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m449s[0m 3s/step - accuracy: 0.8993 - loss: 0.3099 - val_accuracy: 0.8920 - val_loss: 0.2918
Epoch 4/5
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m406s[0m 3s/step - accuracy: 0.9121 - loss: 0.2472 - val_accuracy: 0.8978 - val_loss: 0.2595
Epoch 5/5
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m402s[0m 3s/step - accuracy: 0.9195 - loss: 0.2159 - val_accuracy: 0.8980 - val_loss: 0.2527


<keras.src.callbacks.history.History at 0x78861fd04170>

In [None]:
import numpy as np
from sklearn.metrics import classification_report

y_preds = np.argmax(deep_rnn_model.predict(X_test_padded),axis=1)

print(classification_report(y_test,y_preds))

[1m2606/2606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 20ms/step
              precision    recall  f1-score   support

           0       0.95      0.95      0.95     24238
           1       0.90      0.93      0.92     28214
           2       0.76      0.77      0.77      6911
           3       0.95      0.86      0.90     11463
           4       0.87      0.82      0.84      9542
           5       0.66      0.82      0.73      2994

    accuracy                           0.90     83362
   macro avg       0.85      0.86      0.85     83362
weighted avg       0.90      0.90      0.90     83362



## LSTM

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
from sklearn.model_selection import train_test_split

In [None]:
X_tr, X_test_rnn, y_tr, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
X_train_rnn, X_val_rnn, y_train, y_val = train_test_split(X_tr, y_tr, test_size=0.1, stratify=y_tr, random_state=42)

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Tokenize
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train_rnn)
vocab_size = len(tokenizer.word_index) + 1
print(f'Vocab Size : {vocab_size}')

# Convert to sequences
X_train_seq = tokenizer.texts_to_sequences(X_train_rnn)
X_val_seq = tokenizer.texts_to_sequences(X_val_rnn)
X_test_seq = tokenizer.texts_to_sequences(X_test_rnn)
maxlen = 178
X_train_padded = pad_sequences(X_train_seq, maxlen=maxlen)
X_val_padded = pad_sequences(X_val_seq, maxlen=maxlen)
X_test_padded = pad_sequences(X_test_seq, maxlen=maxlen)

Vocab Size : 64646


#### Simple LSTM

In [None]:
lstm_model = Sequential([
    Embedding(vocab_size, 100, input_length=maxlen),
    LSTM(128, activation='tanh'),
    Dropout(0.3),
    Dense(6, activation='softmax')
])

lstm_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

lstm_model.summary()



In [None]:
lstm_model.fit(X_train_padded, y_train, epochs=10, batch_size=2000, validation_data=(X_val_padded, y_val))

Epoch 1/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 86ms/step - accuracy: 0.4719 - loss: 1.3551 - val_accuracy: 0.8881 - val_loss: 0.2850
Epoch 2/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 86ms/step - accuracy: 0.9112 - loss: 0.2243 - val_accuracy: 0.9263 - val_loss: 0.1470
Epoch 3/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 88ms/step - accuracy: 0.9380 - loss: 0.1275 - val_accuracy: 0.9286 - val_loss: 0.1225
Epoch 4/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 85ms/step - accuracy: 0.9413 - loss: 0.1083 - val_accuracy: 0.9269 - val_loss: 0.1164
Epoch 5/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 84ms/step - accuracy: 0.9432 - loss: 0.1000 - val_accuracy: 0.9250 - val_loss: 0.1149
Epoch 6/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 84ms/step - accuracy: 0.9452 - loss: 0.0943 - val_accuracy: 0.9262 - val_loss: 0.1133
Epoch 7/10
[1m1

<keras.src.callbacks.history.History at 0x7b9edffb18e0>

In [None]:
lstm_model.summary()

In [None]:
import numpy as np
from sklearn.metrics import classification_report

y_preds = np.argmax(lstm_model.predict(X_test_padded),axis=1)

print(classification_report(y_test,y_preds))

[1m2606/2606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step
              precision    recall  f1-score   support

           0       0.97      0.96      0.97     24238
           1       0.94      0.94      0.94     28214
           2       0.79      0.80      0.80      6911
           3       0.91      0.95      0.93     11463
           4       0.88      0.87      0.87      9542
           5       0.75      0.77      0.76      2994

    accuracy                           0.92     83362
   macro avg       0.87      0.88      0.88     83362
weighted avg       0.92      0.92      0.92     83362



#### Bi LSTM

In [None]:
biLstm_model = Sequential([
    Embedding(vocab_size, 100, input_length=maxlen),
    Bidirectional(LSTM(128, activation='tanh')),
    Dropout(0.3),
    Dense(6, activation='softmax')
])

biLstm_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

biLstm_model.summary()

In [None]:
biLstm_model.fit(X_train_padded, y_train, epochs=10, batch_size=2000, validation_data=(X_val_padded, y_val))

Epoch 1/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 168ms/step - accuracy: 0.9455 - loss: 0.0932 - val_accuracy: 0.9233 - val_loss: 0.1187
Epoch 2/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 167ms/step - accuracy: 0.9468 - loss: 0.0902 - val_accuracy: 0.9244 - val_loss: 0.1180
Epoch 3/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 168ms/step - accuracy: 0.9466 - loss: 0.0896 - val_accuracy: 0.9229 - val_loss: 0.1208
Epoch 4/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 168ms/step - accuracy: 0.9479 - loss: 0.0859 - val_accuracy: 0.9243 - val_loss: 0.1180
Epoch 5/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 168ms/step - accuracy: 0.9490 - loss: 0.0834 - val_accuracy: 0.9219 - val_loss: 0.1228
Epoch 6/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 167ms/step - accuracy: 0.9496 - loss: 0.0832 - val_accuracy: 0.9230 - val_loss: 0.1390
Epoch 7/10

<keras.src.callbacks.history.History at 0x7b9e900fce90>

In [None]:
biLstm_model.summary()

In [None]:
import numpy as np
from sklearn.metrics import classification_report

y_preds = np.argmax(biLstm_model.predict(X_test_padded),axis=1)

print(classification_report(y_test,y_preds))

[1m2606/2606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step
              precision    recall  f1-score   support

           0       0.97      0.96      0.97     24238
           1       0.95      0.92      0.94     28214
           2       0.76      0.85      0.80      6911
           3       0.91      0.94      0.92     11463
           4       0.88      0.85      0.86      9542
           5       0.73      0.77      0.75      2994

    accuracy                           0.92     83362
   macro avg       0.87      0.88      0.87     83362
weighted avg       0.92      0.92      0.92     83362



#### Deep LSTM

In [None]:
deep_lstm_model = Sequential([
    Embedding(vocab_size, 100, input_length=maxlen),
    LSTM(128, return_sequences=True, activation='tanh'),
    Dropout(0.3),
    LSTM(64, activation='tanh'),
    Dropout(0.3),
    Dense(6, activation='softmax')
])

deep_lstm_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

deep_lstm_model.summary()



In [None]:
deep_lstm_model.fit(X_train_padded, y_train, epochs=10, batch_size=2000, validation_data=(X_val_padded, y_val))

Epoch 1/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 126ms/step - accuracy: 0.9443 - loss: 0.0976 - val_accuracy: 0.9314 - val_loss: 0.1126
Epoch 2/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 126ms/step - accuracy: 0.9444 - loss: 0.0938 - val_accuracy: 0.9284 - val_loss: 0.1154
Epoch 3/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 126ms/step - accuracy: 0.9457 - loss: 0.0898 - val_accuracy: 0.9286 - val_loss: 0.1156
Epoch 4/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 126ms/step - accuracy: 0.9464 - loss: 0.0875 - val_accuracy: 0.9283 - val_loss: 0.1153
Epoch 5/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 126ms/step - accuracy: 0.9461 - loss: 0.0872 - val_accuracy: 0.9297 - val_loss: 0.1202
Epoch 6/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 126ms/step - accuracy: 0.9474 - loss: 0.0840 - val_accuracy: 0.9291 - val_loss: 0.1235
Epoch 7/10

<keras.src.callbacks.history.History at 0x7b9e901175c0>

In [None]:
deep_lstm_model.summary()

In [None]:
import numpy as np
from sklearn.metrics import classification_report

y_preds = np.argmax(deep_lstm_model.predict(X_test_padded),axis=1)

print(classification_report(y_test,y_preds))

[1m2606/2606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step
              precision    recall  f1-score   support

           0       0.96      0.98      0.97     24238
           1       0.94      0.95      0.94     28214
           2       0.82      0.79      0.80      6911
           3       0.94      0.92      0.93     11463
           4       0.91      0.84      0.87      9542
           5       0.73      0.86      0.79      2994

    accuracy                           0.92     83362
   macro avg       0.88      0.89      0.88     83362
weighted avg       0.92      0.92      0.92     83362



#### Bi + Deep

In [None]:
lstm_model2 = Sequential([
    Embedding(vocab_size, 100, input_length=maxlen),
    Bidirectional(LSTM(128,return_sequences=True, activation='tanh')),
    Dropout(0.3),
    Bidirectional(LSTM(64, activation='tanh')),
    Dropout(0.3),
    Dense(6, activation='softmax')
])

lstm_model2.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

lstm_model2.summary()



In [None]:
lstm_model2.fit(X_train_padded, y_train, epochs=10, batch_size=2000, validation_data=(X_val_padded, y_val))

Epoch 1/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 519ms/step - accuracy: 0.4436 - loss: 1.4043 - val_accuracy: 0.8814 - val_loss: 0.3493
Epoch 2/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 515ms/step - accuracy: 0.9006 - loss: 0.2718 - val_accuracy: 0.9192 - val_loss: 0.1699
Epoch 3/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 513ms/step - accuracy: 0.9296 - loss: 0.1548 - val_accuracy: 0.9240 - val_loss: 0.1472
Epoch 4/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 514ms/step - accuracy: 0.9377 - loss: 0.1246 - val_accuracy: 0.9254 - val_loss: 0.1344
Epoch 5/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 514ms/step - accuracy: 0.9414 - loss: 0.1091 - val_accuracy: 0.9254 - val_loss: 0.1310
Epoch 6/10
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 514ms/step - accuracy: 0.9436 - loss: 0.1018 - val_accuracy: 0.9257 - val_loss: 0.1295
Epoch 7/

<keras.src.callbacks.history.History at 0x7b8416e87e90>

In [None]:
lstm_model2.summary()

In [None]:
import numpy as np
from sklearn.metrics import classification_report

y_preds = np.argmax(lstm_model2.predict(X_test_padded),axis=1)

print(classification_report(y_test,y_preds))

[1m2606/2606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 12ms/step
              precision    recall  f1-score   support

           0       0.96      0.97      0.97     24238
           1       0.93      0.96      0.95     28214
           2       0.88      0.73      0.80      6911
           3       0.95      0.91      0.93     11463
           4       0.85      0.90      0.87      9542
           5       0.76      0.75      0.75      2994

    accuracy                           0.92     83362
   macro avg       0.89      0.87      0.88     83362
weighted avg       0.92      0.92      0.92     83362



## GRU

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout, Bidirectional
from sklearn.model_selection import train_test_split

In [None]:
X = data["text"]
y = data["label"]

In [None]:
X_tr, X_test_rnn, y_tr, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
X_train_rnn, X_val_rnn, y_train, y_val = train_test_split(X_tr, y_tr, test_size=0.1, stratify=y_tr, random_state=42)

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Tokenize
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train_rnn)
vocab_size = len(tokenizer.word_index) + 1
print(f'Vocab Size : {vocab_size}')

# Convert to sequences
X_train_seq = tokenizer.texts_to_sequences(X_train_rnn)
X_val_seq = tokenizer.texts_to_sequences(X_val_rnn)
X_test_seq = tokenizer.texts_to_sequences(X_test_rnn)
maxlen = max([len(text) for text in X_train_seq])
print(f'Max Length : {maxlen}')
X_train_padded = pad_sequences(X_train_seq, maxlen=maxlen)
X_val_padded = pad_sequences(X_val_seq, maxlen=maxlen)
X_test_padded = pad_sequences(X_test_seq, maxlen=maxlen)

Vocab Size : 64646
Max Length : 178


In [None]:
gru_model = Sequential([
    Embedding(vocab_size, 100, input_length=maxlen),
    Bidirectional(GRU(128, return_sequences=True, activation='tanh')),
    Dropout(0.3),
    Bidirectional(GRU(64, activation='tanh')),
    Dropout(0.3),
    Dense(6, activation='softmax')
])

gru_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
gru_model.fit(X_train_padded, y_train, epochs=5, batch_size=64, validation_data=(X_val_padded, y_val))

Epoch 1/5
[1m4690/4690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m238s[0m 48ms/step - accuracy: 0.8473 - loss: 0.3720 - val_accuracy: 0.9414 - val_loss: 0.0997
Epoch 2/5
[1m4690/4690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 39ms/step - accuracy: 0.9398 - loss: 0.1056 - val_accuracy: 0.9401 - val_loss: 0.0952
Epoch 3/5
[1m4690/4690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 39ms/step - accuracy: 0.9432 - loss: 0.0962 - val_accuracy: 0.9402 - val_loss: 0.0961
Epoch 4/5
[1m4690/4690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 39ms/step - accuracy: 0.9426 - loss: 0.0953 - val_accuracy: 0.9385 - val_loss: 0.0948
Epoch 5/5
[1m4690/4690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 39ms/step - accuracy: 0.9451 - loss: 0.0922 - val_accuracy: 0.9391 - val_loss: 0.0956


<keras.src.callbacks.history.History at 0x7ef7cb36a600>

In [None]:
gru_model.summary()

In [None]:
import numpy as np
from sklearn.metrics import classification_report

y_preds = np.argmax(gru_model.predict(X_test_padded),axis=1)

print(classification_report(y_test,y_preds))

[1m2606/2606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 11ms/step
              precision    recall  f1-score   support

           0       0.98      0.97      0.97     24238
           1       0.93      0.99      0.96     28214
           2       0.97      0.72      0.82      6911
           3       0.91      0.99      0.94     11463
           4       0.91      0.88      0.89      9542
           5       0.86      0.74      0.79      2994

    accuracy                           0.94     83362
   macro avg       0.93      0.88      0.90     83362
weighted avg       0.94      0.94      0.94     83362



# Transfer Learning

## BERT -base

In [None]:
%pip install transformers --quiet

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from transformers import BertTokenizer, TFBertModel, TFBertForSequenceClassification

In [None]:
from sklearn.model_selection import train_test_split

X = data["text"]
y = data["label"]

X_small,_,y_small,_ = train_test_split(X,y,test_size=0.80,stratify=y,random_state=42) # using only 20% data because system is crashing for large data

X_tr, X_test, y_tr, y_test = train_test_split(X_small, y_small, test_size=0.2, stratify=y_small, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_tr, y_tr, test_size=0.1, stratify=y_tr, random_state=42)

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
maxlen = 100

def encode_texts(texts):
    return tokenizer(
        list(texts),
        max_length=maxlen,
        padding='max_length',
        truncation=True,
        return_tensors='tf'
    )

train_encodings = encode_texts(X_train)
val_encodings = encode_texts(X_val)
test_encodings = encode_texts(X_test)


TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.


### using CLS token

In [None]:

bert_base = TFBertModel.from_pretrained('bert-base-uncased',from_pt=True)

# Unfreeze top 2 layers
for layer in bert_base.bert.encoder.layer[:-2]:
    layer.trainable = False
for layer in bert_base.bert.encoder.layer[-2:]:
    layer.trainable = True

# ---- Subclassed BERT Model ----
class BertClassifier(tf.keras.Model):
    def __init__(self, bert, num_classes):
        super(BertClassifier, self).__init__()
        self.bert = bert
        self.dropout1 = tf.keras.layers.Dropout(0.3)
        self.fc1 = tf.keras.layers.Dense(256, activation='relu')
        self.dropout2 = tf.keras.layers.Dropout(0.3)
        self.fc2 = tf.keras.layers.Dense(num_classes, activation='softmax')

    def call(self, inputs, training=False):
        outputs = self.bert(inputs, training=training)
        pooled_output = outputs.pooler_output
        x = self.dropout1(pooled_output, training=training)
        x = self.fc1(x)
        x = self.dropout2(x, training=training)
        return self.fc2(x)

pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Al

In [None]:
num_classes = 6
model = BertClassifier(bert_base, num_classes)

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
history = model.fit(
    {'input_ids': train_encodings['input_ids'],
     'attention_mask': train_encodings['attention_mask']},
    y_train,
    validation_data=(
    {'input_ids': val_encodings['input_ids'],
     'attention_mask': val_encodings['attention_mask']},
    y_val ),
    epochs=5,
    batch_size = 64
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
model.summary()

Model: "bert_classifier"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 tf_bert_model (TFBertModel  multiple                  109482240 
 )                                                               
                                                                 
 dropout_37 (Dropout)        multiple                  0         
                                                                 
 dense (Dense)               multiple                  196864    
                                                                 
 dropout_38 (Dropout)        multiple                  0         
                                                                 
 dense_1 (Dense)             multiple                  1542      
                                                                 
Total params: 109680646 (418.40 MB)
Trainable params: 38801926 (148.02 MB)
Non-trainable params: 70878720 (270.38 MB)

In [None]:
y_pred_probs = model.predict(
    {'input_ids': test_encodings['input_ids'],
     'attention_mask': test_encodings['attention_mask']}
)

y_pred = np.argmax(y_pred_probs, axis=1)

print("\n=== Transfer Learning Classification Report ===")
print(classification_report(y_test, y_pred, target_names=class_labels.values()))



=== Transfer Learning Classification Report ===
              precision    recall  f1-score   support

     sadness       0.79      0.95      0.86      4848
         joy       0.76      0.95      0.84      5643
        love       0.31      0.06      0.10      1382
       anger       0.86      0.74      0.79      2293
        fear       0.70      0.51      0.59      1908
    surprise       0.71      0.19      0.29       599

    accuracy                           0.77     16673
   macro avg       0.69      0.56      0.58     16673
weighted avg       0.74      0.77      0.73     16673



### using mean of tokens

In [None]:
bert_base = TFBertModel.from_pretrained('bert-base-uncased',from_pt=True)

# Freeze layers
for layer in bert_base.bert.encoder.layer:
    layer.trainable = False
#for layer in bert_base.bert.encoder.layer[-2:]:
 #   layer.trainable = True

# ---- Subclassed BERT Model ----
class BertClassifier(tf.keras.Model):
    def __init__(self, bert, num_classes):
        super(BertClassifier, self).__init__()
        self.bert = bert
        self.dropout1 = tf.keras.layers.Dropout(0.3)
        self.fc1 = tf.keras.layers.Dense(256, activation='relu')
        self.dropout2 = tf.keras.layers.Dropout(0.3)
        self.fc2 = tf.keras.layers.Dense(num_classes, activation='softmax')

    def call(self, inputs, training=False):
        # BERT forward pass
        outputs = self.bert(inputs, training=training)

        # Get token embeddings (batch_size, seq_len, hidden_dim)
        token_embeddings = outputs.last_hidden_state

        # Compute mean of token embeddings (exclude padding later if needed)
        mean_embeddings = tf.reduce_mean(token_embeddings, axis=1)

        # Classifier head
        x = self.dropout1(mean_embeddings, training=training)
        x = self.fc1(x)
        x = self.dropout2(x, training=training)
        return self.fc2(x)


pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Al

In [None]:
num_classes = 6
model = BertClassifier(bert_base, num_classes)

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
history = model.fit(
    {'input_ids': train_encodings['input_ids'],
     'attention_mask': train_encodings['attention_mask']},
    y_train,
    validation_data=(
    {'input_ids': val_encodings['input_ids'],
     'attention_mask': val_encodings['attention_mask']},
    y_val ),
    epochs=5,
    batch_size = 64
)

Epoch 1/5




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
model.summary()

Model: "bert_classifier"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 tf_bert_model (TFBertModel  multiple                  109482240 
 )                                                               
                                                                 
 dropout_37 (Dropout)        multiple                  0         
                                                                 
 dense (Dense)               multiple                  196864    
                                                                 
 dropout_38 (Dropout)        multiple                  0         
                                                                 
 dense_1 (Dense)             multiple                  1542      
                                                                 
Total params: 109680646 (418.40 MB)
Trainable params: 24626182 (93.94 MB)
Non-trainable params: 85054464 (324.46 MB)


In [None]:
y_pred_probs = model.predict(
    {'input_ids': test_encodings['input_ids'],
     'attention_mask': test_encodings['attention_mask']}
)

y_pred = np.argmax(y_pred_probs, axis=1)

print("\n=== Transfer Learning Classification Report ===")
print(classification_report(y_test, y_pred, target_names=class_labels.values()))



=== Transfer Learning Classification Report ===
              precision    recall  f1-score   support

     sadness       0.97      0.96      0.96      4848
         joy       0.95      0.93      0.94      5643
        love       0.79      0.88      0.83      1382
       anger       0.90      0.95      0.92      2293
        fear       0.92      0.83      0.87      1908
    surprise       0.74      0.87      0.80       599

    accuracy                           0.92     16673
   macro avg       0.88      0.90      0.89     16673
weighted avg       0.92      0.92      0.92     16673



fine tuning

In [None]:
# UNFreeze layers
for layer in bert_base.bert.encoder.layer[-2:]:
    layer.trainable = True

In [None]:
history = model.fit(
    {'input_ids': train_encodings['input_ids'],
     'attention_mask': train_encodings['attention_mask']},
    y_train,
    validation_data=(
    {'input_ids': val_encodings['input_ids'],
     'attention_mask': val_encodings['attention_mask']},
    y_val ),
    epochs=5,
    batch_size = 64
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
model.summary()

Model: "bert_classifier"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 tf_bert_model (TFBertModel  multiple                  109482240 
 )                                                               
                                                                 
 dropout_37 (Dropout)        multiple                  0         
                                                                 
 dense (Dense)               multiple                  196864    
                                                                 
 dropout_38 (Dropout)        multiple                  0         
                                                                 
 dense_1 (Dense)             multiple                  1542      
                                                                 
Total params: 109680646 (418.40 MB)
Trainable params: 38801926 (148.02 MB)
Non-trainable params: 70878720 (270.38 MB)

In [None]:
y_pred_probs = model.predict(
    {'input_ids': test_encodings['input_ids'],
     'attention_mask': test_encodings['attention_mask']}
)

y_pred = np.argmax(y_pred_probs, axis=1)

print("\n=== Transfer Learning Classification Report ===")
print(classification_report(y_test, y_pred, target_names=class_labels.values()))



=== Transfer Learning Classification Report ===
              precision    recall  f1-score   support

     sadness       0.96      0.96      0.96      4848
         joy       0.93      0.95      0.94      5643
        love       0.82      0.81      0.81      1382
       anger       0.95      0.90      0.92      2293
        fear       0.83      0.94      0.88      1908
    surprise       0.93      0.61      0.74       599

    accuracy                           0.92     16673
   macro avg       0.90      0.86      0.88     16673
weighted avg       0.92      0.92      0.92     16673



## distilBERT

In [None]:
%pip install transformers --quiet

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from transformers import DistilBertTokenizerFast, TFDistilBertModel
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
from sklearn.model_selection import train_test_split

X = data["text"]
y = data["label"]

X_small,_,y_small,_ = train_test_split(X,y,test_size=0.80,stratify=y,random_state=42) # using only 20% data because system is crashing for large data

X_tr, X_test, y_tr, y_test = train_test_split(X_small, y_small, test_size=0.2, stratify=y_small, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_tr, y_tr, test_size=0.1, stratify=y_tr, random_state=42)

In [None]:
model_name = "distilbert-base-uncased"
tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

In [None]:
maxlen = 100

def encode_texts(texts):
    return tokenizer(
        list(texts),
        max_length=maxlen,
        padding="max_length",
        truncation=True,
        return_tensors="tf"
    )

train_enc = encode_texts(X_train)
val_enc   = encode_texts(X_val)
test_enc  = encode_texts(X_test)

TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.


In [None]:
distil = TFDistilBertModel.from_pretrained(model_name,from_pt=True)  # base model (no cls pooler)

# freeze by default for transfer learning phase
for layers in distil.distilbert.transformer.layer:
  layers.trainable = False

class DistilClassifier(tf.keras.Model):
    def __init__(self, distil_model, num_classes, dropout_rate=0.3, hidden_units=256):
        super().__init__()
        self.distil = distil_model
        self.dropout1 = tf.keras.layers.Dropout(dropout_rate)
        self.dense = tf.keras.layers.Dense(hidden_units, activation='relu')
        self.dropout2 = tf.keras.layers.Dropout(dropout_rate)
        self.out = tf.keras.layers.Dense(num_classes, activation='softmax')

    def call(self, inputs, training=False):
        # inputs is a dict with 'input_ids' and 'attention_mask'
        outputs = self.distil(inputs, training=training)  # returns BaseModelOutput: last_hidden_state
        last_hidden = outputs.last_hidden_state  # shape (batch, seq_len, hidden_dim)
        mask = tf.cast(tf.expand_dims(inputs['attention_mask'], axis=-1), dtype=last_hidden.dtype)
        # masked mean: sum / count_nonzero
        sum_tokens = tf.reduce_sum(last_hidden * mask, axis=1)  # (batch, hidden_dim)
        denom = tf.reduce_sum(mask, axis=1)  # (batch, 1)
        mean_pooled = sum_tokens / tf.maximum(denom, 1e-9)
        x = self.dropout1(mean_pooled, training=training)
        x = self.dense(x)
        x = self.dropout2(x, training=training)
        return self.out(x)

num_classes = 6
model = DistilClassifier(distil, num_classes=num_classes)


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertModel: ['vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_transform.weight', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing TFDistilBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFDistilBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertModel for predictions without further training.


In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.fit(
    {'input_ids': train_enc['input_ids'], 'attention_mask': train_enc['attention_mask']},
    y_train,
    validation_data=(
        {'input_ids': val_enc['input_ids'], 'attention_mask': val_enc['attention_mask']},
        y_val
    ),
    batch_size=64,
    epochs=5,
)

Epoch 1/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m231s[0m 218ms/step - accuracy: 0.5395 - loss: 1.2270 - val_accuracy: 0.6422 - val_loss: 0.9712
Epoch 2/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m187s[0m 199ms/step - accuracy: 0.5989 - loss: 1.0554 - val_accuracy: 0.6461 - val_loss: 0.9349
Epoch 3/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 199ms/step - accuracy: 0.6103 - loss: 1.0240 - val_accuracy: 0.6577 - val_loss: 0.9162
Epoch 4/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m189s[0m 201ms/step - accuracy: 0.6167 - loss: 1.0040 - val_accuracy: 0.6598 - val_loss: 0.8925
Epoch 5/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 208ms/step - accuracy: 0.6191 - loss: 0.9938 - val_accuracy: 0.6643 - val_loss: 0.8899


<keras.src.callbacks.history.History at 0x7b8418e54d40>

In [None]:
model.fit(
    {'input_ids': train_enc['input_ids'], 'attention_mask': train_enc['attention_mask']},
    y_train,
    validation_data=(
        {'input_ids': val_enc['input_ids'], 'attention_mask': val_enc['attention_mask']},
        y_val
    ),
    batch_size=64,
    epochs=5,
)

Epoch 1/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 207ms/step - accuracy: 0.6251 - loss: 0.9884 - val_accuracy: 0.6665 - val_loss: 0.8922
Epoch 2/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m191s[0m 204ms/step - accuracy: 0.6266 - loss: 0.9734 - val_accuracy: 0.6707 - val_loss: 0.8730
Epoch 3/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 199ms/step - accuracy: 0.6311 - loss: 0.9729 - val_accuracy: 0.6713 - val_loss: 0.8733
Epoch 4/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m187s[0m 199ms/step - accuracy: 0.6343 - loss: 0.9595 - val_accuracy: 0.6758 - val_loss: 0.8583
Epoch 5/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 199ms/step - accuracy: 0.6361 - loss: 0.9501 - val_accuracy: 0.6770 - val_loss: 0.8447


<keras.src.callbacks.history.History at 0x7b835ad53590>

In [None]:
model.summary()

In [None]:
preds = model.predict({'input_ids': test_enc['input_ids'], 'attention_mask': test_enc['attention_mask']})
y_pred = np.argmax(preds, axis=1)
print("\n=== Transfer Learning (Distil frozen) Classification Report ===")
print(classification_report(y_test, y_pred, target_names=class_labels.values()))

[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 79ms/step

=== Transfer Learning (Distil frozen) Classification Report ===
              precision    recall  f1-score   support

     sadness       0.62      0.82      0.71      4848
         joy       0.70      0.85      0.77      5643
        love       0.70      0.27      0.39      1382
       anger       0.73      0.45      0.55      2293
        fear       0.66      0.47      0.55      1908
    surprise       0.65      0.27      0.38       599

    accuracy                           0.67     16673
   macro avg       0.68      0.52      0.56     16673
weighted avg       0.68      0.67      0.65     16673



In [None]:
# UNfreeze last 2 layers
for layers in distil.distilbert.transformer.layer[-2:]:
  layers.trainable = True

In [None]:
model.fit(
    {'input_ids': train_enc['input_ids'], 'attention_mask': train_enc['attention_mask']},
    y_train,
    validation_data=(
        {'input_ids': val_enc['input_ids'], 'attention_mask': val_enc['attention_mask']},
        y_val
    ),
    batch_size=64,
    epochs=5,
)

Epoch 1/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 199ms/step - accuracy: 0.6367 - loss: 0.9454 - val_accuracy: 0.6773 - val_loss: 0.8543
Epoch 2/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 199ms/step - accuracy: 0.6427 - loss: 0.9392 - val_accuracy: 0.6916 - val_loss: 0.8441
Epoch 3/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 198ms/step - accuracy: 0.6472 - loss: 0.9238 - val_accuracy: 0.6871 - val_loss: 0.8345
Epoch 4/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 198ms/step - accuracy: 0.6435 - loss: 0.9273 - val_accuracy: 0.6896 - val_loss: 0.8244
Epoch 5/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 198ms/step - accuracy: 0.6503 - loss: 0.9217 - val_accuracy: 0.6889 - val_loss: 0.8218


<keras.src.callbacks.history.History at 0x7b835aa910d0>

In [None]:
model.summary()

In [None]:
preds = model.predict({'input_ids': test_enc['input_ids'], 'attention_mask': test_enc['attention_mask']})
y_pred = np.argmax(preds, axis=1)
print("\n=== Transfer Learning (Distil frozen) Classification Report ===")
print(classification_report(y_test, y_pred, target_names=class_labels.values()))

[1m522/522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 78ms/step

=== Transfer Learning (Distil frozen) Classification Report ===
              precision    recall  f1-score   support

     sadness       0.65      0.79      0.71      4848
         joy       0.69      0.87      0.77      5643
        love       0.70      0.27      0.39      1382
       anger       0.75      0.45      0.56      2293
        fear       0.66      0.50      0.57      1908
    surprise       0.66      0.31      0.42       599

    accuracy                           0.68     16673
   macro avg       0.68      0.53      0.57     16673
weighted avg       0.68      0.68      0.66     16673

