### Imports and Setup


In [1]:
import sys
sys.path.append("..")

import os
from utils.preprocess import load_data, get_label_encoder
from utils.fcnn import train_fcnn_model
from utils.rnn import train_rnn_model
from utils.transformer import train_transformer_model


import warnings
warnings.filterwarnings("ignore")

MODELS = "../models/"




In [2]:
# Load data
train_df = load_data("../data/train.txt")
test_df = load_data("../data/test.txt")

# Encode labels
label2id, id2label = get_label_encoder(train_df['label'])
train_df['label_id'] = train_df['label'].map(label2id)
test_df['label_id'] = test_df['label'].map(label2id)

print("Training datga sample:")
print(train_df.head())
print("Label2ID mapping:", label2id)


Training datga sample:
                                                text    label  label_id
0                            i didnt feel humiliated  sadness         4
1  i can go from feeling so hopeless to so damned...  sadness         4
2   im grabbing a minute to post i feel greedy wrong    anger         0
3  i am ever feeling nostalgic about the fireplac...     love         3
4                               i am feeling grouchy    anger         0
Label2ID mapping: {'anger': 0, 'fear': 1, 'joy': 2, 'love': 3, 'sadness': 4, 'surprise': 5}


### Training FCNN

In [6]:
fcnn_model = train_fcnn_model(train_df, test_df, label2id, epochs=10)
fcnn_model.save(os.path.join(MODELS, "fcnn_model.keras"))
print("FCNN model saved.")


Epoch 1/10
500/500 - 9s - loss: 0.8890 - accuracy: 0.6700 - 9s/epoch - 18ms/step
Epoch 2/10
500/500 - 8s - loss: 0.2457 - accuracy: 0.9176 - 8s/epoch - 17ms/step
Epoch 3/10
500/500 - 7s - loss: 0.1059 - accuracy: 0.9659 - 7s/epoch - 15ms/step
Epoch 4/10
500/500 - 7s - loss: 0.0585 - accuracy: 0.9806 - 7s/epoch - 14ms/step
Epoch 5/10
500/500 - 6s - loss: 0.0348 - accuracy: 0.9886 - 6s/epoch - 13ms/step
Epoch 6/10
500/500 - 6s - loss: 0.0327 - accuracy: 0.9901 - 6s/epoch - 13ms/step
Epoch 7/10
500/500 - 6s - loss: 0.0279 - accuracy: 0.9923 - 6s/epoch - 13ms/step
Epoch 8/10
500/500 - 7s - loss: 0.0211 - accuracy: 0.9934 - 7s/epoch - 13ms/step
Epoch 9/10
500/500 - 6s - loss: 0.0186 - accuracy: 0.9933 - 6s/epoch - 13ms/step
Epoch 10/10
500/500 - 6s - loss: 0.0196 - accuracy: 0.9931 - 6s/epoch - 13ms/step

FCNN Classification Report:

              precision    recall  f1-score   support

       anger       0.87      0.82      0.84       275
        fear       0.80      0.83      0.82       

### Training RNN with LSTM

In [5]:
rnn_model = train_rnn_model(train_df, test_df, label2id, epochs=20)
rnn_model.save(os.path.join(MODELS, "rnn_model.keras"))
print("RNN with LSTM model saved.")


Epoch 1/20
500/500 - 9s - loss: 1.5950 - accuracy: 0.3241 - 9s/epoch - 18ms/step
Epoch 2/20
500/500 - 8s - loss: 1.5815 - accuracy: 0.3318 - 8s/epoch - 16ms/step
Epoch 3/20
500/500 - 7s - loss: 1.5184 - accuracy: 0.3464 - 7s/epoch - 15ms/step
Epoch 4/20
500/500 - 7s - loss: 1.5059 - accuracy: 0.3469 - 7s/epoch - 15ms/step
Epoch 5/20
500/500 - 7s - loss: 1.5795 - accuracy: 0.3346 - 7s/epoch - 15ms/step
Epoch 6/20
500/500 - 7s - loss: 1.5775 - accuracy: 0.3347 - 7s/epoch - 15ms/step
Epoch 7/20
500/500 - 7s - loss: 1.5569 - accuracy: 0.3431 - 7s/epoch - 15ms/step
Epoch 8/20
500/500 - 7s - loss: 1.5828 - accuracy: 0.3313 - 7s/epoch - 15ms/step
Epoch 9/20
500/500 - 7s - loss: 1.5830 - accuracy: 0.3326 - 7s/epoch - 15ms/step
Epoch 10/20
500/500 - 8s - loss: 1.5794 - accuracy: 0.3351 - 8s/epoch - 15ms/step
Epoch 11/20
500/500 - 8s - loss: 1.5638 - accuracy: 0.3374 - 8s/epoch - 15ms/step
Epoch 12/20
500/500 - 7s - loss: 1.2896 - accuracy: 0.3986 - 7s/epoch - 15ms/step
Epoch 13/20
500/500 - 8s 

### Training BERT Transformner

In [3]:
transformer_model, tokenizer = train_transformer_model(
    train_df, test_df, label2id, epochs=3
)

# Save from notebook
save_path = os.path.join(MODELS, "bert_model")
transformer_model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

print(f"BERT model saved.")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Trainer is using device: cuda:0


Epoch,Training Loss,Validation Loss,Accuracy
1,0.1567,0.172648,0.9325
2,0.0928,0.152107,0.935
3,0.0864,0.172425,0.93



BERT Classification Report:

              precision    recall  f1-score   support

       anger       0.92      0.91      0.92       275
        fear       0.86      0.92      0.89       224
         joy       0.95      0.96      0.96       695
        love       0.88      0.80      0.84       159
     sadness       0.97      0.97      0.97       581
    surprise       0.78      0.64      0.70        66

    accuracy                           0.93      2000
   macro avg       0.89      0.87      0.88      2000
weighted avg       0.93      0.93      0.93      2000

BERT model saved.
