In [1]:
import random
import numpy as np
import torch

seed_val = 17
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

In [2]:
from utils import process_csv
import training_params
from dataset import SentimentDataset
from torch.utils.data import DataLoader
from transformers import BertModel, AdamW, get_linear_schedule_with_warmup
from transformers import BertForSequenceClassification
import numpy as np
from sklearn.metrics import f1_score, accuracy_score, classification_report
import random

train_texts, train_targets = process_csv(training_params.TRAIN_DATA)
valid_texts, valid_targets = process_csv(training_params.VALID_DATA)

# subset

indices_train = random.sample(range(len(train_texts)), 1000)
indices_valid = random.sample(range(len(valid_texts)), 500)

train_texts = [t for i, t in enumerate(train_texts) if i in indices_train]
train_targets = [t for i, t in enumerate(train_targets) if i in indices_train]
valid_texts = [t for i, t in enumerate(valid_texts) if i in indices_valid]
valid_targets = [t for i, t in enumerate(valid_targets) if i in indices_valid]

train_dataset = SentimentDataset(texts=train_texts, targets=train_targets)
valid_dataset = SentimentDataset(texts=valid_texts, targets=valid_targets)

# d = train_dataset.__getitem__(0)
# print(d.keys())
# print(d['input_ids'].shape)
# print(d['attention_mask'].shape)
# print(d['targets'].shape)

train_data_loader = DataLoader(train_dataset, batch_size=training_params.BATCH_SIZE, num_workers=4)
valid_data_loader = DataLoader(valid_dataset, batch_size=training_params.BATCH_SIZE, num_workers=4)

# data = next(iter(train_data_loader))
# print('-'*50)
# print(data.keys())
# print(data['input_ids'].shape)
# print(data['attention_mask'].shape)
# print(data['targets'].shape)


In [3]:
from collections import Counter

In [4]:
Counter(train_targets)

Counter({2: 350, 1: 410, 0: 240})

In [5]:
Counter(valid_targets)

Counter({1: 205, 2: 174, 0: 121})

In [6]:
import torch
from tqdm import tqdm

In [7]:
model = BertForSequenceClassification.from_pretrained(
                                                        training_params.PRE_TRAINED_MODEL_NAME,
                                                        num_labels=len(training_params.LABEL_DICT),
                                                        output_attentions=False,
                                                        output_hidden_states=False)


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

In [8]:
if training_params.FULL_FINETUNING:
    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'gamma', 'beta']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
         'weight_decay_rate': 0.01},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
         'weight_decay_rate': 0.0}
    ]
else:
    param_optimizer = list(model.classifier.named_parameters())
    optimizer_grouped_parameters = [{"params": [p for n, p in param_optimizer]}]

In [9]:
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr = training_params.LEARNING_RATE,
    eps = 1e-8
)

In [10]:
next(iter(train_dataset))

{'input_ids': tensor([  101,   138, 10732,  6758,  3317,   102,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0]),
 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 'targets': tensor(2)}

In [11]:
total_steps = len(train_data_loader) * training_params.EPOCHS

In [12]:
total_steps

1000

In [13]:
epochs = training_params.EPOCHS

scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=0,
    num_training_steps = total_steps
)

In [14]:
model = model.to(training_params.DEVICE)

In [15]:
checkpoint_folder = './checkpoints'

In [16]:
# full finetuning

In [17]:
starting_epoch = 0


loss_values, validation_loss_values = [], []

train_step_count = 0
for epoch in range(starting_epoch, training_params.EPOCHS):

    model.train()
    total_loss = 0

    # Training loop
    tk0 = tqdm(train_data_loader, total=int(len(train_data_loader)), unit='batch')
    tk0.set_description(f'Epoch {epoch + 1}')

    for step, batch in enumerate(tk0):
        #  add batch to gpu
        for k, v in batch.items():
            batch[k] = v.to(training_params.DEVICE)

        b_input_ids, b_input_mask, b_labels = batch['input_ids'], batch['attention_mask'], batch['targets']

        model.zero_grad()

        outputs = model(b_input_ids, token_type_ids=None,
                        attention_mask=b_input_mask, labels=b_labels)
        
        loss = outputs['loss']
        loss.backward()
        total_loss += loss.item()

        # loss for step
#         writer.add_scalar("Training Loss- Step", loss.sum(), train_step_count)
#         wandb.log({'Training Loss - Step': loss.sum()})  
        train_step_count += 1

        torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=training_params.MAX_GRAD_NORM)

        optimizer.step()

        scheduler.step()

    # Calculate the average loss over the training data.
    avg_train_loss = total_loss / len(train_data_loader)
    print("Average train loss: {}".format(avg_train_loss))
#     writer.add_scalar("Training Loss", avg_train_loss, epoch)
#     wandb.log({'Training loss': avg_train_loss, 'epoch': epoch})

    state = {'epoch': epoch,
             'state_dict': model.state_dict(),
             'optimizer': optimizer.state_dict()}
    torch.save(state, checkpoint_folder + '/checkpoint_last.pt')
    # Store the loss value for plotting the learning curve.
    loss_values.append(avg_train_loss)

    model.eval()
    # Reset the validation loss for this epoch.
    eval_loss, eval_accuracy = 0, 0
    nb_eval_steps, nb_eval_examples = 0, 0
    predictions, true_labels = [], []

    best_val_loss = np.inf

    for batch in tqdm(valid_data_loader, total=int(len(valid_data_loader)), unit='batch', leave=True):
        for k, v in batch.items():
            batch[k] = v.to(training_params.DEVICE)
        b_input_ids, b_input_mask, b_labels = batch['input_ids'], batch['attention_mask'], batch['targets']

        with torch.no_grad():
            outputs = model(b_input_ids, token_type_ids=None,
                            attention_mask=b_input_mask, labels=b_labels)
        logits = outputs['logits'].detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()

        # Calculate the accuracy for this batch of test sentences.
        eval_loss += outputs['loss'].item()
        
        preds = np.argmax(logits, axis=1)
        predictions.extend(preds)
        true_labels.extend(label_ids)
        
#         print(predictions)
#         print(true_labels)

    eval_loss = eval_loss / len(valid_data_loader)

    if eval_loss < best_val_loss:
        state = {'epoch': epoch,
                 'state_dict': model.state_dict(),
                 'optimizer': optimizer.state_dict()}
        torch.save(state, checkpoint_folder + '/checkpoint_best.pt')
        best_val_loss = eval_loss

    validation_loss_values.append(eval_loss)
    print("Validation loss: {}".format(eval_loss))
#     writer.add_scalar("Validation Loss", eval_loss, epoch)

#     wandb.log({'Validation loss': eval_loss})

#     pred_tags = [tag_values[p_i] for p, l in zip(predictions, true_labels) for p_i, l_i in zip(p, l) if
#                  tag_values[l_i] != "PAD"]
#     valid_tags = [tag_values[l_i] for l in true_labels for l_i in l if tag_values[l_i] != "PAD"]

    val_accuracy = accuracy_score(true_labels, predictions)
    val_f1_score = f1_score(true_labels, predictions, average='macro')
    print("Validation Accuracy: {}".format(val_accuracy))
    print("Validation F1-Score: {}".format(val_f1_score))
    
    true_labels_names = [training_params.LABEL_DICT[str(tl)] for tl in true_labels]
    predictions_names = [training_params.LABEL_DICT[str(pr)] for pr in predictions]
    
    print("Classification Report: {}".format(classification_report(true_labels_names, predictions_names,
                                                                   labels=np.unique(predictions_names))))


Epoch 1: 100%|█████████████████████████████| 100/100 [00:25<00:00,  3.86batch/s]


Average train loss: 0.9072126896679401


100%|████████████████████████████████████████| 50/50 [00:03<00:00, 14.13batch/s]


Validation loss: 0.8043718355894088
Validation Accuracy: 0.652
Validation F1-Score: 0.5097151848937845
Classification Report:               precision    recall  f1-score   support

    NEGATIVE       1.00      0.03      0.06       121
     NEUTRAL       0.59      0.80      0.68       174
    POSITIVE       0.70      0.89      0.79       205

    accuracy                           0.65       500
   macro avg       0.76      0.57      0.51       500
weighted avg       0.74      0.65      0.57       500



Epoch 2: 100%|█████████████████████████████| 100/100 [00:25<00:00,  3.95batch/s]


Average train loss: 0.5210530643165111


100%|████████████████████████████████████████| 50/50 [00:03<00:00, 14.12batch/s]


Validation loss: 0.4701364353299141
Validation Accuracy: 0.82
Validation F1-Score: 0.8043441186502475
Classification Report:               precision    recall  f1-score   support

    NEGATIVE       0.93      0.61      0.74       121
     NEUTRAL       0.74      0.86      0.79       174
    POSITIVE       0.86      0.91      0.88       205

    accuracy                           0.82       500
   macro avg       0.84      0.79      0.80       500
weighted avg       0.83      0.82      0.82       500



Epoch 3: 100%|█████████████████████████████| 100/100 [00:26<00:00,  3.81batch/s]


Average train loss: 0.2816709213145077


100%|████████████████████████████████████████| 50/50 [00:03<00:00, 13.52batch/s]


Validation loss: 0.5009071134403348
Validation Accuracy: 0.864
Validation F1-Score: 0.8609600967557173
Classification Report:               precision    recall  f1-score   support

    NEGATIVE       0.85      0.86      0.85       121
     NEUTRAL       0.82      0.84      0.83       174
    POSITIVE       0.92      0.89      0.90       205

    accuracy                           0.86       500
   macro avg       0.86      0.86      0.86       500
weighted avg       0.87      0.86      0.86       500



Epoch 4: 100%|█████████████████████████████| 100/100 [00:26<00:00,  3.76batch/s]


Average train loss: 0.18368448690278455


100%|████████████████████████████████████████| 50/50 [00:03<00:00, 13.48batch/s]


Validation loss: 0.5322721938369795
Validation Accuracy: 0.87
Validation F1-Score: 0.8672395342501066
Classification Report:               precision    recall  f1-score   support

    NEGATIVE       0.87      0.85      0.86       121
     NEUTRAL       0.81      0.86      0.84       174
    POSITIVE       0.93      0.89      0.91       205

    accuracy                           0.87       500
   macro avg       0.87      0.87      0.87       500
weighted avg       0.87      0.87      0.87       500



Epoch 5: 100%|█████████████████████████████| 100/100 [00:26<00:00,  3.75batch/s]


Average train loss: 0.10557754631270655


100%|████████████████████████████████████████| 50/50 [00:03<00:00, 13.45batch/s]


Validation loss: 0.5936229682108387
Validation Accuracy: 0.874
Validation F1-Score: 0.8714507264506471
Classification Report:               precision    recall  f1-score   support

    NEGATIVE       0.88      0.85      0.87       121
     NEUTRAL       0.81      0.86      0.84       174
    POSITIVE       0.93      0.90      0.91       205

    accuracy                           0.87       500
   macro avg       0.87      0.87      0.87       500
weighted avg       0.88      0.87      0.87       500



Epoch 6: 100%|█████████████████████████████| 100/100 [00:26<00:00,  3.81batch/s]


Average train loss: 0.03189895458112005


100%|████████████████████████████████████████| 50/50 [00:03<00:00, 14.09batch/s]


Validation loss: 0.6205426702403929
Validation Accuracy: 0.884
Validation F1-Score: 0.8824965080777741
Classification Report:               precision    recall  f1-score   support

    NEGATIVE       0.89      0.88      0.88       121
     NEUTRAL       0.83      0.87      0.85       174
    POSITIVE       0.93      0.90      0.92       205

    accuracy                           0.88       500
   macro avg       0.88      0.88      0.88       500
weighted avg       0.89      0.88      0.88       500



Epoch 7: 100%|█████████████████████████████| 100/100 [00:25<00:00,  3.94batch/s]


Average train loss: 0.009528700941009447


100%|████████████████████████████████████████| 50/50 [00:03<00:00, 14.14batch/s]


Validation loss: 0.6824684699816862
Validation Accuracy: 0.882
Validation F1-Score: 0.8802351485789027
Classification Report:               precision    recall  f1-score   support

    NEGATIVE       0.90      0.86      0.88       121
     NEUTRAL       0.82      0.88      0.85       174
    POSITIVE       0.93      0.90      0.92       205

    accuracy                           0.88       500
   macro avg       0.88      0.88      0.88       500
weighted avg       0.88      0.88      0.88       500



Epoch 8: 100%|█████████████████████████████| 100/100 [00:25<00:00,  3.94batch/s]


Average train loss: 0.010369259992730804


100%|████████████████████████████████████████| 50/50 [00:03<00:00, 14.14batch/s]


Validation loss: 0.7207766271947185
Validation Accuracy: 0.878
Validation F1-Score: 0.8748117539026629
Classification Report:               precision    recall  f1-score   support

    NEGATIVE       0.87      0.87      0.87       121
     NEUTRAL       0.84      0.84      0.84       174
    POSITIVE       0.92      0.91      0.92       205

    accuracy                           0.88       500
   macro avg       0.87      0.87      0.87       500
weighted avg       0.88      0.88      0.88       500



Epoch 9: 100%|█████████████████████████████| 100/100 [00:25<00:00,  3.94batch/s]


Average train loss: 0.006325750190590043


100%|████████████████████████████████████████| 50/50 [00:03<00:00, 14.13batch/s]


Validation loss: 0.7385632276820252
Validation Accuracy: 0.88
Validation F1-Score: 0.8771671756655307
Classification Report:               precision    recall  f1-score   support

    NEGATIVE       0.87      0.88      0.87       121
     NEUTRAL       0.84      0.84      0.84       174
    POSITIVE       0.92      0.91      0.92       205

    accuracy                           0.88       500
   macro avg       0.88      0.88      0.88       500
weighted avg       0.88      0.88      0.88       500



Epoch 10: 100%|████████████████████████████| 100/100 [00:25<00:00,  3.94batch/s]


Average train loss: 0.0078029017985682


100%|████████████████████████████████████████| 50/50 [00:03<00:00, 14.14batch/s]


Validation loss: 0.7472997263655998
Validation Accuracy: 0.88
Validation F1-Score: 0.8771671756655307
Classification Report:               precision    recall  f1-score   support

    NEGATIVE       0.87      0.88      0.87       121
     NEUTRAL       0.84      0.84      0.84       174
    POSITIVE       0.92      0.91      0.92       205

    accuracy                           0.88       500
   macro avg       0.88      0.88      0.88       500
weighted avg       0.88      0.88      0.88       500



In [15]:
starting_epoch = 0


loss_values, validation_loss_values = [], []

train_step_count = 0
for epoch in range(starting_epoch, training_params.EPOCHS):

    model.train()
    total_loss = 0

    # Training loop
    tk0 = tqdm(train_data_loader, total=int(len(train_data_loader)), unit='batch')
    tk0.set_description(f'Epoch {epoch + 1}')

    for step, batch in enumerate(tk0):
        #  add batch to gpu
        for k, v in batch.items():
            batch[k] = v.to(training_params.DEVICE)

        b_input_ids, b_input_mask, b_labels = batch['input_ids'], batch['attention_mask'], batch['targets']

        model.zero_grad()

        outputs = model(b_input_ids, token_type_ids=None,
                        attention_mask=b_input_mask, labels=b_labels)
        
        loss = outputs['loss']
        loss.backward()
        total_loss += loss.item()

        # loss for step
#         writer.add_scalar("Training Loss- Step", loss.sum(), train_step_count)
#         wandb.log({'Training Loss - Step': loss.sum()})  
        train_step_count += 1

        torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=training_params.MAX_GRAD_NORM)

        optimizer.step()

        scheduler.step()

    # Calculate the average loss over the training data.
    avg_train_loss = total_loss / len(train_data_loader)
    print("Average train loss: {}".format(avg_train_loss))
#     writer.add_scalar("Training Loss", avg_train_loss, epoch)
#     wandb.log({'Training loss': avg_train_loss, 'epoch': epoch})

    state = {'epoch': epoch,
             'state_dict': model.state_dict(),
             'optimizer': optimizer.state_dict()}
    torch.save(state, checkpoint_folder + '/checkpoint_last.pt')
    # Store the loss value for plotting the learning curve.
    loss_values.append(avg_train_loss)

    model.eval()
    # Reset the validation loss for this epoch.
    eval_loss, eval_accuracy = 0, 0
    nb_eval_steps, nb_eval_examples = 0, 0
    predictions, true_labels = [], []

    best_val_loss = np.inf

    for batch in tqdm(valid_data_loader, total=int(len(valid_data_loader)), unit='batch', leave=True):
        for k, v in batch.items():
            batch[k] = v.to(training_params.DEVICE)
        b_input_ids, b_input_mask, b_labels = batch['input_ids'], batch['attention_mask'], batch['targets']

        with torch.no_grad():
            outputs = model(b_input_ids, token_type_ids=None,
                            attention_mask=b_input_mask, labels=b_labels)
        logits = outputs['logits'].detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()

        # Calculate the accuracy for this batch of test sentences.
        eval_loss += outputs['loss'].item()
        
        preds = np.argmax(logits, axis=1)
        predictions.extend(preds)
        true_labels.extend(label_ids)
        
#         print(predictions)
#         print(true_labels)

    eval_loss = eval_loss / len(valid_data_loader)

    if eval_loss < best_val_loss:
        state = {'epoch': epoch,
                 'state_dict': model.state_dict(),
                 'optimizer': optimizer.state_dict()}
        torch.save(state, checkpoint_folder + '/checkpoint_best.pt')
        best_val_loss = eval_loss

    validation_loss_values.append(eval_loss)
    print("Validation loss: {}".format(eval_loss))
#     writer.add_scalar("Validation Loss", eval_loss, epoch)

#     wandb.log({'Validation loss': eval_loss})

#     pred_tags = [tag_values[p_i] for p, l in zip(predictions, true_labels) for p_i, l_i in zip(p, l) if
#                  tag_values[l_i] != "PAD"]
#     valid_tags = [tag_values[l_i] for l in true_labels for l_i in l if tag_values[l_i] != "PAD"]

    val_accuracy = accuracy_score(true_labels, predictions)
    val_f1_score = f1_score(true_labels, predictions, average='macro')
    print("Validation Accuracy: {}".format(val_accuracy))
    print("Validation F1-Score: {}".format(val_f1_score))
    
    true_labels_names = [training_params.LABEL_DICT[str(tl)] for tl in true_labels]
    predictions_names = [training_params.LABEL_DICT[str(pr)] for pr in predictions]
    
    print("Classification Report: {}".format(classification_report(true_labels_names, predictions_names,
                                                                   labels=np.unique(predictions_names))))


Epoch 1: 100%|█████████████████████████████| 100/100 [00:25<00:00,  3.96batch/s]


Average train loss: 0.9072126896679401


100%|████████████████████████████████████████| 50/50 [00:03<00:00, 14.19batch/s]


Validation loss: 0.8043718355894088
Validation Accuracy: 0.652
Validation F1-Score: 0.5097151848937845
Classification Report:               precision    recall  f1-score   support

    NEGATIVE       1.00      0.03      0.06       121
     NEUTRAL       0.59      0.80      0.68       174
    POSITIVE       0.70      0.89      0.79       205

    accuracy                           0.65       500
   macro avg       0.76      0.57      0.51       500
weighted avg       0.74      0.65      0.57       500



Epoch 2: 100%|█████████████████████████████| 100/100 [00:25<00:00,  3.91batch/s]


Average train loss: 0.5210530643165111


100%|████████████████████████████████████████| 50/50 [00:03<00:00, 14.06batch/s]


Validation loss: 0.4701364353299141
Validation Accuracy: 0.82
Validation F1-Score: 0.8043441186502475
Classification Report:               precision    recall  f1-score   support

    NEGATIVE       0.93      0.61      0.74       121
     NEUTRAL       0.74      0.86      0.79       174
    POSITIVE       0.86      0.91      0.88       205

    accuracy                           0.82       500
   macro avg       0.84      0.79      0.80       500
weighted avg       0.83      0.82      0.82       500



Epoch 3: 100%|█████████████████████████████| 100/100 [00:26<00:00,  3.83batch/s]


Average train loss: 0.2816709213145077


100%|████████████████████████████████████████| 50/50 [00:03<00:00, 13.58batch/s]


Validation loss: 0.5009071134403348
Validation Accuracy: 0.864
Validation F1-Score: 0.8609600967557173
Classification Report:               precision    recall  f1-score   support

    NEGATIVE       0.85      0.86      0.85       121
     NEUTRAL       0.82      0.84      0.83       174
    POSITIVE       0.92      0.89      0.90       205

    accuracy                           0.86       500
   macro avg       0.86      0.86      0.86       500
weighted avg       0.87      0.86      0.86       500



Epoch 4: 100%|█████████████████████████████| 100/100 [00:26<00:00,  3.78batch/s]


Average train loss: 0.18368448690278455


100%|████████████████████████████████████████| 50/50 [00:03<00:00, 13.46batch/s]


Validation loss: 0.5322721938369795
Validation Accuracy: 0.87
Validation F1-Score: 0.8672395342501066
Classification Report:               precision    recall  f1-score   support

    NEGATIVE       0.87      0.85      0.86       121
     NEUTRAL       0.81      0.86      0.84       174
    POSITIVE       0.93      0.89      0.91       205

    accuracy                           0.87       500
   macro avg       0.87      0.87      0.87       500
weighted avg       0.87      0.87      0.87       500



Epoch 5: 100%|█████████████████████████████| 100/100 [00:27<00:00,  3.70batch/s]


Average train loss: 0.10557754631270655


100%|████████████████████████████████████████| 50/50 [00:03<00:00, 13.36batch/s]


Validation loss: 0.5936229682108387
Validation Accuracy: 0.874
Validation F1-Score: 0.8714507264506471
Classification Report:               precision    recall  f1-score   support

    NEGATIVE       0.88      0.85      0.87       121
     NEUTRAL       0.81      0.86      0.84       174
    POSITIVE       0.93      0.90      0.91       205

    accuracy                           0.87       500
   macro avg       0.87      0.87      0.87       500
weighted avg       0.88      0.87      0.87       500



Epoch 6: 100%|█████████████████████████████| 100/100 [00:26<00:00,  3.84batch/s]


Average train loss: 0.03189895458112005


100%|████████████████████████████████████████| 50/50 [00:03<00:00, 14.15batch/s]


Validation loss: 0.6205426702403929
Validation Accuracy: 0.884
Validation F1-Score: 0.8824965080777741
Classification Report:               precision    recall  f1-score   support

    NEGATIVE       0.89      0.88      0.88       121
     NEUTRAL       0.83      0.87      0.85       174
    POSITIVE       0.93      0.90      0.92       205

    accuracy                           0.88       500
   macro avg       0.88      0.88      0.88       500
weighted avg       0.89      0.88      0.88       500



Epoch 7: 100%|█████████████████████████████| 100/100 [00:25<00:00,  3.91batch/s]


Average train loss: 0.009528700941009447


100%|████████████████████████████████████████| 50/50 [00:03<00:00, 14.17batch/s]


Validation loss: 0.6824684699816862
Validation Accuracy: 0.882
Validation F1-Score: 0.8802351485789027
Classification Report:               precision    recall  f1-score   support

    NEGATIVE       0.90      0.86      0.88       121
     NEUTRAL       0.82      0.88      0.85       174
    POSITIVE       0.93      0.90      0.92       205

    accuracy                           0.88       500
   macro avg       0.88      0.88      0.88       500
weighted avg       0.88      0.88      0.88       500



Epoch 8: 100%|█████████████████████████████| 100/100 [00:25<00:00,  3.94batch/s]


Average train loss: 0.010369259992730804


100%|████████████████████████████████████████| 50/50 [00:03<00:00, 14.19batch/s]


Validation loss: 0.7207766271947185
Validation Accuracy: 0.878
Validation F1-Score: 0.8748117539026629
Classification Report:               precision    recall  f1-score   support

    NEGATIVE       0.87      0.87      0.87       121
     NEUTRAL       0.84      0.84      0.84       174
    POSITIVE       0.92      0.91      0.92       205

    accuracy                           0.88       500
   macro avg       0.87      0.87      0.87       500
weighted avg       0.88      0.88      0.88       500



Epoch 9: 100%|█████████████████████████████| 100/100 [00:25<00:00,  3.93batch/s]


Average train loss: 0.006325750190590043


100%|████████████████████████████████████████| 50/50 [00:03<00:00, 14.21batch/s]


Validation loss: 0.7385632276820252
Validation Accuracy: 0.88
Validation F1-Score: 0.8771671756655307
Classification Report:               precision    recall  f1-score   support

    NEGATIVE       0.87      0.88      0.87       121
     NEUTRAL       0.84      0.84      0.84       174
    POSITIVE       0.92      0.91      0.92       205

    accuracy                           0.88       500
   macro avg       0.88      0.88      0.88       500
weighted avg       0.88      0.88      0.88       500



Epoch 10: 100%|████████████████████████████| 100/100 [00:26<00:00,  3.80batch/s]


Average train loss: 0.0078029017985682


100%|████████████████████████████████████████| 50/50 [00:03<00:00, 14.09batch/s]


Validation loss: 0.7472997263655998
Validation Accuracy: 0.88
Validation F1-Score: 0.8771671756655307
Classification Report:               precision    recall  f1-score   support

    NEGATIVE       0.87      0.88      0.87       121
     NEUTRAL       0.84      0.84      0.84       174
    POSITIVE       0.92      0.91      0.92       205

    accuracy                           0.88       500
   macro avg       0.88      0.88      0.88       500
weighted avg       0.88      0.88      0.88       500



# Metrics

In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score, accuracy_score, classification_report

In [8]:
true_labels_names = ['pos', 'pos', 'pos', 'neg', 'neg', 'neg', 'neu', 'neu']
predictions_names = ['pos', 'pos', 'neg', 'neg', 'neg', 'pos', 'neu', 'neu']

report = classification_report(true_labels_names, 
                                        predictions_names, output_dict=True,
                                        labels=np.unique(predictions_names))

In [9]:
print(report)

{'neg': {'precision': 0.6666666666666666, 'recall': 0.6666666666666666, 'f1-score': 0.6666666666666666, 'support': 3}, 'neu': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 2}, 'pos': {'precision': 0.6666666666666666, 'recall': 0.6666666666666666, 'f1-score': 0.6666666666666666, 'support': 3}, 'accuracy': 0.75, 'macro avg': {'precision': 0.7777777777777777, 'recall': 0.7777777777777777, 'f1-score': 0.7777777777777777, 'support': 8}, 'weighted avg': {'precision': 0.75, 'recall': 0.75, 'f1-score': 0.75, 'support': 8}}


In [10]:
df_report = pd.DataFrame(report)

In [12]:
df_report.transpose()

Unnamed: 0,precision,recall,f1-score,support
neg,0.666667,0.666667,0.666667,3.0
neu,1.0,1.0,1.0,2.0
pos,0.666667,0.666667,0.666667,3.0
accuracy,0.75,0.75,0.75,0.75
macro avg,0.777778,0.777778,0.777778,8.0
weighted avg,0.75,0.75,0.75,8.0


In [14]:
df_report.index

Index(['precision', 'recall', 'f1-score', 'support'], dtype='object')

In [15]:
    df_report = pd.DataFrame(report).transpose() 
    df_report['categories'] = list(df_report.index) 
    df_report = df_report[ ['categories'] + [ col for col in df_report.columns if col != 'categories' ] ] 

In [16]:
df_report

Unnamed: 0,categories,precision,recall,f1-score,support
neg,neg,0.666667,0.666667,0.666667,3.0
neu,neu,1.0,1.0,1.0,2.0
pos,pos,0.666667,0.666667,0.666667,3.0
accuracy,accuracy,0.75,0.75,0.75,0.75
macro avg,macro avg,0.777778,0.777778,0.777778,8.0
weighted avg,weighted avg,0.75,0.75,0.75,8.0
