In [43]:
import torch
from torch import nn
from transformers import AutoTokenizer, AutoModel
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
from transformers import AdamW, get_linear_schedule_with_warmup, get_constant_schedule_with_warmup

from datetime import datetime
from tqdm import tqdm
import pandas as pd
import numpy as np
import statistics
import json
import os
import re

In [44]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Hyper-params

In [45]:
MAX_LEN = 128
BATCH_SIZE = 32
TEST_BATCH_SIZE = 128
TRAIN_VAL_SIZE = 0.2
VAL_TEST_SIZE = 0.5
EPOCH = 1

# Dataset Preparation

In [34]:
bbc_df = pd.read_csv("../datasets/bbc/bbc-text.csv")
bbc_df["category"] = pd.Categorical(bbc_df['category']).codes
labels = bbc_df["category"].unique()

In [5]:
def clean_text(text):
    text=re.sub('<br \/>','',text) 
    pattern=r'[^a-zA-z0-9\s]'
    text=re.sub(pattern,'',text) 
    text = re.sub('\[[^]]*\]', '', text)
    return text

In [34]:
movies_df = pd.read_csv("../datasets/IMDB_Dataset.csv")
movies_df["review"] = movies_df["review"].apply(lambda x: clean_text(x))
movies_df = movies_df.rename(columns={"review": "text"})
movies_df

Unnamed: 0,text,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production The filming tech...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically theres a family where a little boy J...,negative
4,Petter Matteis Love in the Time of Money is a ...,positive
...,...,...
49995,I thought this movie did a down right good job...,positive
49996,Bad plot bad dialogue bad acting idiotic direc...,negative
49997,I am a Catholic taught in parochial elementary...,negative
49998,Im going to have to disagree with the previous...,negative


In [35]:
sampled_imdb = movies_df.sample(frac=0.1, ignore_index= True)
sampled_imdb.to_csv("../datasets/movies/sampled.csv", index= False)

In [36]:
movies_df = pd.read_csv("../datasets/movies/sampled.csv")
labels = movies_df["sentiment"].unique()
movies_df["sentiment"] = pd.Categorical(movies_df['sentiment']).codes


In [37]:
movies_df

Unnamed: 0,text,sentiment
0,A female vampire kills young women and paints ...,0
1,Personally I think this show looks pretty chea...,0
2,I grew up watching Inspector Gadget It was and...,0
3,This movie is awful Im SORRY I bought this to ...,0
4,This is a great example of a good dumb movie N...,1
...,...,...
4995,After watching this on the MST3K episode I hav...,0
4996,Upon completing this infernal piece of trash a...,0
4997,Maybe Im biased because the F16 is my favorite...,1
4998,The Best Movie of the 90s The Welsh Trainspott...,0


In [60]:
twitter_df = pd.read_csv("../datasets/twitter_sampled.csv")
labels = twitter_df["label"].unique()
twitter_df["label"] = twitter_df["label"].apply(lambda x: 1 if x==4 else 0)
twitter_df

Unnamed: 0,label,text
0,1,why and she screaming ahaha this song is funny
1,0,the_trini_bajan work as usual
2,0,desi_f pack me in your luggage I wanna go
3,1,elm8 Thanks I enjoy talking to you too
4,1,watchin the season finale of The Office lets h...
...,...,...
4795,0,So sleepy this morning
4796,0,bakespace do you archive your newsletters some...
4797,1,santyadh hope that will soon change though bo...
4798,0,I think I should do my homework


In [46]:
class TextDataset(Dataset):
  def __init__(self, ids, texts, targets, tokenizer, max_len):
    self.ids = ids
    self.texts = texts
    self.targets = targets
    self.tokenizer = tokenizer
    self.max_len = max_len

  def __getitem__(self, idx):
    id = self.ids[idx]
    text = self.texts[idx]
    label = self.targets[idx]

    encoding = self.tokenizer.encode_plus(
      text,
      add_special_tokens=True,
      max_length=self.max_len,
      return_token_type_ids=False,
      padding='max_length',
      return_attention_mask=True,
      truncation=True,
      return_tensors='pt',
    )

    return {
      'id': torch.tensor(id, dtype=torch.long),
      'text': text,
      'input_ids': encoding['input_ids'].flatten(),
      'attention_mask': encoding['attention_mask'],
      'label': torch.tensor(label, dtype=torch.int)
    }

  def __len__(self):
    return len(self.texts)

# BERT Model training

In [47]:
bert_model = AutoModel.from_pretrained("bert-base-uncased")
bert_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [58]:
class BERTClassifier(nn.Module):
  def __init__(self, n_classes, bert_model, dropout=0.3):
    super(BERTClassifier, self).__init__()
    self.bert = bert_model
    self.drop = nn.Dropout(dropout)
    self.out = nn.Linear(bert_model.config.hidden_size, n_classes)

  def forward(self, input_ids, attention_mask):
    pooled_output = self.bert(
      input_ids=input_ids,
      attention_mask = attention_mask
    )
    
    output = self.drop(pooled_output[0][:, 0, :])
    return self.out(output)

  def save_pretrained(self, path):
    self.bert.save_pretrained(path)

In [49]:
def get_data_loaders(train_X,train_Y, val_X, val_Y, test_X, test_Y):
    train_dataset = TextDataset(
    texts=train_X.text.to_numpy(),
    targets=train_Y.to_numpy(),
    ids=train_X.index.to_numpy(),
    tokenizer=bert_tokenizer,
    max_len=MAX_LEN
    )

    validation_dataset = TextDataset(
    texts=val_X.text.to_numpy(),
    targets=val_Y.to_numpy(),
    ids=val_X.index.to_numpy(),
    tokenizer=bert_tokenizer,
    max_len=MAX_LEN
    )

    test_dataset = TextDataset(
    texts=test_X.text.to_numpy(),
    targets=test_Y.to_numpy(),
    ids=test_X.index.to_numpy(),
    tokenizer=bert_tokenizer,
    max_len=MAX_LEN
    )

    train_dataloader =  DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=0, shuffle=True)
    val_dataloader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, num_workers=0, shuffle=True)
    test_dataloader =  DataLoader(test_dataset, batch_size=TEST_BATCH_SIZE, num_workers=0, shuffle=True)

    return train_dataloader, val_dataloader, test_dataloader

In [50]:

def eval_model(model, data_loader, loss_fn, device):
  model = model.eval()
  losses = []
  predictions = []
  all_predictions , true_labels, ids = [], [], []
  correct_predictions = 0

  with torch.no_grad():
    
    for d in data_loader:
      input_ids = d["input_ids"].to(device)
      labels = d["label"].to(device)
      attention_mask = d["attention_mask"].to(device)
      outputs = model(
        input_ids=input_ids,
        attention_mask=attention_mask,
      )
      _, preds = torch.max(outputs, dim=1)
      loss = loss_fn(outputs, labels.long())
      losses.append(loss.item())
      correct_predictions += torch.sum(preds == labels)
      all_predictions.append(preds.cpu().data)
      true_labels.append(labels.cpu().data) 
      ids.append(d["id"].cpu().data)
  
  all_predictions = np.concatenate(all_predictions, axis=0)
  true_labels = np.concatenate(true_labels, axis=0)
  predictions = {"id":ids,"preds":all_predictions,"exact":true_labels}

  
  f1 = f1_score(true_labels, all_predictions, average="macro")
  precision = precision_score(true_labels, all_predictions, average="macro")
  recall = recall_score(true_labels, all_predictions, average="macro")
  accuracy = accuracy_score(true_labels,all_predictions)

  
  
  return accuracy,precision, recall, f1

In [61]:

def train(
  model,
  epochs,
  train_data_loader,
  val_data_loader,
  loss_fn,
  optimizer,
  device,
  scheduler,
  output_dir):

  model = model.train()
  results = []
  for e in range(epochs):
    all_predictions , true_labels = [], []
    correct_predictions = 0
    losses = []

    for d in tqdm(train_data_loader):
      input_ids = d["input_ids"].to(device)
      attention_mask = d["attention_mask"].to(device)
      labels = d["label"].to(device)
      

      outputs = model(
        input_ids=input_ids,
        attention_mask=attention_mask,
      )
      _, preds = torch.max(outputs, dim=1)
      loss = loss_fn(outputs, labels.long())

      all_predictions.append(preds.cpu().data)
      true_labels.append(labels.cpu().data) 

      correct_predictions += torch.sum(preds == labels)
      losses.append(loss.item())
      loss.backward()
        
      nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
      
      optimizer.step()
      scheduler.step()
      optimizer.zero_grad()

    all_predictions = np.concatenate(all_predictions, axis=0)
    true_labels = np.concatenate(true_labels, axis=0)
    train_f1 = f1_score(true_labels, all_predictions,average="macro")
    train_precision = precision_score(true_labels, all_predictions ,average="macro")
    train_recall = recall_score(true_labels, all_predictions, average="macro")
    train_accuracy = accuracy_score(true_labels,all_predictions)
    print(f"Epoch: {e + 1} Train Accuracy: {train_accuracy} Train Precision: {train_precision} Train Recall: {train_recall} Train F1: {train_f1}" )

    val_acc, val_precision, val_recall, val_f1 = eval_model(model, val_data_loader, loss_fn, device)
    print(f"Validation Accuracy: {val_acc} Validation Precision: {val_precision} Validation Recall: {val_recall} Validation F1: {val_f1}" )

    results.append({
        "epoch": e,
        "train_loss": losses,
        "train_f1": val_f1,
        "train_accuracy": val_precision,
        "train_recall": val_recall,
        "val_accuracy": val_acc,
        "val_f1": val_f1
        })
    
    with open(os.path.join(output_dir,"results.json"), "w") as f:
      json.dump(results, f)

  model.save_pretrained(output_dir)

  return val_acc, val_precision, val_recall, val_f1

# Model Training with BBC

In [46]:
precisions = []
recalls = []
f1s = []
accuracies = []
for i in range(5):
  train_X, val_X, train_Y, val_Y = train_test_split(bbc_df, bbc_df["category"], test_size = TRAIN_VAL_SIZE)
  val_X, test_X, val_Y, test_Y = train_test_split(val_X, val_Y, test_size = VAL_TEST_SIZE)
  train_dataloader, val_dataloader, test_dataloader = get_data_loaders(train_X,train_Y, val_X, val_Y, test_X, test_Y)
  model = BERTClassifier(len(labels), bert_model, 0.2) 
  model = model.to(device)

  optimizer = AdamW(model.parameters(), lr=2e-5)
  total_steps = len(train_dataloader) * EPOCH
  warmup_step = int(len(train_dataloader)/10) 

  scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=warmup_step,
    num_training_steps=total_steps
  )

  loss_fn = nn.CrossEntropyLoss().to(device)

  ts = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
  output_dir = os.path.join("../model_outputs", ts)
  os.mkdir(output_dir)
  start_time = datetime.now()
  acuuracy, precision, recall, f1 = train(model=model, epochs=EPOCH, train_data_loader=train_dataloader, val_data_loader=val_dataloader, 
      loss_fn=loss_fn, optimizer=optimizer, device=device, scheduler=scheduler, output_dir=output_dir)
  end_time = datetime.now()
  training_time = (end_time - start_time).total_seconds()
  print('Training time: {:.2f}s'.format(training_time))
  precisions.append(precision)
  recalls.append(recall)
  f1s.append(f1)
  accuracies.append(acuuracy)

  del model


100%|██████████| 56/56 [03:59<00:00,  4.28s/it]


Epoch: 1 Train Accuracy: 0.9123595505617977 Train Precision: 0.9115121535571087 Train Recall: 0.9098170516937341 Train F1: 0.9092758628771238
Validation Accuracy: 0.990990990990991 Validation Precision: 0.9927272727272728 Validation Recall: 0.9908902691511386 Validation F1: 0.9916888555442771


100%|██████████| 56/56 [02:59<00:00,  3.20s/it]


Epoch: 2 Train Accuracy: 0.9932584269662922 Train Precision: 0.9933964384005618 Train Recall: 0.9929822341309359 Train F1: 0.9931822550559796
Validation Accuracy: 0.990990990990991 Validation Precision: 0.9927272727272728 Validation Recall: 0.9908902691511386 Validation F1: 0.9916888555442771


100%|██████████| 56/56 [02:57<00:00,  3.18s/it]


Epoch: 3 Train Accuracy: 0.998314606741573 Train Precision: 0.9981218850424307 Train Recall: 0.9983792289535799 Train F1: 0.9982463511713282
Validation Accuracy: 0.990990990990991 Validation Precision: 0.9927272727272728 Validation Recall: 0.9908902691511386 Validation F1: 0.9916888555442771




Training time: 621.72s


100%|██████████| 56/56 [04:06<00:00,  4.41s/it]


Epoch: 1 Train Accuracy: 0.9179775280898876 Train Precision: 0.9241977329589266 Train Recall: 0.9197134229426929 Train F1: 0.9213108706798018
Validation Accuracy: 0.990990990990991 Validation Precision: 0.992080745341615 Validation Recall: 0.992080745341615 Validation F1: 0.992080745341615


100%|██████████| 56/56 [02:58<00:00,  3.19s/it]


Epoch: 2 Train Accuracy: 0.9971910112359551 Train Precision: 0.9971450576039078 Train Recall: 0.9969811576722624 Train F1: 0.9970622830804325
Validation Accuracy: 0.9954954954954955 Validation Precision: 0.9957446808510639 Validation Recall: 0.9964285714285716 Validation F1: 0.9960476605637896


100%|██████████| 56/56 [02:57<00:00,  3.16s/it]


Epoch: 3 Train Accuracy: 0.998314606741573 Train Precision: 0.9981776288227902 Train Recall: 0.9983850622148163 Train F1: 0.9982773861090708
Validation Accuracy: 0.9954954954954955 Validation Precision: 0.9957446808510639 Validation Recall: 0.9964285714285716 Validation F1: 0.9960476605637896




Training time: 627.64s


100%|██████████| 56/56 [03:56<00:00,  4.22s/it]


Epoch: 1 Train Accuracy: 0.9174157303370787 Train Precision: 0.9155562190259767 Train Recall: 0.9184504679172549 Train F1: 0.9166165477132685
Validation Accuracy: 1.0 Validation Precision: 1.0 Validation Recall: 1.0 Validation F1: 1.0


100%|██████████| 56/56 [02:59<00:00,  3.20s/it]


Epoch: 2 Train Accuracy: 0.9977528089887641 Train Precision: 0.9977077797531431 Train Recall: 0.9979593044765076 Train F1: 0.9978306580306017
Validation Accuracy: 0.9954954954954955 Validation Precision: 0.9953488372093023 Validation Recall: 0.9959183673469388 Validation F1: 0.9955852031534264


100%|██████████| 56/56 [03:04<00:00,  3.30s/it]


Epoch: 3 Train Accuracy: 0.998876404494382 Train Precision: 0.9987450903377848 Train Recall: 0.9989004809470957 Train F1: 0.9988210251559921
Validation Accuracy: 1.0 Validation Precision: 1.0 Validation Recall: 1.0 Validation F1: 1.0




Training time: 625.78s


100%|██████████| 56/56 [03:55<00:00,  4.21s/it]


Epoch: 1 Train Accuracy: 0.9219101123595506 Train Precision: 0.9229862496851121 Train Recall: 0.9209235032645402 Train F1: 0.9218024708036847
Validation Accuracy: 1.0 Validation Precision: 1.0 Validation Recall: 1.0 Validation F1: 1.0


100%|██████████| 56/56 [03:07<00:00,  3.34s/it]


Epoch: 2 Train Accuracy: 0.998314606741573 Train Precision: 0.9984015188633023 Train Recall: 0.998244230164347 Train F1: 0.998320921779986
Validation Accuracy: 1.0 Validation Precision: 1.0 Validation Recall: 1.0 Validation F1: 1.0


100%|██████████| 56/56 [03:02<00:00,  3.26s/it]


Epoch: 3 Train Accuracy: 1.0 Train Precision: 1.0 Train Recall: 1.0 Train F1: 1.0
Validation Accuracy: 1.0 Validation Precision: 1.0 Validation Recall: 1.0 Validation F1: 1.0




Training time: 629.77s


100%|██████████| 56/56 [03:37<00:00,  3.89s/it]


Epoch: 1 Train Accuracy: 0.9393258426966292 Train Precision: 0.9386318510746371 Train Recall: 0.9405485565114781 Train F1: 0.9391583724860861
Validation Accuracy: 1.0 Validation Precision: 1.0 Validation Recall: 1.0 Validation F1: 1.0


100%|██████████| 56/56 [02:54<00:00,  3.12s/it]


Epoch: 2 Train Accuracy: 0.999438202247191 Train Precision: 0.9993975903614458 Train Recall: 0.9993527508090615 Train F1: 0.9993741917662214
Validation Accuracy: 1.0 Validation Precision: 1.0 Validation Recall: 1.0 Validation F1: 1.0


100%|██████████| 56/56 [02:58<00:00,  3.18s/it]


Epoch: 3 Train Accuracy: 1.0 Train Precision: 1.0 Train Recall: 1.0 Train F1: 1.0
Validation Accuracy: 1.0 Validation Precision: 1.0 Validation Recall: 1.0 Validation F1: 1.0
Training time: 595.62s


In [48]:
print("Precision values:", precisions)
print("Precision avg: %0.4f (+/- %0.4f)" % (statistics.mean(precisions), statistics.stdev(precisions) * 2))
print("Recall values:", recalls)
print("Recall avg: %0.4f (+/- %0.4f)" % (statistics.mean(recalls), statistics.stdev(recalls) * 2))
print("F1 values:", f1s)
print("F1 avg: %0.4f (+/- %0.4f)" % (statistics.mean(f1s), statistics.stdev(f1s) * 2))


Precision values: [0.9927272727272728, 0.9957446808510639, 1.0, 1.0, 1.0]
Precision avg: 0.9977 (+/- 0.0067)
Recall values: [0.9908902691511386, 0.9964285714285716, 1.0, 1.0, 1.0]
Recall avg: 0.9975 (+/- 0.0080)
F1 values: [0.9916888555442771, 0.9960476605637896, 1.0, 1.0, 1.0]
F1 avg: 0.9975 (+/- 0.0074)


# Model Training with Movies

In [40]:
precisions = []
recalls = []
f1s = []
accuracies = []
for i in range(5):
  train_X, val_X, train_Y, val_Y = train_test_split(movies_df, movies_df["sentiment"], test_size = TRAIN_VAL_SIZE)
  val_X, test_X, val_Y, test_Y = train_test_split(val_X, val_Y, test_size = VAL_TEST_SIZE)
  train_dataloader, val_dataloader, test_dataloader = get_data_loaders(train_X,train_Y, val_X, val_Y, test_X, test_Y)
  model = BERTClassifier(len(labels), bert_model, 0.2) 
  model = model.to(device)

  optimizer = AdamW(model.parameters(), lr=2e-5)
  total_steps = len(train_dataloader) * EPOCH
  warmup_step = int(len(train_dataloader)/10) 

  scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=warmup_step,
    num_training_steps=total_steps
  )

  loss_fn = nn.CrossEntropyLoss().to(device)

  ts = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
  output_dir = os.path.join("../model_outputs", ts)
  os.mkdir(output_dir)
  start_time = datetime.now()
  acuuracy, precision, recall, f1 = train(model=model, epochs=EPOCH, train_data_loader=train_dataloader, val_data_loader=val_dataloader, 
      loss_fn=loss_fn, optimizer=optimizer, device=device, scheduler=scheduler, output_dir=output_dir)
  end_time = datetime.now()
  training_time = (end_time - start_time).total_seconds()
  print('Training time: {:.2f}s'.format(training_time))
  precisions.append(precision)
  recalls.append(recall)
  f1s.append(f1)
  accuracies.append(acuuracy)

  del model


100%|██████████| 125/125 [17:19<00:00,  8.31s/it]


Epoch: 1 Train Accuracy: 0.84775 Train Precision: 0.8478362887323115 Train Recall: 0.8476622662266227 Train F1: 0.8477046826246235
Validation Accuracy: 0.866 Validation Precision: 0.8657540172062994 Validation Recall: 0.8658653846153846 Validation F1: 0.8658062241877271




Training time: 1079.34s


100%|██████████| 125/125 [16:47<00:00,  8.06s/it]


Epoch: 1 Train Accuracy: 0.86675 Train Precision: 0.8668663099945957 Train Recall: 0.8667411167411168 Train F1: 0.8667373317175964
Validation Accuracy: 0.882 Validation Precision: 0.8819658448895737 Validation Recall: 0.8815065967713493 Validation F1: 0.8817042606516291




Training time: 1046.45s


100%|██████████| 125/125 [16:52<00:00,  8.10s/it]


Epoch: 1 Train Accuracy: 0.8815 Train Precision: 0.8815024703756176 Train Recall: 0.8815368086969849 Train F1: 0.8814976003264067
Validation Accuracy: 0.934 Validation Precision: 0.9345510259742343 Validation Recall: 0.933625768442623 Validation F1: 0.9339045581820148




Training time: 1059.33s


100%|██████████| 125/125 [15:04<00:00,  7.23s/it]


Epoch: 1 Train Accuracy: 0.91275 Train Precision: 0.9128655971222697 Train Recall: 0.9126949720536306 Train F1: 0.9127322791984447
Validation Accuracy: 0.96 Validation Precision: 0.9594247560349256 Validation Recall: 0.9603705609881626 Validation F1: 0.9598354861512757




Training time: 925.19s


100%|██████████| 125/125 [10:10<00:00,  4.88s/it]


Epoch: 1 Train Accuracy: 0.93 Train Precision: 0.9311090920541913 Train Recall: 0.9296343446513025 Train F1: 0.9298987738294098
Validation Accuracy: 0.952 Validation Precision: 0.9516967092180384 Validation Recall: 0.9520737031330252 Validation F1: 0.9518698560908697
Training time: 631.43s


In [41]:
print("Precision values:", precisions)
print("Precision avg: %0.4f (+/- %0.4f)" % (statistics.mean(precisions), statistics.stdev(precisions) * 2))
print("Recall values:", recalls)
print("Recall avg: %0.4f (+/- %0.4f)" % (statistics.mean(recalls), statistics.stdev(recalls) * 2))
print("F1 values:", f1s)
print("F1 avg: %0.4f (+/- %0.4f)" % (statistics.mean(f1s), statistics.stdev(f1s) * 2))


Precision values: [0.8657540172062994, 0.8819658448895737, 0.9345510259742343, 0.9594247560349256, 0.9516967092180384]
Precision avg: 0.9187 (+/- 0.0846)
Recall values: [0.8658653846153846, 0.8815065967713493, 0.933625768442623, 0.9603705609881626, 0.9520737031330252]
Recall avg: 0.9187 (+/- 0.0851)
F1 values: [0.8658062241877271, 0.8817042606516291, 0.9339045581820148, 0.9598354861512757, 0.9518698560908697]
F1 avg: 0.9186 (+/- 0.0848)


# Model Training with Twitter data

In [62]:
precisions = []
recalls = []
f1s = []
accuracies = []
for i in range(5):
  train_X, val_X, train_Y, val_Y = train_test_split(twitter_df, twitter_df["label"], test_size = TRAIN_VAL_SIZE)
  val_X, test_X, val_Y, test_Y = train_test_split(val_X, val_Y, test_size = VAL_TEST_SIZE)
  train_dataloader, val_dataloader, test_dataloader = get_data_loaders(train_X,train_Y, val_X, val_Y, test_X, test_Y)
  model = BERTClassifier(len(labels), bert_model, 0.2) 
  model = model.to(device)

  optimizer = AdamW(model.parameters(), lr=2e-5)
  total_steps = len(train_dataloader) * EPOCH
  warmup_step = int(len(train_dataloader)/10) 

  scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=warmup_step,
    num_training_steps=total_steps
  )

  loss_fn = nn.CrossEntropyLoss().to(device)

  ts = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
  output_dir = os.path.join("../model_outputs", ts)
  os.mkdir(output_dir)
  start_time = datetime.now()
  acuuracy, precision, recall, f1 = train(model=model, epochs=EPOCH, train_data_loader=train_dataloader, val_data_loader=val_dataloader, 
      loss_fn=loss_fn, optimizer=optimizer, device=device, scheduler=scheduler, output_dir=output_dir)
  end_time = datetime.now()
  training_time = (end_time - start_time).total_seconds()
  print('Training time: {:.2f}s'.format(training_time))
  precisions.append(precision)
  recalls.append(recall)
  f1s.append(f1)
  accuracies.append(acuuracy)

  del model


100%|██████████| 120/120 [10:09<00:00,  5.08s/it]


Epoch: 1 Train Accuracy: 0.6755208333333333 Train Precision: 0.6755344546934346 Train Recall: 0.6755155984327239 Train F1: 0.6755101824902564
Validation Accuracy: 0.775 Validation Precision: 0.7782281311693076 Validation Recall: 0.7768737286809576 Validation F1: 0.7749023013460704




Training time: 629.95s


100%|██████████| 120/120 [10:12<00:00,  5.11s/it]


Epoch: 1 Train Accuracy: 0.775 Train Precision: 0.775012200411735 Train Recall: 0.7750145877228748 Train F1: 0.7749999389648272
Validation Accuracy: 0.8125 Validation Precision: 0.8131123618928497 Validation Recall: 0.8132647385984426 Validation F1: 0.8124967447351517




Training time: 632.67s


100%|██████████| 120/120 [10:14<00:00,  5.12s/it]


Epoch: 1 Train Accuracy: 0.7919270833333333 Train Precision: 0.7921214543937709 Train Recall: 0.7918146859549533 Train F1: 0.7918389802433063
Validation Accuracy: 0.8229166666666666 Validation Precision: 0.8229166666666667 Validation Recall: 0.8235964439186485 Validation F1: 0.822823618306489




Training time: 635.88s


100%|██████████| 120/120 [10:15<00:00,  5.13s/it]


Epoch: 1 Train Accuracy: 0.81953125 Train Precision: 0.8195568946489438 Train Recall: 0.8195288952715102 Train F1: 0.8195268316776689
Validation Accuracy: 0.8791666666666667 Validation Precision: 0.8791666666666667 Validation Recall: 0.8791666666666667 Validation F1: 0.8791666666666667




Training time: 635.22s


100%|██████████| 120/120 [10:02<00:00,  5.02s/it]


Epoch: 1 Train Accuracy: 0.846875 Train Precision: 0.8469124334952705 Train Recall: 0.8469033990722404 Train F1: 0.846874833848561
Validation Accuracy: 0.9020833333333333 Validation Precision: 0.9020833333333333 Validation Recall: 0.9022579244463742 Validation F1: 0.9020727075420509
Training time: 623.03s


In [63]:
print("Precision values:", precisions)
print("Precision avg: %0.4f (+/- %0.4f)" % (statistics.mean(precisions), statistics.stdev(precisions) * 2))
print("Recall values:", recalls)
print("Recall avg: %0.4f (+/- %0.4f)" % (statistics.mean(recalls), statistics.stdev(recalls) * 2))
print("F1 values:", f1s)
print("F1 avg: %0.4f (+/- %0.4f)" % (statistics.mean(f1s), statistics.stdev(f1s) * 2))


Precision values: [0.7782281311693076, 0.8131123618928497, 0.8229166666666667, 0.8791666666666667, 0.9020833333333333]
Precision avg: 0.8391 (+/- 0.1011)
Recall values: [0.7768737286809576, 0.8132647385984426, 0.8235964439186485, 0.8791666666666667, 0.9022579244463742]
Recall avg: 0.8390 (+/- 0.1019)
F1 values: [0.7749023013460704, 0.8124967447351517, 0.822823618306489, 0.8791666666666667, 0.9020727075420509]
F1 avg: 0.8383 (+/- 0.1033)
