In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from transformers import AutoModel, BertTokenizerFast, ElectraTokenizer, ElectraModel
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import random
from transformers import AdamW
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, roc_auc_score
import time

In [2]:
# Seeting seed for PyTorch, Python i Numpy
torch.manual_seed(42)
random.seed(42)
np.random.seed(42)

In [3]:
def transform_sentiment(value):
    if value.startswith('+'):
        return 1
    elif value.startswith('-'):
        return 0
    else:
        return value

In [4]:
df = pd.read_csv("D:\Faks\Master\Web mining\Projekat\podaci_1.csv", encoding="cp1252")
df['label'] = df['label'].apply(transform_sentiment)

In [5]:
df_test = pd.read_csv("D:\Faks\Master\Web mining\Projekat\podaci_2.csv")
df_test = df_test[['1', 'comment']]
df_test = df_test.rename(columns = {'1' : 'label', 'comment' : 'text'})
df_test['label'] = df_test['label'].apply(transform_sentiment)

In [6]:
# split train dataset into train, validation and test sets
train_text, val_text, train_labels, val_labels = train_test_split(df['text'], df['label'], 
                                                                    random_state=2018, 
                                                                    test_size=0.3, 
                                                                    stratify=df['label'])

# split test dataset into test_text and test_labels
test_text = df_test['text']
test_labels = df_test['label']                                                              

In [7]:
# import BERT-base pretrained model
bert = AutoModel.from_pretrained('bert-base-multilingual-cased')
# Load the BERT tokenizer
tokenizer = BertTokenizerFast.from_pretrained('bert-base-multilingual-cased')

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [8]:
# freeze all the parameters
for param in bert.parameters():
    param.requires_grad = False

In [8]:
# freeze the parameters of first 8 layers
for name, param in bert.named_parameters():
    if 'encoder.layer' in name:
        layer_index = int(name.split('.')[2])
        if layer_index < 3:
            param.requires_grad = False

In [9]:
tokens_train = tokenizer.batch_encode_plus(
    train_text.tolist(),
    max_length = 50,
    padding = 'longest',
    truncation=True
)

# tokenize and encode sequences in the validation set
tokens_val = tokenizer.batch_encode_plus(
    val_text.tolist(),
    max_length = 50,
    padding = 'longest',
    truncation=True
)

# tokenize and encode sequences in the test set
tokens_test = tokenizer.batch_encode_plus(
    test_text.tolist(),
    max_length = 50,
    padding = 'longest',
    truncation=True
)

## convert lists to tensors
train_seq = torch.tensor(tokens_train['input_ids'])
train_mask = torch.tensor(tokens_train['attention_mask'])
train_y = torch.tensor(train_labels.to_list())

val_seq = torch.tensor(tokens_val['input_ids'])
val_mask = torch.tensor(tokens_val['attention_mask'])
val_y = torch.tensor(val_labels.tolist())

test_seq = torch.tensor(tokens_test['input_ids'])
test_mask = torch.tensor(tokens_test['attention_mask'])
test_y = torch.tensor(test_labels.tolist())

In [10]:
class BERT_Arch(nn.Module):

    def __init__(self, bert):
      
      super(BERT_Arch, self).__init__()

      self.bert = bert 
      
      # dropout layer
      self.dropout = nn.Dropout(0.1)
      
      # relu activation function
      self.relu =  nn.ReLU()

      # dense layer 1
      self.fc1 = nn.Linear(768,512)
      
      # dense layer 2 (Output layer)
      #self.fc2 = nn.Linear(512,3)
      self.fc2 = nn.Linear(512,2)

      #softmax activation function
      self.softmax = nn.LogSoftmax(dim=1)

    #define the forward pass
    def forward(self, sent_id, mask):

      #pass the inputs to the model  
      _, cls_hs = self.bert(sent_id, attention_mask=mask, return_dict=False)
      
      x = self.fc1(cls_hs)

      x = self.relu(x)

      x = self.dropout(x)

      # output layer
      x = self.fc2(x)
      
      # apply softmax activation
      x = self.softmax(x)

      return x

In [11]:
# function to train the model
def train(model, train_dataloader, device, cross_entropy, optimizer):
  
  model.train()

  total_loss, total_accuracy = 0, 0
  
  # empty list to save model predictions
  total_preds=[]
  
  # iterate over batches
  for step,batch in enumerate(train_dataloader):
    
    # progress update after every 50 batches.
    if step % 50 == 0 and not step == 0:
      print('  Batch {:>5,}  of  {:>5,}.'.format(step, len(train_dataloader)))

    # push the batch to gpu
    batch = [r.to(device) for r in batch]
 
    sent_id, mask, labels = batch

    # clear previously calculated gradients 
    model.zero_grad()        

    # get model predictions for the current batch
    preds = model(sent_id, mask)

    # compute the loss between actual and predicted values
    loss = cross_entropy(preds, labels)

    # add on to the total loss
    total_loss = total_loss + loss.item()

    # backward pass to calculate the gradients
    loss.backward()

    # clip the the gradients to 1.0. It helps in preventing the exploding gradient problem
    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

    # update parameters
    optimizer.step()

    # model predictions are stored on GPU. So, push it to CPU
    preds=preds.detach().cpu().numpy()

    # append the model predictions
    total_preds.append(preds)

  # compute the training loss of the epoch
  avg_loss = total_loss / len(train_dataloader)
  
  # predictions are in the form of (no. of batches, size of batch, no. of classes).
  # reshape the predictions in form of (number of samples, no. of classes)
  total_preds  = np.concatenate(total_preds, axis=0)

  #returns the loss and predictions
  return avg_loss, total_preds

In [20]:
# function for evaluating the model
def evaluate(model, val_dataloader, device, cross_entropy):
  
  print("\nEvaluating...")
  
  # deactivate dropout layers
  model.eval()

  total_loss, total_accuracy = 0, 0
  
  # empty list to save the model predictions
  total_preds = []

  # iterate over batches
  for step,batch in enumerate(val_dataloader):
    
    # Progress update every 50 batches.
    # if step % 50 == 0 and not step == 0:
      
      # Calculate elapsed time in minutes.
      # elapsed = format_time(time.time() - t0)
            
      # Report progress.
      # print('  Batch {:>5,}  of  {:>5,}.'.format(step, len(val_dataloader)))

    # push the batch to gpu
    batch = [t.to(device) for t in batch]

    sent_id, mask, labels = batch

    # deactivate autograd
    with torch.no_grad():
      
      # model predictions
      preds = model(sent_id, mask)

      # compute the validation loss between actual and predicted values
      loss = cross_entropy(preds,labels)

      total_loss = total_loss + loss.item()

      preds = preds.detach().cpu().numpy()

      total_preds.append(preds)

  # compute the validation loss of the epoch
  avg_loss = total_loss / len(val_dataloader) 

  # reshape the predictions in form of (number of samples, no. of classes)
  total_preds  = np.concatenate(total_preds, axis=0)

  return avg_loss, total_preds

In [13]:
def train_and_evaluate(batch_size, device, learning_rate, epochs, optimizer_name, model_name):
    
    # prepare train and validation dataset
    train_data = TensorDataset(train_seq, train_mask, train_y)
    train_sampler = RandomSampler(train_data)
    train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)
    
    val_data = TensorDataset(val_seq, val_mask, val_y)
    val_sampler = SequentialSampler(val_data)
    val_dataloader = DataLoader(val_data, sampler = val_sampler, batch_size=batch_size)
    
    # specify device
    device = torch.device(device)
    model = BERT_Arch(bert)
    model = model.to(device)
    
    # spcecify learning_rate and optimizer
    if optimizer_name == 'adamw':
        optimizer = AdamW(model.parameters(), lr = learning_rate)
    
    # copmute and set class_weights
    class_weights = compute_class_weight(class_weight = "balanced", classes = np.unique(train_labels), y = train_labels)
    weights= torch.tensor(class_weights,dtype=torch.float)
    weights = weights.to(device)
    # cross_entropy  = nn.NLLLoss(weight=weights)
    cross_entropy  = nn.NLLLoss()

    # set initial loss to infinite
    best_valid_loss = float('inf')
    # empty lists to store training and validation loss and accuracy of each epoch
    train_losses=[]
    valid_losses=[]
    train_accuracies = []  # lista za čuvanje tačnosti tokom treninga
    valid_accuracies = []  # lista za čuvanje tačnosti tokom evaluacije

    #for each epoch
    for epoch in range(epochs):

        print('\n Epoch {:} / {:}'.format(epoch + 1, epochs))
        
        #train and evaluate model
        train_loss, _ = train(model, train_dataloader, device, cross_entropy, optimizer)
        valid_loss, _ = evaluate(model, val_dataloader, device, cross_entropy)

        #save the best model
        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            torch.save(model.state_dict(), model_name)

        # append training and validation loss and accuracy
        train_losses.append(train_loss)
        valid_losses.append(valid_loss)
    

        print(f'\nTraining Loss: {train_loss:.3f}')
        print(f'Validation Loss: {valid_loss:.3f}')

In [14]:
def test(path, batch_size, device):

    # pass the pre-trained BERT to our define architecture
    model_test = BERT_Arch(bert)
    #load weights of best model
    model_test.load_state_dict(torch.load(path))
    model_test.to(device)

    num_rows = test_seq.shape[0]
    preds = []

    with torch.no_grad():
        for i in range(0, num_rows, batch_size):
            start_idx = i
            end_idx = min(i + batch_size, num_rows)
            batch_seq = test_seq[start_idx:end_idx].to(device)
            batch_mask = test_mask[start_idx:end_idx].to(device)

            batch_preds = model_test(batch_seq.to(device), batch_mask.to(device))
            batch_preds = batch_preds.detach().cpu().numpy()
            preds.append(batch_preds)

    preds = np.concatenate(preds, axis=0)

    pred_labels = np.argmax(preds, axis=1)

    # Print classification report
    print(classification_report(test_y, pred_labels))

    # Compute confusion matrix
    confusion_mat = confusion_matrix(test_y, pred_labels)
    print("Confusion Matrix:")
    print(confusion_mat)

    # Compute ROC curve and AUC score
    fpr, tpr, thresholds = roc_curve(test_y, preds[:, 1])
    auc_score = roc_auc_score(test_y, preds[:, 1])
    print("AUC Score:", auc_score)

In [5]:
train_and_evaluate(32, "cpu", 2e-5, 4, "adamw", "bert_4_epochs.pt")

NameError: name 'TensorDataset' is not defined

In [17]:
test("bert_4_epochs.pt", 10, "cpu")

              precision    recall  f1-score   support

           0       0.68      0.79      0.73       191
           1       0.84      0.74      0.79       273

    accuracy                           0.76       464
   macro avg       0.76      0.77      0.76       464
weighted avg       0.77      0.76      0.76       464

Confusion Matrix:
[[151  40]
 [ 70 203]]
AUC Score: 0.8371018161594077


In [22]:
train_and_evaluate(32, "cuda", 2e-5, 4, "adamw", "bert_4_epochs_freezed.pt")




 Epoch 1 / 4
  Batch    50  of     77.

Evaluating...

Training Loss: 0.674
Validation Loss: 0.670

 Epoch 2 / 4
  Batch    50  of     77.

Evaluating...

Training Loss: 0.665
Validation Loss: 0.668

 Epoch 3 / 4
  Batch    50  of     77.

Evaluating...

Training Loss: 0.661
Validation Loss: 0.668

 Epoch 4 / 4
  Batch    50  of     77.

Evaluating...

Training Loss: 0.664
Validation Loss: 0.666


In [23]:
test("bert_4_epochs_freezed.pt", 10, "cuda")

              precision    recall  f1-score   support

           0       0.00      0.00      0.00       191
           1       0.59      1.00      0.74       273

    accuracy                           0.59       464
   macro avg       0.29      0.50      0.37       464
weighted avg       0.35      0.59      0.44       464

Confusion Matrix:
[[  0 191]
 [  0 273]]
AUC Score: 0.5500834244289741


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [30]:
train_and_evaluate(32, "cpu", 2e-5, 2, "adamw", "bert_3_epochs.pt")


 Epoch 1 / 2
  Batch    50  of     77.

Evaluating...

Training Loss: 0.678
Validation Loss: 0.670

 Epoch 2 / 2
  Batch    50  of     77.

Evaluating...

Training Loss: 0.667
Validation Loss: 0.669


In [15]:
start_time = time.time()

train_and_evaluate(32, "cpu", 2e-5, 3, "adamw", "bert_3_epochs.pt")


end_time = time.time()
execution_time = (end_time - start_time) / 60 
print(f"Execution time: {execution_time:.2f} minutes")




 Epoch 1 / 3
  Batch    50  of     77.

Evaluating...

Training Loss: 0.641
Validation Loss: 0.607

 Epoch 2 / 3
  Batch    50  of     77.

Evaluating...

Training Loss: 0.534
Validation Loss: 0.544

 Epoch 3 / 3
  Batch    50  of     77.

Evaluating...

Training Loss: 0.396
Validation Loss: 0.537
Execution time: 52.49 minutes


In [16]:
test("bert_3_epochs.pt", 10, "cpu")

              precision    recall  f1-score   support

           0       0.68      0.79      0.73       191
           1       0.83      0.74      0.78       273

    accuracy                           0.76       464
   macro avg       0.76      0.77      0.76       464
weighted avg       0.77      0.76      0.76       464

Confusion Matrix:
[[151  40]
 [ 71 202]]
AUC Score: 0.8342826458009702


In [15]:
start_time = time.time()

train_and_evaluate(32, "cuda", 2e-5, 20, "adamw", "bert_20_epochs_freezed.pt")


end_time = time.time()
execution_time = (end_time - start_time) / 60 
print(f"Execution time: {execution_time:.2f} minutes")




 Epoch 1 / 20
  Batch    50  of     77.

Evaluating...

Training Loss: 0.671
Validation Loss: 0.669

 Epoch 2 / 20
  Batch    50  of     77.

Evaluating...

Training Loss: 0.665
Validation Loss: 0.669

 Epoch 3 / 20
  Batch    50  of     77.

Evaluating...

Training Loss: 0.665
Validation Loss: 0.667

 Epoch 4 / 20
  Batch    50  of     77.

Evaluating...

Training Loss: 0.663
Validation Loss: 0.663

 Epoch 5 / 20
  Batch    50  of     77.

Evaluating...

Training Loss: 0.661
Validation Loss: 0.664

 Epoch 6 / 20
  Batch    50  of     77.

Evaluating...

Training Loss: 0.661
Validation Loss: 0.659

 Epoch 7 / 20
  Batch    50  of     77.

Evaluating...

Training Loss: 0.657
Validation Loss: 0.659

 Epoch 8 / 20
  Batch    50  of     77.

Evaluating...

Training Loss: 0.656
Validation Loss: 0.656

 Epoch 9 / 20
  Batch    50  of     77.

Evaluating...

Training Loss: 0.656
Validation Loss: 0.655

 Epoch 10 / 20
  Batch    50  of     77.

Evaluating...

Training Loss: 0.654
Validation L

In [16]:
test("bert_20_epochs_freezed.pt", 10, "cpu")

              precision    recall  f1-score   support

           0       0.83      0.05      0.10       191
           1       0.60      0.99      0.75       273

    accuracy                           0.61       464
   macro avg       0.72      0.52      0.42       464
weighted avg       0.70      0.61      0.48       464

Confusion Matrix:
[[ 10 181]
 [  2 271]]
AUC Score: 0.6766008860249697


In [16]:
start_time = time.time()

train_and_evaluate(32, "cuda", 2e-5, 4, "adamw", "bert_4_epochs_10_freezed.pt")


end_time = time.time()
execution_time = (end_time - start_time) / 60 
print(f"Execution time: {execution_time:.2f} minutes")




 Epoch 1 / 4
  Batch    50  of     77.

Evaluating...

Training Loss: 0.660
Validation Loss: 0.649

 Epoch 2 / 4
  Batch    50  of     77.

Evaluating...

Training Loss: 0.622
Validation Loss: 0.621

 Epoch 3 / 4
  Batch    50  of     77.

Evaluating...

Training Loss: 0.585
Validation Loss: 0.616

 Epoch 4 / 4
  Batch    50  of     77.

Evaluating...

Training Loss: 0.548
Validation Loss: 0.609
Execution time: 2.89 minutes


In [17]:
test("bert_4_epochs_10_freezed.pt", 10, "cpu")

              precision    recall  f1-score   support

           0       0.62      0.55      0.58       191
           1       0.71      0.77      0.74       273

    accuracy                           0.68       464
   macro avg       0.67      0.66      0.66       464
weighted avg       0.67      0.68      0.68       464

Confusion Matrix:
[[105  86]
 [ 63 210]]
AUC Score: 0.7519705425464587


In [15]:
start_time = time.time()

train_and_evaluate(32, "cuda", 2e-5, 30, "adamw", "bert_30_epochs_10_freezed.pt")

end_time = time.time()
execution_time = (end_time - start_time) / 60 
print(f"Execution time: {execution_time:.2f} minutes")




 Epoch 1 / 30
  Batch    50  of     77.

Evaluating...

Training Loss: 0.660
Validation Loss: 0.649

 Epoch 2 / 30
  Batch    50  of     77.

Evaluating...

Training Loss: 0.622
Validation Loss: 0.621

 Epoch 3 / 30
  Batch    50  of     77.

Evaluating...

Training Loss: 0.585
Validation Loss: 0.616

 Epoch 4 / 30
  Batch    50  of     77.

Evaluating...

Training Loss: 0.548
Validation Loss: 0.609

 Epoch 5 / 30
  Batch    50  of     77.

Evaluating...

Training Loss: 0.521
Validation Loss: 0.610

 Epoch 6 / 30
  Batch    50  of     77.

Evaluating...

Training Loss: 0.492
Validation Loss: 0.607

 Epoch 7 / 30
  Batch    50  of     77.

Evaluating...

Training Loss: 0.439
Validation Loss: 0.699

 Epoch 8 / 30
  Batch    50  of     77.

Evaluating...

Training Loss: 0.424
Validation Loss: 0.734

 Epoch 9 / 30
  Batch    50  of     77.

Evaluating...

Training Loss: 0.379
Validation Loss: 0.683

 Epoch 10 / 30
  Batch    50  of     77.

Evaluating...

Training Loss: 0.354
Validation L

In [16]:
test("bert_30_epochs_10_freezed.pt", 10, "cpu")

              precision    recall  f1-score   support

           0       0.58      0.74      0.65       191
           1       0.78      0.63      0.69       273

    accuracy                           0.67       464
   macro avg       0.68      0.68      0.67       464
weighted avg       0.70      0.67      0.68       464

Confusion Matrix:
[[142  49]
 [102 171]]
AUC Score: 0.7736033599907945


In [15]:
start_time = time.time()

train_and_evaluate(32, "cuda", 2e-5, 6, "adamw", "bert_6_epochs_10_freezed.pt")

end_time = time.time()
execution_time = (end_time - start_time) / 60 
print(f"Execution time: {execution_time:.2f} minutes")




 Epoch 1 / 6
  Batch    50  of     77.

Evaluating...

Training Loss: 0.660
Validation Loss: 0.649

 Epoch 2 / 6
  Batch    50  of     77.

Evaluating...

Training Loss: 0.622
Validation Loss: 0.621

 Epoch 3 / 6
  Batch    50  of     77.

Evaluating...

Training Loss: 0.585
Validation Loss: 0.616

 Epoch 4 / 6
  Batch    50  of     77.

Evaluating...

Training Loss: 0.548
Validation Loss: 0.609

 Epoch 5 / 6
  Batch    50  of     77.

Evaluating...

Training Loss: 0.521
Validation Loss: 0.610

 Epoch 6 / 6
  Batch    50  of     77.

Evaluating...

Training Loss: 0.492
Validation Loss: 0.607
Execution time: 4.24 minutes


In [16]:
test("bert_6_epochs_10_freezed.pt", 10, "cpu")

              precision    recall  f1-score   support

           0       0.58      0.74      0.65       191
           1       0.78      0.63      0.70       273

    accuracy                           0.68       464
   macro avg       0.68      0.69      0.68       464
weighted avg       0.70      0.68      0.68       464

Confusion Matrix:
[[142  49]
 [101 172]]
AUC Score: 0.7736033599907945


In [15]:
start_time = time.time()

train_and_evaluate(32, "cpu", 2e-5, 4, "adamw", "bert_4_epochs_6_freezed.pt")

end_time = time.time()
execution_time = (end_time - start_time) / 60 
print(f"Execution time: {execution_time:.2f} minutes")




 Epoch 1 / 4
  Batch    50  of     77.

Evaluating...

Training Loss: 0.649
Validation Loss: 0.629

 Epoch 2 / 4
  Batch    50  of     77.

Evaluating...

Training Loss: 0.593
Validation Loss: 0.604

 Epoch 3 / 4
  Batch    50  of     77.

Evaluating...

Training Loss: 0.499
Validation Loss: 0.599

 Epoch 4 / 4
  Batch    50  of     77.

Evaluating...

Training Loss: 0.420
Validation Loss: 0.646
Execution time: 64.63 minutes


In [16]:
test("bert_4_epochs_6_freezed.pt", 10, "cpu")

              precision    recall  f1-score   support

           0       0.63      0.72      0.67       191
           1       0.78      0.71      0.74       273

    accuracy                           0.71       464
   macro avg       0.71      0.71      0.71       464
weighted avg       0.72      0.71      0.72       464

Confusion Matrix:
[[137  54]
 [ 79 194]]
AUC Score: 0.7807375870203096


In [15]:
start_time = time.time()

train_and_evaluate(32, "cpu", 2e-5, 4, "adamw", "bert_4_epochs_3_freezed.pt")

end_time = time.time()
execution_time = (end_time - start_time) / 60 
print(f"Execution time: {execution_time:.2f} minutes")




 Epoch 1 / 4
  Batch    50  of     77.

Evaluating...

Training Loss: 0.647
Validation Loss: 0.626

 Epoch 2 / 4
  Batch    50  of     77.

Evaluating...

Training Loss: 0.563
Validation Loss: 0.590

 Epoch 3 / 4
  Batch    50  of     77.

Evaluating...

Training Loss: 0.443
Validation Loss: 0.588

 Epoch 4 / 4
  Batch    50  of     77.

Evaluating...

Training Loss: 0.345
Validation Loss: 0.614
Execution time: 66.81 minutes


In [16]:
test("bert_4_epochs_3_freezed.pt", 10, "cpu")

              precision    recall  f1-score   support

           0       0.66      0.84      0.74       191
           1       0.86      0.69      0.77       273

    accuracy                           0.75       464
   macro avg       0.76      0.77      0.75       464
weighted avg       0.78      0.75      0.76       464

Confusion Matrix:
[[161  30]
 [ 84 189]]
AUC Score: 0.8307538883455114


In [21]:
batch_sizes = [8, 16]
for batch_size in batch_sizes:
    model_name = "bert_4_epochs_unfreezed_" + str(batch_size) +".pt"
    train_and_evaluate(batch_size, "cpu", 2e-5, 4, "adamw", model_name)


 Epoch 1 / 4
  Batch    50  of    306.
  Batch   100  of    306.
  Batch   150  of    306.
  Batch   200  of    306.
  Batch   250  of    306.
  Batch   300  of    306.

Evaluating...

Training Loss: 0.399
Validation Loss: 1.008

 Epoch 2 / 4
  Batch    50  of    306.
  Batch   100  of    306.
  Batch   150  of    306.
  Batch   200  of    306.
  Batch   250  of    306.
  Batch   300  of    306.

Evaluating...

Training Loss: 0.306
Validation Loss: 1.112

 Epoch 3 / 4
  Batch    50  of    306.
  Batch   100  of    306.
  Batch   150  of    306.
  Batch   200  of    306.
  Batch   250  of    306.
  Batch   300  of    306.

Evaluating...

Training Loss: 0.269
Validation Loss: 1.249

 Epoch 4 / 4
  Batch    50  of    306.
  Batch   100  of    306.
  Batch   150  of    306.
  Batch   200  of    306.
  Batch   250  of    306.
  Batch   300  of    306.

Evaluating...

Training Loss: 0.164
Validation Loss: 1.295

 Epoch 1 / 4
  Batch    50  of    153.
  Batch   100  of    153.
  Batch   150 

In [22]:
test("bert_4_epochs_unfreezed_8.pt", 10, "cpu")

              precision    recall  f1-score   support

           0       0.71      0.70      0.71       191
           1       0.79      0.80      0.80       273

    accuracy                           0.76       464
   macro avg       0.75      0.75      0.75       464
weighted avg       0.76      0.76      0.76       464

Confusion Matrix:
[[134  57]
 [ 54 219]]
AUC Score: 0.8347812745718505


In [23]:
test("bert_4_epochs_unfreezed_16.pt", 10, "cpu")

              precision    recall  f1-score   support

           0       0.68      0.77      0.72       191
           1       0.83      0.74      0.78       273

    accuracy                           0.76       464
   macro avg       0.75      0.76      0.75       464
weighted avg       0.76      0.76      0.76       464

Confusion Matrix:
[[148  43]
 [ 70 203]]
AUC Score: 0.7900197533705386
