In [56]:
"""
Set the following paths appropriately.

single_sentences_dataset_path : Path of the csv file containing dataset for single sentences - commonsense_single_sent.csv
saved_model_output_directory : Directory to store trained output models
pretrained model : Can select any of the following - {bert-large-uncased, albert-xxlarge-v2, roberta-large, xlnet-large-cased}
epochs : number of steps to train the model

Note: albert-xxlarge-v2 takes around 1hr 40mins for one epoch. Set the parameter appropriately

"""
single_sentences_dataset_path = "/content/drive/MyDrive/IIT_Bombay/CS 626/commonsense_single_sent.csv"
saved_model_output_directory = "/content/drive/MyDrive/IIT_Bombay/CS 626/Single_Models/BERT/"
pretrained_model = "bert-large-uncased"
epochs = 4

In [57]:
"""
Uncomment the code below if you want to load the files from drive
"""
# from google.colab import drive
# drive.mount('/content/drive')

'\nUncomment the code below if you want to load the files from drive\n'

In [58]:
#installing all the packages and importing the required libraries
!pip install transformers -q

import torch
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import AdamW
from transformers import get_linear_schedule_with_warmup
import time
import datetime
import random
import os
from sklearn.metrics import confusion_matrix, classification_report

In [59]:
#Code to select device as cpu or gpu
if torch.cuda.is_available():       
    device = torch.device("cuda")
else:
    print('Using CPU')
    device = torch.device("cpu")

In [60]:
#Change model name to try out different models
model_name = pretrained_model
model = AutoModelForSequenceClassification.from_pretrained(model_name) 
tokenizer = AutoTokenizer.from_pretrained(model_name, do_lower_case=True)

Downloading:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.25G [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint a

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

In [61]:
#read the single sent training file from drive.
df = pd.read_csv(single_sentences_dataset_path)

#collect the sentences and their labels
sentences = df.sentences.values 
labels = df.label.values

In [62]:
#tokenize the sentences.
input_ids = []
for sent in sentences:
    encoded_sent = tokenizer.encode(sent,add_special_tokens = True)
    input_ids.append(encoded_sent)

#pad the sentences.
MAX_LEN = 32
input_ids = pad_sequences(input_ids, maxlen=MAX_LEN, dtype="long", value=0, truncating="post", padding="post")

#attention masks.
attention_masks = []
for sent in input_ids:
    a_mask = [int(token > 0) for token in sent]
    attention_masks.append(a_mask)

In [63]:
#spli data into train and validation
train_sentences, validation_sentences,_,_=train_test_split(sentences, labels, random_state=42, test_size=0.1)
train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(input_ids, labels, random_state=42, test_size=0.1)
train_masks, validation_masks, _, _ = train_test_split(attention_masks, labels, random_state=42, test_size=0.1)

In [64]:
#convert to torch
train_inputs = torch.tensor(train_inputs)
validation_inputs = torch.tensor(validation_inputs)

train_labels = torch.tensor(train_labels)
validation_labels = torch.tensor(validation_labels)

train_masks = torch.tensor(train_masks)
validation_masks = torch.tensor(validation_masks)

In [65]:
batch_size = 32

#dataloader for train set
train_data = TensorDataset(train_inputs, train_masks, train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

#dataloader for validation set
validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)
validation_sampler = SequentialSampler(validation_data)
validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size)


In [66]:
model.cuda();
optimizer = AdamW(model.parameters(),lr = 1e-5,eps = 1e-8)

In [67]:
total_steps = len(train_dataloader) * epochs
scheduler = get_linear_schedule_with_warmup(optimizer,num_warmup_steps = 0,num_training_steps = total_steps)

In [68]:
# Function to calculate the accuracy of our predictions vs labels
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

In [69]:
def format_time(time_elapsed):
    elapsed_rounded = int(round((time_elapsed)))
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [None]:
#code to train the model
seed_val = 42

random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

#loss after each epoch
loss_values = []


for epoch_i in range(0, epochs):
    
    #Training the model

    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    # Measure how long the training epoch takes.
    t0 = time.time()

    # Reset the total loss for this epoch.
    total_loss = 0

    model.train()

    # For each batch of training data...
    for step, batch in enumerate(train_dataloader):

        # Progress update every 40 batches.
        if step % 40 == 0 and not step == 0:
            # Calculate elapsed time in minutes.
            elapsed = format_time(time.time() - t0)
            
            # Report progress.
            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))


        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)

        model.zero_grad()        
        outputs = model(b_input_ids, 
                    token_type_ids=None, 
                    attention_mask=b_input_mask, 
                    labels=b_labels)
        
        loss = outputs[0]
        total_loss += loss.item()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()

    avg_train_loss = total_loss / len(train_dataloader)            
    loss_values.append(avg_train_loss)

    print("")
    print("  Average training loss: {0:.2f}".format(avg_train_loss))
    print("  Training epcoh took: {:}".format(format_time(time.time() - t0)))
        
    #Running the Validation

    print("")
    print("Running Validation...")

    t0 = time.time()
    model.eval()

    eval_loss, eval_accuracy = 0, 0
    nb_eval_steps, nb_eval_examples = 0, 0

    for batch in validation_dataloader:
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch
        
        with torch.no_grad():        
            outputs = model(b_input_ids, 
                            token_type_ids=None, 
                            attention_mask=b_input_mask)
        
        logits = outputs[0]
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()
        
        tmp_eval_accuracy = flat_accuracy(logits, label_ids)
        eval_accuracy += tmp_eval_accuracy
        nb_eval_steps += 1

    print("  Accuracy: {0:.2f}".format(eval_accuracy/nb_eval_steps))
    print("  Validation took: {:}".format(format_time(time.time() - t0)))

print("")
print("Training complete!")


Training...
  Batch    40  of    789.    Elapsed: 0:00:53.
  Batch    80  of    789.    Elapsed: 0:01:45.
  Batch   120  of    789.    Elapsed: 0:02:38.
  Batch   160  of    789.    Elapsed: 0:03:30.
  Batch   200  of    789.    Elapsed: 0:04:22.
  Batch   240  of    789.    Elapsed: 0:05:15.
  Batch   280  of    789.    Elapsed: 0:06:07.
  Batch   320  of    789.    Elapsed: 0:06:59.
  Batch   360  of    789.    Elapsed: 0:07:52.
  Batch   400  of    789.    Elapsed: 0:08:44.
  Batch   440  of    789.    Elapsed: 0:09:36.
  Batch   480  of    789.    Elapsed: 0:10:29.
  Batch   520  of    789.    Elapsed: 0:11:21.
  Batch   560  of    789.    Elapsed: 0:12:14.
  Batch   600  of    789.    Elapsed: 0:13:06.
  Batch   640  of    789.    Elapsed: 0:13:58.
  Batch   680  of    789.    Elapsed: 0:14:51.
  Batch   720  of    789.    Elapsed: 0:15:43.
  Batch   760  of    789.    Elapsed: 0:16:36.

  Average training loss: 0.56
  Training epcoh took: 0:17:13

Running Validation...
  Accurac

In [None]:
if not os.path.exists(saved_model_output_directory):
    os.makedirs(saved_model_output_directory)

model.save_pretrained(saved_model_output_directory)
tokenizer.save_pretrained(saved_model_output_directory)

In [None]:
print("Predicting...")
model.eval()

predictions , true_labels = [], []

for batch in validation_dataloader:
  batch = tuple(t.to(device) for t in batch)
  b_input_ids, b_input_mask, b_labels = batch
  with torch.no_grad():
      outputs = model(b_input_ids, token_type_ids=None, 
                      attention_mask=b_input_mask)

  logits = outputs[0]
  logits = logits.detach().cpu().numpy()
  label_ids = b_labels.to('cpu').numpy()
  
  #store the predictions
  predictions.append(logits)
  true_labels.append(label_ids)

predictions = [item for sublist in predictions for item in sublist]
predictions = np.argmax(predictions, axis=1).flatten()
true_labels = [item for sublist in true_labels for item in sublist]
# true_labels = true_labels.flatten()
print("DONE")

In [None]:
create_confusion_matrix = confusion_matrix(true_labels, list(predictions), labels=[0,1])
confusion_matrix_df = pd.DataFrame(create_confusion_matrix, index=[0,1], columns=[0,1])
display(confusion_matrix_df)

In [None]:
# Precision, Recall and F1 score F(0.5) and F2 score calculation
eval_metrics = classification_report(true_labels, list(predictions), target_names=[0,1], output_dict=True)
print("---------------------------------Evaluation Metrics------------------------------------")
# Deleting the support result/last column 
eval_metrics_df = pd.DataFrame(eval_metrics).transpose()  
eval_metrics_df = eval_metrics_df.iloc[: , :-1]
display(eval_metrics_df)