## Travel Domain Question Classification
Following program classifies questions related to travel domain using BERT.

In [None]:
import pandas as pd
import re
import numpy as np
import matplotlib.pyplot as plt
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer 
from nltk import pos_tag
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
import sqlite3
import pickle
%matplotlib inline
import nltk
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')

print('----- Importing dataset -----')
d_file = open('5000TravelQuestionsDataset.csv', encoding="latin-1")

df = pd.read_csv(d_file, header=None)
df.columns = ['text', 'class1', 'class2']

print ('Training Data : Imported Rows, Columns - ', df.shape)
print ('Data Preview :')
df.head()

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
----- Importing dataset -----
Training Data : Imported Rows, Columns -  (5000, 3)
Data Preview :


Unnamed: 0,text,class1,class2
0,What are the special things we (husband and me...,TTD,TTDSIG
1,What are the companies which organize shark fe...,TTD,TTDOTH
2,Is it safe for female traveller to go alone to...,TGU,TGUHEA
3,What are the best places around Cape Town for ...,TTD,TTDSIG
4,What are the best places to stay for a family ...,ACM,ACMOTH


Preprocessing steps

In [None]:
# Text normalization steps
import string

# Function to remove stop words
def remove_stopwords(text):
  text_tokens = nltk.word_tokenize(text) 
  filtered_sentence = [word for word in text_tokens if not word in stopwords.words()] 
  return " ".join(filtered_sentence)

# Converting to lower case 
df['processed_text'] = df['text'].str.lower()
# Removing punctuations
translator = str.maketrans('', '', string.punctuation)
df['processed_text'] = df['processed_text'].str.translate(translator)
# Removing leading ending white spaces
df['processed_text'] = df['processed_text'].str.strip()
# Remove stop words
df['processed_text'] = df.processed_text.apply(remove_stopwords)


# Removing leading ending white spaces
df['class1'] = df['class1'].str.strip()
df['class2'] = df['class2'].str.strip()

df.head()


Unnamed: 0,text,class1,class2,processed_text
0,What are the special things we (husband and me...,TTD,TTDSIG,special things husband 5 day stay cape town
1,What are the companies which organize shark fe...,TTD,TTDOTH,companies organize shark feeding events scuba ...
2,Is it safe for female traveller to go alone to...,TGU,TGUHEA,safe female traveller go alone cape town
3,What are the best places around Cape Town for ...,TTD,TTDSIG,best places around cape town safari
4,What are the best places to stay for a family ...,ACM,ACMOTH,best places stay family stay away nightlife


In [None]:
possible_labels_1 = df.class1.unique()
possible_labels_2 = df.class2.unique()

label_dict_1 = {}
for index, possible_label in enumerate(possible_labels_1):
    label_dict_1[possible_label] = index
label_dict_1

{'ACM': 2, 'ENT': 6, 'FOD': 5, 'TGU': 1, 'TRS': 3, 'TTD': 0, 'WTH': 4}

In [None]:
label_dict_2 = {}
for index, possible_label in enumerate(possible_labels_2):
    label_dict_2[possible_label] = index
label_dict_2

{'ACMAPA': 9,
 'ACMBEA': 40,
 'ACMBUN': 15,
 'ACMCAR': 52,
 'ACMHOT': 28,
 'ACMOTH': 3,
 'ACMRES': 14,
 'ENTCLB': 34,
 'ENTFES': 49,
 'ENTMUS': 57,
 'ENTOTH': 62,
 'ENTSHW': 54,
 'ENTSPO': 58,
 'FODAUT': 22,
 'FODBAK': 30,
 'FODBAR': 47,
 'FODBRE': 32,
 'FODCAT': 46,
 'FODCOT': 41,
 'FODFCA': 61,
 'FODFMA': 53,
 'FODOTH': 20,
 'TGUAPT': 42,
 'TGUATT': 33,
 'TGUAVE': 35,
 'TGUBAN': 5,
 'TGUCIG': 37,
 'TGUHEA': 2,
 'TGUHOL': 48,
 'TGULAU': 44,
 'TGULUG': 29,
 'TGUNEI': 7,
 'TGUOTH': 13,
 'TGUPLN': 24,
 'TGURES': 51,
 'TGURUL': 50,
 'TGUTEL': 36,
 'TGUTOP': 19,
 'TGUVIS': 21,
 'TGUWEB': 59,
 'TRSAIR': 11,
 'TRSBUS': 27,
 'TRSCRS': 16,
 'TRSDRV': 60,
 'TRSGAS': 55,
 'TRSLIC': 43,
 'TRSOTH': 10,
 'TRSRNT': 18,
 'TRSROU': 12,
 'TRSTAX': 25,
 'TRSTCD': 39,
 'TRSTRN': 4,
 'TTDGYM': 45,
 'TTDOTH': 1,
 'TTDSHP': 38,
 'TTDSIG': 0,
 'TTDSPA': 31,
 'TTDSPO': 8,
 'TTDTRI': 23,
 'WTHDRC': 17,
 'WTHOTH': 26,
 'WTHSNW': 56,
 'WTHTMP': 6}

In [None]:
df['class1_enc'] = df.class1.replace(label_dict_1)
df['class2_enc'] = df.class2.replace(label_dict_2)
df.head(10)

Unnamed: 0,text,class1,class2,processed_text,class1_enc,class2_enc
0,What are the special things we (husband and me...,TTD,TTDSIG,special things husband 5 day stay cape town,0,0
1,What are the companies which organize shark fe...,TTD,TTDOTH,companies organize shark feeding events scuba ...,0,1
2,Is it safe for female traveller to go alone to...,TGU,TGUHEA,safe female traveller go alone cape town,1,2
3,What are the best places around Cape Town for ...,TTD,TTDSIG,best places around cape town safari,0,0
4,What are the best places to stay for a family ...,ACM,ACMOTH,best places stay family stay away nightlife,2,3
5,What are the train services that travels from ...,TRS,TRSTRN,train services travels cape town oudtshoorn,3,4
6,What are the best places to spend about 2 week...,ACM,ACMOTH,best places spend 2 weeks relaxing honeymoon s...,2,3
7,Can we use travellers cheques and credit cards...,TGU,TGUBAN,use travellers cheques credit cards cape town,1,5
8,Is it warm enough to swim in early September i...,WTH,WTHTMP,warm enough swim early september cape town,4,6
9,What are the best beaches for shelling in Cape...,TTD,TTDSIG,best beaches shelling capetown,0,0


In [None]:

from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(df.index.values, 
                                                  df.class1_enc.values, 
                                                  test_size=0.15, 
                                                  random_state=42, 
                                                  stratify=df.class1_enc.values)

X_train_1, X_val_1, y_train_1, y_val_1 = train_test_split(df.index.values, 
                                                  df.class2_enc.values, 
                                                  test_size=0.15, 
                                                  random_state=42, 
                                                  stratify=df.class2_enc.values)

df['data_type'] = ['not_set']*df.shape[0]

df.loc[X_train, 'data_type'] = 'train'
df.loc[X_val, 'data_type'] = 'val'

df.loc[X_train_1, 'data_type'] = 'train'
df.loc[X_val_1, 'data_type'] = 'val'

df.groupby(['class1', 'class1_enc', 'data_type']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,text,class2,processed_text,class2_enc
class1,class1_enc,data_type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ACM,2,train,612,612,612,612
ACM,2,val,108,108,108,108
ENT,6,train,184,184,184,184
ENT,6,val,32,32,32,32
FOD,5,train,442,442,442,442
FOD,5,val,79,79,79,79
TGU,1,train,1038,1038,1038,1038
TGU,1,val,182,182,182,182
TRS,3,train,860,860,860,860
TRS,3,val,151,151,151,151


In [None]:
df.groupby(['class2', 'class2_enc', 'data_type']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,text,class1,processed_text,class1_enc
class2,class2_enc,data_type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ACMAPA,9,train,25,25,25,25
ACMAPA,9,val,4,4,4,4
ACMBEA,40,train,4,4,4,4
ACMBEA,40,val,1,1,1,1
ACMBUN,15,train,45,45,45,45
...,...,...,...,...,...,...
WTHOTH,26,val,20,20,20,20
WTHSNW,56,train,6,6,6,6
WTHSNW,56,val,1,1,1,1
WTHTMP,6,train,17,17,17,17


In [None]:
!pip install transformers

import torch
from tqdm.notebook import tqdm
from transformers import BertTokenizer
from torch.utils.data import TensorDataset

from transformers import BertForSequenceClassification



In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', 
                                          do_lower_case=True)
                                          
encoded_data_train = tokenizer.batch_encode_plus(
    df[df.data_type=='train'].text.values, 
    add_special_tokens=True, 
    return_attention_mask=True, 
    pad_to_max_length=True, 
    max_length=256, 
    return_tensors='pt'
)

encoded_data_val = tokenizer.batch_encode_plus(
    df[df.data_type=='val'].text.values, 
    add_special_tokens=True, 
    return_attention_mask=True, 
    pad_to_max_length=True, 
    max_length=256, 
    return_tensors='pt'
)


input_ids_train = encoded_data_train['input_ids']
attention_masks_train = encoded_data_train['attention_mask']
labels_train = torch.tensor(df[df.data_type=='train'].class1_enc.values)
labels_train_1 = torch.tensor(df[df.data_type=='train'].class2_enc.values)

input_ids_val = encoded_data_val['input_ids']
attention_masks_val = encoded_data_val['attention_mask']
labels_val = torch.tensor(df[df.data_type=='val'].class1_enc.values)
labels_val_1 = torch.tensor(df[df.data_type=='val'].class2_enc.values)

dataset_train = TensorDataset(input_ids_train, attention_masks_train, labels_train)
dataset_val = TensorDataset(input_ids_val, attention_masks_val, labels_val)
dataset_train_1 = TensorDataset(input_ids_train, attention_masks_train, labels_train_1)
dataset_val_1 = TensorDataset(input_ids_val, attention_masks_val, labels_val_1)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


# Classification for Class 1

# Model

In [None]:

model = BertForSequenceClassification.from_pretrained("bert-base-uncased",
                                                      num_labels=len(label_dict_1),
                                                      output_attentions=False,
                                                      output_hidden_states=False)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [None]:
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

batch_size = 3

dataloader_train = DataLoader(dataset_train, 
                              sampler=RandomSampler(dataset_train), 
                              batch_size=batch_size)

dataloader_validation = DataLoader(dataset_val, 
                                   sampler=SequentialSampler(dataset_val), 
                                   batch_size=batch_size)

In [None]:
from transformers import AdamW, get_linear_schedule_with_warmup

optimizer = AdamW(model.parameters(),
                  lr=1e-5, 
                  eps=1e-8)
                  
epochs = 5

scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps=0,
                                            num_training_steps=len(dataloader_train)*epochs)

In [None]:
from sklearn.metrics import f1_score

def f1_score_func(preds, labels):
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return f1_score(labels_flat, preds_flat, average='weighted')

def accuracy_per_class(preds, labels, dict):
    label_dict_inverse = {v: k for k, v in dict.items()}
    
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()

    for label in np.unique(labels_flat):
        y_preds = preds_flat[labels_flat==label]
        y_true = labels_flat[labels_flat==label]
        print(f'Class: {label_dict_inverse[label]}')
        print(f'Accuracy: {len(y_preds[y_preds==label])}/{len(y_true)}\n')

        

In [None]:
import random
import torch
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

device = torch.device("cuda:0")
# device = torch.device("cpu")

seed_val = 17
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

def evaluate(dataloader_val):

    model.eval()
    
    loss_val_total = 0
    predictions, true_vals = [], []
    
    for batch in dataloader_val:
        
        batch = tuple(b.to(device) for b in batch)
        
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2],
                 }

        with torch.no_grad():        
            outputs = model(**inputs)
            
        loss = outputs[0]
        logits = outputs[1]
        loss_val_total += loss.item()

        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)
    
    loss_val_avg = loss_val_total/len(dataloader_val) 
    
    predictions = np.concatenate(predictions, axis=0)
    true_vals = np.concatenate(true_vals, axis=0)
            
    return loss_val_avg, predictions, true_vals
    
for epoch in tqdm(range(1, epochs+1)):
    
    model.train()
    
    loss_train_total = 0

    progress_bar = tqdm(dataloader_train, desc='Epoch {:1d}'.format(epoch), leave=False, disable=False)
    for batch in progress_bar:

        model.to(device)
        model.zero_grad()
        
        batch = tuple(b.to(device) for b in batch)
        
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2],
                 }       

        outputs = model(**inputs)
        
        loss = outputs[0]
        loss_train_total += loss.item()
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)


        # optimizer = optim.Adam(model.parameters())
        optimizer = AdamW(model.parameters(),
                  lr=1e-5, 
                  eps=1e-8)
        # optimizer.load_state_dict(checkpoint['optimizer'])
        for state in optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.cuda()

        optimizer.step()
        scheduler.step()
        
        progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item()/len(batch))})
         
        
    torch.save(model.state_dict(), f'/content/finetuned_BERT_epoch_{epoch}.model')
        
    tqdm.write(f'\nEpoch {epoch}')
    
    loss_train_avg = loss_train_total/len(dataloader_train)            
    tqdm.write(f'Training loss: {loss_train_avg}')
    
    val_loss, predictions, true_vals = evaluate(dataloader_validation)
    val_f1 = f1_score_func(predictions, true_vals)
    tqdm.write(f'Validation loss: {val_loss}')
    tqdm.write(f'F1 Score (Weighted): {val_f1}')

    


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, description='Epoch 1', max=1417.0, style=ProgressStyle(description_wid…




Epoch 1
Training loss: 0.9269771339444427
Validation loss: 0.6598270829487592
F1 Score (Weighted): 0.8566984623236935


HBox(children=(FloatProgress(value=0.0, description='Epoch 2', max=1417.0, style=ProgressStyle(description_wid…


Epoch 2
Training loss: 0.6871350321173625
Validation loss: 1.1365950887036051
F1 Score (Weighted): 0.8699634572187745


HBox(children=(FloatProgress(value=0.0, description='Epoch 3', max=1417.0, style=ProgressStyle(description_wid…


Epoch 3
Training loss: 0.581191987285328
Validation loss: 1.3034056997667103
F1 Score (Weighted): 0.8788538323997638


HBox(children=(FloatProgress(value=0.0, description='Epoch 4', max=1417.0, style=ProgressStyle(description_wid…


Epoch 4
Training loss: 0.4170581857905518
Validation loss: 1.3431915329833504
F1 Score (Weighted): 0.882996677378062


HBox(children=(FloatProgress(value=0.0, description='Epoch 5', max=1417.0, style=ProgressStyle(description_wid…


Epoch 5
Training loss: 0.21693877407075787
Validation loss: 1.402910351255022
F1 Score (Weighted): 0.8920816270486349



In [None]:
model = BertForSequenceClassification.from_pretrained("bert-base-uncased",
                                                      num_labels=len(label_dict_1),
                                                      output_attentions=False,
                                                      output_hidden_states=False)

model.to(device)

model.load_state_dict(torch.load('/content/finetuned_BERT_epoch_1.model', map_location=torch.device('cuda:0')))

_, predictions, true_vals = evaluate(dataloader_validation)
    
fine_pred = [np.argmax(p) for p in predictions]
fine_gt = [np.argmax(p) for p in true_vals]

val_acc = accuracy_score(fine_gt, fine_pred)*100
f1 =  f1_score(fine_pred, fine_gt, average='weighted')

print(classification_report(fine_gt,fine_pred))
print('Accuracy : %.3f' % val_acc)
print('F1 Score: %.3f' % f1)
cm = confusion_matrix(fine_gt, fine_pred)
print("Confusion Matrix: \n{}".format(cm))


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

              precision    recall  f1-score   support

           0       1.00      0.25      0.41       750
           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00         0
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         0
           5       0.00      0.00      0.00         0
           6       0.00      0.00      0.00         0

    accuracy                           0.25       750
   macro avg       0.14      0.04      0.06       750
weighted avg       1.00      0.25      0.41       750

Accuracy : 25.467
F1 Score: 0.103
Confusion Matrix: 
[[191 157 118 150  24  87  23]
 [  0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0]]


  _warn_prf(average, modifier, msg_start, len(result))


Class 2 Classification

In [None]:
model = BertForSequenceClassification.from_pretrained("bert-base-uncased",
                                                      num_labels=len(label_dict_2),
                                                      output_attentions=False,
                                                      output_hidden_states=False)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [None]:
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

batch_size = 3

dataloader_train = DataLoader(dataset_train_1, 
                              sampler=RandomSampler(dataset_train_1), 
                              batch_size=batch_size)

dataloader_validation = DataLoader(dataset_val_1, 
                                   sampler=SequentialSampler(dataset_val_1), 
                                   batch_size=batch_size)

In [None]:
from transformers import AdamW, get_linear_schedule_with_warmup

optimizer = AdamW(model.parameters(),
                  lr=1e-5, 
                  eps=1e-8)
                  
epochs = 5

scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps=0,
                                            num_training_steps=len(dataloader_train)*epochs)

In [None]:
import random
import torch
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report


device = torch.device("cuda:0")
# device = torch.device("cpu")

seed_val = 17
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

def evaluate(dataloader_val):

    model.eval()
    
    loss_val_total = 0
    predictions, true_vals = [], []
    
    for batch in dataloader_val:
        
        batch = tuple(b.to(device) for b in batch)
        
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2],
                 }

        with torch.no_grad():        
            outputs = model(**inputs)
            
        loss = outputs[0]
        logits = outputs[1]
        loss_val_total += loss.item()

        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)
    
    loss_val_avg = loss_val_total/len(dataloader_val) 
    
    predictions = np.concatenate(predictions, axis=0)
    true_vals = np.concatenate(true_vals, axis=0)
            
    return loss_val_avg, predictions, true_vals
    
for epoch in tqdm(range(1, epochs+1)):
    
    model.train()
    
    loss_train_total = 0

    progress_bar = tqdm(dataloader_train, desc='Epoch {:1d}'.format(epoch), leave=False, disable=False)
    for batch in progress_bar:

        model.to(device)
        model.zero_grad()
        
        batch = tuple(b.to(device) for b in batch)
        
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2],
                 }       

        outputs = model(**inputs)
        
        loss = outputs[0]
        loss_train_total += loss.item()
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)


        # optimizer = optim.Adam(model.parameters())
        optimizer = AdamW(model.parameters(),
                  lr=1e-5, 
                  eps=1e-8)
        # optimizer.load_state_dict(checkpoint['optimizer'])
        for state in optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.cuda()

        optimizer.step()
        scheduler.step()
        
        progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item()/len(batch))})
         
        
    torch.save(model.state_dict(), f'/content/finetuned_BERT_epoch_{epoch}.model')
        
    tqdm.write(f'\nEpoch {epoch}')
    
    loss_train_avg = loss_train_total/len(dataloader_train)            
    tqdm.write(f'Training loss: {loss_train_avg}')
    
    val_loss, predictions, true_vals = evaluate(dataloader_validation)
    val_f1 = f1_score_func(predictions, true_vals)
    tqdm.write(f'Validation loss: {val_loss}')
    tqdm.write(f'F1 Score (Weighted): {val_f1}')


In [None]:
model = BertForSequenceClassification.from_pretrained("bert-base-uncased",
                                                      num_labels=len(label_dict_2),
                                                      output_attentions=False,
                                                      output_hidden_states=False)

model.to(device)

model.load_state_dict(torch.load('/content/finetuned_BERT_epoch_1.model', map_location=torch.device('cuda:0')))

_, predictions, true_vals = evaluate(dataloader_validation)
    
fine_pred = [np.argmax(p) for p in predictions]
fine_gt = [np.argmax(p) for p in true_vals]

val_acc = accuracy_score(fine_gt, fine_pred)*100
f1 =  f1_score(y_test, y_pred, average='weighted')

print(classification_report(fine_gt,fine_pred))
print('Accuracy : %.3f' % val_acc)
print('F1 Score: %.3f' % f1)
cm = confusion_matrix(fine_gt, fine_pred)
print("Confusion Matrix: \n{}".format(cm))