# Sentiment fine-grained classifications-8 project

In [None]:
# Libraries we need to install - If it is already installed you can skip this cell
!pip install sentencepiece
!pip install transformers

In [2]:
# Libraries 
#pretrained model
import torch
from transformers import TrainingArguments, Trainer
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import AlbertTokenizer, AlbertForSequenceClassification
from transformers import GPT2Tokenizer, GPT2ForSequenceClassification
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
from transformers import EarlyStoppingCallback

# base model
from torchtext.legacy.data import Field,LabelField,BucketIterator,TabularDataset
from torchtext import vocab
from tqdm import tqdm
import torch.nn.functional as F

#preprocessing and evaluation
import pandas as pd
import numpy as np
import os
import json
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

2021-10-10 22:40:11.388391: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-10-10 22:40:11.388450: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [43]:
# Read dataset
def read_dataset(file_path):
    """ Read dataset
    Input:
        file_path - string the path of the dataset
    Returns:
        train dataframe 
    """
    train_data = pd.read_excel(file_path, 'Sheet1')
    
    ''' Should/Must statement
        Should/must statement
        should/must statement labels are 
        converted to Should/Must statement
        
        personalizing is converted to Personalizing''' 
    
    
    train_data.loc[(train_data['label'] == 'should/must statement') | (train_data['label'] == 'Should/must statement')] = 'Should/Must statement' 
    train_data.loc[train_data['label'] == 'personalizing'] = 'Personalizing' 
    
    #Label encoding 
    
    le = LabelEncoder()
    train_data["label_encoded"] = le.fit_transform(train_data["label"]) 
    np.save('classes.npy', le.classes_)
    return train_data

In [5]:
#Utils function 
def split_dataset_base(dataframe):
    """ Split dataset into train, val and test
    Input:
        dataframe - dataframe dataset
    Returns:
        train_df dataframe train dataframe
        val_df dataframe val dataframe
        test_df dataframe test dataframe
    """   
    # split train dataset into train, validation and test sets
    df, test_df = train_test_split(dataframe,random_state=seed,test_size=0.2, stratify=dataframe["label_encoded"])
    
    train_df, val_df = train_test_split(df,random_state=seed,test_size=0.2, stratify=df["label_encoded"])
    
    save_files(train_df, val_df, test_df)
    
    return train_df, val_df, test_df

def split_dataset_pretrained(dataframe):
    """ Split dataset into train, val and test
    Input:
        dataframe - dataframe dataset
    Returns:
        X_train list train sentences
        y_train list label of train dataset
        X_val list val sentences
        y_val list label of val dataset
        X_test list test sentences
        y_test list label of test dataset
    """
    X_train, temp_text, y_train, temp_labels = train_test_split(list(dataframe["sentences"].values), list(dataframe["label_encoded"].values), 
                                                                    random_state=seed, 
                                                                    test_size=0.2, 
                                                                    stratify=list(dataframe["label_encoded"].values))


    X_val, X_test, y_val, y_test = train_test_split(temp_text, temp_labels, 
                                                                random_state=seed, 
                                                                test_size=0.4, 
                                                                stratify=temp_labels)
    
    return X_train, y_train, X_val, y_val, X_test, y_test

def save_files(out_path, train_df, val_df, test_df):
    """ Save splittted dataset into folder
    Input:
        out_path string path for saving the files
        train_df dataframe train dataframe
        val_df dataframe val dataframe
        test_df dataframe test dataframe
    """  
    train_df.to_csv(out_path+'train.csv',index=False)
    val_df.to_csv(out_path+'val.csv',index=False)
    test_df.to_csv(out_path+'test.csv',index=False) 
   
def tokenize(s): 
    """ Split text
    Input:
        s string text to split
    Returns:
        string splittex text
    """  
    return s.split(' ')

In [6]:
def compute_metrics(p):
    """Compute metrics for evaluation
    p Lists prediction and gold labels for evaluation
    Reurns:
        eval_scores dictionary evaluation scores
    """
    pred, labels = p
    pred = np.argmax(pred, axis=1)

    accuracy = accuracy_score(y_true=labels, y_pred=pred)
    recall = recall_score(y_true=labels, y_pred=pred, average='micro')
    precision = precision_score(y_true=labels, y_pred=pred, average='micro')
    f1 = f1_score(y_true=labels, y_pred=pred, average='micro')

    eval_scores = {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}
    
    return eval_scores

In [7]:
# call model and tokenizer based on your pretraine model
def bert_model(output_label):
    """ Define bert pretrained tokenizer and model
    Input:
        output_label - int the number of classes in the dataset
    Returns:
        tokenizer
        model
    """
    model_name = "bert-base-uncased"
    tokenizer = BertTokenizer.from_pretrained(model_name)
    model = BertForSequenceClassification.from_pretrained(model_name, num_labels=output_label)
    
    return tokenizer, model

def distilbert_model(output_label):
    """ Define distilbert pretrained tokenizer and model
    Input:
        output_label - int the number of classes in the dataset
    Returns:
        tokenizer
        model
    """
    model_name = "distilbert-base-uncased"
    tokenizer = DistilBertTokenizer.from_pretrained(model_name)
    model = DistilBertForSequenceClassification.from_pretrained(model_name, num_labels=output_label)
    
    return tokenizer, model


def alberta_model(output_label):
    """ Define alberta pretrained tokenizer and model
    Input:
        output_label - int the number of classes in the dataset
    Returns:
        tokenizer
        model
    """
    model_name = "albert-base-v2"
    tokenizer = AlbertTokenizer.from_pretrained(model_name)
    model = AlbertForSequenceClassification.from_pretrained(model_name, num_labels=output_label)

    return tokenizer, model

def gpt2_model(output_label):
    """ Define GPT2 pretrained tokenizer and model
    Input:
        output_label - int the number of classes in the dataset
    Returns:
        tokenizer
        model
    """
    model_name = "gpt2"
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    model = GPT2ForSequenceClassification.from_pretrained(model_name, num_labels=output_label)
    
    return tokenizer, model


In [45]:
# Load saved model based on your pretrained model
def test_bert_model(model_path, dataset):
    """ Test with bert pretrained model
    Input:
        model_path - path of saved pretrained model
    Returns:
        raw_pred list predictions of test dataset
    """
    model = BertForSequenceClassification.from_pretrained(model_path, num_labels=8) 
    test_trainer = Trainer(model)
    raw_pred, _, _ = test_trainer.predict(dataset) 
    
    return raw_pred

def test_bert_model_one_sentence(model_path,sentence,classes):
    model_name = "bert-base-uncased"
    tokenizer = BertTokenizer.from_pretrained(model_name)
    model = BertForSequenceClassification.from_pretrained(model_path, num_labels=8) 
    test_trainer = Trainer(model)
    
    inputs = tokenizer(sentence, padding=True, truncation=True, max_length=max_length, return_tensors="pt")
    outputs = model(**inputs)
    probs = outputs[0].softmax(1)
    pred = np.argmax(probs.detach().numpy(), axis=1)
    
    return classes[pred]
    

def test_distilbert_model(model_path, dataset):
    """ Test with distilbert pretrained model
    Input:
        model_path - path of saved pretrained model
    Returns:
        raw_pred list predictions of test dataset
    """
    model = DistilBertForSequenceClassification.from_pretrained(model_path, num_labels=8) 
    test_trainer = Trainer(model)
    raw_pred, _, _ = test_trainer.predict(dataset) 
    
    return raw_pred

def test_distilbert_model_one_Sentence:
    tokenizer = DistilBertTokenizer.from_pretrained(model_name)
    model = DistilBertForSequenceClassification.from_pretrained(model_path, num_labels=8) 
    test_trainer = Trainer(model)
    
    inputs = tokenizer(sentence, padding=True, truncation=True, max_length=max_length, return_tensors="pt")
    outputs = model(**inputs)
    probs = outputs[0].softmax(1)
    pred = np.argmax(probs.detach().numpy(), axis=1)
    
    return classes[pred]


def test_alberta_model(model, dataset):
    """ Test with alberta pretrained model
    Input:
        model_path - path of saved pretrained model
    Returns:
        raw_pred list predictions of test dataset
    """
    model = AlbertForSequenceClassification.from_pretrained(model_path, num_labels=8) 
    test_trainer = Trainer(model)
    raw_pred, _, _ = test_trainer.predict(dataset) 
    
    return raw_pred

def test_alberta_model_one_Sentence:
    tokenizer = AlbertTokenizer.from_pretrained(model_name)
    model = AlbertForSequenceClassification.from_pretrained(model_path, num_labels=8) 
    test_trainer = Trainer(model)
    
    inputs = tokenizer(sentence, padding=True, truncation=True, max_length=max_length, return_tensors="pt")
    outputs = model(**inputs)
    probs = outputs[0].softmax(1)
    pred = np.argmax(probs.detach().numpy(), axis=1)
    
    return classes[pred]


def test_gpt2_model(model_path, dataset):
    """ Test with alberta pretrained model
    Input:
        model_path - path of saved pretrained model
    Returns:
        raw_pred list predictions of test dataset
    """
    model = GPT2ForSequenceClassification.from_pretrained(model_path, num_labels=8) 
    test_trainer = Trainer(model)
    raw_pred, _, _ = test_trainer.predict(dataset) 
    
    return raw_pred

def test_gpt2_model_one_Sentence:
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    model = GPT2ForSequenceClassification.from_pretrained(model_path, num_labels=8) 
    test_trainer = Trainer(model)
    
    inputs = tokenizer(sentence, padding=True, truncation=True, max_length=max_length, return_tensors="pt")
    outputs = model(**inputs)
    probs = outputs[0].softmax(1)
    pred = np.argmax(probs.detach().numpy(), axis=1)
    
    return classes[pred]

In [9]:
#Function split dataset into train test and val and convert to torch.data.Dataset
def prepare_dataset_pretrained(tokenizer, dataset):
    """ Prepare dataset
    Input:
        tokenizer - pretrained tokenizer
        dataset - dataframe 
    Returns:
        train_dataset - torch.utils.data.Dataset train Dataset
        val_dataset - torch.utils.data.Dataset val Dataset
        test_dataset - torch.utils.data.Dataset test Dataset
        y_test - list gold labels for the test data
    """
    X_train, y_train, X_val, y_val, X_test, y_test = split_dataset_pretrained(dataset)
    
    X_train_tokenized = tokenizer(X_train, padding=True, truncation=True, max_length=512)
    X_val_tokenized = tokenizer(X_val, padding=True, truncation=True, max_length=512)
    X_test_tokenized = tokenizer(X_test, padding=True, truncation=True, max_length=512)      
    
    train_dataset = Dataset(X_train_tokenized, y_train)
    val_dataset = Dataset(X_val_tokenized, y_val)
    test_dataset = Dataset(X_test_tokenized, y_test)
    
    return train_dataset, val_dataset, test_dataset, y_test

In [10]:
# Dataset for pretraine model
class Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels=None):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        if self.labels:
            item["labels"] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.encodings["input_ids"])

In [49]:
# Parameters

seed = 1234
np.random.seed(seed) 
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True  # cuda algorithms
os.environ['PYTHONHASHSEED'] = str(seed)
    
batch_size = 32
lr = 1e-4
num_epochs = 500
model_name = 'lstm' # model name (bert, alberta, distilbert or gpt2  for pretrained) (lstm, rnn, bilestm for base model)
output_path = "output-bert" #create a folder to save pretrained model
model_path = "lstm"
embedding_path = "embeddings/glove.6B.50d.txt"
max_length = 512
dataset_base = True # boolean value to split dataset into 
dataset_path = "data/" # path where to save splitted data (it is necessary is dataset_base is True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # use 'cuda' if available else 'cpu'

In [44]:
# prepare dataset for pretrained models
#split into train val and test
dataset = read_dataset('L2400.xlsx')

num_output = len(set(dataset["label_encoded"])) # number of classes in the dataset

In [17]:
# select model for pretrained models
if model_name == 'bert':
    tokenizer, model = bert_model(num_output)
elif model_name == 'alberta':
    tokenizer, model = alberta_model(num_output)
elif model_name == 'distilbert':
    tokenizer, model = distilbert_model(num_output)
elif model_name == 'gpt2':
    tokenizer, model = gpt2_model(num_output)
else:
    print('model is not defined')
    
train_dataset, val_dataset, test_dataset, y_test = prepare_dataset_pretrained(tokenizer, dataset)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [None]:
# Train pretrained model
args = TrainingArguments(
output_dir = output_path,
evaluation_strategy = 'steps',
eval_steps = 500,
per_device_train_batch_size = batch_size,
per_device_eval_batch_size = batch_size,
num_train_epochs = num_epochs,
seed = seed,
load_best_model_at_end = True,)   

trainer = Trainer(
model=model,
args=args,
train_dataset=train_dataset,
eval_dataset=val_dataset,
compute_metrics=compute_metrics,
callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],)

trainer.train()

In [None]:
# Test model pretrained models
raw_pred, _, _ = trainer.predict(test_dataset)

# Preprocess raw predictions
y_pred = np.argmax(raw_pred, axis=1)
y = (raw_pred, y_test)
compute_metrics(y) 

In [None]:
# Only test your data on trained model without training phase - pretrained models
model_path = "path where you stored your model"
test_data_file = "path of the file"
test_dataset = read_dataset(test_data_file)
    
if model_name == 'bert':
    predictions = test_bert_model(model_path, test_dataset)
elif model_name == 'alberta':
    predictions = test_alberta_model(model_path, test_dataset)
elif model_name == 'distilbert':
    predictions = test_distilbert_model(model_path, test_dataset)
elif model_name == 'gpt2':
    predictions = test_gpt2_model(model_path, test_dataset)
else:
    print('model is not defined')
    
y_true = list(dataset["label_encoded"].values)
y = (predictions, y_true)
compute_metrics(y)

In [48]:
#only test on one sentence

le = LabelEncoder()
le.classes_ = np.load('classes.npy', allow_pickle=True)

sentence = "it is a school"

if model_name == 'bert':
    label_sentence = test_bert_model_one_sentence(model_path, sentence, le.classes_)
elif model_name == 'alberta':
    label_sentence = test_alberta_model_one_sentence(model_path, sentence, le.classes_)
elif model_name == 'distilbert':
    label_sentence = test_distilbert_model_one_sentence(model_path, sentence, le.classes_)
elif model_name == 'gpt2':
    label_sentence = test_gpt2_model_one_sentence(model_path, sentence, le.classes_)
else:
    print('model is not defined')


loading file https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt from cache at /home/necva/.cache/huggingface/transformers/45c3f7a79a80e1cf0a489e5c62b43f173c15db47864303a55d623bb3c96f72a5.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99
loading file https://huggingface.co/bert-base-uncased/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/bert-base-uncased/resolve/main/special_tokens_map.json from cache at None
loading file https://huggingface.co/bert-base-uncased/resolve/main/tokenizer_config.json from cache at /home/necva/.cache/huggingface/transformers/c1d7f0a763fb63861cc08553866f1fc3e5a6f4f07621be277452d26d71303b7e.20430bd8e10ef77a7d2977accefe796051e01bc2fc4aa146bc862997a1a15e79
loading file https://huggingface.co/bert-base-uncased/resolve/main/tokenizer.json from cache at /home/necva/.cache/huggingface/transformers/534479488c54aeaf9c3406f647aa2ec13648c06771ffe269edabebd4c412da1d.7f2721073f19841be16f41b0a70b600ca6b880c

['Labeling']


In [57]:
# Network for Base models
class Network(torch.nn.Module):
    '''
    It inherits the functionality of Module class from torch.nn whic includes al the layers, weights, grads setup
    and methods to calculate the same. We just need to put in the required layers and describe the flows as
    which layers comes after which one
    '''
    
    def __init__(self,in_neuron,embedding_dim=50,hidden_size=256,out_neuron=8,m_type='lstm',drop=0.2,**kwargs):
        '''
        Constructor of the class which will instantiate the layers while initialisation.
        
        Input:
            in_neuron: input dimensions of the first layer {int}
            embedding_dim: number of latent features you want to calculate from the input data {int} default=128
            hidden_size: neurons you want to have in your hidden RNN layer {int} default=256
            out_neuron: number of outputs you want to have at the end.{int} default=1
            model: whether to use 'rnn' or 'lstm' {string} 
            drop: proportion of values to dropout from the previous values randomly {float 0-1} default=0.53
            **kwargs: any torch.nn.RNN or torch.nn.LSTM args given m_type='rnn' or'lstm' {dict}
        Returns: 
            A tensor of shape {batch,out_neuron} as output 
        '''
        super(Network,self).__init__() 
        self.m_type = m_type
        
        self.embedding = torch.nn.Embedding(in_neuron,embedding_dim) # embedding layer is always the first layer
        if self.m_type == "bilstm":
            self.bilstm = torch.nn.LSTM(embedding_dim,hidden_size,bidirectional=True, **kwargs)
        elif self.m_type == 'lstm':
        # whether to use the LSTM type model or the RNN type model. It'll use only 1 in forward()
            self.lstm = torch.nn.LSTM(embedding_dim,hidden_size,**kwargs)
        else:
            self.rnn = torch.nn.RNN(embedding_dim,hidden_size,**kwargs) 
        
        self.dropout = torch.nn.Dropout(drop) # drop the values by random which comes from previous layer
        if self.m_type == "bilstm":
            self.dense = torch.nn.Linear(hidden_size*2,out_neuron) # last fully connected layer
        else:
            self.dense = torch.nn.Linear(hidden_size,out_neuron) # last fully connected layer
    
    def forward(self,t):
        '''
        Activate the forward propagation of a batch at a time to transform the input bath of tensors through
        the different layers to get an out which then will be compared to original label for computing loss.
        Input:
            t: tensors in the form of a batch {torch.tensor}
        Returns:
            output of the network
        '''
        embedding_t = self.embedding(t)
        
        drop_emb = self.dropout(embedding_t)
        
        if self.m_type == "bilstm":
            out, (hidden_state,_) = self.bilstm(drop_emb)
            hidden_state = torch.cat((hidden_state[0,:,:],hidden_state[1,:,:]), dim=1)
        elif self.m_type == 'lstm':
            out, (hidden_state,_) = self.lstm(drop_emb)
        else:
            out, hidden_state = self.rnn(drop_emb)
            #  shape of rnn_out = (seq_len, batch, num_directions * hidden_size)
       
        hidden_squeezed = hidden_state.squeeze(0) 
        
        return self.dense(hidden_squeezed)

In [None]:
def prepare_dataset_base(dataset, train):
    """ Prepare dataset
    Input:
        dataset - dataframe 
    Returns:
        train_dataset - BucketIterator train Dataset
        val_dataset - BucketIterator val Dataset
        test_dataset - BucketIteratortest Dataset
        input_size int input size of the model
    """
    
    text_field = Field(tokenize=tokenize)
    label_field = LabelField(dtype=torch.float) 
    # useful for label string to LabelEncoding. Not useful here but doesn't hurt either
    
    fields = [('sentences',text_field),('label_encoded',label_field)] 
    # (column name,field object to use on that column) pair for the dictonary
    
    glove = vocab.Vectors(embedding_path, dataset_path)
    if train: #prepare train val and est dataset

        if not dataset_base: #ıf dataset is not saved
            train_df, val_df, test_df = split_dataset_base(dataset)
            
        train_dataset, val_dataset, test_dataset = TabularDataset.splits(path=dataset_path, train='train.csv',validation='val.csv',test='test.csv', 
                                                 format='csv',skip_header=True,fields=fields)
        
        
        
        text_field.build_vocab(train_dataset,max_size=100000,vectors=glove,unk_init=torch.Tensor.zero_) 
        label_field.build_vocab(train_dataset) 
        input_size = len(text_field.vocab)
        train_iter, val_iter, test_iter = BucketIterator.splits((train_dataset, val_dataset, test_dataset), batch_sizes=(32,128,128),
                                                      sort_key=lambda x: len(x.sentences),
                                                      sort_within_batch=False,
                                                      device=device) # use the cuda device if available
        return train_iter, val_iter, test_iter, input_size
    else: #prepare dataset for tes
        test_dataset = TabularDataset(path=dataset_path+'test.csv', 
                                             format='csv',skip_header=True,fields=fields)
        
        text_field.build_vocab(test_dataset,max_size=100000,vectors=glove,unk_init=torch.Tensor.zero_) 
        label_field.build_vocab(test_dataset)
        
        test_iter = BucketIterator(test_dataset, batch_size=32,
                                                      sort_key=lambda x: len(x.sentences),
                                                      sort=False,
                                                      sort_within_batch=False,
                                                      device=device) # use the cuda device if available
    
        return test_dataset

In [55]:
# save and load models for base models
def save_checkpoint(save_path, model_name, optimizer, valid_loss, in_neuron):

    if save_path == None:
        return
    
    state_dict = {'model_state_dict': model.state_dict(),
                  'optimizer_state_dict': optimizer.state_dict(),
                  'valid_loss': valid_loss,
                 'input_size':in_neuron}
    
    torch.save(state_dict, save_path)
    print(f'Model saved to ==> {save_path}')


def load_checkpoint(load_path, model_name):

    if load_path==None:
        return
    
    state_dict = torch.load(load_path, map_location=device)
    print(f'Model loaded from <== {load_path}')
    
    model = Network(state_dict['input_size'], m_type=model_name) 
    optimizer = torch.optim.Adam(model.parameters(), lr=lr) 
    model.load_state_dict(state_dict['model_state_dict'])
    optimizer.load_state_dict(state_dict['optimizer_state_dict'])
    
    return model, optimizer

In [None]:
#train function for base models
def train_network(network,train_iter,optimizer,loss_fn,epoch_num):
    '''
    train the network using given parameters
    Input:
        network: any Neural Network object 
        train_batch: iterator of training data
        optimizer: optimizer for gradients calculation and updation
        loss_fn: appropriate loss function
        epoch_num = Epoch number so that it can show which epoch number in tqdm Bar
    Returns:
        a tuple of (average_loss,average_accuracy) of floating values for a single epoch
    '''
    epoch_loss = 0
    epoch_acc = 0 
    network.train() 
    
    for batch in tqdm(train_iter,f"Epoch: {epoch_num}"): 
        optimizer.zero_grad() 
        predictions = network(batch.sentences).squeeze(1) 
        loss = loss_fn(predictions,batch.label_encoded.to(torch.long)) 
        pred_classes = F.softmax(predictions, dim=1)
        pred_classes = torch.argmax(pred_classes, dim=1)
        correct_preds = (pred_classes == batch.label_encoded).float()
        accuracy = correct_preds.sum()/len(correct_preds)# it'll be a tensor of shape [1,]
        loss.backward() 
        optimizer.step()
        
        epoch_loss += loss.item() 
        epoch_acc += accuracy.item()
        
        
    return epoch_loss/len(train_iter), epoch_acc/len(train_iter)

In [None]:
#evaluation function for base models
def evaluate_network(network,val_test_iter,optimizer,loss_fn):
    '''
    evaluate the network using given parameters
    args:
        network: any Neural Network object 
        val_test_iter: iterator of validation/test data
        optimizer: optimizer for gradients calculation and updation
        loss_fn: appropriate loss function
    out:
        a tuple of (average_loss,average_accuracy) of floating values for the incoming dataset
    '''
    total_loss = 0 
    total_acc = 0
    network.eval()
    
    with torch.no_grad():
        
        for batch in val_test_iter:

            predictions = network(batch.sentences).squeeze(1)
            loss = loss_fn(predictions,batch.label_encoded.to(torch.long))
            pred_classes = torch.argmax(predictions, dim=1)
            correct_preds = (pred_classes == batch.label_encoded).float()
            accuracy = correct_preds.sum()/len(correct_preds)
            total_loss += loss.item() 
            total_acc += accuracy.item()

        return total_loss/len(val_test_iter), total_acc/len(val_test_iter)

In [None]:
# Train base model
train_iter, val_iter, test_iter, in_neuron  = prepare_dataset_base(dataset, True)

network = Network(in_neuron, m_type=model_name) 
if torch.cuda.is_available():
    network.cuda() 

optimizer = torch.optim.Adam(network.parameters(),lr=lr) 
loss_fn = torch.nn.CrossEntropyLoss()

for epoch in range(num_epochs):
        train_loss, train_acc = train_network(network,train_iter,optimizer,loss_fn,epoch+1)
        val_loss,val_acc = evaluate_network(network,val_iter,optimizer,loss_fn)
        tqdm.write(f'''End of Epoch: {epoch+1}  |  Train Loss: {train_loss:.3f}  |  Val Loss: {val_loss:.3f}  |  Train Acc: {train_acc*100:.2f}%  |  Val Acc: {val_acc*100:.2f}%''')
        
test_loss,test_acc = evaluate_network(network,test_iter,optimizer,loss_fn)
save_checkpoint(output_path + '/model.pt', network, optimizer, val_loss, in_neuron)

In [None]:
# Test base model without training
dataset = read_dataset('L2400.xlsx')
test_data_file = "path of the file"
#dataset = read_dataset(test_data_file)

test_iter  = prepare_dataset_base(dataset, False)


model, optimizer = load_checkpoint(model_path + '/model.pt', model_name)



#network.load_state_dict(torch.load(model_path), strict=False)
test_loss,test_acc = evaluate_network(model,test_iter,optimizer,loss_fn)