In [1]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import torch
from tqdm import tqdm

SEED = 1111
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [3]:
from transformers import AutoTokenizer
model_name='bert-base-uncased'

tokenizer = AutoTokenizer.from_pretrained(model_name)

In [4]:
from google.colab import drive

drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [5]:
init_token = tokenizer.cls_token
eos_token = tokenizer.sep_token
pad_token = tokenizer.pad_token
unk_token = tokenizer.unk_token
additional_tokens = []
print(init_token, eos_token, pad_token, unk_token)

[CLS] [SEP] [PAD] [UNK]


In [6]:
init_token_idx = tokenizer.cls_token_id
eos_token_idx = tokenizer.sep_token_id
pad_token_idx = tokenizer.pad_token_id
unk_token_idx = tokenizer.unk_token_id

print(init_token_idx, eos_token_idx, pad_token_idx, unk_token_idx)

101 102 0 100


In [7]:
max_input_length = 63

In [8]:
def tokenize_bert(sentence):
    tokens = tokenizer.tokenize(sentence) 
    return tokens

In [9]:
def split_and_cut(sentence):
    tokens = sentence.strip().split(" ")
    tokens = tokens[:max_input_length-1]
    return tokens

In [10]:
def trim_sentence(sent):
    try:
        sent = sent.split()
        sent = sent[:32]
        return " ".join(sent)
    except:
        return sent

In [11]:
!wget https://stanfordnlp.github.io/contract-nli/resources/contract-nli.zip

--2023-05-15 05:32:03--  https://stanfordnlp.github.io/contract-nli/resources/contract-nli.zip
Resolving stanfordnlp.github.io (stanfordnlp.github.io)... 185.199.108.153, 185.199.110.153, 185.199.111.153, ...
Connecting to stanfordnlp.github.io (stanfordnlp.github.io)|185.199.108.153|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 65362913 (62M) [application/zip]
Saving to: ‘contract-nli.zip.3’


2023-05-15 05:32:04 (331 MB/s) - ‘contract-nli.zip.3’ saved [65362913/65362913]



In [12]:
from zipfile import ZipFile
  
# specifying the zip file name
file_name = "contract-nli.zip"
  
# opening the zip file in READ mode
with ZipFile(file_name, 'r') as zip:
    # printing all the contents of the zip file
    zip.printdir()
  
    # extracting all the files
    print('Extracting all the files now...')
    zip.extractall()
    print('Done!')

File Name                                             Modified             Size
contract-nli/                                  2021-10-06 01:32:08            0
contract-nli/LICENSE                           2021-10-04 17:59:52        18656
contract-nli/TERMS                             2021-10-04 19:39:12         3664
contract-nli/test.json                         2021-10-06 01:32:02      2260203
contract-nli/dev.json                          2021-10-06 01:32:08      1183107
contract-nli/README.md                         2021-10-05 20:05:32         5561
contract-nli/train.json                        2021-10-06 01:32:06      7608211
contract-nli/raw/                              2021-10-04 17:58:02            0
contract-nli/raw/NDA%2009%20for%20website.pdf  2021-10-04 17:57:32       102888
contract-nli/raw/Non-Disclosure-Agreement_12.pdf 2021-10-04 17:57:32       387795
contract-nli/raw/NDA-ROI-Corporation.pdf       2021-10-04 17:57:32        71906
contract-nli/raw/stony_hill_buyer_nda.

In [13]:
def get_sent1_token_type(sent):
    try:
        return [0]* len(sent)
    except:
        return []

def get_sent2_token_type(sent):
    try:
        return [1]* len(sent)
    except:
        return []
    
def combine_seq(seq):
    return " ".join(seq)

def combine_mask(mask):
    mask = [str(m) for m in mask]
    return " ".join(mask)

In [14]:
from typing import List, Tuple
import enum


label2index = {"Entailment":1,"NotMentioned":0,"Contradiction":2}


class ContractNLIExample:
    """
    A single training/test example for the contract NLI.

    Args:
        data_id: The example's unique identifier
        hypothesis_text: The hypothesis string
        context_text: The context string
        answer_text: The answer string
        start_position_character: The character position of the start of the answer
    """

    def __init__(
        self,
        *,
        data_id,
        document_id,
        hypothesis_id,
        file_name,
        hypothesis_text,
        hypothesis_tokens,
        context_text,
        tokens,
        splits,
        spans,
        char_to_word_offset,
        label,
        annotated_spans
    ):
        self.data_id: str = data_id
        self.document_id: str = document_id
        self.hypothesis_id: str = hypothesis_id
        self.hypothesis_symbol: str = f'[{hypothesis_id}]'
        self.file_name: str = file_name
        self.hypothesis_text: str = hypothesis_text
        self.hypothesis_tokens: List[str] = hypothesis_tokens
        self.context_text: str = context_text
        self.tokens: List[str] = tokens
        # Note that splits are NOT unique
        self.splits: List[int] = splits
        self.spans: List[Tuple[int, int]] = spans
        self.char_to_word_offset: List[int] = char_to_word_offset
        self.label: int = label
        self.annotated_spans: List[int] = annotated_spans

    @staticmethod
    def tokenize_and_align(text: str, spans: List[Tuple[int, int]]):
        """
        spans: Spans as character offsets. e.g. "world" in "Hello, world" will
            be represented as (7, 12).
        """
        # Split on whitespace so that different tokens may be attributed to their original position.
        tokens = []
        char_to_word_offset = []
        prev_is_whitespace = True
        splits = {si for s in spans for si in s}

        for i, c in enumerate(text):
            if c == ' ':
                # splits will be ignored on space
                prev_is_whitespace = True
            else:
                if prev_is_whitespace or i in splits:
                    tokens.append(c)
                else:
                    tokens[-1] += c
                prev_is_whitespace = False
            # len(tokens) == 0 when first characters are spaces
            char_to_word_offset.append(max(len(tokens) - 1, 0))
        
        splits = [char_to_word_offset[s[0]] for s in spans]
        return tokens, splits, char_to_word_offset

    @classmethod
    def load(cls, input_data) -> List['ContractNLIExample']:
        examples = []
        label_dict = {
            label_id: label_info['hypothesis']
            for label_id, label_info in input_data['labels'].items()}
        for document in tqdm(input_data['documents']):
            if len(document['annotation_sets']) != 1:
                raise RuntimeError(
                    f'{len(document["annotation_sets"])} annotation sets given but '
                    'we only support single annotation set.')
            for label_id, annotation in document['annotation_sets'][0]['annotations'].items():
                data_id = f'{document["id"]}_{label_id}'
                context_text = document['text']
                hypothesis_text = label_dict[label_id]

                tokens, splits, char_to_word_offset = cls.tokenize_and_align(
                    context_text, document['spans'])
                hypothesis_tokens, _, _ = cls.tokenize_and_align(
                    hypothesis_text, [])
                assert len(splits) == len(document['spans'])
                example = cls(
                    data_id=data_id,
                    document_id=document["id"],
                    hypothesis_id=label_id,
                    file_name=document['file_name'],
                    hypothesis_text=hypothesis_text,
                    hypothesis_tokens=hypothesis_tokens,
                    context_text=context_text,
                    tokens=tokens,
                    splits=splits,
                    spans=document['spans'],
                    char_to_word_offset=char_to_word_offset,
                    label=label2index[annotation['choice']],
                    annotated_spans=annotation['spans']
                )
                examples.append(example)
        return examples

In [15]:
SPAN_TOKEN = "[SPAN]"

def addAdditionalToken(token_tobe_added):
    if (token_tobe_added not in additional_tokens):
        additional_tokens.append(token_tobe_added)
        tokenizer.add_special_tokens({'additional_special_tokens': tokenizer.additional_special_tokens + [token_tobe_added]})

addAdditionalToken(SPAN_TOKEN)

In [16]:
import json
def getExamples(dataset_name='train'):
    path=f'/content/contract-nli/{dataset_name}.json' 
    with open(path) as fin:
        input_dict = json.load(fin)
    examples = ContractNLIExample.load(input_dict)
    return examples

In [17]:

def preprocessingContract(examples, SPAN_TOKEN, l = 300, n = 20):
    max_length=332
    data_examples = {"sequence": [], "label": [], "span_position_vector": [], "span_index_vector": [], "span_attention_hypothesis": [] ,"attention_mask":[], "token_type":[] }
    # print(len(examples))
    for i_example in examples:
        # x+=1
        updated_text = ""
        for i in i_example.spans:
            updated_text += " " + SPAN_TOKEN + " " + i_example.context_text[i[0]:i[1]]
        
        start = 0
        current_span_index = 0
        end = -1
        prevpos = 0
        tokenized_text = tokenize_bert(updated_text)
        prevstart = start
        hypo_token = tokenize_bert(i_example.hypothesis_text)
        while(end < len(tokenized_text)-1):

            span_position_vector = []
            span_index_vector = []
            span_attention_hypothesis = []
            temstart = start
            curcontext = []
            prevpos = -1
            for i in range(start, min(start+l, len(tokenized_text))):
                if(tokenized_text[i] == SPAN_TOKEN):
                    span_position_vector.append(1)
                    span_index_vector.append(current_span_index)
                    if(current_span_index in i_example.annotated_spans):
                        span_attention_hypothesis.append(1)
                    else:
                        span_attention_hypothesis.append(0)
                    temstart = i
                    current_span_index += 1
                    prevpos = i-start
                else:
                    span_position_vector.append(0)
                    span_index_vector.append(0)
                    span_attention_hypothesis.append(0)
                curcontext.append(tokenized_text[i])

                end = i
            


            if(end < len(tokenized_text) -1 ):
                if(tokenized_text[end+1] == SPAN_TOKEN):
                    if (prevpos != -1):
                        span_position_vector[prevpos] = 0
                        span_index_vector[prevpos] = 0
                        span_attention_hypothesis[prevpos] = 0

            # print(prevstart, start)
            if(temstart==start):
                start += n
            else:
                start = temstart - n 
            
            if(prevstart == start):
                start += n
            prevstart = start

            for i in range(end, temstart - n - 1, -1):
                if(tokenized_text[i] == SPAN_TOKEN):
                    current_span_index -= 1
            # print(i_example.spans[current_span_index])
            token_type = [0]*(len(curcontext)+2) + [1]*(len(hypo_token)+1)
            curcontext = [init_token] + curcontext + [eos_token] + hypo_token + [eos_token]
            curlen = max_length - len(curcontext)
            attention_mask = [1]*len(curcontext) + [0]*curlen
            curcontext += [pad_token] * curlen
            span_position_vector = [0] + span_position_vector + [0] * (len(hypo_token)+2 + curlen)
            token_type += [1]*curlen
            span_index_vector = [0] + span_index_vector + [0] * (len(hypo_token)+2+curlen)
            span_attention_hypothesis = [0] + span_attention_hypothesis + [0] * (len(hypo_token)+2+curlen)
            
            if(sum(span_attention_hypothesis) == 0):
                continue

            # max_length = max(max_length, len(curcontext))
            data_examples["sequence"].append(tokenizer.convert_tokens_to_ids(curcontext))
            data_examples["label"].append(i_example.label)            
            data_examples["span_position_vector"].append(span_position_vector)            
            data_examples["span_index_vector"].append(span_index_vector)            
            data_examples["span_attention_hypothesis"].append(span_attention_hypothesis) 
            data_examples["attention_mask"].append(attention_mask)
            data_examples["token_type"].append(token_type)

               
    # print(max_length)
    return data_examples

In [18]:
train_examples = getExamples("train")
train_dataset = preprocessingContract(train_examples,SPAN_TOKEN)
del train_examples
# print(max_len)

# if len(train_dataset["sequence"][0]) != len(train_dataset["span_position_vector"][0]):
#     print("FAILURE")

# for i in range(len(train_dataset["span_position_vector"][0])):
#     if train_dataset["span_position_vector"][0][i] == 1:
#         print(train_dataset["sequence"][0][i])

dev_examples = getExamples("dev")
dev_dataset=[]
for i in tqdm(dev_examples):
    dev_dataset.append(preprocessingContract([i],SPAN_TOKEN))
del dev_examples

test_examples = getExamples("test")
test_dataset=[]
for i in tqdm(test_examples):
    test_dataset.append(preprocessingContract([i],SPAN_TOKEN))
del test_examples

100%|██████████| 423/423 [00:53<00:00,  7.95it/s]
Token indices sequence length is longer than the specified maximum sequence length for this model (1909 > 512). Running this sequence through the model will result in indexing errors
100%|██████████| 61/61 [00:09<00:00,  6.61it/s]
100%|██████████| 1037/1037 [00:12<00:00, 82.07it/s]
100%|██████████| 123/123 [00:10<00:00, 11.40it/s]
100%|██████████| 2091/2091 [00:26<00:00, 79.74it/s]


In [19]:
# print(max_len)

In [20]:
BATCH_SIZE = 8

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def getDataloader(dataset):
    sequence = torch.tensor(dataset["sequence"],dtype=torch.long)
    attention_mask = torch.tensor(dataset["attention_mask"],dtype=torch.long)
    token_type = torch.tensor(dataset["token_type"],dtype=torch.long)
    label = torch.tensor(dataset["label"],dtype=torch.long)
    span_position_vector = torch.tensor(dataset["span_position_vector"],dtype=torch.long)
    span_index_vector = torch.tensor(dataset["span_index_vector"],dtype=torch.long)
    span_attention_hypothesis = torch.tensor(dataset["span_attention_hypothesis"],dtype=torch.long)

    dataset = torch.utils.data.TensorDataset(sequence, label, span_position_vector, span_index_vector, span_attention_hypothesis ,attention_mask, token_type)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
    return dataloader
  
train_dataloader = getDataloader(train_dataset)
valid_dataloader = []
test_dataloader = []
for i in dev_dataset:
    # print(i)
    if len(i["label"]) != 0:
        valid_dataloader.append(getDataloader(i))
for i in test_dataset:
    if len(i["label"]) != 0:
        test_dataloader.append(getDataloader(i))

In [21]:
from transformers import AutoModel

bert_model = AutoModel.from_pretrained(model_name,output_hidden_states=True, add_pooling_layer=True)
bert_model.resize_token_embeddings(len(tokenizer))

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Embedding(30523, 768)

In [22]:
import torch.nn as nn

class BERTNLIModel(nn.Module):
    def __init__(self,
                 bert_model,
                 hidden_dim,
                 output_dim,
                ):
        
        super().__init__()
        
        self.bert = bert_model
        
        embedding_dim = bert_model.config.to_dict()['hidden_size']

        self.class_outputs = nn.Linear(embedding_dim, 3)
        self.span_outputs = nn.Linear(embedding_dim, 2)
        print(bert_model.config.hidden_dropout_prob)
        print(bert_model.config.hidden_size)
        self.dropout = nn.Dropout(bert_model.config.hidden_dropout_prob)
        self.activation2 = nn.Sigmoid()
        self.activation1 = nn.Softmax()


    def forward(self, sequence, attn_mask, token_type):
        output = self.bert(input_ids = sequence, attention_mask = attn_mask, token_type_ids= token_type)
        last_hidden_state = output.last_hidden_state
        last_hidden_state = self.dropout(last_hidden_state)
        span_out = self.span_outputs(last_hidden_state)
        span_out = self.activation2(span_out)

        pooler_output = output.pooler_output
        pooler_output = self.dropout(pooler_output)
        label_out = self.class_outputs(pooler_output)
        label_out = self.activation1(label_out)

        return span_out, label_out

In [23]:
HIDDEN_DIM = 512
OUTPUT_DIM = len(label2index)

model = BERTNLIModel(bert_model,HIDDEN_DIM,OUTPUT_DIM,).to(device)


0.1
768


In [24]:
from transformers import *

Xformers is not installed correctly. If you want to use memorry_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


In [25]:
import torch.optim as optim

#optimizer = optim.Adam(model.parameters())
optimizer = AdamW(model.parameters(),lr=2e-5,eps=1e-6,correct_bias=False)

def get_scheduler(optimizer, warmup_steps):
    scheduler = get_constant_schedule_with_warmup(optimizer, num_warmup_steps=warmup_steps)
    return scheduler



In [26]:
criterion = nn.CrossEntropyLoss().to(device)

In [27]:
def categorical_accuracy(preds, y):
    max_preds = preds.argmax(dim = 1, keepdim = True) # get the index of the max probability
    correct = (max_preds.squeeze(1)==y).float()
    return correct.sum() / len(y)

In [28]:
max_grad_norm = 1

def train(model, iterator, optimizer, criterion, scheduler, weight_span=0.1):
    #print(iterator)
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for sequence, label, span_position_vector, span_index_vector, span_attention_hypothesis ,attn_mask, token_type in tqdm(iterator):

        optimizer.zero_grad() # clear gradients first
        torch.cuda.empty_cache() # releases all unoccupied cached memory 
        
        sequence,attn_mask,token_type,label = sequence.to(device),attn_mask.to(device),token_type.to(device),label.to(device)
        # span_position_vector, span_index_vector, span_attention_hypothesis = span_position_vector.to(device), span_index_vector.to(device), span_attention_hypothesis.to(device)
        
        predictions_span, predictions_label = model(sequence, attn_mask, token_type)

        final_list=[]
        
        # print(predictions_span)

        # print(predictions_span.shape)
        for i in range(len(predictions_span)):
            predictions_span[i] = torch.mul(predictions_span[i].clone().T, span_position_vector[i].clone()).T        
            # print(predictions_span[i].shape)

            spv = torch.unsqueeze(span_position_vector[i].clone(),0)
            
            # print(spv.shape)
            spah = torch.unsqueeze(span_attention_hypothesis[i].clone(),0)

            true_span =  torch.cat([spah, spv- spah]).T
            # print(true_span.shape)
            true_span = torch.unsqueeze(true_span,0)
            # print(true_span.shape)
            
            final_list.append(true_span)

        final_true_span= torch.cat(final_list)
        final_true_span = final_true_span.to(device) 

        # print(predictions_span)

        loss_span = criterion(predictions_span,  torch.tensor(final_true_span,dtype=torch.float))
        loss_label = None
        # print(span_attention_hypothesis[0].shape)
        for i in range(len(label)):
            
            if torch.sum(span_attention_hypothesis[i]) >= 1:
                if loss_label != None:
                    loss_label += criterion(predictions_label[i], label[i])
                else:
                    loss_label = criterion(predictions_label[i], label[i])

        # loss = loss_label + weight_span * loss_span
        if loss_label == None:
            continue
        loss = loss_label + weight_span * loss_span
        
        acc = categorical_accuracy(predictions_label, label)
        loss.backward()
        
        optimizer.step()
        scheduler.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [29]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import seaborn as sn
import matplotlib.pyplot as plt
import pandas as pd



def categorical_accuracy1(preds, y):
    max_preds = preds.argmax() # get the index of the max probability
    correct = (max_preds==y)
    return correct


def evaluate(model, iterator, criterion):
    #print(iterator)
    epoch_loss = 0
    epoch_acc = 0
    y_true = [torch.tensor(0), torch.tensor(1), torch.tensor(2)]
    y_pred = [torch.tensor(0), torch.tensor(1), torch.tensor(2)]
    model.eval()
    
    with torch.no_grad():
        for itl in tqdm(iterator):
            pl = torch.zeros(3).to(device)
            wt = 0
            llabel = None
            for sequence, labels, span_position_vector, span_index_vector, span_attention_hypothesis ,attn_mask, token_type in itl:

                sequence,attn_mask,token_type,labels = sequence.to(device),attn_mask.to(device),token_type.to(device),labels.to(device)
                predictions_span, predictions_label = model(sequence, attn_mask, token_type)
                for i in range(len(predictions_label)):
                    temwt = torch.sum(span_attention_hypothesis)/torch.sum(span_position_vector)
                    pl = pl + temwt.to(device) * predictions_label[i]
                    wt += temwt                    
                llabel = labels[0]

            pl = pl/wt
            pl =  torch.mul(pl, torch.tensor([0,1,1]).to(device))
            # print(pl)
            # print(llabel)
            # print(pl.cpu().argmax())
            loss = criterion(pl, llabel)
            y_true.append(llabel.cpu())
                
            y_pred.append(pl.cpu().argmax())
            acc = categorical_accuracy1(pl, llabel)
            
            epoch_loss += loss.item()
            epoch_acc += acc.item()
        # print("Test Classification report")
        # print(y_true)
        # print(y_pred)
        # print(classification_report(y_true, y_pred, target_names=["NotMentioned","Entailment","Contradiction"]))
        # cf_matrix = confusion_matrix(y_true, y_pred)
        # df_cm = pd.DataFrame(cf_matrix / np.sum(cf_matrix, axis=1)[:, None], index = [i for i in ["NotMentioned","Entailment","Contradiction"]],columns = [i for i in ["NotMentioned","Entailment","Contradiction"]])
        # # plt.figure(figsize = (12,7))
        # sn.heatmap(df_cm, annot=True)
        # plt.savefig('output.png')
        # plt.show()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [30]:
import time

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [31]:
import math
N_EPOCHS = 6

warmup_percent = 0.2
total_steps = math.ceil(N_EPOCHS*len(train_dataset)*1./BATCH_SIZE)
warmup_steps = int(total_steps*warmup_percent)
scheduler = get_scheduler(optimizer, warmup_steps)

best_valid_loss = float('inf')
best_epoch=0

for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, train_acc = train(model, train_dataloader, optimizer, criterion, scheduler)
    valid_loss, valid_acc = evaluate(model, valid_dataloader, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        best_epoch=epoch
    torch.save(model.state_dict(), f'/content/gdrive/MyDrive/NLPProject/models/bert-cnli_{epoch}.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

  label_out = self.activation1(label_out)
100%|██████████| 868/868 [08:16<00:00,  1.75it/s]
100%|██████████| 614/614 [00:23<00:00, 25.99it/s]


Epoch: 01 | Epoch Time: 8m 40s
	Train Loss: 5.880 | Train Acc: 81.64%
	 Val. Loss: 0.706 |  Val. Acc: 84.53%


100%|██████████| 868/868 [08:20<00:00,  1.73it/s]
100%|██████████| 614/614 [00:23<00:00, 26.00it/s]


Epoch: 02 | Epoch Time: 8m 44s
	Train Loss: 5.862 | Train Acc: 81.80%
	 Val. Loss: 0.706 |  Val. Acc: 84.53%


100%|██████████| 868/868 [08:21<00:00,  1.73it/s]
100%|██████████| 614/614 [00:23<00:00, 26.06it/s]


Epoch: 03 | Epoch Time: 8m 44s
	Train Loss: 5.862 | Train Acc: 81.83%
	 Val. Loss: 0.706 |  Val. Acc: 84.53%


100%|██████████| 868/868 [08:20<00:00,  1.73it/s]
100%|██████████| 614/614 [00:23<00:00, 25.98it/s]


Epoch: 04 | Epoch Time: 8m 44s
	Train Loss: 5.862 | Train Acc: 81.83%
	 Val. Loss: 0.706 |  Val. Acc: 84.53%


100%|██████████| 868/868 [08:20<00:00,  1.73it/s]
100%|██████████| 614/614 [00:23<00:00, 26.14it/s]


Epoch: 05 | Epoch Time: 8m 44s
	Train Loss: 5.862 | Train Acc: 81.83%
	 Val. Loss: 0.706 |  Val. Acc: 84.53%


100%|██████████| 868/868 [08:20<00:00,  1.73it/s]
100%|██████████| 614/614 [00:23<00:00, 25.98it/s]


Epoch: 06 | Epoch Time: 8m 44s
	Train Loss: 5.862 | Train Acc: 81.80%
	 Val. Loss: 0.706 |  Val. Acc: 84.53%


In [32]:
model.load_state_dict(torch.load(f'/content/gdrive/MyDrive/NLPProject/models/bert-cnli_5.pt'))

test_loss, test_acc = evaluate(model, test_dataloader, criterion)

print(f'Test Loss: {test_loss:.3f} |  Test Acc: {test_acc*100:.2f}%')

  label_out = self.activation1(label_out)
100%|██████████| 1188/1188 [00:45<00:00, 26.14it/s]

Test Loss: 0.737 |  Test Acc: 81.48%



