In [1]:
import csv

In [2]:
train, dev, test = [], [], []

In [3]:
with open('./pnli_train.csv', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        # x[2] will be the label (0 or 1). x[0] and x[1] will be the sentence pairs.
        x[2] = int(x[2])
        train.append(x)
print (len(train))
print (train[:3])


5983
[['Sometimes do exercise.', 'A person typically desire healthy life.', 1], ['Who eats junk foods.', 'A person typically desire healthy life.', 0], ['A person is sick.', 'A person typically desire healthy life.', 1]]


In [4]:
with open('./pnli_dev.csv', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        # x[2] will be the label (0 or 1). x[0] and x[1] will be the sentence pairs.
        # labels of dev set to int
        x[2] = int(x[2])
        dev.append(x)
print (len(dev))
print (dev[:3])

1055
[['A person is looking for accuracy.', 'A person typically desires accurate results.', 1], ['A person does not care for accuracy.', 'A person typically desires accurate results.', 0], ['The person double checks their data.', 'A person typically desires accurate results.', 1]]


In [5]:
with open('./pnli_test_unlabeled.csv', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        test.append(x)
print (len(test))
print (test[:3])

4850
[['The people want to have a romantic and pleasant feel.', 'People typically does desire to smell violets.'], ['The contract is to buy products from you.', 'Getting contract typically cause to make money or spend money.'], ['Train station is closed.', 'Line can typically be used to move train along tracks.']]


In [6]:
!pip install datasets==1.0.1
!pip install transformers==3.1.0

import os
import copy
import random
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.cuda.amp import autocast, GradScaler

from tqdm import tqdm
from transformers import AutoTokenizer, AutoModel, AdamW, get_linear_schedule_with_warmup
from datasets import load_metric
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# to check if 100% of GPU is available to execute the code
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip -q install gputil
!pip -q install psutil
!pip -q install humanize

import os
import psutil
import GPUtil as GPU
import humanize

GPUs = GPU.getGPUs()
gpu = GPUs[0]

Collecting datasets==1.0.1
  Downloading datasets-1.0.1-py3-none-any.whl (1.8 MB)
[K     |████████████████████████████████| 1.8 MB 12.7 MB/s 
Collecting xxhash
  Downloading xxhash-3.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)
[K     |████████████████████████████████| 212 kB 49.5 MB/s 
Installing collected packages: xxhash, datasets
Successfully installed datasets-1.0.1 xxhash-3.0.0
Collecting transformers==3.1.0
  Downloading transformers-3.1.0-py3-none-any.whl (884 kB)
[K     |████████████████████████████████| 884 kB 12.7 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.49-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 43.3 MB/s 
Collecting sentencepiece!=0.1.92
  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 40.0 MB/s 
[?25hCollecting tokenizers==0.8.1.rc2
  Downloading tokenizers-0.8.1rc2-cp37-cp37m-manylinux1_x8

PyTorch version 1.10.0+cu111 available.
TensorFlow version 2.8.0 available.


  Building wheel for gputil (setup.py) ... [?25l[?25hdone


In [7]:
def mem_usage_print():
 process = psutil.Process(os.getpid())
 print("RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ))
 print("Proc size: " + humanize.naturalsize( process.memory_info().rss))
 print("GPU RAM Free: {0:.0f} MB | Used: {1:.0f} MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))

mem_usage_print()

# execute to kill ongoing process and to utilize full GPU
# !kill -9 -1

RAM Free: 11.8 GB
Proc size: 1.6 GB
GPU RAM Free: 11441 MB | Used: 0 MB | Util   0% | Total 11441MB


In [8]:
# load the datasets in dataframes. transform data into pandas dataframes

# training dataset
df_train = pd.DataFrame(train, columns=['precondition', 'statement', 'label'])

# dev dataset
df_val = pd.DataFrame(dev, columns=['precondition', 'statement', 'label'])  

print(df_train.dtypes)
print(df_val.dtypes)

precondition    object
statement       object
label            int64
dtype: object
precondition    object
statement       object
label            int64
dtype: object


In [10]:
print(df_train.shape)
print(df_val.shape)

print()
print('Training dataset:::')
df_train.head()

(5983, 3)
(1055, 3)

Training dataset:::


Unnamed: 0,precondition,statement,label
0,Sometimes do exercise.,A person typically desire healthy life.,1
1,Who eats junk foods.,A person typically desire healthy life.,0
2,A person is sick.,A person typically desire healthy life.,1
3,A person is dead.,A person typically desire healthy life.,0
4,A person eats properly and do exercise regularly.,A person typically desire healthy life.,1


In [11]:
class CustomizedDataset(Dataset):

    def __init__(self, data, maxlen, with_labels = True, bert_model='albert-base-v2'):
        # pandas dataframe
        self.data = data  
        # initialize the tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(bert_model)
        self.maxlen = maxlen
        self.with_labels = with_labels 

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        # precondition and statement at the specified index in dataframe
        precnd = str(self.data.loc[index, 'precondition'])
        stmt = str(self.data.loc[index, 'statement'])
        # tokenize the pair of sentences to get token ids, attention masks and token type ids
        encoded_pair_sentences = self.tokenizer(precnd, stmt, padding='max_length',  # max_length padding
                                      truncation=True,  # truncate to max_length
                                      max_length=self.maxlen, return_tensors='pt')  # returns torch.Tensor objects
        
        # tensor of token ids
        token_ids = encoded_pair_sentences['input_ids'].squeeze(0)  
        # binary tensor: "0" -> padded values and "1" -> other values
        attn_masks = encoded_pair_sentences['attention_mask'].squeeze(0)  
        # binary tensor: "0" -> sentence1 (precondition) tokens and "1" -> sentence2 (statement) tokens
        token_type_ids = encoded_pair_sentences['token_type_ids'].squeeze(0)  

        
        if self.with_labels:  # for dataset with labels (train)
            label = self.data.loc[index, 'label']
            return token_ids, attn_masks, token_type_ids, label  
        else: # for dataset without labels (test)
            return token_ids, attn_masks, token_type_ids

In [12]:
class SentencePairClassificationModule(nn.Module):

    def __init__(self, bert_model="albert-base-v2", freeze_bert=False):
        super(SentencePairClassificationModule, self).__init__()
        #  instantiate BERT-based model object
        self.bert_layer = AutoModel.from_pretrained(bert_model)

        #  fix the hidden-state size of the encoder outputs
        if bert_model == "albert-base-v2":  # 12M parameters
            hidden_size = 768
        elif bert_model == "albert-large-v2":  # 18M parameters
            hidden_size = 1024
        elif bert_model == "albert-xlarge-v2":  # 60M parameters
            hidden_size = 2048
        elif bert_model == "albert-xxlarge-v2":  # 235M parameters
            hidden_size = 4096
        elif bert_model == "bert-base-uncased": # 110M parameters
            hidden_size = 768

        # freeze bert layers and only train the classification layer weights
        if freeze_bert:
            for p in self.bert_layer.parameters():
                p.requires_grad = False

        # classification layer
        self.cls_layer = nn.Linear(hidden_size, 1)
        self.dropout = nn.Dropout(p=0.1)

    @autocast()
    def forward(self, input_ids, attn_masks, token_type_ids):
        # input_ids, attn_masks, token_type_ids : tensor with token ids, attention masks to handle non-padded values, token type ids identify precond and stmt resp.
        
        # feed the inputs to the BERT-based model to obtain contextualized representations
        cont_reps, pooler_output = self.bert_layer(input_ids, attn_masks, token_type_ids)

        # feed to the classifier layer the last layer hidden-state of the [CLS] token further processed by a linear layer and a tanh activation. 
        # the linear layer weights were trained from the sentence order prediction (ALBERT) objective during pre-training.
        logits = self.cls_layer(self.dropout(pooler_output))

        return logits

In [13]:
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

In [14]:
# evaluate function for validation/dev set
def evaluate_loss(net, device, criterion, dataloader):
    net.eval()
    val_loss = 0
    count = 0

    with torch.no_grad():
        for i, (sequence, attn_masks, token_type_ids, labels) in enumerate(tqdm(dataloader)):
            sequence, attn_masks, token_type_ids, labels = \
                sequence.to(device), attn_masks.to(device), token_type_ids.to(device), labels.to(device)
            logits = net(sequence, attn_masks, token_type_ids)
            val_loss += criterion(logits.squeeze(-1), labels.float()).item()  
            count += 1

    return val_loss / count

!mkdir models
print("models folder created")

models folder created


In [18]:
# def tuple_of_tensors_to_tensor(tuple_of_tensors):
#     print(torch.stack(list(tuple_of_tensors), dim = 0))
#     return  torch.stack(list(tuple_of_tensors), dim=0)

# def categorical_accuracy(preds, y):
#     max_preds = preds.argmax(dim = 1, keepdim = True) # get the index of the max probability
#     correct = (max_preds.squeeze(1)==y).float()
#     return correct.sum() / len(y)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def train_bert(net, criterion, opti, lr, lr_scheduler, train_loader, val_loader, epochs, iterations_to_accumulate):

    best_loss = np.Inf
    best_ep = 1
    nb_iterations = len(train_loader)
    print_every = nb_iterations // 5 
    iters = []
    train_losses = []
    val_losses = []
    scaler = GradScaler()

    for ep in range(epochs):

        net.train()
        running_loss = 0.0
        for i, (sequence, attn_masks, token_type_ids, labels) in enumerate(tqdm(train_loader)):
            # print('sequence', sequence)
            # print('attn_masks' , attn_masks)
            # print('token_type_ids' , token_type_ids)
            # print('labels', labels)

            # converting to cuda tensors
            sequence, attn_masks, token_type_ids, labels = \
                sequence.to(device), attn_masks.to(device), token_type_ids.to(device), labels.to(device)

            # enables autocasting for the forward pass
            with autocast():
                # get logits from the model
                logits = net(sequence, attn_masks, token_type_ids)
                loss = criterion(logits.squeeze(-1), labels.float())
                # accuracy calculations
                # acc = categorical_accuracy(logits.squeeze(-1), labels.float())
                # normalize loss
                loss = loss / iterations_to_accumulate  

            # back-propagating the gradients. scales loss
            scaler.scale(loss).backward() # calls backward() on scaled loss to create scaled gradients.

            if (i + 1) % iterations_to_accumulate == 0:
                # Optimization step
                scaler.step(opti) # unscales the gradients of the optimizer's assigned params
                scaler.update() # update scale for next iteration
                lr_scheduler.step() # adjust learning rate based on the number of iterations.
                opti.zero_grad() # clear gradients


            running_loss += loss.item()
            if (i + 1) % print_every == 0:  # print training loss information
                print()
                print("Iteration {}/{} of epoch {} complete. Training Loss : {} "
                      .format(i + 1, nb_iterations, ep+1, running_loss / print_every))
                running_loss = 0.0

        # for validation dataset only
        val_loss = evaluate_loss(net, device, criterion, val_loader)  # compute validation loss
        print()
        print(" Epoch {} completed ".format(ep+1))
        print(" Validation Loss: {} ".format(val_loss))

        if val_loss < best_loss:
            print(" Validation loss improved from {} to {} ".format(best_loss, val_loss))
            print()
            net_copy = copy.deepcopy(net)  # save a copy of the model
            best_loss = val_loss
            best_ep = ep + 1

    # save model
    model_path='models/{}_lr_{}_ep_{}.pt'.format(bert_model, lr, best_ep)
    torch.save(net_copy.state_dict(), model_path)
    print("Path on which model is saved {}".format(model_path))

    del loss
    torch.cuda.empty_cache()

In [19]:
bert_model = "albert-base-v2"  # 'albert-base-v2', 'albert-large-v2', 'albert-xlarge-v2', 'albert-xxlarge-v2', 'bert-base-uncased', ...
maxlen = 128  # maximum length of the tokenized input sentence pair : if greater than "maxlen", the input is truncated and else if smaller, the input is padded
bs = 16  # batch size
iterations_to_accumulate = 2  # the gradient accumulation adds gradients over an effective batch of size : bs * iters_to_accumulate
lr = 2e-5  # learning rate
freeze_bert = False  # if true, freeze the encoder weights and only update the classification layer weights
epochs = 4  # number of training epochs

In [20]:
set_seed(1) # set all seeds to make reproducible results

print("Read training and validation set data")
train_set = CustomizedDataset(df_train, maxlen, bert_model)
val_set = CustomizedDataset(df_val, maxlen, bert_model)
train_loader = DataLoader(train_set, batch_size=bs, num_workers=5)
val_loader = DataLoader(val_set, batch_size=bs, num_workers=5)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = SentencePairClassificationModule(bert_model, freeze_bert=freeze_bert)

if torch.cuda.device_count() > 1:
    print(" use ", torch.cuda.device_count(), "GPUs!")
    net = nn.DataParallel(net)

net.to(device)
criterion = nn.BCEWithLogitsLoss()
opti = AdamW(net.parameters(), lr=lr, weight_decay=1e-2)
num_warmup_steps = 0   # number of steps for the warmup phase.
num_training_steps = epochs * len(train_loader)  # total training steps
t_total = (len(train_loader) // iterations_to_accumulate) * epochs  # gradient accumulation
learn_rate_scheduler = get_linear_schedule_with_warmup(optimizer=opti, num_warmup_steps=num_warmup_steps, num_training_steps=t_total)

# train the model
train_bert(net, criterion, opti, lr, learn_rate_scheduler, train_loader, val_loader, epochs, iterations_to_accumulate)

Read training and validation set data


  cpuset_checked))
 20%|█▉        | 74/374 [01:54<07:43,  1.55s/it]


Iteration 74/374 of epoch 1 complete. Training Loss : 0.28538926429039724 


 40%|███▉      | 148/374 [03:49<05:49,  1.54s/it]


Iteration 148/374 of epoch 1 complete. Training Loss : 0.26154422014951706 


 59%|█████▉    | 222/374 [05:43<03:55,  1.55s/it]


Iteration 222/374 of epoch 1 complete. Training Loss : 0.19854044269871068 


 79%|███████▉  | 296/374 [07:38<02:00,  1.54s/it]


Iteration 296/374 of epoch 1 complete. Training Loss : 0.21111392310342272 


 99%|█████████▉| 370/374 [09:32<00:06,  1.55s/it]


Iteration 370/374 of epoch 1 complete. Training Loss : 0.20051204242013596 


100%|██████████| 374/374 [09:38<00:00,  1.55s/it]
100%|██████████| 66/66 [00:36<00:00,  1.81it/s]



 Epoch 1 completed 
 Validation Loss: 0.38679518392591766 
 Validation loss improved from inf to 0.38679518392591766 



 20%|█▉        | 74/374 [01:54<07:41,  1.54s/it]


Iteration 74/374 of epoch 2 complete. Training Loss : 0.1824047935472147 


 40%|███▉      | 148/374 [03:49<05:50,  1.55s/it]


Iteration 148/374 of epoch 2 complete. Training Loss : 0.18509920464979635 


 59%|█████▉    | 222/374 [05:43<03:54,  1.55s/it]


Iteration 222/374 of epoch 2 complete. Training Loss : 0.1429833250774725 


 79%|███████▉  | 296/374 [07:38<02:00,  1.55s/it]


Iteration 296/374 of epoch 2 complete. Training Loss : 0.16142272627031481 


 99%|█████████▉| 370/374 [09:32<00:06,  1.56s/it]


Iteration 370/374 of epoch 2 complete. Training Loss : 0.16815843991935253 


100%|██████████| 374/374 [09:39<00:00,  1.55s/it]
100%|██████████| 66/66 [00:36<00:00,  1.79it/s]



 Epoch 2 completed 
 Validation Loss: 0.38340790434317157 
 Validation loss improved from 0.38679518392591766 to 0.38340790434317157 



 20%|█▉        | 74/374 [01:54<07:42,  1.54s/it]


Iteration 74/374 of epoch 3 complete. Training Loss : 0.1405274291094896 


 40%|███▉      | 148/374 [03:48<05:49,  1.55s/it]


Iteration 148/374 of epoch 3 complete. Training Loss : 0.14312599478541194 


 59%|█████▉    | 222/374 [05:42<03:53,  1.54s/it]


Iteration 222/374 of epoch 3 complete. Training Loss : 0.10852103081305285 


 79%|███████▉  | 296/374 [07:36<02:00,  1.55s/it]


Iteration 296/374 of epoch 3 complete. Training Loss : 0.11573388278987762 


 99%|█████████▉| 370/374 [09:31<00:06,  1.54s/it]


Iteration 370/374 of epoch 3 complete. Training Loss : 0.12828204841227145 


100%|██████████| 374/374 [09:37<00:00,  1.54s/it]
100%|██████████| 66/66 [00:36<00:00,  1.80it/s]



 Epoch 3 completed 
 Validation Loss: 0.3774237741123546 
 Validation loss improved from 0.38340790434317157 to 0.3774237741123546 



 20%|█▉        | 74/374 [01:54<07:43,  1.54s/it]


Iteration 74/374 of epoch 4 complete. Training Loss : 0.11012472015981739 


 40%|███▉      | 148/374 [03:48<05:48,  1.54s/it]


Iteration 148/374 of epoch 4 complete. Training Loss : 0.10727565030793886 


 59%|█████▉    | 222/374 [05:42<03:53,  1.53s/it]


Iteration 222/374 of epoch 4 complete. Training Loss : 0.09133592344870842 


 79%|███████▉  | 296/374 [07:35<01:59,  1.53s/it]


Iteration 296/374 of epoch 4 complete. Training Loss : 0.08917517125656879 


 99%|█████████▉| 370/374 [09:28<00:06,  1.53s/it]


Iteration 370/374 of epoch 4 complete. Training Loss : 0.09269116393516998 


100%|██████████| 374/374 [09:35<00:00,  1.54s/it]
100%|██████████| 66/66 [00:36<00:00,  1.81it/s]



 Epoch 4 completed 
 Validation Loss: 0.37586927233320294 
 Validation loss improved from 0.3774237741123546 to 0.37586927233320294 

Path on which model is saved models/albert-base-v2_lr_2e-05_ep_4.pt


In [21]:
mem_usage_print()
# !kill -9 -1

!mkdir predictions
print("predictions folder created")

RAM Free: 10.5 GB
Proc size: 4.2 GB
GPU RAM Free: 11441 MB | Used: 0 MB | Util   0% | Total 11441MB
predictions folder created


In [22]:
# converts a tensor of logits to an array of probabilities by using sigmoid function
def get_probs_from_logits(logits): 
    probs = torch.sigmoid(logits.unsqueeze(-1))
    return probs.detach().cpu().numpy()

# predict the probabilities on test dataset , print the results to a file
def predict_test(net, device, dataloader, with_labels= False, res_output_file="predictions/output.txt"):
    net.eval()
    w = open(res_output_file, 'w')
    probability_all = []

    with torch.no_grad():
        if with_labels:
            for sequence, attn_masks, token_type_ids, _ in tqdm(dataloader):
                sequence, attn_masks, token_type_ids = sequence.to(device), attn_masks.to(device), token_type_ids.to(device)
                logits = net(sequence, attn_masks, token_type_ids)
                probs = get_probs_from_logits(logits.squeeze(-1)).squeeze(-1)
                probability_all += probs.tolist()
        else:
            for sequence, attn_masks, token_type_ids in tqdm(dataloader):
                sequence, attn_masks, token_type_ids = sequence.to(device), attn_masks.to(device), token_type_ids.to(device)
                logits = net(sequence, attn_masks, token_type_ids)
                probs = get_probs_from_logits(logits.squeeze(-1)).squeeze(-1)
                probability_all += probs.tolist()

    w.writelines(str(prob)+'\n' for prob in probability_all)
    w.close()

In [23]:
# test dataset
df_test = pd.DataFrame(test, columns=['precondition', 'statement'])
print(df_test)

                                           precondition  \
0     The people want to have a romantic and pleasan...   
1             The contract is to buy products from you.   
2                              Train station is closed.   
3              There is no water for driving the boats.   
4                                     The poet is busy.   
...                                                 ...   
4845                                   Cats like night.   
4846                 The screwdriver is the right size.   
4847                             A person lacks morals.   
4848                                     You are dirty.   
4849                              The rook is captured.   

                                              statement  
0        People typically does desire to smell violets.  
1     Getting contract typically cause to make money...  
2     Line can typically be used to move train along...  
3         People typically desires drive boats for fun.  
4

In [24]:
# model_path = '/content/models/albert-base-v2_lr_2e-05_val_loss_0.38695_ep_2.pt'  
# model_path = '/content/models/albert-base-v2_lr_2e-05_val_loss_0.37587_ep_4.pt'  
# model_path = '/content/models/albert-base-v2_lr_2e-05_val_loss_0.38011_ep_2.pt'  
# model_path = '/content/models/albert-base-v2_lr_2e-05_val_loss_0.38011_ep_2.pt'

model_path = '/content/models/albert-base-v2_lr_2e-05_ep_4.pt'

# test dataset prediction values
path_to_output_file = 'predictions/output.txt'

print(" Reading test dataset ")
test_set = CustomizedDataset(df_test, maxlen, False, bert_model) # with_labels = False as test dataset is without labels
test_loader = DataLoader(test_set, batch_size=bs, num_workers=5)

model = SentencePairClassificationModule(bert_model)
if torch.cuda.device_count() > 1:
    print(" use", torch.cuda.device_count(), "GPUs")
    model = nn.DataParallel(model)

print()
print(" Load weights of the model ")
model.load_state_dict(torch.load(model_path))
model.to(device)

print(" Predicting on test data ")
predict_test(net=model, device=device, dataloader=test_loader, res_output_file=path_to_output_file)
print()
print(" Predictions in file : {} ".format(path_to_output_file))

 Reading test dataset 


  cpuset_checked))



 Load weights of the model 
 Predicting on test data 


100%|██████████| 304/304 [02:47<00:00,  1.82it/s]


 Predictions in file : predictions/output.txt 





In [25]:
# predict the probabilities on validation dataset , print the result in a file
def predict_dev(net, device, dataloader, with_labels= False, result_file="predictions/output_dev.txt"):
    net.eval()
    w = open(result_file, 'w')
    probs_all = []

    with torch.no_grad():
        if with_labels:
            for seq, attn_masks, token_type_ids, _ in tqdm(dataloader):
                seq, attn_masks, token_type_ids = seq.to(device), attn_masks.to(device), token_type_ids.to(device)
                logits = net(seq, attn_masks, token_type_ids)
                probs = get_probs_from_logits(logits.squeeze(-1)).squeeze(-1)
                probs_all += probs.tolist()
        else:
            for seq, attn_masks, token_type_ids in tqdm(dataloader):
                seq, attn_masks, token_type_ids = seq.to(device), attn_masks.to(device), token_type_ids.to(device)
                logits = net(seq, attn_masks, token_type_ids)
                probs = get_probs_from_logits(logits.squeeze(-1)).squeeze(-1)
                probs_all += probs.tolist()

    w.writelines(str(prob)+'\n' for prob in probs_all)
    w.close()


path_to_output_file1 = 'predictions/output_dev.txt'

print("Reading validation dataset")
dev_set = CustomizedDataset(df_val, maxlen, False, bert_model)
dev_loader = DataLoader(dev_set, batch_size=bs, num_workers=5)

model = SentencePairClassificationModule(bert_model)
if torch.cuda.device_count() > 1:
    print(" use", torch.cuda.device_count(), "GPUs!")
    model = nn.DataParallel(model)

print()
print(" Load weights of the model ")
model.load_state_dict(torch.load(model_path))
model.to(device)

print(" Predicting on validation dataset ")
predict_dev(net=model, device=device, dataloader=dev_loader, result_file=path_to_output_file1)
# predict_test(net=model, device=device, dataloader=dev_loader, res_file=path_to_output_file1)
print()
print("Predictions in file : {}".format(path_to_output_file1))

Reading validation dataset


  cpuset_checked))



 Load weights of the model 
 Predicting on validation dataset 


100%|██████████| 66/66 [00:36<00:00,  1.81it/s]


Predictions in file : predictions/output_dev.txt





In [26]:
# predict validation dataset labels, find accuracy on validation dataset
output_filepath_dev = 'predictions/output_dev.txt'

labels_dev = df_val['label']  # true labels
print('val:: ' , len(labels_dev))

probs_dev = pd.read_csv(output_filepath_dev, header=None)[0]
threshold = 0.5
# predicted labels with the given fixed threshold
preds_test_dev=(probs_dev>=threshold).astype('uint8') 
print(preds_test_dev)
print(len(preds_test_dev))
metric = load_metric("glue", "mrpc")

# to predict the validation dataset labels
res = []
for x in preds_test_dev:
  res.append(x)
print(res)

# write the predicted validation dataset results in upload_predictions_dev.txt for reference
with open('upload_predictions_dev.txt', 'w', encoding = 'utf-8') as fp:
    for x in res:
        fp.write(str(x) + '\n')

val::  1055
0       1
1       0
2       0
3       1
4       1
       ..
1050    0
1051    0
1052    1
1053    1
1054    0
Name: 0, Length: 1055, dtype: uint8
1055


https://raw.githubusercontent.com/huggingface/datasets/1.0.1/metrics/glue/glue.py not found in cache or force_download set to True, downloading to /root/.cache/huggingface/datasets/tmpetzsa6s8


Downloading:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

storing https://raw.githubusercontent.com/huggingface/datasets/1.0.1/metrics/glue/glue.py in cache at /root/.cache/huggingface/datasets/50d5843bbbbd80c47809bc76a5b03c0fd87d068509b0060103ae8182e4f5cfb9.ec871b06a00118091ec63eff0a641fddcb8d3c7cd52e855bbb2be28944df4b82.py
creating metadata file for /root/.cache/huggingface/datasets/50d5843bbbbd80c47809bc76a5b03c0fd87d068509b0060103ae8182e4f5cfb9.ec871b06a00118091ec63eff0a641fddcb8d3c7cd52e855bbb2be28944df4b82.py
Checking /root/.cache/huggingface/datasets/50d5843bbbbd80c47809bc76a5b03c0fd87d068509b0060103ae8182e4f5cfb9.ec871b06a00118091ec63eff0a641fddcb8d3c7cd52e855bbb2be28944df4b82.py for additional imports.
Creating main folder for metric https://raw.githubusercontent.com/huggingface/datasets/1.0.1/metrics/glue/glue.py at /root/.cache/huggingface/modules/datasets_modules/metrics/glue
Creating specific version folder for metric https://raw.githubusercontent.com/huggingface/datasets/1.0.1/metrics/glue/glue.py at /root/.cache/huggingface/mod

[1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 

In [27]:
# compute the accuracy and F1 scores on validation (dev) dataset
metric._compute(predictions=preds_test_dev, references=labels_dev)

{'accuracy': 0.8568720379146919, 'f1': 0.8623518687329079}

In [28]:
# predict test dataset labels
# path to the file with test file prediction probabilities
output_filepath_test = 'predictions/output.txt'  

probs_test = pd.read_csv(output_filepath_test, header=None)[0]  # prediction probabilities
threshold = 0.5
preds_test = (probs_test >= threshold).astype('uint8') # predicted labels using the above threshold
print(len(preds_test))

4850


In [29]:
# test dataset predicted labels
results = []
for row in preds_test:
  results.append(row)
print(results)

[1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 

### Output Prediction Result File

You will need to submit a prediction result file. It should have 4850 lines, every line should be either 0 or 1, which is your model's prediction on the respective test set instance.

In [30]:
assert (len(results) == 4850)

In [31]:
# make sure the results are not float numbers, but intergers 0 and 1
results = [int(x) for x in results]

In [32]:
# write your prediction results to 'upload_predictions.txt' and upload that later
print('Count of 0\'s :' , results.count(0))
print('Count of 1\'s :' , results.count(1))

with open('upload_predictions.txt', 'w', encoding = 'utf-8') as fp:
    for x in results:
        fp.write(str(x) + '\n')

Count of 0's : 2303
Count of 1's : 2547
