In [0]:
## Mount Drive into Colab
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
!pip install transformers

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/a3/78/92cedda05552398352ed9784908b834ee32a0bd071a9b32de287327370b7/transformers-2.8.0-py3-none-any.whl (563kB)
[K     |▋                               | 10kB 20.8MB/s eta 0:00:01[K     |█▏                              | 20kB 4.6MB/s eta 0:00:01[K     |█▊                              | 30kB 5.8MB/s eta 0:00:01[K     |██▎                             | 40kB 5.8MB/s eta 0:00:01[K     |███                             | 51kB 5.1MB/s eta 0:00:01[K     |███▌                            | 61kB 5.7MB/s eta 0:00:01[K     |████                            | 71kB 6.0MB/s eta 0:00:01[K     |████▋                           | 81kB 6.4MB/s eta 0:00:01[K     |█████▎                          | 92kB 6.8MB/s eta 0:00:01[K     |█████▉                          | 102kB 6.9MB/s eta 0:00:01[K     |██████▍                         | 112kB 6.9MB/s eta 0:00:01[K     |███████                         | 122kB 6.9M

In [0]:
# !cd drive/My\ Drive/ && git clone https://github.com/wangcunxiang/SemEval2020-Task4-Commonsense-Validation-and-Explanation

In [0]:
import torch
from torch.utils.data import Dataset
from transformers import RobertaTokenizer
import numpy as np
import csv

def prepare_features(seq_1, tokenizer, max_seq_length = 64, 
             zero_pad = True, include_CLS_token = True, include_SEP_token = True):
    ## Tokenzine Input
    tokens_a = tokenizer.tokenize(seq_1)

    ## Truncate
    if len(tokens_a) > max_seq_length - 2:
        tokens_a = tokens_a[0:(max_seq_length - 2)]
    ## Initialize Tokens
    tokens = []
    if include_CLS_token:
        tokens.append(tokenizer.cls_token)
    ## Add Tokens and separators
    for token in tokens_a:
        tokens.append(token)

    if include_SEP_token:
        tokens.append(tokenizer.sep_token)

    input_ids = tokenizer.convert_tokens_to_ids(tokens)
    ## Input Mask 
    input_mask = [1] * len(input_ids)
    ## Zero-pad sequence lenght
    if zero_pad:
        while len(input_ids) < max_seq_length:
            input_ids.append(0)
            input_mask.append(0)
    return torch.tensor(input_ids), torch.tensor(input_mask)

class CVEdatasetA(Dataset):

    def __init__(self, root='../SemEval2020-Task4-Commonsense-Validation-and-Explanation/Training_Data/', maxlen=64):

        #Load data and labels
        print('Getting data from: ', root)
        fa = open(root+'subtaskB_answers.csv')
        fd = open(root+'subtaskB_data.csv')
        self.answers = []
        self.data = []
        c2l = {'A':0, 'B':1, 'C':2}
        
        ra = csv.reader(fa)
        for row in ra:
            if row[0] == 'id':
              continue
            id_n = int(row[0])
            label = int(c2l[row[1]])
            self.answers.append((id_n, label))

        rd = csv.reader(fd)    
        for row in rd:
            if row[0] == 'id':
              continue
            id_n = int(row[0])
            sen = str(row[1])
            exp1 = str(row[2])
            exp2 = str(row[3])
            exp3 = str(row[4])
            
            if sen[-1] == '.':
              sen = sen[:-1]
            sen = sen + ' is against commonsense because '
            if exp1 != '' and exp1[-1] == '.':
              exp1 = exp1[:-1]
            if exp2 != '' and exp2[-1] == '.':
              exp2 = exp2[:-1]
            if exp3 != '' and exp3[-1] == '.':
              exp3 = exp3[:-1]
          
            self.data.append((id_n, sen, exp1, exp2, exp3))

        #Initialize the BERT tokenizer
        self.tokenizer = RobertaTokenizer.from_pretrained('roberta-base', do_lower_case=True)

        self.maxlen = maxlen

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):

        #Selecting the sentence and label at the specified index in the data frame
        sent = self.data[index][1]
        exp1 = self.data[index][2]
        exp2 = self.data[index][3]
        exp3 = self.data[index][4]
        id_n = self.data[index][0]
        
        assert id_n == self.answers[index][0]
        answer = self.answers[index][1]
        
        #Construct target labels
        label = torch.eye(3)[answer]

        #Preprocessing the text to be suitable for BERT
        tok_id1_tensor, attn_mask1 = prepare_features(sent+exp1, self.tokenizer) #Tokenize the sentence
        tok_id2_tensor, attn_mask2 = prepare_features(sent+exp2, self.tokenizer) #Tokenize the sentence
        tok_id3_tensor, attn_mask3 = prepare_features(sent+exp3, self.tokenizer) #Tokenize the sentence
        
  
        # tok1 = ['[CLS]'] + tok1 + ['[SEP]']
        # tok2 = ['[CLS]'] + tok2 + ['[SEP]']
        # tok3 = ['[CLS]'] + tok3 + ['[SEP]']
        
        # if len(tok1) < self.maxlen:
        #     tok1 = tok1 + ['[PAD]' for _ in range(self.maxlen - len(tok1))] #Padding sentences
        # else:
        #     tok1 = tok1[:self.maxlen-1] + ['[SEP]'] #Prunning the list to be of specified max length
        
        # if len(tok2) < self.maxlen:
        #     tok2 = tok2 + ['[PAD]' for _ in range(self.maxlen - len(tok2))] #Padding sentences
        # else:
        #     tok2 = tok2[:self.maxlen-1] + ['[SEP]'] #Prunning the list to be of specified max length

        # if len(tok3) < self.maxlen:
        #     tok3 = tok3 + ['[PAD]' for _ in range(self.maxlen - len(tok3))] #Padding sentences
        # else:
        #     tok3 = tok3[:self.maxlen-1] + ['[SEP]'] #Prunning the list to be of specified max length

        # tok_id1 = self.tokenizer.convert_tokens_to_ids(tok1) #Obtaining the indices of the tokens in the BERT Vocabulary
        # tok_id1_tensor = torch.tensor(tok_id1) #Converting the list to a pytorch tensor
        # tok_id2 = self.tokenizer.convert_tokens_to_ids(tok2) #Obtaining the indices of the tokens in the BERT Vocabulary
        # tok_id2_tensor = torch.tensor(tok_id2) #Converting the list to a pytorch tensor
        # tok_id3 = self.tokenizer.convert_tokens_to_ids(tok3) #Obtaining the indices of the tokens in the BERT Vocabulary
        # tok_id3_tensor = torch.tensor(tok_id3) #Converting the list to a pytorch tensor
        
        # #Obtaining the attention mask i.e a tensor containing 1s for no padded tokens and 0s for padded ones
        # attn_mask1 = (tok_id1_tensor != 0).long()
        # attn_mask2 = (tok_id2_tensor != 0).long()
        # attn_mask3 = (tok_id3_tensor != 0).long()
        
        return tok_id1_tensor, tok_id2_tensor, tok_id3_tensor, attn_mask1, attn_mask2, attn_mask3, label, id_n
    
if __name__=='__main__':
    trainset = CVEdatasetA(root="drive/My Drive/SemEval2020-Task4-Commonsense-Validation-and-Explanation/Training Data/")
    valset = CVEdatasetA(root="drive/My Drive/SemEval2020-Task4-Commonsense-Validation-and-Explanation/Dev Data/")
    a,b,c,d,e,f,g,h = trainset.__getitem__(1000)
    print(a.shape,b.shape,c.shape,d.shape,e.shape,f.shape,g.shape)

Getting data from:  drive/My Drive/SemEval2020-Task4-Commonsense-Validation-and-Explanation/Training Data/


HBox(children=(IntProgress(value=0, description='Downloading', max=898823, style=ProgressStyle(description_wid…




HBox(children=(IntProgress(value=0, description='Downloading', max=456318, style=ProgressStyle(description_wid…


Getting data from:  drive/My Drive/SemEval2020-Task4-Commonsense-Validation-and-Explanation/Dev Data/
torch.Size([64]) torch.Size([64]) torch.Size([64]) torch.Size([64]) torch.Size([64]) torch.Size([64]) torch.Size([3])


In [0]:
import torch
import torch.nn as nn
from transformers import RobertaModel

class CVEclassifier(nn.Module):

    def __init__(self, freeze_bert = False, hidden_dropout_prob=0.15, num_labels=1):
        super(CVEclassifier, self).__init__()
        #Instantiating BERT model object 
        self.bert_layer = RobertaModel.from_pretrained('roberta-base')
        
        #Freeze bert layers
        if freeze_bert:
            for p in self.bert_layer.parameters():
                p.requires_grad = False
        
        #Dropout
        self.dropout = nn.Dropout(hidden_dropout_prob)
        #Classifier layer
        #We are predicting scores for a sentence
        self.classifier = nn.Linear(768, num_labels)
        
    def forward(self, tok_id1_tensor, tok_id2_tensor, tok_id3_tensor, attn_mask1, attn_mask2, attn_mask3):
        #Feeding the input to BERT model to obtain contextualized representations
        bert_hidden_states1, _ = self.bert_layer(tok_id1_tensor, attention_mask = attn_mask1)
        bert_hidden_states2, _ = self.bert_layer(tok_id2_tensor, attention_mask = attn_mask2)
        bert_hidden_states3, _ = self.bert_layer(tok_id3_tensor, attention_mask = attn_mask3)
        
        #Extract [CLS] embeddings
        sent_emb1 = bert_hidden_states1[:,0]
        sent_emb2 = bert_hidden_states2[:,0]
        sent_emb3 = bert_hidden_states3[:,0]

        #Calculate sentence scores/logit
        logit1 = self.classifier(self.dropout(sent_emb1))
        logit2 = self.classifier(self.dropout(sent_emb2))
        logit3 = self.classifier(self.dropout(sent_emb3))
        
        #Concatenate to get full logits
        logits = torch.cat((logit1, logit2, logit3), 1)
        
        return logits


In [0]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

from transformers import AdamW, get_linear_schedule_with_warmup

import os
import time
import datetime
import numpy as np

# from model import CVEclassifier
# from CVEdataset import CVEdatasetA

# Function to calculate the accuracy of our predictions vs labels
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    # Round to the nearest second.
    elapsed_rounded = int(round((elapsed)))
    
    # Format as hh:mm:ss
    return str(datetime.timedelta(seconds=elapsed_rounded))



def train(model, criterion, optimizer, scheduler, train_dataloader, val_dataloader, device, epochs=6):

    # Store the average loss after each epoch so we can plot them.
    loss_values = []

    # For each epoch...
    for epoch_i in range(0, epochs):

        # ========================================
        #               Training
        # ========================================

        # Perform one full pass over the training set.

        print("")
        print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
        print('Training...')

        # Measure how long the training epoch takes.
        t0 = time.time()

        # Reset the total loss for this epoch.
        total_loss = 0

        # Put the model into training mode
        # `dropout` and `batchnorm` layers behave differently during training vs. test
        model.train()

        # For each batch of training data...
        for step, batch in enumerate(train_dataloader):

            # Progress update every 10 batches.
            if step % 20 == 0 and not step == 0:
                # Calculate elapsed time in minutes.
                elapsed = format_time(time.time() - t0)

                # Report progress.
                print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))


            # unpack the batch received from train_dataloader
            b_input_id1 = batch[0].to(device)
            b_input_id2 = batch[1].to(device)
            b_input_id3 = batch[2].to(device)
            b_input_mask1 = batch[3].to(device)
            b_input_mask2 = batch[4].to(device)
            b_input_mask3 = batch[5].to(device)
            b_labels = batch[6].to(device)

            # Clear any previously calculated gradients before performing a
            # backward pass. PyTorch doesn't do this automatically because 
            # accumulating the gradients is "convenient while training RNNs". 
            # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)
            model.zero_grad()        

            # Perform a forward pass (evaluate the model on this training batch).
            # This will return the logits
            outputs = model(b_input_id1, b_input_id2, b_input_id3, b_input_mask1, b_input_mask2, b_input_mask3)

            # The call to `model` returns logits
            # criterion requires class labels and not the one-hot vectors
            loss = criterion(outputs, torch.argmax(b_labels, dim=1))

            # Accumulate the training loss over all of the batches to calculate average loss
            total_loss += loss.item()

            # Perform a backward pass to calculate the gradients.
            loss.backward()

            # Clip the norm of the gradients to 1.0.
            # This is to help prevent the "exploding gradients" problem.
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            # Update parameters and take a step using the computed gradient.
            optimizer.step()

            # Update the learning rate.
            scheduler.step()

        # Calculate the average loss over the training data.
        avg_train_loss = total_loss / len(train_dataloader)            

        # Store the loss value for plotting the learning curve.
        loss_values.append(avg_train_loss)

        print("")
        print("  Average training loss: {0:.2f}".format(avg_train_loss))
        print("  Training epoch took: {:}".format(format_time(time.time() - t0)))

        # ========================================
        #               Validation
        # ========================================
        # After the completion of each training epoch, measure our performance on
        # our validation set.

        print("")
        print("Running Validation...")

        t0 = time.time()
        
        total_loss = 0

        # Put the model in evaluation mode--the dropout layers behave differently
        # during evaluation.
        model.eval()

        # Tracking variables 
        eval_loss, eval_accuracy = 0, 0
        nb_eval_steps, nb_eval_examples = 0, 0

        # Evaluate data for one epoch
        for batch in val_dataloader:

            # Add batch to GPU
            batch = tuple(t.to(device) for t in batch)

            b_input_id1, b_input_id2, b_input_id3, b_input_mask1, b_input_mask2, b_input_mask3, b_labels, _ = batch
            
            # Telling the model not to compute or store gradients, saving memory and
            # speeding up validation
            with torch.no_grad():        
                # Forward pass, calculate logit predictions.
                logits = model(b_input_id1, b_input_id2, b_input_id3, b_input_mask1, b_input_mask2, b_input_mask3)
            
            # Calculate loss for the batch
            loss = criterion(logits, torch.argmax(b_labels, dim=1))

            # Accumulate the training loss over all of the batches to calculate average loss
            total_loss += loss.item()
            
            # Move logits and labels to CPU
            logits = logits.detach().cpu().numpy()
            label_ids = torch.argmax(b_labels, dim=1)
            label_ids = label_ids.to('cpu').numpy()

            # Calculate the accuracy for this batch of test sentences.
            tmp_eval_accuracy = flat_accuracy(logits, label_ids)

            # Accumulate the total accuracy.
            eval_accuracy += tmp_eval_accuracy

            # Track the number of batches
            nb_eval_steps += 1

        # Calculate the average loss over the training data.
        eval_loss = total_loss / len(val_dataloader)
        
        # Report the final accuracy for this validation run.
        print("  Accuracy: {0:.4f}".format(eval_accuracy/nb_eval_steps))
        print("  Average validation loss: {0:.4f}".format(eval_loss))
        print("  Validation took: {:}".format(format_time(time.time() - t0)))

    chkpt_dict = {'model_state_dist':model.state_dict(),
                    'optimizer_state_dict':optimizer.state_dict(),
                     'scheduler_state_dict':scheduler.state_dict()}
    
    torch.save(chkpt_dict, 'drive/My Drive/sem_eval/weights/'+'roberta-e4-ep-'+str(epoch_i+1)+'-vacc-'+str((100*eval_accuracy/nb_eval_steps))+'.pt')
    
    print("")
    print("Training complete!")

if __name__=='__main__':
                                       
    if torch.cuda.is_available():    
        device = torch.device("cuda")
        print('There are %d GPU(s) available.' % torch.cuda.device_count())
        print('Using the GPU:', torch.cuda.get_device_name(0))
    else:
        print('No GPU available, using the CPU instead.')
        device = torch.device("cpu")
    
    trainset = CVEdatasetA(root='drive/My Drive/SemEval2020-Task4-Commonsense-Validation-and-Explanation/Training Data/')
    valset = CVEdatasetA(root='drive/My Drive/SemEval2020-Task4-Commonsense-Validation-and-Explanation/Dev Data/')
    
    # Training logs
    os.makedirs('drive/My Drive/sem_eval/logs/', exist_ok=True)
    # Weight checkpoint
    os.makedirs('drive/My Drive/sem_eval/weights/', exist_ok=True)
    
    #Creating intsances of training and validation dataloaders
    train_dataloader = DataLoader(trainset, batch_size = 32, num_workers = 5, shuffle=True)
    val_dataloader = DataLoader(valset, batch_size = 128, num_workers = 5, shuffle=False)
    
    model = CVEclassifier()
    model.to(device)
    
    criterion = nn.CrossEntropyLoss()
    
    # Note: AdamW is a class from the huggingface library (as opposed to pytorch)
    # args.learning_rate - default is 5e-5, our notebook had 2e-5
    # args.adam_epsilon  - default is 1e-8
    optimizer = AdamW(model.parameters(), lr = 2e-5, eps = 1e-8)
    
    epochs = 4
    # Total number of training steps is number of batches * number of epochs.
    total_steps = len(train_dataloader) * epochs
    warmup_steps = int(0.1*total_steps) # Can change warmup proportion, default value in run_glue.py
    # Create the learning rate scheduler.
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps = warmup_steps, num_training_steps = total_steps)
                                       
    train(model, criterion, optimizer, scheduler, train_dataloader, val_dataloader, device, epochs)

There are 1 GPU(s) available.
Using the GPU: Tesla K80
Getting data from:  drive/My Drive/SemEval2020-Task4-Commonsense-Validation-and-Explanation/Training Data/
Getting data from:  drive/My Drive/SemEval2020-Task4-Commonsense-Validation-and-Explanation/Dev Data/


HBox(children=(IntProgress(value=0, description='Downloading', max=481, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='Downloading', max=501200538, style=ProgressStyle(description_…



Training...
  Batch    20  of    313.    Elapsed: 0:00:36.
  Batch    40  of    313.    Elapsed: 0:01:11.
  Batch    60  of    313.    Elapsed: 0:01:48.
  Batch    80  of    313.    Elapsed: 0:02:25.
  Batch   100  of    313.    Elapsed: 0:03:02.
  Batch   120  of    313.    Elapsed: 0:03:39.
  Batch   140  of    313.    Elapsed: 0:04:16.
  Batch   160  of    313.    Elapsed: 0:04:53.
  Batch   180  of    313.    Elapsed: 0:05:30.
  Batch   200  of    313.    Elapsed: 0:06:06.
  Batch   220  of    313.    Elapsed: 0:06:43.
  Batch   240  of    313.    Elapsed: 0:07:20.
  Batch   260  of    313.    Elapsed: 0:07:57.
  Batch   280  of    313.    Elapsed: 0:08:34.
  Batch   300  of    313.    Elapsed: 0:09:11.

  Average training loss: 0.61
  Training epoch took: 0:09:34

Running Validation...
  Accuracy: 0.8655
  Average validation loss: 0.3768
  Validation took: 0:00:20

Training...
  Batch    20  of    313.    Elapsed: 0:00:37.
  Batch    40  of    313.    Elapsed: 0:01:14.
  Batch  

In [0]:
# ###################### SUBMISSION FILE ############################


# import torch
# import torch.nn as nn
# from torch.utils.data import DataLoader

# from transformers import AdamW, get_linear_schedule_with_warmup

# import os
# import time
# import datetime
# import numpy as np

# # from model import CVEclassifier
# # from CVEdataset import CVEdatasetA

# def format_time(elapsed):
#     '''
#     Takes a time in seconds and returns a string hh:mm:ss
#     '''
#     # Round to the nearest second.
#     elapsed_rounded = int(round((elapsed)))
    
#     # Format as hh:mm:ss
#     return str(datetime.timedelta(seconds=elapsed_rounded))

# def test():
#   if torch.cuda.is_available():    
#       device = torch.device("cuda")
#       print('There are %d GPU(s) available.' % torch.cuda.device_count())
#       print('Using the GPU:', torch.cuda.get_device_name(0))
#   else:
#       print('No GPU available, using the CPU instead.')
#       device = torch.device("cpu")
          
#   valset = CVEdatasetA(root='drive/My Drive/SemEval2020-Task4-Commonsense-Validation-and-Explanation/Test Data/')
#   val_dataloader = DataLoader(valset, batch_size = 1, num_workers = 1, shuffle=False)
#   l2c = {'0':'A', '1':'B', '2':'C'}

#   chkpt = torch.load('drive/My Drive/sem_eval/weights/roberta-e3-ep-3-vacc-87.94187035891089.pt')
#   model =  CVEclassifier()
#   model.load_state_dict(chkpt['model_state_dist'])
#   model.to(device)

#   criterion = nn.CrossEntropyLoss()

#   print("Running Validation...")

#   t0 = time.time()

#   total_loss = 0

#   # Put the model in evaluation mode--the dropout layers behave differently
#   # during evaluation.
#   model.eval()

#   # Tracking variables 
#   eval_loss, eval_accuracy = 0, 0
#   nb_eval_steps, nb_eval_examples = 0, 0
#   true_labels = []
#   pred_labels = []
#   cls_losses = []

#   # Evaluate data for one epoch
#   for batch in val_dataloader:

#       # Add batch to GPU
#       batch = tuple(t.to(device) for t in batch)

#       b_input_id1, b_input_id2, b_input_id3, b_input_mask1, b_input_mask2, b_input_mask3, b_labels, id_n = batch

#       # Telling the model not to compute or store gradients, saving memory and
#       # speeding up validation
#       with torch.no_grad():        
#           # Forward pass, calculate logit predictions.
#           logits = model(b_input_id1, b_input_id2, b_input_id3, b_input_mask1, b_input_mask2, b_input_mask3)


#       pred_label = torch.argmax(logits, dim=1)
#       pred_label = pred_label.to('cpu').numpy()
#   #     logits = logits.detach().cpu().numpy()
#       true_label = torch.argmax(b_labels, dim=1)
#       true_label = true_label.to('cpu').numpy()
#       id_n = id_n.to('cpu').numpy()
      
#       pred_labels.append((id_n, pred_label))
#       true_labels.append((id_n, true_label))

#       with open('drive/My Drive/sem_eval/logs/roberta-test.csv', 'a') as f:
#         f.write(str(id_n[0])+','+l2c[str(pred_label[0])]+'\n')
        
#   print("  Validation took: {:}".format(format_time(time.time() - t0)))

# if __name__=='__main__':
#   test()

There are 1 GPU(s) available.
Using the GPU: Tesla P100-PCIE-16GB
Getting data from:  drive/My Drive/SemEval2020-Task4-Commonsense-Validation-and-Explanation/Test Data/
Running Validation...
  Validation took: 0:00:35


In [0]:
import torch
torch.cuda.empty_cache()

In [0]:
############################ WRITE LOGITS FOR ENSEMBLING######################
# import torch
# import torch.nn as nn
# from torch.utils.data import DataLoader

# from transformers import AdamW, get_linear_schedule_with_warmup

# import os
# import time
# import datetime
# import numpy as np

# # from model import CVEclassifier
# # from CVEdataset import CVEdatasetA

# def format_time(elapsed):
#     '''
#     Takes a time in seconds and returns a string hh:mm:ss
#     '''
#     # Round to the nearest second.
#     elapsed_rounded = int(round((elapsed)))
    
#     # Format as hh:mm:ss
#     return str(datetime.timedelta(seconds=elapsed_rounded))

# def test():
#   if torch.cuda.is_available():    
#       device = torch.device("cuda")
#       print('There are %d GPU(s) available.' % torch.cuda.device_count())
#       print('Using the GPU:', torch.cuda.get_device_name(0))
#   else:
#       print('No GPU available, using the CPU instead.')
#       device = torch.device("cpu")
          
#   valset = CVEdatasetA(root='drive/My Drive/SemEval2020-Task4-Commonsense-Validation-and-Explanation/Training Data/')
#   val_dataloader = DataLoader(valset, batch_size = 1, num_workers = 1, shuffle=False)

#   chkpt = torch.load('drive/My Drive/sem_eval/weights/roberta-e3-ep-3-vacc-87.94187035891089.pt')
#   model =  CVEclassifier()
#   model.load_state_dict(chkpt['model_state_dist'])
#   model.to(device)

#   criterion = nn.CrossEntropyLoss()
#   prob_layer = nn.Softmax(dim=1)
#   with open('drive/My Drive/sem_eval/ensemble/Training Data/roberta-e3-ep-3-vacc-87.94187035891089.csv', 'w') as f:
#       f.write('id'+','+'logit1'+','+'logit2'+','+'logit3'+'\n')


#   print("Running Validation...")

#   t0 = time.time()

#   total_loss = 0

#   # Put the model in evaluation mode--the dropout layers behave differently
#   # during evaluation.
#   model.eval()

#   # Tracking variables 
#   eval_loss, eval_accuracy = 0, 0
#   nb_eval_steps, nb_eval_examples = 0, 0
#   true_labels = []
#   pred_labels = []
#   cls_losses = []

#   # Evaluate data for one epoch
#   for batch in val_dataloader:

#       # Add batch to GPU
#       batch = tuple(t.to(device) for t in batch)

#       b_input_id1, b_input_id2, b_input_id3, b_input_mask1, b_input_mask2, b_input_mask3, b_labels, id_n = batch

#       # Telling the model not to compute or store gradients, saving memory and
#       # speeding up validation
#       with torch.no_grad():        
#           # Forward pass, calculate logit predictions.
#           logits = model(b_input_id1, b_input_id2, b_input_id3, b_input_mask1, b_input_mask2, b_input_mask3)

#       # Calculate loss for the batch
#       loss = criterion(logits, torch.argmax(b_labels, dim=1))

#       # Accumulate the training loss over all of the batches to calculate average loss
#       total_loss += loss.item()
#       cls_losses.append((id_n, loss.item()))

#       # Move logits and labels to CPU
#       pred_label = torch.argmax(logits, dim=1)
#       pred_label = pred_label.to('cpu').numpy()
#   #     logits = logits.detach().cpu().numpy()
#       true_label = torch.argmax(b_labels, dim=1)
#       true_label = true_label.to('cpu').numpy()
#       id_n = id_n.to('cpu').numpy()
      
#       pred_labels.append((id_n, pred_label))
#       true_labels.append((id_n, true_label))

#       probs = prob_layer(logits)
#       probs = probs.to('cpu').numpy()
#       nb_eval_steps+=1
#       with open('drive/My Drive/sem_eval/ensemble/Training Data/roberta-e3-ep-3-vacc-87.94187035891089.csv', 'a') as f:
#         f.write('{},{},{},{}\n'.format(id_n[0], probs[0][0], probs[0][1], probs[0][2]))
#         print('{},{},{},{}'.format(id_n[0], probs[0][0], probs[0][1], probs[0][2]))

#   # Calculate the average loss over the training data.
#   eval_loss = total_loss / len(val_dataloader)


#   # Report the final accuracy for this validation run.
#   print("  Accuracy: {0:.2f}".format(eval_accuracy/nb_eval_steps))
#   print("  Average validation loss: {0:.2f}".format(eval_loss))
#   print("  Validation took: {:}".format(format_time(time.time() - t0)))

# if __name__=='__main__':
#   test()