### Read training, dev and unlabeled test data

The following provides a starting code (Python 3) of how to read the labeled training and dev sentence pairs, and unlabeled test sentence pairs, into lists.

In [1]:
import csv

In [2]:
train, dev, test = [], [], []

In [None]:
with open('./data/pnli_train.csv', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        # x[2] will be the label (0 or 1). x[0] and x[1] will be the sentence pairs.
        train.append(x)
print (len(train))
print (train[:3])

In [None]:
with open('./data/pnli_dev.csv', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        # x[2] will be the label (0 or 1). x[0] and x[1] will be the sentence pairs.
        dev.append(x)
print (len(dev))
print (dev[:3])

In [None]:
with open('./data/pnli_test_unlabeled.csv', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        # x[0] and x[1] will be the sentence pairs.
        test.append(x)
print (len(test))
print (test[:3])

### Main Code Body

You may choose to experiment with different methods using your program. However, you need to embed the training and inference processes at here. We will use your prediction on the unlabeled test data to grade, while checking this part to understand how your method has produced the predictions.

In [6]:
%%capture
!pip install transformers
!pip install sentencepiece

In [7]:
import numpy as np
import pandas as pd
import random
import torch
import gc
from transformers import BertTokenizer
from transformers import AdamW
from sklearn.metrics import accuracy_score
import warnings

In [8]:
train_df = pd.DataFrame(train, columns=['precondition', 'statement', 'label'])
dev_df = pd.DataFrame(dev, columns=['precondition', 'statement', 'label'])
test_df = pd.DataFrame(test, columns=['precondition', 'statement'])

train_df['label'] = train_df['label'].astype(int)
dev_df['label'] = dev_df['label'].astype(int)

In [None]:
train_df.head()

In [None]:
dev_df.head()

In [None]:
test_df.head()

In [12]:
model_type = 'roberta-base'

learning_rate = 2e-5
epsilon = 1e-8

max_length = 128

num_epochs = 4
batch_size = 32


In [None]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification

# roberta-base
print('Load tokenizer', model_type)
tokenizer = RobertaTokenizer.from_pretrained(model_type)

In [None]:
if not (torch.cuda.is_available()):
  device = "cpu"
else:
  device = "cuda"

print(device)

In [15]:
warnings.filterwarnings('ignore')
from torch.utils.data import Dataset

class CreateTrainValDataset(Dataset):
  def __init__(self, statement, precondition, label, tokenizer, max_length):
    self.statement = statement
    self.precondition = precondition
    self.label = label
    self. tokenizer = tokenizer
    self.max_length = max_length

  def __len__(self):
    return len(self.statement) 

  def __getitem__(self, index):
    statement_item = self.statement[index]
    precondition_item = self.precondition[index]
    label_item = self.label[index]

    tokenized_input = tokenizer.encode_plus( statement_item, precondition_item,
                                            add_special_tokens = True,
                                            max_length = self.max_length,
                                            padding = 'max_length',
                                            truncation = True,
                                            return_attention_mask = True,
                                            return_tensors = 'pt')
    return {
            'input_ids': tokenized_input['input_ids'].flatten(),
            'attention_mask': tokenized_input['attention_mask'].flatten(),
            'targets': torch.tensor(label_item)
        }
                                          
        

In [None]:
train_data = CreateTrainValDataset(train_df['statement'], train_df['precondition'], 
                                   train_df['label'], tokenizer, max_length)

dev_data = CreateTrainValDataset(dev_df['statement'], dev_df['precondition'], 
                                   dev_df['label'], tokenizer, max_length)
train_dataloader = torch.utils.data.DataLoader(train_data,batch_size=batch_size,shuffle=True,num_workers=2)

val_dataloader = torch.utils.data.DataLoader(dev_data,batch_size=batch_size,shuffle=True,num_workers=2)
print(len(train_dataloader))
print(len(val_dataloader))

In [None]:
#checking train dataset is working
train_data.__getitem__(0)

In [None]:
warnings.filterwarnings('ignore')
from transformers import RobertaForSequenceClassification


model = RobertaForSequenceClassification.from_pretrained(model_type, num_labels = 2)
model.to(device)
optimizer = AdamW(model.parameters(), lr = learning_rate, eps = epsilon)

In [26]:
#Setting seed value
seed_val = 42

random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

In [None]:
for epoch in range(0, num_epochs):
    
  print('\nEpoch', (epoch + 1),"/",num_epochs)
  
  val_preds = []
  ground_truth = []

  ##Training
  print('Training Metrics')
  
  model.train()
  torch.set_grad_enabled(True)

  total_train_loss = 0

  for i, sample in enumerate(train_dataloader):
      sample_input_ids = sample["input_ids"].to(device)
      sample_atten_mask = sample["attention_mask"].to(device)
      sample_labels =  sample["targets"].to(device)

      model.zero_grad()        
      outputs = model(sample_input_ids, 
                  attention_mask=sample_atten_mask,
                  labels=sample_labels)
      
      train_loss = outputs[0]
      total_train_loss +=  train_loss.item()
      optimizer.zero_grad()
      train_loss.backward()        
      torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
      optimizer.step() 
  print('Train loss:' ,total_train_loss)

  ## Validation
  
  print('\nValidation Metrics')
  model.eval()
  total_val_loss = 0
  
  with torch.no_grad():
    for j, sample in enumerate(val_dataloader):
      sample_input_ids = sample["input_ids"].to(device)
      sample_atten_mask = sample["attention_mask"].to(device)
      sample_labels =  sample["targets"].to(device)     



      outputs = model(sample_input_ids, 
              attention_mask=sample_atten_mask, 
              labels=sample_labels)
      
      val_loss,preds = outputs[0],outputs[1]
      total_val_loss += val_loss.item()
      
      val_preds_batch = preds.detach().cpu().numpy()
      ground_truth_batch = sample_labels.to('cpu').numpy()

      ground_truth.extend(ground_truth_batch)

      if j != 0: 
          val_preds = np.vstack((val_preds, val_preds_batch))

      else:
          val_preds = val_preds_batch
  
    y_pred = np.argmax(val_preds, axis=1)
    val_accuracy = accuracy_score(ground_truth, y_pred)
    
    print('Validation loss:' ,total_val_loss)
    print('Validation accuracy: ', val_accuracy)
    
    gc.collect()

In [28]:
import warnings
warnings.filterwarnings('ignore')
from torch.utils.data import Dataset

class CreateTestDataset(Dataset):
  def __init__(self, statement, precondition, tokenizer, max_length):
    self.statement = statement
    self.precondition = precondition
    self. tokenizer = tokenizer
    self.max_length = max_length

  def __len__(self):
    return len(self.statement) 

  def __getitem__(self, index):
    statement_item = self.statement[index]
    precondition_item = self.precondition[index]

    tokenized_input = tokenizer.encode_plus( statement_item, precondition_item,
                                            add_special_tokens = True,
                                            max_length = self.max_length,
                                            padding = 'max_length',
                                            truncation = True,
                                            return_attention_mask = True,
                                            return_tensors = 'pt')
    return {
            'input_ids': tokenized_input['input_ids'].flatten(),
            'attention_mask': tokenized_input['attention_mask'].flatten(),
        }
                                          
        

In [29]:
test_data = CreateTestDataset(test_df['statement'], test_df['precondition'], 
                              tokenizer, max_length)
test_dataloader = torch.utils.data.DataLoader(test_data,
                                        batch_size=batch_size,
                                        shuffle=False,
                                       num_workers=2)

In [None]:
#checking test dataset
test_data.__getitem__(0)


In [31]:
##Testing Code

pred_label = []
for j, sample in enumerate(test_dataloader):

  sample_input_ids = sample['input_ids'].to(device)
  sample_atten_mask =sample['attention_mask'].to(device)


  outputs = model(sample_input_ids, 
          attention_mask=sample_atten_mask)
  test_preds_batch = outputs[0]
  test_preds_batch = test_preds_batch.detach().cpu().numpy()

  if j != 0: 
      pred_label = np.vstack((pred_label, test_preds_batch))

  else:
      pred_label = test_preds_batch
  

final_preds = np.argmax(pred_label, axis=1)

In [None]:
len(final_preds)

In [33]:
# Eventually, results need to be a list of 2028 0 or 1's
results = final_preds

In [None]:
len(results)

### Output Prediction Result File

You will need to submit a prediction result file. It should have 2028 lines, every line should be either 0 or 1, which is your model's prediction on the respective test set instance.

In [35]:
# suppose you had your model's predictions on the 2028 test cases read from test_enc_unlabeled.tsv, and 
#those results are in the list called 'results'
assert (len(results) == 4850)

In [36]:
# make sure the results are not float numbers, but intergers 0 and 1
results = [int(x) for x in results]

In [37]:
# write your prediction results to 'upload_predictions.txt' and upload that later
with open('upload_predictions.txt', 'w', encoding = 'utf-8') as fp:
    for x in results:
        fp.write(str(x) + '\n')