In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install transformers



In [3]:
import json
from transformers import BertForMultipleChoice, BertTokenizer
from torch.utils.data import DataLoader, Dataset
import torch
import os

In [4]:
validation_data = []

val_folder_path = '/content/drive/MyDrive/NLP/Project/data/validation_dataset/'

In [5]:
for item in os.listdir(val_folder_path):
    with open(os.path.join(val_folder_path, item), 'r') as f:
        data = json.load(f)
        validation_data.extend(data)

In [6]:
len(validation_data)

240

In [7]:
# Preprocess the data
class MultipleChoiceDataset(Dataset):
    def __init__(self, tokenizer, data):
        self.tokenizer = tokenizer
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        entry = self.data[idx]
        story = entry['Story']
        question = entry['Question']
        choices = entry['Answer Choices']
        labels = torch.tensor(choices.index(entry['Answer']))

        # Combine story, question, and each choice for tokenization
        input_ids = []
        attention_masks = []
        for choice in choices:
            inputs = self.tokenizer.encode_plus(
                text=story + " " + question,
                text_pair=choice,
                add_special_tokens=True,
                max_length=512,
                padding='max_length',
                truncation=True,
                return_attention_mask=True,
                return_tensors='pt'
            )
            input_ids.append(inputs['input_ids'][0])
            attention_masks.append(inputs['attention_mask'][0])

        # Convert lists to tensors
        input_ids = torch.stack(input_ids, dim=0)
        attention_masks = torch.stack(attention_masks, dim=0)

        return input_ids, attention_masks, labels

In [8]:
# Load tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForMultipleChoice.from_pretrained('bert-base-uncased')

tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForMultipleChoice were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
# Create the dataset and dataloader
dataset = MultipleChoiceDataset(tokenizer, validation_data)
dataloader = DataLoader(dataset, batch_size=1)

In [10]:
model.eval()

ground_truth = []
predictions = []

with torch.no_grad():
    for i, batch in enumerate(dataloader):
        input_ids, attention_masks, labels = batch
        outputs = model(input_ids=input_ids, attention_mask=attention_masks)
        prediction = int(torch.argmax(outputs.logits, dim=1))
        predictions.append(prediction)
        ground_truth.append(int(labels))

In [11]:
from sklearn.metrics import classification_report

report = classification_report(ground_truth, predictions)
print(report)

              precision    recall  f1-score   support

           0       0.18      0.28      0.22        43
           1       0.33      0.24      0.28        86
           2       0.44      0.27      0.33        71
           3       0.18      0.30      0.23        40

    accuracy                           0.27       240
   macro avg       0.28      0.27      0.26       240
weighted avg       0.31      0.27      0.28       240

