In [1]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Model, T5EncoderModel
import pandas as pd
import torch
from torch import nn
from sklearn.metrics import accuracy_score, f1_score
import numpy as np
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split, Dataset

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [3]:
model = T5EncoderModel.from_pretrained('google-t5/t5-small').to(device)
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

In [4]:
data = np.load('/kaggle/input/nlpproject/SP-train.npy', allow_pickle = True)

In [5]:
data = pd.DataFrame(data)

In [6]:
df_train = data.iloc[:405,:]
df_dev = data.iloc[405:456,:]
df_test = data.iloc[456:507,:]
df_dev.reset_index(drop=True, inplace=True)
df_test.reset_index(drop=True, inplace=True)
df_train.reset_index(drop=True, inplace=True)

In [7]:
class TaskDataset(Dataset):
    def __init__(self, data, tokenizer):
        self.data = data
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        entry = self.data[0][idx]
        question = entry['question']
        choices = entry['choice_list']
        label = entry['label']
        choice_order = entry['choice_order']
        correct_index = choice_order.index(label)

        tokenized_choices = [self.tokenizer.encode_plus(  question + "[SEP]" + choice, add_special_tokens = True, max_length = 128, truncation=True, padding='max_length', return_tensors = 'pt') for choice in choices]

        input_ids = torch.cat([choice['input_ids'].unsqueeze(0) for choice in tokenized_choices],dim=0)
        attention_mask = torch.cat([choice['attention_mask'].unsqueeze(0) for choice in tokenized_choices],dim=0)
        labels = torch.zeros(len(choices), dtype=torch.long)
        labels[label] = 1

        input_ids_m = []
        attention_mask_m = []
        labels_m = []

        for i in choice_order:
            input_ids_m.append(input_ids[i])
            attention_mask_m.append(attention_mask[i])
            labels_m.append(labels[i])
            

        return input_ids_m, attention_mask_m, torch.tensor(labels_m)

In [8]:
class TaskModel(nn.Module):
    def __init__(self, model):
        super(TaskModel, self).__init__()
        self.t5 = model
        # self.classifier = nn.Linear(self.t5.config.d_model, 1)
        self.classifier = nn.Sequential(
            nn.Linear(self.t5.config.d_model, 768), 
            nn.GELU(),
            nn.Dropout(0.1),
            nn.Linear(768, 1)
        )
        

    def forward(self, input_ids, attention_mask):
        output = self.t5(input_ids = input_ids, attention_mask = attention_mask, return_dict = True)
        last_hidden_states = output.last_hidden_state[:, 0, :]
        logits = self.classifier(last_hidden_states)
        return logits
        

In [9]:
train_dataset = TaskDataset(df_train, tokenizer)
val_dataset = TaskDataset(df_dev, tokenizer)
test_dataset = TaskDataset(df_test, tokenizer)

In [10]:
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
validation_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size = 1, shuffle = False)

In [11]:
Modelarch = TaskModel(model)

In [12]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)

In [13]:
# Training loop
# model.train()
Modelarch.to(device)
for epoch in range(10):  # Number of epochs
    total_loss = 0
    Modelarch.train()
    
    for input_ids, attention_mask, labels in train_loader:
        optimizer.zero_grad()
        logits = []
        for i,j in zip(input_ids, attention_mask):
            #print(i.squeeze(0).shape,j.squeeze(0).shape)
            logits.append(Modelarch(i.squeeze(0).to(device),j.squeeze(0).to(device)))
        logits = torch.stack(logits).squeeze()
        labels = labels.float().squeeze(0).to(device)
        # print(logits)
        # print(labels)
        # logits = Modelarch(input_ids, attention_mask)
        loss = criterion(logits, labels.float())
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        
    loss = total_loss/len(train_loader)
    print(f'Epoch {epoch+1}, Loss: {loss}')
    
    Modelarch.eval()
    all_predictions = []
    all_labels = []
    total_loss = 0
    
    for input_ids, attention_mask, labels in validation_loader:
            optimizer.zero_grad()
            logits = []
            for i,j in zip(input_ids, attention_mask):
                #print(i.squeeze(0).shape,j.squeeze(0).shape)
                logits.append(Modelarch(i.squeeze(0).to(device),j.squeeze(0).to(device)))
            logits = torch.stack(logits).squeeze()
            labels = labels.float().squeeze(0).to(device)
            # print(logits)
            probabilities = F.softmax(logits, dim=0)
            # Get the predicted class index
            predictions = torch.argmax(probabilities, dim=0)
            predictions = F.one_hot(predictions, num_classes=4) 
            # print(predictions)
            # print(labels)
            # logits = Modelarch(input_ids, attention_mask)
            loss = criterion(logits, labels.float())
            total_loss += loss.item()
            all_predictions.extend(predictions.tolist())
            all_labels.extend(labels.squeeze(0).tolist())

    # Compute accuracy and F1 score
    accuracy = accuracy_score(all_labels, all_predictions)
    f1 = f1_score(all_labels, all_predictions, average='macro')
    val_loss = total_loss / len(validation_loader)
    
    print(f"Accuracy: {accuracy}, F1 Score: {f1}, Loss : {val_loss}")

Epoch 1, Loss: 0.6394500051015689
Accuracy: 0.6862745098039216, F1 Score: 0.5816993464052287, Loss : 0.5937266560161815
Epoch 2, Loss: 0.5857192623762437
Accuracy: 0.7058823529411765, F1 Score: 0.6078431372549019, Loss : 0.5691428324755501
Epoch 3, Loss: 0.5710420386290844
Accuracy: 0.7254901960784313, F1 Score: 0.6339869281045752, Loss : 0.5629155518961888
Epoch 4, Loss: 0.5655007719993591
Accuracy: 0.7843137254901961, F1 Score: 0.7124183006535947, Loss : 0.5589232117521996
Epoch 5, Loss: 0.5555120978826358
Accuracy: 0.7843137254901961, F1 Score: 0.7124183006535947, Loss : 0.5389237935636558
Epoch 6, Loss: 0.5246656057275372
Accuracy: 0.8333333333333334, F1 Score: 0.7777777777777777, Loss : 0.5214376204154071
Epoch 7, Loss: 0.4786718423719759
Accuracy: 0.8137254901960784, F1 Score: 0.7516339869281046, Loss : 0.5025270896799424
Epoch 8, Loss: 0.43843165937765144
Accuracy: 0.8333333333333334, F1 Score: 0.7777777777777777, Loss : 0.49532198730637045
Epoch 9, Loss: 0.406305718642694
Accur

In [14]:
model.eval()
all_predictions = []
all_labels = []
total_loss = 0
for input_ids, attention_mask, labels in test_loader:
        optimizer.zero_grad()
        logits = []
        for i,j in zip(input_ids, attention_mask):
            #print(i.squeeze(0).shape,j.squeeze(0).shape)
            logits.append(Modelarch(i.squeeze(1).to(device),j.squeeze(1).to(device)))
        logits = torch.stack(logits, dim=0).squeeze()
        labels = labels.float().squeeze(0).to(device)
        # print(logits)
        probabilities = F.softmax(logits, dim=0)
        # Get the predicted class index
        predictions = torch.argmax(probabilities, dim=0)
        predictions = F.one_hot(predictions, num_classes=4)
    
        # for i in range(len(labels)):
        #     if labels[i] == 1:
        #         label = i
        # for i in range(len(predictions)):
        #     if predictions[i] == 1:
        #         prediction = i
        # print(label, prediction)
        # print(input_ids[label])
        # print(type(label))
        # print(type(input_ids[label]))
    
        # print("correct: "+tokenizer.decode(input_ids[label].squeeze(), skip_special_tokens = True))
        # print("predicted: "+tokenizer.decode(input_ids[prediction].squeeze(), skip_special_tokens = True))
    
        # print(predictions)
        # print(labels)
        # logits = Modelarch(input_ids, attention_mask)
        loss = criterion(logits, labels.float())
        all_predictions.extend(predictions.tolist())
        all_labels.extend(labels.squeeze(0).tolist())
        total_loss += loss.item()
    
# Compute accuracy and F1 score
accuracy = accuracy_score(all_labels, all_predictions)
f1 = f1_score(all_labels, all_predictions, average='macro')

print(f"Accuracy: {accuracy}")
print(f"F1 Score: {f1}")


Accuracy: 0.7450980392156863
F1 Score: 0.6601307189542484


In [15]:
torch.save(model, './model.pt')