In [2]:
import torch
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModel
import torch.nn as nn
from torch.utils.data import Dataset

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
# Calculate accuracy (a classification metric)
def accuracy_fn(y_true, y_pred):
    """Calculates accuracy between truth labels and predictions.

    Args:
        y_true (torch.Tensor): Truth labels for predictions.
        y_pred (torch.Tensor): Predictions to be compared to predictions.

    Returns:
        [torch.float]: Accuracy value between y_true and y_pred, e.g. 78.45
    """
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

In [4]:
class DNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.fc3(out)
        return out

In [5]:
# Load the model
gpt_model = DNN(input_size=768*65,
               hidden_size=512,
               num_classes=1).to(device)

In [6]:
def pad_sequences(sequences):
    padded_sequences = []
    for seq in sequences:
        if seq.size(0) <= 65:
            padded_seq = torch.nn.functional.pad(seq, (0, 0, 0, 65 - seq.size(0)), mode='constant', value=0)
        else:
            padded_seq = seq[:65]
        padded_sequences.append(padded_seq)
    return torch.stack(padded_sequences)

In [7]:
class customDataset(Dataset):
    def __init__(self, path):
        df_data = pd.read_csv(path+"data.txt",
                              delimiter='\t',
                              names=['Target Word', 'PoS', 'Index', 'Context1', 'Context2'])
        df_label = pd.read_csv(path+'gold.txt',
                               delimiter='\t',
                               names=['label'])
        self.data = pd.concat([df_data, df_label], axis=1)
        self.data['Joined'] = self.data['Context1'] + " " + self.data['Context2']
        self.data['label'] = self.data['label'].map(lambda x: 0 if x == 'false' else 1)
        self.tokenizer = AutoTokenizer.from_pretrained('gpt2')
        self.model = AutoModel.from_pretrained('gpt2')
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        gpt_token = self.tokenizer(self.data['Joined'].iloc[idx], return_tensors='pt')
        gpt_outputs = self.model(gpt_token['input_ids'])[0]
        padded_outputs = pad_sequences(gpt_outputs)
        return (padded_outputs, torch.tensor(self.data['label'].iloc[idx], dtype=torch.float32))
    
train_path = r"C:\Users\joowa\OneDrive\Spring 2023\CS577\Project\WiC_dataset\train\train."
valid_path = r"C:\Users\joowa\OneDrive\Spring 2023\CS577\Project\WiC_dataset\dev\dev."
test_path = r"C:\Users\joowa\OneDrive\Spring 2023\CS577\Project\WiC_dataset\test\test."

In [8]:
test_data = customDataset(test_path)
test_dataloader = torch.utils.data.DataLoader(dataset=test_data,
                                             batch_size=32,
                                             shuffle=False)
gpt_model = DNN(input_size=768*65, hidden_size=512, num_classes=1).to(device)
gpt_model.load_state_dict(torch.load('gpt_model_wic_2.path'))

<All keys matched successfully>

In [9]:
pred_list, labels_list = [], []
gpt_model.eval()
with torch.inference_mode():
    for inputs, labels in test_dataloader:
        inputs = inputs.to(device)
        flattened_inputs = inputs.view(inputs.size(0), -1)
        test_logits = gpt_model(flattened_inputs)
        pred = torch.round(torch.sigmoid(test_logits))
        pred_list.append(pred.cpu().numpy())
        labels_list.append(labels.numpy())
        

In [13]:
pred_array = np.concatenate(pred_list)
labels_array = np.concatenate(labels_list)
pred_array = pred_array.ravel()

In [14]:
np.mean(pred_array == labels_array)

0.54

In [15]:
pred_array

array([0., 0., 0., ..., 1., 1., 1.], dtype=float32)

In [16]:
labels_array

array([1., 1., 1., ..., 1., 1., 1.], dtype=float32)