In [29]:
import torch
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModel
import torch.nn as nn
from torch.utils.data import Dataset

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [30]:
# Calculate accuracy (a classification metric)
def accuracy_fn(y_true, y_pred):
    """Calculates accuracy between truth labels and predictions.

    Args:
        y_true (torch.Tensor): Truth labels for predictions.
        y_pred (torch.Tensor): Predictions to be compared to predictions.

    Returns:
        [torch.float]: Accuracy value between y_true and y_pred, e.g. 78.45
    """
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

In [31]:
class DNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.fc3(out)
        return out

In [32]:
class customDataset(Dataset):
    def __init__(self, path):
        df_data = pd.read_csv(path+"data.txt",
                              delimiter='\t',
                              names=['Target Word', 'PoS', 'Index', 'Context1', 'Context2'])
        df_label = pd.read_csv(path+'gold.txt',
                               delimiter='\t',
                               names=['label'])
        self.data = pd.concat([df_data, df_label], axis=1)
        self.data['Joined'] = self.data['Context1'] + " " + self.data['Context2']
        self.data['label'] = self.data['label'].map(lambda x: 0 if x == 'false' else 1)
        self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
        self.model = AutoModel.from_pretrained('bert-base-uncased').to(device)
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        bert_token = self.tokenizer(self.data['Joined'].iloc[idx],
                                    padding='max_length',
                                    return_tensors='pt',
                                    max_length=68).to(device)
        bert_outputs = self.model(bert_token['input_ids'])[0]
        return (bert_outputs, torch.tensor(self.data['label'].iloc[idx], dtype=torch.float32))
    
train_path = r"C:\Users\joowa\OneDrive\Spring 2023\CS577\Project\WiC_dataset\train\train."
valid_path = r"C:\Users\joowa\OneDrive\Spring 2023\CS577\Project\WiC_dataset\dev\dev."
test_path = r"C:\Users\joowa\OneDrive\Spring 2023\CS577\Project\WiC_dataset\test\test."

In [34]:
test_data = customDataset(test_path)
test_dataloader = torch.utils.data.DataLoader(dataset=test_data,
                                             batch_size=32,
                                             shuffle=False)
bert_model = DNN(input_size=768*68, hidden_size=512, num_classes=1).to(device)
bert_model.load_state_dict(torch.load('bert_model_wic_1.pth'))

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [35]:
pred_list, labels_list = [], []
bert_model.eval()
with torch.inference_mode():
    for inputs, labels in test_dataloader:
        inputs = inputs.to(device)
        flattened_inputs = inputs.view(inputs.size(0), -1)
        test_logits = bert_model(flattened_inputs)
        pred = torch.round(torch.sigmoid(test_logits))
        pred_list.append(pred.cpu().numpy())
        labels_list.append(labels.numpy())
        

In [36]:
pred_array = np.concatenate(pred_list)
labels_array = np.concatenate(labels_list)
pred_array = pred_array.ravel()

In [37]:
np.mean(pred_array == labels_array)

0.8814285714285715

array([1., 1., 1., ..., 1., 0., 1.], dtype=float32)

array([1., 1., 1., ..., 1., 1., 1.], dtype=float32)