In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
import numpy as np

In [None]:
from sklearn.metrics import classification_report

In [None]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.optim import Adam
import torch

In [None]:
from tqdm import tqdm

# SEED

In [None]:
seed = 43

# Data

In [None]:
test_cases_data = pd.read_hdf('/content/drive/MyDrive/master_thesis/dataset_data/embedded_data/test_eval_cases_emb.h5', 'df')
test_df = pd.read_hdf('/content/drive/MyDrive/master_thesis/dataset_data/embedded_data/test_eval_emb.h5', 'df')

In [None]:
class ExperimentDataset(Dataset):
    def __init__(self, df):
        super().__init__()
        self.df = df
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        item = self.df.iloc[index]
        inputs = np.concatenate([item['abstract_embedding'], item['ChallengeDescription_embedding']])
        return torch.tensor(inputs), torch.tensor(item['label'])

# Model

In [None]:
from torch import nn
from torch import cat

class Classifier(nn.Module):
    def __init__(self, embedding_size=1536):
        super().__init__()
        self.dropout = nn.Dropout(0.2)
        self.h_1 = nn.Linear(embedding_size, 512)
        self.h_2 = nn.Linear(512, 128)
        self.o = nn.Linear(128, 1)

    def forward(self, input):
        h1 = self.dropout(self.h_1(input))
        h2 = self.dropout(self.h_2(h1))
        return self.o(h2)

In [None]:
def make_pred_for_eval(model, dataloader):
  use_cuda = torch.cuda.is_available()
  device = torch.device("cuda" if use_cuda else "cpu")
  if use_cuda:
    model = model.cuda()

  model.eval()
  pred = []
  with torch.no_grad():
    for inputs, labels in tqdm(dataloader):
        eval_label = labels.unsqueeze(1).to(device)
        output = torch.sigmoid(model(inputs.float().to(device)))

        pred.append(output.cpu().detach().numpy())
  return pred

In [None]:
model = Classifier()
model.load_state_dict(torch.load(f"/content/drive/MyDrive/master_thesis/bert_embedding_nn/linear/{seed}"))
model.eval()

# Eval

In [None]:
eval_df = test_df
eval_ds = ExperimentDataset(eval_df)
eval_dataloader = DataLoader(eval_ds, batch_size=16, shuffle=False)

In [None]:
preds = make_pred_for_eval(model, eval_dataloader)
flattened_preds = np.concatenate(preds).ravel()

In [None]:
print(classification_report(eval_df['label'].tolist(), flattened_preds >= 0.5, digits=4))