In [1]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
from torch import nn

# Test Model

### Get Test Data and Model

In [2]:
# MAKE SURE THESE ARE UP TO DATE WITH THE CLASS DEFINED IN model_training.ipynb

class RelevanceDataset(Dataset):
    def __init__(self, X_data, y_data):
        self.X_data = torch.tensor(X_data.reset_index(drop=True).embeddings)
        self.y_data = torch.tensor(y_data.reset_index(drop=True).relevance).long()

    def __getitem__(self, item):
        return self.X_data[item], self.y_data[item]

    def __len__(self):
        return len(self.X_data)


class Classifier(nn.Module):
    def __init__(self, num_features, hidden_layer_size, kernel_size, pool_size, num_class):
        super(Classifier, self).__init__()

        self.conv1 = nn.Conv1d(num_features, hidden_layer_size, kernel_size)
        self.conv2 = nn.Conv1d(hidden_layer_size, hidden_layer_size, kernel_size)
        self.fc1 = nn.Linear(48384, 48384)
        self.fc_out = nn.Linear(48384, num_class)

        self.pool = nn.MaxPool1d(pool_size)
        self.flatten = nn.Flatten()
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout()

    def forward(self, x):
        x = self.conv1(x)
        x = self.pool(x)

        x = self.conv2(x)
        x = self.pool(x)

        x = self.flatten(x)

        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)

        x = self.fc_out(x)

        return x

EPOCHS = 300
BATCH_SIZE = 16
LEARNING_RATE = 0.0007

KERNEL_SIZE = 5
POOL_SIZE = 2
HIDDEN_LAYER_SIZE = 256

NUM_FEATURES = 400
NUM_CLASSES = 4

In [None]:
test_dataset = torch.load('data/test_dataset.pt')
test_loader = DataLoader(dataset=test_dataset, batch_size=1)

model = Classifier(num_features=NUM_FEATURES, hidden_layer_size=HIDDEN_LAYER_SIZE, kernel_size=KERNEL_SIZE, num_class=NUM_CLASSES, pool_size=POOL_SIZE)
model.load_state_dict(torch.load('models/model_v1.pt'))

#### Conduct Test

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
y_pred_list = []

with torch.no_grad():
    model.eval()

    for X_batch, _, in test_loader:
        X_batch = X_batch.to(device)

        y_test_pred = model(X_batch)

        _, y_pred_tags = torch.max(y_test_pred, dim=1)
        y_pred_list.append((y_pred_tags.cpu().numpy()))

y_pred_list = [a.squeeze().tolist() for a in y_pred_list]

#### Confusion Matrix and Classifcation Report

In [None]:
y_test = [d[1].item() for d in test_dataset]
y_test

In [None]:
cm_df = pd.DataFrame(confusion_matrix(y_test, y_pred_list))

sns.heatmap(cm_df, annot=True)

In [None]:
print(classification_report(y_test, y_pred_list, zero_division=0))