In [3]:
import numpy as np
import pickle

from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, f1_score

In [18]:
X_test = np.load('../Data/X_test.npy')
rnn_X_test = np.load('../Data/rnn_X_test.npy')
y_test = np.load('../Data/y_test.npy')

In [31]:
import torch
import torch.nn as nn


class GRU(nn.Module):
    def __init__(self, voc_size, embedding_size, padding_idx, hidden_state_size, n_rec_layers, output_size):
        super().__init__()
        self.n_rec_layers = n_rec_layers
        self.hidden_state_size = hidden_state_size

        self.embedding = nn.Embedding(voc_size, embedding_size, padding_idx)
        self.rnn = nn.GRU(embedding_size, hidden_state_size, n_rec_layers, batch_first=True, dropout=0.2)
        self.bn = nn.BatchNorm1d(hidden_state_size)
        self.fc = nn.Linear(hidden_state_size, output_size)
        self.logsoftmax = nn.LogSoftmax(dim=1)

    def forward(self, X):
        h_0 = torch.zeros(self.n_rec_layers, X.size(0), self.hidden_state_size).to(device)
        print(X)
        X = self.embedding(X)
        X, _ = self.rnn(X, h_0)
        X = X[:, -1, :]
        X = self.bn(X)
        X = self.fc(X)
        X = self.logsoftmax(X)

        return X

    def predict(self, X_test):
        return torch.argmax(self(torch.from_numpy(X_test).to(device)), dim=1).cpu().numpy() - 2


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

voc_size = 5412  # "Data_preparation_RNN.ipynb"
padding_idx = 5411

gru = GRU(
    voc_size=voc_size,
    embedding_size=512,
    padding_idx=padding_idx,
    hidden_state_size=512,
    n_rec_layers=2,
    output_size=5
).to(device)

gru.load_state_dict(torch.load('../Trained_models/GRU.pth', map_location=torch.device('cpu')))
gru.eval()

GRU(
  (embedding): Embedding(5412, 512, padding_idx=5411)
  (rnn): GRU(512, 512, num_layers=2, batch_first=True, dropout=0.2)
  (bn): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc): Linear(in_features=512, out_features=5, bias=True)
  (logsoftmax): LogSoftmax(dim=1)
)

In [32]:
with open("../Trained_models/Random_Forest.pkl", "rb") as f:
    rf = pickle.load(f)
with open("../Trained_models/KNN.pkl", "rb") as f:
    knn = pickle.load(f)

models = [
    (rf, 0.45),
    (gru, 0.35),
    (knn, 0.2)
]

In [33]:
n_models = len(models)

ensem_pred = np.zeros_like(y_test, dtype=np.float64)

for model, w in models:
    if isinstance(model, GRU):
        pred = model.predict(rnn_X_test)
    else:
        pred = model.predict(X_test)
    ensem_pred += pred * w
    print(pred)

[ 0  0  1 ... -2 -1  0]
tensor([[14867, 14867, 14867,  ...,  4522,  3740,   977],
        [14867, 14867, 14867,  ...,  4879,  1416, 14284],
        [14867, 14867, 14867,  ...,  7643, 11940,  8430],
        ...,
        [ 2799, 12002,  9365,  ...,  7713,  7713,  5147],
        [14867, 14867, 14867,  ..., 10581,  1416,  2198],
        [11671, 11391,  2799,  ..., 10755,  2451,  5959]])


IndexError: index out of range in self

In [8]:
ensem_pred = np.round(ensem_pred)
print(classification_report(y_test, ensem_pred))

              precision    recall  f1-score   support

          -2       0.52      0.71      0.60       473
          -1       0.65      0.71      0.68      2746
           0       0.79      0.69      0.74      4174
           1       0.50      0.61      0.55       522
           2       0.61      0.51      0.56       105

    accuracy                           0.69      8020
   macro avg       0.62      0.65      0.63      8020
weighted avg       0.71      0.69      0.70      8020

