In [1]:
# core
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time

# modeling
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

# functional
import joblib
import pickle

# warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

## Load Data and Models

In [2]:
data_names = ['tfidf_train', 'w2v_train', 'bert_train', 'gpt_train', 'tfidf_test', 'w2v_test', 'bert_test', 'gpt_test', 'y_train', 'y_test']

for var_name in data_names:
    with open(f'data/{var_name}.pkl', 'rb') as f:
        globals()[var_name] = pickle.load(f)

In [3]:
class RnnTextClassifier(nn.Module):
    def __init__(self, input_size, output_size, hidden_size, num_layers):
        super(RnnTextClassifier, self).__init__()

        # model params
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first = True)
        self.fc = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):

        # reshape input
        x = x.unsqueeze(1)

        # initialize hidden state
        hidden = torch.zeros(self.num_layers, x.size(0), self.hidden_size)

        # get RNN output
        out, hidden = self.rnn(x, hidden)
        out = self.fc(out[:, -1, :])
        
        return out

class RnnDataset(Dataset):
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data
        
    def __len__ (self):
        return len(self.X_data)
    
    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]

def train_rnn(model, data_loader, criterion, optimizer, n_epochs):
    
    model.train()
    
    for _ in n_epochs:

        for X_batch, y_batch in data_loader:

            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
    return model

In [4]:
lr_tfidf = joblib.load('models/lr_model_tfidf.joblib')
lr_w2v = joblib.load('models/lr_model_w2v.joblib')
lr_bert = joblib.load('models/lr_model_bert.joblib')
lr_gpt = joblib.load('models/lr_model_gpt.joblib')

rf_tfidf = joblib.load('models/rf_model_tfidf.joblib')
rf_w2v = joblib.load('models/rf_model_w2v.joblib')
rf_bert = joblib.load('models/rf_model_bert.joblib')
rf_gpt = joblib.load('models/rf_model_gpt.joblib')

svm_tfidf = joblib.load('models/svm_model_tfidf.joblib')
svm_w2v = joblib.load('models/svm_model_w2v.joblib')
svm_bert = joblib.load('models/svm_model_bert.joblib')
svm_gpt = joblib.load('models/svm_model_gpt.joblib')

rnn_tfidf = torch.load('models/rnn_model_tfidf.pth')
rnn_w2v = torch.load('models/rnn_model_w2v.pth')
rnn_bert = torch.load('models/rnn_model_bert.pth')
rnn_gpt = torch.load('models/rnn_model_gpt.pth')