In [None]:
from torch import nn
from torchvision import transforms, datasets
from torch.utils.data import random_split, DataLoader, Dataset
from torch.nn.utils.rnn import pad_sequence,pack_padded_sequence
import numpy as np
import pandas as pd
import torch
import tiktoken

In [None]:
import matplotlib.pyplot as plt
from tqdm import tqdm
tqdm.pandas()

import os
import csv

In [None]:
def get_save_path(model_name, dataset_name, model_type):
    current_dir = os.getcwd()
    model_subdir = "model"
    model_save_dir = os.path.join(current_dir, model_subdir)
    os.makedirs(model_save_dir, exist_ok=True)

    model_base_filename = f"best_{model_name}_on_{dataset_name}_{model_type}.pth"
    model_save_path = os.path.join(model_save_dir, model_base_filename)

    return model_save_path

def append_metrics_to_log(log_file_path, epoch, train_loss, val_loss=None, val_accuracy=None, val_mae=None):
    log_dir = os.path.dirname(log_file_path)

    if log_dir and not os.path.exists(log_dir):
        os.makedirs(log_dir, exist_ok=True)

    write_header = not os.path.exists(log_file_path)

    header = ['Epoch', 'Train Loss']
    row_data = [epoch, f"{float(train_loss):.4f}"]

    if val_loss is not None:
        header.append('Validation Loss')
        row_data.append(f"{float(val_loss):.4f}")

    if val_accuracy is not None:
        header.append('Validation Accuracy')
        row_data.append(f"{float(val_accuracy)*100:.2f}%")

    if val_mae is not None:
        header.append('Validation MAE')
        row_data.append(f"{float(val_mae):.4f}")

    with open(log_file_path, mode='a', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        if write_header:
            writer.writerow(header)
        writer.writerow(row_data)


In [None]:
def collate_fn_imdb(data, pad_value=50257, use_embedding=True):
    data.sort(key=lambda x: len(x[0]), reverse=True)
    sequences = [x[0] for x in data]
    scores = torch.tensor([x[1] for x in data], dtype=torch.float32)
    labels = torch.tensor([x[2] for x in data], dtype=torch.float32)

    original_seq_lengths = torch.tensor([len(s) for s in sequences], dtype=torch.long)
    padded_seqs_long = pad_sequence(sequences, batch_first=True, padding_value=pad_value)

    if use_embedding:
        return padded_seqs_long, original_seq_lengths, scores, labels
    else:
        padded_seqs_float = padded_seqs_long.unsqueeze(-1).float()
        packed_input = pack_padded_sequence(padded_seqs_float, original_seq_lengths.cpu(), batch_first=True, enforce_sorted=True)
        return packed_input, scores, labels
    

def collate_fn_mnist(x):
    images = [i[0] for i in x]
    labels = [i[1] for i in x]

    images = torch.vstack(images)
    images = images.squeeze(1)

    labels = torch.tensor(labels, dtype=torch.long)

    return images,labels

def tokenize_text(text_list, tokenizer):
    tokenized_text = []
    for text in text_list:
        tokens = torch.tensor(tokenizer.encode(text))
        tokenized_text.append(tokens)

    return tokenized_text

class IMDBDataset(Dataset):
    def __init__(self, comments_token_ids, sentiments, scores):
        self.comments_token_ids = comments_token_ids
        self.sentiments = sentiments
        self.scores = scores

    def __len__(self):
        return len(self.comments_token_ids)
    
    def __getitem__(self, idx):
        return self.comments_token_ids[idx], self.scores[idx], self.sentiments[idx]
    
def create_IMDB_dataloader(dataset, batch_size=32, shuffle=True, num_workers=0, use_embedding=True, pad_value=50257):
    collate_wrapper = lambda x: collate_fn_imdb(x, pad_value=pad_value, use_embedding=use_embedding)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, collate_fn=collate_wrapper)
    return dataloader

In [None]:
train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,)),
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1326,), (0.3106,)),
])

mnist_train = datasets.MNIST(root='data', train=True, download=True, transform=train_transform)
mnist_test = datasets.MNIST(root='data', train=False, download=True, transform=test_transform)

train_size = int(0.9 * len(mnist_train))
val_size = len(mnist_train) - train_size
mnist_train_set, mnist_val_set = random_split(mnist_train, [train_size, val_size])

sample, target = mnist_train[0]
print(f"Sample count: {len(mnist_train)}")
print(f"Sample shape: {sample.shape}, Target: {target}")
print(f"Sample type: {sample.dtype}, Target type: {type(target)}")
print(f"Sample min: {sample.min()}, Sample max: {sample.max()}")
print(f"Sample training set size: {len(mnist_train_set)}")
print(f"Sample validation set size: {len(mnist_val_set)}")

In [None]:
IMDB_train = pd.read_csv("data/IMDB_train.csv")
IMDB_test = pd.read_csv("data/IMDB_test.csv")

train_comments = IMDB_train["preprocessed_comments"].to_list()
train_sentiments = IMDB_train["sentiment"].to_list()
train_scores = IMDB_train["score"].to_list()
test_comments = IMDB_test["preprocessed_comments"].to_list()
test_sentiments = IMDB_test["sentiment"].to_list()
test_scores = IMDB_test["score"].to_list()
tokenizer = tiktoken.get_encoding("gpt2")

tokenized_train_comments = tokenize_text(train_comments, tokenizer)
tokenized_test_comments = tokenize_text(test_comments, tokenizer)
sample = train_comments[0]
sample_sentiment = train_sentiments[0]
sample_score = train_scores[0]
token_ids = tokenized_train_comments[0]
reconstructed = tokenizer.decode(token_ids.tolist())

print(f"Sample: {sample}")
print(f"Sentiment: {sample_sentiment}")
print(f"Score: {sample_score}")
print(f"Token IDs: {token_ids}")
print(f"Reconstructed: {reconstructed}")

In [None]:
class Basic_RNN_MNIST(nn.Module):
    def __init__(self, input_size=28, out_dim=10):
        super().__init__()
        self.rnn = nn.RNN(input_size, out_dim, batch_first=True)

    def forward(self,x):
        h0 = torch.zeros(1, x.size(0), 10).to(x.device)

        output, hn = self.rnn(x, h0)

        return output[:,-1,:]
    
class Basic_LSTM_MNIST(nn.Module):
    def __init__(self, input_size=28, out_dim=10):
        super().__init__()
        self.lstm = nn.LSTM(input_size, out_dim, batch_first=True)

    def forward(self,x):
        h0 = torch.zeros(1, x.size(0), 10).to(x.device)
        c0 = torch.zeros(1, x.size(0), 10).to(x.device)

        output, (hn, cn) = self.lstm(x, (h0,c0))

        return output[:,-1,:]
    
class Basic_GRU_MNIST(nn.Module):
    def __init__(self, input_size=28, out_dim=10):
        super().__init__()
        self.gru = nn.GRU(input_size, out_dim, batch_first=True)

    def forward(self,x):
        h0 = torch.zeros(1, x.size(0), 10).to(x.device)

        output, hn = self.gru(x, h0)

        return output[:,-1,:]

In [None]:
class Hymmn0s_RNN_MNIST(nn.Module):
    def __init__(self, input_size=28, hiddin_dim=14, out_dim=10, num_layers=3):
        super().__init__()
        self.hiden_dim = hiddin_dim
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hiddin_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hiddin_dim, out_dim)

    def forward(self, x):
        output, fn = self.rnn(x)

        return self.fc(output[:,-1,:])
    
class Hymmn0s_LSTM_MNIST(nn.Module):
    def __init__(self, input_size=28, hiddin_dim=14, out_dim=10, num_layers=3):
        super().__init__()
        self.hiden_dim = hiddin_dim
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hiddin_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hiddin_dim, out_dim)

    def forward(self, x):
        output, (hn, cn) = self.lstm(x)

        return self.fc(output[:,-1,:])
    
class Hymmn0s_GRU_MNIST(nn.Module):
    def __init__(self, input_size=28, hiddin_dim=14, out_dim=10, num_layers=3):
        super().__init__()
        self.hiden_dim = hiddin_dim
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hiddin_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hiddin_dim, out_dim)

    def forward(self, x):
        output, hn = self.gru(x)

        return self.fc(output[:,-1,:])

In [None]:
def train_mnist(model, train_dataset, val_dataset, model_name, dataset_name, model_type, epochs = 50):
    lr = 2e-3
    batch = 256
    weight_decay = 2e-4
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    loss_fn = nn.CrossEntropyLoss()

    train_loader = DataLoader(train_dataset, batch_size=batch, shuffle=True, collate_fn=collate_fn_mnist)
    val_loader = DataLoader(val_dataset, batch_size=batch, shuffle=False, collate_fn=collate_fn_mnist)

    best_val_accuracy = 0.0

    model_save_path = get_save_path(model_name, dataset_name, model_type)

    model.to(device)
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for x, y in tqdm(train_loader):
            x = x.to(device)
            y = y.to(device)

            optimizer.zero_grad()
            output = model(x)
            loss = loss_fn(output, y)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        print(f"Epoch {epoch+1}/{epochs}, Loss: {train_loss/len(train_loader)}")
        with torch.no_grad():
            val_loss = 0
            correct = 0
            total = 0
            model.eval()
            for x, y in val_loader:
                x = x.to(device)
                y = y.to(device)

                output = model(x)
                loss = loss_fn(output, y)
                val_loss += loss.item()

                _, predicted = torch.max(output.data, 1)
                total += y.size(0)
                correct += (predicted == y).sum().item()
            
            if correct / total > best_val_accuracy:
                best_val_accuracy = correct / total
                torch.save(model.state_dict(), model_save_path)
                print(f"Model saved with accuracy: {best_val_accuracy}")
            
            append_metrics_to_log(f"log/{model_name}_on_{dataset_name}_training_log_{model_type}.csv", epoch, train_loss/len(train_loader), val_loss/len(val_loader), correct / total)

            print(f"Validation Loss: {val_loss/len(val_loader)}, Accuracy: {100 * correct / total}%")


def test_mnist(model, test_dataset):
    batch = 256
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch, shuffle=False, collate_fn=collate_fn_mnist)
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for data, labels in test_loader:
            data, labels = data.to(device), labels.to(device)
            outputs = model(data)
            predicted = torch.argmax(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")

In [None]:
rnn_mnist = Basic_RNN_MNIST(input_size=28, out_dim=10).to("cuda" if torch.cuda.is_available() else "cpu")

#train_mnist(rnn_mnist, mnist_train_set, mnist_val_set, "RNN", "MNIST","Basic")
rnn_mnist.load_state_dict(torch.load(get_save_path("RNN", "MNIST", "Basic")))
test_mnist(rnn_mnist, mnist_test)

In [None]:
lstm_mnist = Basic_LSTM_MNIST(input_size=28, out_dim=10).to("cuda" if torch.cuda.is_available() else "cpu")

train_mnist(lstm_mnist, mnist_train_set, mnist_val_set, "LSTM", "MNIST","Basic", epochs=80)
lstm_mnist.load_state_dict(torch.load(get_save_path("LSTM", "MNIST", "Basic")))
test_mnist(lstm_mnist, mnist_test)

In [None]:
GRU_mnist = Basic_GRU_MNIST(input_size=28, out_dim=10).to("cuda" if torch.cuda.is_available() else "cpu")

#train_mnist(GRU_mnist, mnist_train_set, mnist_val_set, "GRU", "MNIST","Basic")
GRU_mnist.load_state_dict(torch.load(get_save_path("GRU", "MNIST", "Basic")))
test_mnist(GRU_mnist, mnist_test)

In [None]:
Hymmn0s_rnn_mnist = Hymmn0s_RNN_MNIST(input_size=28, hiddin_dim=24, out_dim=10, num_layers=5).to("cuda" if torch.cuda.is_available() else "cpu")

#train_mnist(Hymmn0s_rnn_mnist, mnist_train_set, mnist_val_set, "RNN", "MNIST","Hymmn0s")
Hymmn0s_rnn_mnist.load_state_dict(torch.load(get_save_path("RNN", "MNIST", "Hymmn0s")))
test_mnist(Hymmn0s_rnn_mnist, mnist_test)

In [None]:
Hymmn0s_lstm_mnist = Hymmn0s_LSTM_MNIST(input_size=28, hiddin_dim=24, out_dim=10, num_layers=5).to("cuda" if torch.cuda.is_available() else "cpu")

train_mnist(Hymmn0s_lstm_mnist, mnist_train_set, mnist_val_set, "LSTM", "MNIST","Hymmn0s")
Hymmn0s_lstm_mnist.load_state_dict(torch.load(get_save_path("LSTM", "MNIST", "Hymmn0s")))
test_mnist(Hymmn0s_lstm_mnist, mnist_test)

In [None]:
Hymmn0s_gru_mnist = Hymmn0s_GRU_MNIST(input_size=28, hiddin_dim=24, out_dim=10, num_layers=5).to("cuda" if torch.cuda.is_available() else "cpu")

#train_mnist(Hymmn0s_gru_mnist, mnist_train_set, mnist_val_set, "GRU", "MNIST","Hymmn0s")
Hymmn0s_gru_mnist.load_state_dict(torch.load(get_save_path("GRU", "MNIST", "Hymmn0s")))
test_mnist(Hymmn0s_gru_mnist, mnist_test)

In [None]:
vocab_size = 50257
embedding_dim = 64
padding_token_id = vocab_size

imdb_train = IMDBDataset(tokenized_train_comments, train_sentiments, train_scores)
imdb_test = IMDBDataset(tokenized_test_comments, test_sentiments, test_scores)

train_size = int(0.9 * len(imdb_train))
val_size = len(imdb_train) - train_size
imdb_train_set, imdb_val_set = random_split(imdb_train, [train_size, val_size])

sample = imdb_train[0]
sample_sentiment = imdb_train.sentiments[0]
sample_score = imdb_train.scores[0]

print(f"Sample: {sample}")
print(f"Sentiment: {sample_sentiment}")
print(f"Score: {sample_score}")
print(f"Reconstructed: {tokenizer.decode(sample[0].tolist())}")

In [None]:
class Basic_RNN_IMDB(nn.Module):
    def __init__(self, input_dim=1, hidden_dim=32, output_dim=1, num_layers=1):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, packed_x):
        packed_output, hn = self.rnn(packed_x) 

        out = self.fc(hn.squeeze(0))
        return out

class Basic_LSTM_IMDB(nn.Module):
    def __init__(self, input_dim=1, hidden_dim=32, output_dim=1, num_layers=1):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, packed_x):
        packed_output, (hn, cn) = self.lstm(packed_x)
        out = self.fc(hn.squeeze(0))
        return out

class Basic_GRU_IMDB(nn.Module):
    def __init__(self, input_dim=1, hidden_dim=32, output_dim=1, num_layers=1):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.gru = nn.GRU(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, packed_x):
        packed_output, hn = self.gru(packed_x)
        out = self.fc(hn.squeeze(0))
        return out

In [None]:
class Hymmn0s_RNN_IMDB(nn.Module):
    def __init__(self, vocab_size=50257, embedding_dim=64, hidden_dim=32, output_dim=1, num_layers=5, padding_idx=50257):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.embedding = nn.Embedding(vocab_size + 1, embedding_dim, padding_idx=padding_idx)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, text_token_ids, text_lengths):
        embedded_seqs = self.embedding(text_token_ids)
        packed_input = pack_padded_sequence(embedded_seqs, text_lengths.cpu(), batch_first=True, enforce_sorted=True)

        packed_output, hn = self.rnn(packed_input)

        out = self.fc(hn[-1, :, :])
        return out
    
class Hymmn0s_LSTM_IMDB(nn.Module):
    def __init__(self, vocab_size=50257, embedding_dim=64, hidden_dim=32, output_dim=1, num_layers=5, padding_idx=50257):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.embedding = nn.Embedding(vocab_size + 1, embedding_dim, padding_idx=padding_idx)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, text_token_ids, text_lengths):
        embedded_seqs = self.embedding(text_token_ids)
        packed_input = pack_padded_sequence(embedded_seqs, text_lengths.cpu(), batch_first=True, enforce_sorted=True)

        packed_output, (hn, cn) = self.lstm(packed_input)

        out = self.fc(hn[-1, :, :])
        return out
    
class Hymmn0s_GRU_IMDB(nn.Module):
    def __init__(self, vocab_size=50257, embedding_dim=64, hidden_dim=32, output_dim=1, num_layers=5, padding_idx=50257):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.embedding = nn.Embedding(vocab_size + 1, embedding_dim, padding_idx=padding_idx)
        self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, text_token_ids, text_lengths):
        embedded_seqs = self.embedding(text_token_ids)
        packed_input = pack_padded_sequence(embedded_seqs, text_lengths.cpu(), batch_first=True, enforce_sorted=True)

        packed_output, hn = self.gru(packed_input)

        out = self.fc(hn[-1, :, :])
        return out

In [None]:
def predict_sentiment_from_score(score_tensor, threshold=5.0):
    return (score_tensor > threshold).float()

def train_imdb(model, train_dataset_obj, val_dataset_obj, model_name, dataset_name, model_type,
               epochs=50, use_embedding=True, padding_token_id=50257, lr=2e-3, batch_size_param=256, weight_decay=2e-4):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    regression_loss_fn = nn.MSELoss()

    train_loader = create_IMDB_dataloader(train_dataset_obj, batch_size=batch_size_param, shuffle=True, use_embedding=use_embedding, pad_value=padding_token_id)
    val_loader = create_IMDB_dataloader(val_dataset_obj, batch_size=batch_size_param, shuffle=False, use_embedding=use_embedding, pad_value=padding_token_id)

    best_val_mae = float('inf')
    best_val_sentiment_accuracy = 0.0
    model_save_path = get_save_path(model_name, dataset_name, model_type)
    log_file_path = f"log/{model_name}_on_{dataset_name}_{model_type}_train_log.csv"


    model.to(device)

    for epoch in range(epochs):
        model.train()
        total_train_loss_regr = 0

        for batch_idx, batch_data in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} [Training]")):
            input_seq, lengths_or_none, true_scores, true_sentiments = None, None, None, None
            if use_embedding:
                input_seq, lengths_or_none, true_scores, true_sentiments = batch_data
                input_seq = input_seq.to(device)
            else:
                input_seq, true_scores, true_sentiments = batch_data
                input_seq = input_seq.to(device)

            true_scores = true_scores.to(device).float().unsqueeze(1)

            optimizer.zero_grad()
            if use_embedding and lengths_or_none is not None:
                predicted_scores = model(input_seq, lengths_or_none)
            else:
                predicted_scores = model(input_seq)

            if predicted_scores.ndim == 1:
                predicted_scores = predicted_scores.unsqueeze(1)

            loss_regr = regression_loss_fn(predicted_scores, true_scores)
            loss_regr.backward()
            optimizer.step()
            total_train_loss_regr += loss_regr.item()

        avg_train_loss_regr = total_train_loss_regr / len(train_loader)
        print(f"Epoch {epoch+1}/{epochs}, Regression Train Loss (MSE): {avg_train_loss_regr:.4f}")

        model.eval()
        total_val_loss_regr = 0
        total_val_mae = 0
        correct_sentiments = 0
        total_sentiments_samples = 0

        with torch.no_grad():
            for batch_data_val in tqdm(val_loader, desc=f"Epoch {epoch+1}/{epochs} [Validation]"):
                input_seq_val, lengths_val_or_none, true_scores_val, true_sentiments_val = None, None, None, None
                if use_embedding:
                    input_seq_val, lengths_val_or_none, true_scores_val, true_sentiments_val = batch_data_val
                    input_seq_val = input_seq_val.to(device)
                else:
                    input_seq_val, true_scores_val, true_sentiments_val = batch_data_val
                    input_seq_val = input_seq_val.to(device)

                true_scores_val = true_scores_val.to(device).float().unsqueeze(1)
                true_sentiments_val = true_sentiments_val.to(device).float()


                if use_embedding and lengths_val_or_none is not None:
                    predicted_scores_val = model(input_seq_val, lengths_val_or_none)
                else:
                    predicted_scores_val = model(input_seq_val)

                if predicted_scores_val.ndim == 1:
                    predicted_scores_val = predicted_scores_val.unsqueeze(1)

                val_loss_regr_batch = regression_loss_fn(predicted_scores_val, true_scores_val)
                total_val_loss_regr += val_loss_regr_batch.item()
                total_val_mae += nn.functional.l1_loss(predicted_scores_val, true_scores_val, reduction="sum").item()

                predicted_sentiments = predict_sentiment_from_score(predicted_scores_val.squeeze())
                correct_sentiments += (predicted_sentiments == true_sentiments_val).sum().item()
                total_sentiments_samples += true_sentiments_val.size(0)


        avg_val_loss_regr = total_val_loss_regr / len(val_loader)
        avg_val_mae = total_val_mae / total_sentiments_samples if total_sentiments_samples > 0 else 0
        val_sentiment_accuracy = correct_sentiments / total_sentiments_samples if total_sentiments_samples > 0 else 0

        print(f"Validation MSE: {avg_val_loss_regr:.4f}, Validation MAE: {avg_val_mae:.4f}, Validation Sentiment Acc: {val_sentiment_accuracy*100:.2f}%")
        append_metrics_to_log(log_file_path, epoch + 1, avg_train_loss_regr, avg_val_loss_regr, val_accuracy=val_sentiment_accuracy, val_mae=avg_val_mae)

        if avg_val_mae < best_val_mae:
            best_val_mae = avg_val_mae
            torch.save(model.state_dict(), model_save_path)
            print(f"New best model (by MAE) saved with Val MAE: {best_val_mae:.4f}, Sentiment Acc: {val_sentiment_accuracy*100:.2f}%")
        if val_sentiment_accuracy > best_val_sentiment_accuracy :
             best_val_sentiment_accuracy = val_sentiment_accuracy

def test_imdb(model, test_dataset_obj, use_embedding=True, threshold=5.0, padding_token_id=50257, batch_size_param=256):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    test_loader = create_IMDB_dataloader(test_dataset_obj, batch_size=batch_size_param, shuffle=False, use_embedding=use_embedding, pad_value=padding_token_id)

    model.eval()
    model.to(device)

    total_mae_sum = 0
    correct_sentiments = 0
    total_samples = 0

    with torch.no_grad():
        for batch_data_test in tqdm(test_loader, desc="Testing IMDB"):
            input_seq_test, lengths_test_or_none, true_scores_test, true_sentiments_test = None, None, None, None
            if use_embedding:
                input_seq_test, lengths_test_or_none, true_scores_test, true_sentiments_test = batch_data_test
                input_seq_test = input_seq_test.to(device)
            else:
                input_seq_test, true_scores_test, true_sentiments_test = batch_data_test
                input_seq_test = input_seq_test.to(device)

            true_scores_test = true_scores_test.to(device).float().unsqueeze(1)
            true_sentiments_test = true_sentiments_test.to(device).float()


            if use_embedding and lengths_test_or_none is not None:
                predicted_scores_test = model(input_seq_test, lengths_test_or_none)
            else:
                predicted_scores_test = model(input_seq_test)

            if predicted_scores_test.ndim == 1:
                predicted_scores_test = predicted_scores_test.unsqueeze(1)

            total_mae_sum += nn.functional.l1_loss(predicted_scores_test, true_scores_test, reduction='sum').item()

            predicted_sentiments_test = predict_sentiment_from_score(predicted_scores_test.squeeze(), threshold)
            correct_sentiments += (predicted_sentiments_test == true_sentiments_test).sum().item()
            total_samples += true_sentiments_test.size(0)

    average_mae = total_mae_sum / total_samples if total_samples > 0 else 0
    sentiment_accuracy = correct_sentiments / total_samples if total_samples > 0 else 0

    print(f"Test MAE (Average Absolute Error): {average_mae:.4f}")
    print(f"Test Sentiment Classification Accuracy (Threshold > {threshold}): {sentiment_accuracy*100:.2f}%")

    #return average_mae, sentiment_accuracy

In [None]:
rnn_imdb = Basic_RNN_IMDB(input_dim=1, hidden_dim=32, output_dim=1, num_layers=1).to("cuda" if torch.cuda.is_available() else "cpu")

#train_imdb(rnn_imdb, imdb_train_set, imdb_val_set, "RNN", "IMDB","Basic", use_embedding=False)
rnn_imdb.load_state_dict(torch.load(get_save_path("RNN", "IMDB", "Basic")))
test_imdb(rnn_imdb, imdb_test, use_embedding=False)

In [None]:
lstm_imdb = Basic_LSTM_IMDB(input_dim=1, hidden_dim=32, output_dim=1, num_layers=1).to("cuda" if torch.cuda.is_available() else "cpu")

#train_imdb(lstm_imdb, imdb_train_set, imdb_val_set, "LSTM", "IMDB","Basic", use_embedding=False)
lstm_imdb.load_state_dict(torch.load(get_save_path("LSTM", "IMDB", "Basic")))
test_imdb(lstm_imdb, imdb_test, use_embedding=False)

In [None]:
gru_imdb = Basic_GRU_IMDB(input_dim=1, hidden_dim=32, output_dim=1, num_layers=1).to("cuda" if torch.cuda.is_available() else "cpu")

#train_imdb(gru_imdb, imdb_train_set, imdb_val_set, "GRU", "IMDB","Basic", use_embedding=False)
gru_imdb.load_state_dict(torch.load(get_save_path("GRU", "IMDB", "Basic")))
test_imdb(gru_imdb, imdb_test, use_embedding=False)

In [None]:
Hymmn0s_rnn_imdb = Hymmn0s_RNN_IMDB(vocab_size=vocab_size, embedding_dim=embedding_dim, hidden_dim=32, output_dim=1, num_layers=3).to("cuda" if torch.cuda.is_available() else "cpu")

#train_imdb(Hymmn0s_rnn_imdb, imdb_train_set, imdb_val_set, "RNN", "IMDB","Hymmn0s", use_embedding=True)
Hymmn0s_rnn_imdb.load_state_dict(torch.load(get_save_path("RNN", "IMDB", "Hymmn0s")))
test_imdb(Hymmn0s_rnn_imdb, imdb_test, use_embedding=True)

In [None]:
Hymmn0s_lstm_imdb = Hymmn0s_LSTM_IMDB(vocab_size=vocab_size, embedding_dim=embedding_dim, hidden_dim=32, output_dim=1, num_layers=3).to("cuda" if torch.cuda.is_available() else "cpu")

#train_imdb(Hymmn0s_lstm_imdb, imdb_train_set, imdb_val_set, "LSTM", "IMDB","Hymmn0s", use_embedding=True)
Hymmn0s_lstm_imdb.load_state_dict(torch.load(get_save_path("LSTM", "IMDB", "Hymmn0s")))
test_imdb(Hymmn0s_lstm_imdb, imdb_test, use_embedding=True)

In [None]:
Hymmn0s_gru_imdb = Hymmn0s_GRU_IMDB(vocab_size=vocab_size, embedding_dim=embedding_dim, hidden_dim=32, output_dim=1, num_layers=3).to("cuda" if torch.cuda.is_available() else "cpu")

#train_imdb(Hymmn0s_gru_imdb, imdb_train_set, imdb_val_set, "GRU", "IMDB","Hymmn0s", use_embedding=True)
Hymmn0s_gru_imdb.load_state_dict(torch.load(get_save_path("GRU", "IMDB", "Hymmn0s")))
test_imdb(Hymmn0s_gru_imdb, imdb_test, use_embedding=True)