In [1]:
import json
from collections import Counter
import re

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score, confusion_matrix

from tqdm import tqdm

from seq_helpers import *

should_downsample = False
should_include_kcs = True

In [2]:
# Set device to MPS (if using MacOS 12.3+). If not available, use CPU.
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

# Set device to CUDA (if using an Nvidia GPU). If not available, use CPU.
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Grid Search
We perform a grid search over N (the sequence length in the input data) and n_layers (the number of layers in each model). We select only these two hyperparemeters because we want to approximate the effect of varying the input sequence length on model performance (data source variation) AND the effect of increasing model complexity via n_layers. We could have chosen a different complexity hyperparameter (like hidden_size), but opt to choose only one due to computational constraints (adding one extra hyperparameter to the search increases runtime significantly).

In [3]:
hparams = {
    'batch_size': 32,
    'lr': 0.001,
    'epochs': 100,
    'input_size': 2
}

In [9]:
best_metrics = {
    'RNN': {
        'val': {'acc': 0, 'bal_acc': 0, 'f1_0': 0, 'f1_1': 0, 'sup_0': 0, 'sup_1': 0, 'auc': 0, 'epoch': 0},
        'test': {'acc': 0, 'bal_acc': 0, 'f1_0': 0, 'f1_1': 0, 'sup_0': 0, 'sup_1': 0, 'auc': 0, 'epoch': 0}
        },
    'LSTM': {
        'val': {'acc': 0, 'bal_acc': 0, 'f1_0': 0, 'f1_1': 0, 'sup_0': 0, 'sup_1': 0, 'auc': 0, 'epoch': 0},
        'test': {'acc': 0, 'bal_acc': 0, 'f1_0': 0, 'f1_1': 0, 'sup_0': 0, 'sup_1': 0, 'auc': 0, 'epoch': 0}
        },
    'Transformer': {
        'val': {'acc': 0, 'bal_acc': 0, 'f1_0': 0, 'f1_1': 0, 'sup_0': 0, 'sup_1': 0, 'auc': 0, 'epoch': 0},
        'test': {'acc': 0, 'bal_acc': 0, 'f1_0': 0, 'f1_1': 0, 'sup_0': 0, 'sup_1': 0, 'auc': 0, 'epoch': 0}
        },
}

for N in [5, 10, 20]:
    for n_layers in [1, 2, 5]:
        for model_name in ['RNN', 'LSTM', 'Transformer']:
            print(f"Training {model_name} with N = {N} and n_layers = {n_layers}")

            train_data = load_and_process_data(f'data_outputs/train_fold_1_n_{N}.json', seq_len = N)
            val_data = load_and_process_data(f'data_outputs/val_fold_1_n_{N}.json', seq_len = N)
            test_data = load_and_process_data(f'data_outputs/test_n_{N}.json', seq_len = N)

            train_dataset = StudentSequenceDataset(train_data, embedding_dim=0)
            val_dataset = StudentSequenceDataset(val_data, embedding_dim=0)
            test_dataset = StudentSequenceDataset(test_data, embedding_dim=0)

            train_data_loader = DataLoader(train_dataset, batch_size=hparams['batch_size'], shuffle=True)
            val_data_loader = DataLoader(val_dataset, batch_size=hparams['batch_size'], shuffle=False)
            test_data_loader = DataLoader(test_dataset, batch_size=hparams['batch_size'], shuffle=False)

            if model_name == 'RNN':
                model = SimpleRNN(input_size=hparams['input_size'], num_layers=n_layers).to(device)
            elif model_name == 'LSTM':
                model = SimpleLSTM(input_size=hparams['input_size'], num_layers=n_layers).to(device)
            elif model_name == 'Transformer':
                model = SimpleTransformer(input_size=hparams['input_size'], num_layers=n_layers).to(device)
            evaluator = ModelEvaluator({f'{model_name}_{N}_{n_layers}_epoch': model}, train_data_loader, val_data_loader, test_data_loader, device, epochs=hparams['epochs'], lr=hparams['lr'])
            evaluator.train_and_evaluate()
            val_metrics, test_metrics = evaluator.compute_best_metrics()

            if val_metrics['bal_acc'] > best_metrics[model_name]['val']['bal_acc']:
                best_metrics[model_name]['val'] = val_metrics
                best_metrics[model_name]['test'] = test_metrics
                print(f"New best {model_name} model with N = {N} and n_layers = {n_layers}!")
                print(f"    Val: {val_metrics}")
                print(f"    Test: {test_metrics}")


Training RNN with N = 5 and n_layers = 1


100%|██████████| 11302/11302 [00:00<00:00, 922311.10it/s]
100%|██████████| 5675/5675 [00:00<00:00, 974560.89it/s]
100%|██████████| 3130/3130 [00:00<00:00, 924193.70it/s]
Epoch [1/1]: 100%|██████████| 354/354 [00:04<00:00, 85.19it/s, loss=0.653]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 90.67it/s, loss=0.626]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 92.44it/s, loss=0.617]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 92.65it/s, loss=0.616]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 93.56it/s, loss=0.615]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 93.01it/s, loss=0.615]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 93.49it/s, loss=0.615]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 92.96it/s, loss=0.616]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 93.48it/s, loss=0.614]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 92.72it/s, loss=0.613]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 93.21it/s, loss=0.612]
Epoch

Best Metrics:
    Validation Set: RNN_5_1_epoch_92: {'acc': 0.7305726872246696, 'bal_acc': 0.6918040060299004, 'f1_0': 0.5992136304062909, 'f1_1': 0.797080291970803, 'sup_0': 2141, 'sup_1': 3534, 'auc': 0.6918040060299004, 'epoch': 92}
    Test Set: RNN_5_1_epoch_91: {'acc': 0.7424920127795527, 'bal_acc': 0.702279039485805, 'f1_0': 0.6121270452358035, 'f1_1': 0.8072692491630798, 'sup_0': 1192, 'sup_1': 1938, 'auc': 0.702279039485805, 'epoch': 91}

New best RNN model with N = 5 and n_layers = 1!
    Val: {'acc': 0.7305726872246696, 'bal_acc': 0.6918040060299004, 'f1_0': 0.5992136304062909, 'f1_1': 0.797080291970803, 'sup_0': 2141, 'sup_1': 3534, 'auc': 0.6918040060299004, 'epoch': 92}
    Test: {'acc': 0.7424920127795527, 'bal_acc': 0.702279039485805, 'f1_0': 0.6121270452358035, 'f1_1': 0.8072692491630798, 'sup_0': 1192, 'sup_1': 1938, 'auc': 0.702279039485805, 'epoch': 91}
Training LSTM with N = 5 and n_layers = 1


100%|██████████| 11302/11302 [00:00<00:00, 940425.41it/s]
100%|██████████| 5675/5675 [00:00<00:00, 957661.44it/s]
100%|██████████| 3130/3130 [00:00<00:00, 996143.22it/s]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 109.45it/s, loss=0.646]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 111.76it/s, loss=0.627]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 111.22it/s, loss=0.62]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 111.62it/s, loss=0.615]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 112.21it/s, loss=0.609]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 112.24it/s, loss=0.613]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 111.81it/s, loss=0.612]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 112.21it/s, loss=0.611]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 112.12it/s, loss=0.608]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 111.44it/s, loss=0.604]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 111.17it/s, loss=0.

Best Metrics:
    Validation Set: LSTM_5_1_epoch_66: {'acc': 0.733568281938326, 'bal_acc': 0.6920919937818963, 'f1_0': 0.5970149253731345, 'f1_1': 0.801000263227165, 'sup_0': 2141, 'sup_1': 3534, 'auc': 0.6920919937818965, 'epoch': 66}
    Test Set: LSTM_5_1_epoch_77: {'acc': 0.736741214057508, 'bal_acc': 0.7010258448133757, 'f1_0': 0.6145930776426567, 'f1_1': 0.8000970402717127, 'sup_0': 1192, 'sup_1': 1938, 'auc': 0.7010258448133757, 'epoch': 77}

New best LSTM model with N = 5 and n_layers = 1!
    Val: {'acc': 0.733568281938326, 'bal_acc': 0.6920919937818963, 'f1_0': 0.5970149253731345, 'f1_1': 0.801000263227165, 'sup_0': 2141, 'sup_1': 3534, 'auc': 0.6920919937818965, 'epoch': 66}
    Test: {'acc': 0.736741214057508, 'bal_acc': 0.7010258448133757, 'f1_0': 0.6145930776426567, 'f1_1': 0.8000970402717127, 'sup_0': 1192, 'sup_1': 1938, 'auc': 0.7010258448133757, 'epoch': 77}
Training Transformer with N = 5 and n_layers = 1


100%|██████████| 11302/11302 [00:00<00:00, 919092.30it/s]
100%|██████████| 5675/5675 [00:00<00:00, 967549.09it/s]
100%|██████████| 3130/3130 [00:00<00:00, 40375.74it/s]
Epoch [1/1]: 100%|██████████| 354/354 [00:06<00:00, 53.19it/s, loss=0.658]
Epoch [1/1]: 100%|██████████| 354/354 [00:06<00:00, 56.34it/s, loss=0.652]
Epoch [1/1]: 100%|██████████| 354/354 [00:06<00:00, 57.51it/s, loss=0.66]
Epoch [1/1]: 100%|██████████| 354/354 [00:06<00:00, 57.56it/s, loss=0.653]
Epoch [1/1]: 100%|██████████| 354/354 [00:06<00:00, 57.29it/s, loss=0.655]
Epoch [1/1]: 100%|██████████| 354/354 [00:06<00:00, 57.53it/s, loss=0.655]
Epoch [1/1]: 100%|██████████| 354/354 [00:06<00:00, 57.41it/s, loss=0.653]
Epoch [1/1]: 100%|██████████| 354/354 [00:06<00:00, 57.31it/s, loss=0.661]
Epoch [1/1]: 100%|██████████| 354/354 [00:06<00:00, 57.60it/s, loss=0.655]
Epoch [1/1]: 100%|██████████| 354/354 [00:06<00:00, 57.03it/s, loss=0.661]
Epoch [1/1]: 100%|██████████| 354/354 [00:06<00:00, 57.51it/s, loss=0.661]
Epoch [

Best Metrics:
    Validation Set: Transformer_5_1_epoch_0: {'acc': 0.5524229074889868, 'bal_acc': 0.5591669448742013, 'f1_0': 0.49722882026920034, 'f1_1': 0.5966973642426167, 'sup_0': 2141, 'sup_1': 3534, 'auc': 0.5591669448742012, 'epoch': 0}
    Test Set: Transformer_5_1_epoch_0: {'acc': 0.5575079872204473, 'bal_acc': 0.5642007951184713, 'f1_0': 0.5048265999284948, 'f1_1': 0.6000577533930118, 'sup_0': 1192, 'sup_1': 1938, 'auc': 0.5642007951184713, 'epoch': 0}

New best Transformer model with N = 5 and n_layers = 1!
    Val: {'acc': 0.5524229074889868, 'bal_acc': 0.5591669448742013, 'f1_0': 0.49722882026920034, 'f1_1': 0.5966973642426167, 'sup_0': 2141, 'sup_1': 3534, 'auc': 0.5591669448742012, 'epoch': 0}
    Test: {'acc': 0.5575079872204473, 'bal_acc': 0.5642007951184713, 'f1_0': 0.5048265999284948, 'f1_1': 0.6000577533930118, 'sup_0': 1192, 'sup_1': 1938, 'auc': 0.5642007951184713, 'epoch': 0}
Training RNN with N = 5 and n_layers = 2


100%|██████████| 11302/11302 [00:00<00:00, 991923.49it/s]
100%|██████████| 5675/5675 [00:00<00:00, 60486.88it/s]
100%|██████████| 3130/3130 [00:00<00:00, 987377.52it/s]
Epoch [1/1]: 100%|██████████| 354/354 [00:05<00:00, 64.69it/s, loss=0.641]
Epoch [1/1]: 100%|██████████| 354/354 [00:05<00:00, 64.12it/s, loss=0.617]
Epoch [1/1]: 100%|██████████| 354/354 [00:05<00:00, 64.62it/s, loss=0.615]
Epoch [1/1]: 100%|██████████| 354/354 [00:05<00:00, 65.51it/s, loss=0.614]
Epoch [1/1]: 100%|██████████| 354/354 [00:05<00:00, 64.61it/s, loss=0.614]
Epoch [1/1]: 100%|██████████| 354/354 [00:05<00:00, 65.12it/s, loss=0.616]
Epoch [1/1]: 100%|██████████| 354/354 [00:05<00:00, 65.16it/s, loss=0.615]
Epoch [1/1]: 100%|██████████| 354/354 [00:05<00:00, 64.06it/s, loss=0.614]
Epoch [1/1]: 100%|██████████| 354/354 [00:05<00:00, 65.78it/s, loss=0.616]
Epoch [1/1]: 100%|██████████| 354/354 [00:05<00:00, 64.88it/s, loss=0.615]
Epoch [1/1]: 100%|██████████| 354/354 [00:05<00:00, 65.00it/s, loss=0.614]
Epoch 

Best Metrics:
    Validation Set: RNN_5_2_epoch_46: {'acc': 0.7423788546255506, 'bal_acc': 0.6921701033557511, 'f1_0': 0.5881690140845071, 'f1_1': 0.8125641025641026, 'sup_0': 2141, 'sup_1': 3534, 'auc': 0.6921701033557511, 'epoch': 46}
    Test Set: RNN_5_2_epoch_63: {'acc': 0.7380191693290735, 'bal_acc': 0.7054486047333097, 'f1_0': 0.6231617647058825, 'f1_1': 0.7992164544564151, 'sup_0': 1192, 'sup_1': 1938, 'auc': 0.7054486047333097, 'epoch': 63}

New best RNN model with N = 5 and n_layers = 2!
    Val: {'acc': 0.7423788546255506, 'bal_acc': 0.6921701033557511, 'f1_0': 0.5881690140845071, 'f1_1': 0.8125641025641026, 'sup_0': 2141, 'sup_1': 3534, 'auc': 0.6921701033557511, 'epoch': 46}
    Test: {'acc': 0.7380191693290735, 'bal_acc': 0.7054486047333097, 'f1_0': 0.6231617647058825, 'f1_1': 0.7992164544564151, 'sup_0': 1192, 'sup_1': 1938, 'auc': 0.7054486047333097, 'epoch': 63}
Training LSTM with N = 5 and n_layers = 2


100%|██████████| 11302/11302 [00:00<00:00, 1008939.72it/s]
100%|██████████| 5675/5675 [00:00<00:00, 1029750.17it/s]
100%|██████████| 3130/3130 [00:00<00:00, 966017.04it/s]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 101.76it/s, loss=0.647]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 102.03it/s, loss=0.615]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 102.10it/s, loss=0.611]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 102.23it/s, loss=0.606]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 102.68it/s, loss=0.605]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 102.59it/s, loss=0.604]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 101.45it/s, loss=0.608]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 102.19it/s, loss=0.603]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 101.70it/s, loss=0.602]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 101.88it/s, loss=0.6]
Epoch [1/1]: 100%|██████████| 354/354 [00:03<00:00, 101.74it/s, loss=0

Best Metrics:
    Validation Set: LSTM_5_2_epoch_96: {'acc': 0.7277533039647577, 'bal_acc': 0.6913813420414274, 'f1_0': 0.6008783260139499, 'f1_1': 0.7934215804251905, 'sup_0': 2141, 'sup_1': 3534, 'auc': 0.6913813420414273, 'epoch': 96}
    Test Set: LSTM_5_2_epoch_83: {'acc': 0.7424920127795527, 'bal_acc': 0.7042166213005867, 'f1_0': 0.6165556612749763, 'f1_1': 0.8061568061568061, 'sup_0': 1192, 'sup_1': 1938, 'auc': 0.7042166213005867, 'epoch': 83}

Training Transformer with N = 5 and n_layers = 2


100%|██████████| 11302/11302 [00:00<00:00, 997706.39it/s]
100%|██████████| 5675/5675 [00:00<00:00, 1073207.77it/s]
100%|██████████| 3130/3130 [00:00<00:00, 849389.98it/s]
Epoch [1/1]: 100%|██████████| 354/354 [00:08<00:00, 43.41it/s, loss=0.661]
Epoch [1/1]: 100%|██████████| 354/354 [00:08<00:00, 43.87it/s, loss=0.659]
Epoch [1/1]: 100%|██████████| 354/354 [00:08<00:00, 43.76it/s, loss=0.658]
Epoch [1/1]: 100%|██████████| 354/354 [00:08<00:00, 43.87it/s, loss=0.657]
Epoch [1/1]: 100%|██████████| 354/354 [00:08<00:00, 44.01it/s, loss=0.659]
Epoch [1/1]: 100%|██████████| 354/354 [00:08<00:00, 44.24it/s, loss=0.659]
Epoch [1/1]: 100%|██████████| 354/354 [00:08<00:00, 43.89it/s, loss=0.658]
Epoch [1/1]: 100%|██████████| 354/354 [00:08<00:00, 44.14it/s, loss=0.657]
Epoch [1/1]: 100%|██████████| 354/354 [00:08<00:00, 44.17it/s, loss=0.661]
Epoch [1/1]: 100%|██████████| 354/354 [00:08<00:00, 44.20it/s, loss=0.661]
Epoch [1/1]: 100%|██████████| 354/354 [00:08<00:00, 44.11it/s, loss=0.658]
Epoc

Best Metrics:
    Validation Set: Transformer_5_2_epoch_0: {'acc': 0.5524229074889868, 'bal_acc': 0.5591669448742013, 'f1_0': 0.49722882026920034, 'f1_1': 0.5966973642426167, 'sup_0': 2141, 'sup_1': 3534, 'auc': 0.5591669448742012, 'epoch': 0}
    Test Set: Transformer_5_2_epoch_1: {'acc': 0.5626198083067092, 'bal_acc': 0.5662297151287219, 'f1_0': 0.5030852994555355, 'f1_1': 0.6094151212553496, 'sup_0': 1192, 'sup_1': 1938, 'auc': 0.566229715128722, 'epoch': 1}

Training RNN with N = 5 and n_layers = 5


100%|██████████| 11302/11302 [00:00<00:00, 975913.53it/s]
100%|██████████| 5675/5675 [00:00<00:00, 81666.48it/s]
100%|██████████| 3130/3130 [00:00<00:00, 1003222.64it/s]
Epoch [1/1]: 100%|██████████| 354/354 [00:10<00:00, 34.41it/s, loss=0.628]
Epoch [1/1]: 100%|██████████| 354/354 [00:10<00:00, 33.66it/s, loss=0.612]
Epoch [1/1]: 100%|██████████| 354/354 [00:10<00:00, 34.07it/s, loss=0.612]
Epoch [1/1]: 100%|██████████| 354/354 [00:10<00:00, 34.06it/s, loss=0.612]
Epoch [1/1]: 100%|██████████| 354/354 [00:10<00:00, 34.17it/s, loss=0.617]
Epoch [1/1]: 100%|██████████| 354/354 [00:10<00:00, 34.00it/s, loss=0.611]
Epoch [1/1]: 100%|██████████| 354/354 [00:10<00:00, 33.94it/s, loss=0.613]
Epoch [1/1]: 100%|██████████| 354/354 [00:10<00:00, 34.04it/s, loss=0.628]
Epoch [1/1]: 100%|██████████| 354/354 [00:10<00:00, 34.12it/s, loss=0.651]
Epoch [1/1]: 100%|██████████| 354/354 [00:10<00:00, 34.00it/s, loss=0.641]
Epoch [1/1]: 100%|██████████| 354/354 [00:10<00:00, 33.90it/s, loss=0.65]
Epoch 

Best Metrics:
    Validation Set: RNN_5_5_epoch_81: {'acc': 0.7344493392070485, 'bal_acc': 0.6936278844041746, 'f1_0': 0.599734395750332, 'f1_1': 0.8013183915622939, 'sup_0': 2141, 'sup_1': 3534, 'auc': 0.6936278844041746, 'epoch': 81}
    Test Set: RNN_5_5_epoch_75: {'acc': 0.7421725239616613, 'bal_acc': 0.7055732748768883, 'f1_0': 0.6198775317946302, 'f1_1': 0.8049311094996374, 'sup_0': 1192, 'sup_1': 1938, 'auc': 0.7055732748768883, 'epoch': 75}

New best RNN model with N = 5 and n_layers = 5!
    Val: {'acc': 0.7344493392070485, 'bal_acc': 0.6936278844041746, 'f1_0': 0.599734395750332, 'f1_1': 0.8013183915622939, 'sup_0': 2141, 'sup_1': 3534, 'auc': 0.6936278844041746, 'epoch': 81}
    Test: {'acc': 0.7421725239616613, 'bal_acc': 0.7055732748768883, 'f1_0': 0.6198775317946302, 'f1_1': 0.8049311094996374, 'sup_0': 1192, 'sup_1': 1938, 'auc': 0.7055732748768883, 'epoch': 75}
Training LSTM with N = 5 and n_layers = 5


100%|██████████| 11302/11302 [00:00<00:00, 1019003.09it/s]
100%|██████████| 5675/5675 [00:00<00:00, 960366.16it/s]
100%|██████████| 3130/3130 [00:00<00:00, 866088.63it/s]
Epoch [1/1]: 100%|██████████| 354/354 [00:05<00:00, 65.25it/s, loss=0.664]
Epoch [1/1]: 100%|██████████| 354/354 [00:05<00:00, 65.10it/s, loss=0.629]
Epoch [1/1]: 100%|██████████| 354/354 [00:05<00:00, 65.17it/s, loss=0.617]
Epoch [1/1]: 100%|██████████| 354/354 [00:05<00:00, 65.08it/s, loss=0.614]
Epoch [1/1]: 100%|██████████| 354/354 [00:05<00:00, 61.98it/s, loss=0.645]
Epoch [1/1]: 100%|██████████| 354/354 [00:05<00:00, 64.41it/s, loss=0.634]
Epoch [1/1]: 100%|██████████| 354/354 [00:05<00:00, 63.75it/s, loss=0.627]
Epoch [1/1]: 100%|██████████| 354/354 [00:05<00:00, 63.42it/s, loss=0.612]
Epoch [1/1]: 100%|██████████| 354/354 [00:05<00:00, 64.47it/s, loss=0.611]
Epoch [1/1]: 100%|██████████| 354/354 [00:05<00:00, 62.87it/s, loss=0.607]
Epoch [1/1]: 100%|██████████| 354/354 [00:05<00:00, 64.14it/s, loss=0.605]
Epoc

Best Metrics:
    Validation Set: LSTM_5_5_epoch_67: {'acc': 0.7446696035242291, 'bal_acc': 0.6915239481838797, 'f1_0': 0.5839793281653748, 'f1_1': 0.8158128892843522, 'sup_0': 2141, 'sup_1': 3534, 'auc': 0.6915239481838797, 'epoch': 67}
    Test Set: LSTM_5_5_epoch_80: {'acc': 0.7460063897763578, 'bal_acc': 0.7057628773869138, 'f1_0': 0.616867469879518, 'f1_1': 0.8100358422939068, 'sup_0': 1192, 'sup_1': 1938, 'auc': 0.7057628773869138, 'epoch': 80}

Training Transformer with N = 5 and n_layers = 5


100%|██████████| 11302/11302 [00:00<00:00, 989232.55it/s]
100%|██████████| 5675/5675 [00:00<00:00, 1052098.44it/s]
100%|██████████| 3130/3130 [00:00<00:00, 976507.85it/s]
Epoch [1/1]: 100%|██████████| 354/354 [00:17<00:00, 20.61it/s, loss=0.659]
Epoch [1/1]: 100%|██████████| 354/354 [00:17<00:00, 20.76it/s, loss=0.661]
Epoch [1/1]: 100%|██████████| 354/354 [00:16<00:00, 20.86it/s, loss=0.657]
Epoch [1/1]: 100%|██████████| 354/354 [00:17<00:00, 20.81it/s, loss=0.662]
Epoch [1/1]: 100%|██████████| 354/354 [00:16<00:00, 21.04it/s, loss=0.657]
Epoch [1/1]: 100%|██████████| 354/354 [00:17<00:00, 20.81it/s, loss=0.661]
Epoch [1/1]: 100%|██████████| 354/354 [00:16<00:00, 20.84it/s, loss=0.661]
Epoch [1/1]: 100%|██████████| 354/354 [00:16<00:00, 20.87it/s, loss=0.661]
Epoch [1/1]: 100%|██████████| 354/354 [00:18<00:00, 19.20it/s, loss=0.657]
Epoch [1/1]: 100%|██████████| 354/354 [00:18<00:00, 18.81it/s, loss=0.659]
Epoch [1/1]: 100%|██████████| 354/354 [00:18<00:00, 18.84it/s, loss=0.659]
Epoc

Best Metrics:
    Validation Set: Transformer_5_5_epoch_0: {'acc': 0.6227312775330397, 'bal_acc': 0.5, 'f1_0': 0.0, 'f1_1': 0.7675100445216637, 'sup_0': 2141, 'sup_1': 3534, 'auc': 0.5, 'epoch': 0}
    Test Set: Transformer_5_5_epoch_0: {'acc': 0.6191693290734824, 'bal_acc': 0.5, 'f1_0': 0.0, 'f1_1': 0.7647987371744277, 'sup_0': 1192, 'sup_1': 1938, 'auc': 0.5, 'epoch': 0}

Training RNN with N = 10 and n_layers = 1


100%|██████████| 7143/7143 [00:00<00:00, 546474.42it/s]
100%|██████████| 3577/3577 [00:00<00:00, 560567.38it/s]
100%|██████████| 1967/1967 [00:00<00:00, 489306.44it/s]
Epoch [1/1]: 100%|██████████| 224/224 [00:03<00:00, 59.08it/s, loss=0.612]
Epoch [1/1]: 100%|██████████| 224/224 [00:03<00:00, 59.00it/s, loss=0.612]
Epoch [1/1]: 100%|██████████| 224/224 [00:03<00:00, 59.28it/s, loss=0.613]
Epoch [1/1]: 100%|██████████| 224/224 [00:03<00:00, 59.70it/s, loss=0.61]
Epoch [1/1]: 100%|██████████| 224/224 [00:03<00:00, 59.41it/s, loss=0.593]
Epoch [1/1]: 100%|██████████| 224/224 [00:03<00:00, 59.23it/s, loss=0.593]
Epoch [1/1]: 100%|██████████| 224/224 [00:03<00:00, 59.67it/s, loss=0.604]
Epoch [1/1]: 100%|██████████| 224/224 [00:03<00:00, 59.20it/s, loss=0.601]
Epoch [1/1]: 100%|██████████| 224/224 [00:03<00:00, 59.52it/s, loss=0.588]
Epoch [1/1]: 100%|██████████| 224/224 [00:03<00:00, 59.73it/s, loss=0.593]
Epoch [1/1]: 100%|██████████| 224/224 [00:03<00:00, 59.56it/s, loss=0.596]
Epoch [1

Best Metrics:
    Validation Set: RNN_10_1_epoch_44: {'acc': 0.7081353089180877, 'bal_acc': 0.6159188151760993, 'f1_0': 0.4423076923076923, 'f1_1': 0.8023475956077244, 'sup_0': 1101, 'sup_1': 2476, 'auc': 0.6159188151760993, 'epoch': 44}
    Test Set: RNN_10_1_epoch_63: {'acc': 0.6832740213523132, 'bal_acc': 0.6037667820591839, 'f1_0': 0.43517679057116954, 'f1_1': 0.779936418226775, 'sup_0': 601, 'sup_1': 1366, 'auc': 0.6037667820591839, 'epoch': 63}

Training LSTM with N = 10 and n_layers = 1


100%|██████████| 7143/7143 [00:00<00:00, 523637.39it/s]
100%|██████████| 3577/3577 [00:00<00:00, 573597.85it/s]
100%|██████████| 1967/1967 [00:00<00:00, 491258.54it/s]
Epoch [1/1]: 100%|██████████| 224/224 [00:02<00:00, 89.90it/s, loss=0.628]
Epoch [1/1]: 100%|██████████| 224/224 [00:02<00:00, 92.03it/s, loss=0.619]
Epoch [1/1]: 100%|██████████| 224/224 [00:02<00:00, 93.99it/s, loss=0.61]
Epoch [1/1]: 100%|██████████| 224/224 [00:02<00:00, 93.36it/s, loss=0.605]
Epoch [1/1]: 100%|██████████| 224/224 [00:02<00:00, 92.39it/s, loss=0.612]
Epoch [1/1]: 100%|██████████| 224/224 [00:02<00:00, 91.23it/s, loss=0.61]
Epoch [1/1]: 100%|██████████| 224/224 [00:02<00:00, 91.74it/s, loss=0.611]
Epoch [1/1]: 100%|██████████| 224/224 [00:02<00:00, 91.64it/s, loss=0.61]
Epoch [1/1]: 100%|██████████| 224/224 [00:02<00:00, 91.84it/s, loss=0.608]
Epoch [1/1]: 100%|██████████| 224/224 [00:02<00:00, 91.98it/s, loss=0.611]
Epoch [1/1]: 100%|██████████| 224/224 [00:02<00:00, 91.14it/s, loss=0.61]
Epoch [1/1]

Best Metrics:
    Validation Set: LSTM_10_1_epoch_85: {'acc': 0.700587084148728, 'bal_acc': 0.6263546944399202, 'f1_0': 0.4711111111111112, 'f1_1': 0.7911873659582763, 'sup_0': 1101, 'sup_1': 2476, 'auc': 0.6263546944399202, 'epoch': 85}
    Test Set: LSTM_10_1_epoch_47: {'acc': 0.701067615658363, 'bal_acc': 0.6282257730527209, 'f1_0': 0.4740608228980322, 'f1_1': 0.7911931818181819, 'sup_0': 601, 'sup_1': 1366, 'auc': 0.6282257730527209, 'epoch': 47}

Training Transformer with N = 10 and n_layers = 1


100%|██████████| 7143/7143 [00:00<00:00, 537995.86it/s]
100%|██████████| 3577/3577 [00:00<00:00, 526717.65it/s]
100%|██████████| 1967/1967 [00:00<00:00, 533199.51it/s]
Epoch [1/1]: 100%|██████████| 224/224 [00:04<00:00, 49.98it/s, loss=0.612]
Epoch [1/1]: 100%|██████████| 224/224 [00:04<00:00, 52.81it/s, loss=0.613]
Epoch [1/1]: 100%|██████████| 224/224 [00:04<00:00, 54.06it/s, loss=0.61]
Epoch [1/1]: 100%|██████████| 224/224 [00:04<00:00, 53.80it/s, loss=0.612]
Epoch [1/1]: 100%|██████████| 224/224 [00:04<00:00, 53.84it/s, loss=0.61]
Epoch [1/1]: 100%|██████████| 224/224 [00:04<00:00, 54.61it/s, loss=0.608]
Epoch [1/1]: 100%|██████████| 224/224 [00:04<00:00, 54.12it/s, loss=0.608]
Epoch [1/1]: 100%|██████████| 224/224 [00:04<00:00, 54.14it/s, loss=0.611]
Epoch [1/1]: 100%|██████████| 224/224 [00:04<00:00, 54.45it/s, loss=0.608]
Epoch [1/1]: 100%|██████████| 224/224 [00:04<00:00, 54.01it/s, loss=0.608]
Epoch [1/1]: 100%|██████████| 224/224 [00:04<00:00, 54.47it/s, loss=0.608]
Epoch [1/

Best Metrics:
    Validation Set: Transformer_10_1_epoch_10: {'acc': 0.6678781101481689, 'bal_acc': 0.5351398860486648, 'f1_0': 0.2602739726027397, 'f1_1': 0.7858687815428983, 'sup_0': 1101, 'sup_1': 2476, 'auc': 0.5351398860486649, 'epoch': 10}
    Test Set: Transformer_10_1_epoch_10: {'acc': 0.6741230299949161, 'bal_acc': 0.5375411405588051, 'f1_0': 0.2589595375722543, 'f1_1': 0.7911371782339525, 'sup_0': 601, 'sup_1': 1366, 'auc': 0.5375411405588051, 'epoch': 10}

Training RNN with N = 10 and n_layers = 2


100%|██████████| 7143/7143 [00:00<00:00, 537802.71it/s]
100%|██████████| 3577/3577 [00:00<00:00, 46099.47it/s]
100%|██████████| 1967/1967 [00:00<00:00, 483768.97it/s]
Epoch [1/1]: 100%|██████████| 224/224 [00:05<00:00, 38.56it/s, loss=0.618]
Epoch [1/1]: 100%|██████████| 224/224 [00:05<00:00, 37.82it/s, loss=0.614]
Epoch [1/1]: 100%|██████████| 224/224 [00:05<00:00, 37.91it/s, loss=0.605]
Epoch [1/1]: 100%|██████████| 224/224 [00:05<00:00, 38.16it/s, loss=0.6]
Epoch [1/1]: 100%|██████████| 224/224 [00:05<00:00, 38.20it/s, loss=0.591]
Epoch [1/1]: 100%|██████████| 224/224 [00:05<00:00, 37.97it/s, loss=0.598]
Epoch [1/1]: 100%|██████████| 224/224 [00:05<00:00, 38.33it/s, loss=0.59]
Epoch [1/1]: 100%|██████████| 224/224 [00:05<00:00, 38.47it/s, loss=0.594]
Epoch [1/1]: 100%|██████████| 224/224 [00:05<00:00, 38.31it/s, loss=0.589]
Epoch [1/1]: 100%|██████████| 224/224 [00:05<00:00, 38.18it/s, loss=0.589]
Epoch [1/1]: 100%|██████████| 224/224 [00:05<00:00, 38.34it/s, loss=0.588]
Epoch [1/1]

Best Metrics:
    Validation Set: RNN_10_2_epoch_37: {'acc': 0.6885658372938216, 'bal_acc': 0.6118708722720863, 'f1_0': 0.44906033630069236, 'f1_1': 0.7829306313328137, 'sup_0': 1101, 'sup_1': 2476, 'auc': 0.6118708722720864, 'epoch': 37}
    Test Set: RNN_10_2_epoch_72: {'acc': 0.71021860701576, 'bal_acc': 0.6040639929059182, 'f1_0': 0.4111570247933884, 'f1_1': 0.8078219824679703, 'sup_0': 601, 'sup_1': 1366, 'auc': 0.6040639929059182, 'epoch': 72}

Training LSTM with N = 10 and n_layers = 2


100%|██████████| 7143/7143 [00:00<00:00, 541300.74it/s]
100%|██████████| 3577/3577 [00:00<00:00, 512976.56it/s]
100%|██████████| 1967/1967 [00:00<00:00, 26684.21it/s]
Epoch [1/1]: 100%|██████████| 224/224 [00:02<00:00, 75.57it/s, loss=0.623]
Epoch [1/1]: 100%|██████████| 224/224 [00:02<00:00, 77.34it/s, loss=0.609]
Epoch [1/1]: 100%|██████████| 224/224 [00:02<00:00, 77.24it/s, loss=0.605]
Epoch [1/1]: 100%|██████████| 224/224 [00:02<00:00, 76.65it/s, loss=0.616]
Epoch [1/1]: 100%|██████████| 224/224 [00:02<00:00, 76.92it/s, loss=0.603]
Epoch [1/1]: 100%|██████████| 224/224 [00:02<00:00, 76.89it/s, loss=0.603]
Epoch [1/1]: 100%|██████████| 224/224 [00:02<00:00, 77.17it/s, loss=0.601]
Epoch [1/1]: 100%|██████████| 224/224 [00:02<00:00, 76.52it/s, loss=0.605]
Epoch [1/1]: 100%|██████████| 224/224 [00:02<00:00, 77.35it/s, loss=0.601]
Epoch [1/1]: 100%|██████████| 224/224 [00:02<00:00, 76.81it/s, loss=0.597]
Epoch [1/1]: 100%|██████████| 224/224 [00:02<00:00, 76.59it/s, loss=0.605]
Epoch [1

Best Metrics:
    Validation Set: LSTM_10_2_epoch_96: {'acc': 0.7103718199608611, 'bal_acc': 0.6432581116593962, 'f1_0': 0.49903288201160545, 'f1_1': 0.7963035784506488, 'sup_0': 1101, 'sup_1': 2476, 'auc': 0.6432581116593962, 'epoch': 96}
    Test Set: LSTM_10_2_epoch_96: {'acc': 0.7097102186070158, 'bal_acc': 0.6377097224489199, 'f1_0': 0.4878923766816144, 'f1_1': 0.7974459028024121, 'sup_0': 601, 'sup_1': 1366, 'auc': 0.6377097224489199, 'epoch': 96}

Training Transformer with N = 10 and n_layers = 2


100%|██████████| 7143/7143 [00:00<00:00, 526999.36it/s]
100%|██████████| 3577/3577 [00:00<00:00, 570218.75it/s]
100%|██████████| 1967/1967 [00:00<00:00, 482947.72it/s]
Epoch [1/1]: 100%|██████████| 224/224 [00:06<00:00, 36.28it/s, loss=0.615]
Epoch [1/1]: 100%|██████████| 224/224 [00:06<00:00, 36.13it/s, loss=0.616]
Epoch [1/1]: 100%|██████████| 224/224 [00:06<00:00, 36.52it/s, loss=0.616]
Epoch [1/1]: 100%|██████████| 224/224 [00:06<00:00, 36.33it/s, loss=0.615]
Epoch [1/1]: 100%|██████████| 224/224 [00:06<00:00, 36.56it/s, loss=0.613]
Epoch [1/1]: 100%|██████████| 224/224 [00:06<00:00, 36.63it/s, loss=0.614]
Epoch [1/1]: 100%|██████████| 224/224 [00:06<00:00, 36.37it/s, loss=0.616]
Epoch [1/1]: 100%|██████████| 224/224 [00:06<00:00, 36.58it/s, loss=0.606]
Epoch [1/1]: 100%|██████████| 224/224 [00:06<00:00, 36.32it/s, loss=0.615]
Epoch [1/1]: 100%|██████████| 224/224 [00:06<00:00, 36.84it/s, loss=0.615]
Epoch [1/1]: 100%|██████████| 224/224 [00:06<00:00, 36.49it/s, loss=0.613]
Epoch [

Best Metrics:
    Validation Set: Transformer_10_2_epoch_11: {'acc': 0.6678781101481689, 'bal_acc': 0.5351398860486648, 'f1_0': 0.2602739726027397, 'f1_1': 0.7858687815428983, 'sup_0': 1101, 'sup_1': 2476, 'auc': 0.5351398860486649, 'epoch': 11}
    Test Set: Transformer_10_2_epoch_11: {'acc': 0.6741230299949161, 'bal_acc': 0.5375411405588051, 'f1_0': 0.2589595375722543, 'f1_1': 0.7911371782339525, 'sup_0': 601, 'sup_1': 1366, 'auc': 0.5375411405588051, 'epoch': 11}

Training RNN with N = 10 and n_layers = 5


100%|██████████| 7143/7143 [00:00<00:00, 555275.94it/s]
100%|██████████| 3577/3577 [00:00<00:00, 580949.68it/s]
100%|██████████| 1967/1967 [00:00<00:00, 489800.28it/s]
Epoch [1/1]: 100%|██████████| 224/224 [00:11<00:00, 18.88it/s, loss=0.61]
Epoch [1/1]: 100%|██████████| 224/224 [00:11<00:00, 18.83it/s, loss=0.607]
Epoch [1/1]: 100%|██████████| 224/224 [00:11<00:00, 18.77it/s, loss=0.597]
Epoch [1/1]: 100%|██████████| 224/224 [00:11<00:00, 18.76it/s, loss=0.593]
Epoch [1/1]: 100%|██████████| 224/224 [00:11<00:00, 18.95it/s, loss=0.594]
Epoch [1/1]: 100%|██████████| 224/224 [00:11<00:00, 18.70it/s, loss=0.592]
Epoch [1/1]: 100%|██████████| 224/224 [00:12<00:00, 18.64it/s, loss=0.588]
Epoch [1/1]: 100%|██████████| 224/224 [00:11<00:00, 18.72it/s, loss=0.585]
Epoch [1/1]: 100%|██████████| 224/224 [00:12<00:00, 18.66it/s, loss=0.586]
Epoch [1/1]: 100%|██████████| 224/224 [00:12<00:00, 18.61it/s, loss=0.585]
Epoch [1/1]: 100%|██████████| 224/224 [00:12<00:00, 18.62it/s, loss=0.582]
Epoch [1

Best Metrics:
    Validation Set: RNN_10_5_epoch_35: {'acc': 0.698909700866648, 'bal_acc': 0.6178294368902408, 'f1_0': 0.45413076533198177, 'f1_1': 0.7921250723798495, 'sup_0': 1101, 'sup_1': 2476, 'auc': 0.6178294368902407, 'epoch': 35}
    Test Set: RNN_10_5_epoch_70: {'acc': 0.6980172852058973, 'bal_acc': 0.6120521434505205, 'f1_0': 0.44172932330827064, 'f1_1': 0.7930313588850175, 'sup_0': 601, 'sup_1': 1366, 'auc': 0.6120521434505206, 'epoch': 70}

Training LSTM with N = 10 and n_layers = 5


100%|██████████| 7143/7143 [00:00<00:00, 580584.72it/s]
100%|██████████| 3577/3577 [00:00<00:00, 580679.85it/s]
100%|██████████| 1967/1967 [00:00<00:00, 493108.36it/s]
Epoch [1/1]: 100%|██████████| 224/224 [00:03<00:00, 58.31it/s, loss=0.623]
Epoch [1/1]: 100%|██████████| 224/224 [00:03<00:00, 58.41it/s, loss=0.621]
Epoch [1/1]: 100%|██████████| 224/224 [00:03<00:00, 59.04it/s, loss=0.624]
Epoch [1/1]: 100%|██████████| 224/224 [00:03<00:00, 58.68it/s, loss=0.622]
Epoch [1/1]: 100%|██████████| 224/224 [00:03<00:00, 58.84it/s, loss=0.609]
Epoch [1/1]: 100%|██████████| 224/224 [00:03<00:00, 58.76it/s, loss=0.611]
Epoch [1/1]: 100%|██████████| 224/224 [00:03<00:00, 58.43it/s, loss=0.611]
Epoch [1/1]: 100%|██████████| 224/224 [00:03<00:00, 59.28it/s, loss=0.609]
Epoch [1/1]: 100%|██████████| 224/224 [00:03<00:00, 58.55it/s, loss=0.615]
Epoch [1/1]: 100%|██████████| 224/224 [00:03<00:00, 58.85it/s, loss=0.613]
Epoch [1/1]: 100%|██████████| 224/224 [00:03<00:00, 59.08it/s, loss=0.603]
Epoch [

Best Metrics:
    Validation Set: LSTM_10_5_epoch_95: {'acc': 0.6840928152082751, 'bal_acc': 0.6215017483004877, 'f1_0': 0.4719626168224299, 'f1_1': 0.7746310331072996, 'sup_0': 1101, 'sup_1': 2476, 'auc': 0.6215017483004877, 'epoch': 95}
    Test Set: LSTM_10_5_epoch_95: {'acc': 0.7020843924758515, 'bal_acc': 0.6406057010887174, 'f1_0': 0.4974271012006861, 'f1_1': 0.7882947976878613, 'sup_0': 601, 'sup_1': 1366, 'auc': 0.6406057010887174, 'epoch': 95}

Training Transformer with N = 10 and n_layers = 5


100%|██████████| 7143/7143 [00:00<00:00, 101192.68it/s]
100%|██████████| 3577/3577 [00:00<00:00, 534048.53it/s]
100%|██████████| 1967/1967 [00:00<00:00, 525657.60it/s]
Epoch [1/1]: 100%|██████████| 224/224 [00:10<00:00, 20.78it/s, loss=0.614]
Epoch [1/1]: 100%|██████████| 224/224 [00:10<00:00, 21.04it/s, loss=0.61]
Epoch [1/1]: 100%|██████████| 224/224 [00:10<00:00, 21.18it/s, loss=0.61]
Epoch [1/1]: 100%|██████████| 224/224 [00:10<00:00, 20.93it/s, loss=0.61]
Epoch [1/1]: 100%|██████████| 224/224 [00:10<00:00, 20.96it/s, loss=0.61]
Epoch [1/1]: 100%|██████████| 224/224 [00:10<00:00, 21.00it/s, loss=0.609]
Epoch [1/1]: 100%|██████████| 224/224 [00:10<00:00, 21.05it/s, loss=0.609]
Epoch [1/1]: 100%|██████████| 224/224 [00:10<00:00, 20.91it/s, loss=0.609]
Epoch [1/1]: 100%|██████████| 224/224 [00:10<00:00, 21.07it/s, loss=0.61]
Epoch [1/1]: 100%|██████████| 224/224 [00:10<00:00, 21.07it/s, loss=0.61]
Epoch [1/1]: 100%|██████████| 224/224 [00:10<00:00, 20.99it/s, loss=0.609]
Epoch [1/1]: 

Best Metrics:
    Validation Set: Transformer_10_5_epoch_0: {'acc': 0.6922001677383282, 'bal_acc': 0.5, 'f1_0': 0.0, 'f1_1': 0.8181067239385428, 'sup_0': 1101, 'sup_1': 2476, 'auc': 0.5, 'epoch': 0}
    Test Set: Transformer_10_5_epoch_0: {'acc': 0.6944585663446874, 'bal_acc': 0.5, 'f1_0': 0.0, 'f1_1': 0.8196819681968197, 'sup_0': 601, 'sup_1': 1366, 'auc': 0.5, 'epoch': 0}

Training RNN with N = 20 and n_layers = 1


100%|██████████| 3881/3881 [00:00<00:00, 318298.31it/s]
100%|██████████| 1937/1937 [00:00<00:00, 302425.81it/s]
100%|██████████| 1056/1056 [00:00<00:00, 280097.71it/s]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 42.64it/s, loss=0.599]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 43.81it/s, loss=0.598]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 44.63it/s, loss=0.597]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 43.73it/s, loss=0.596]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 43.60it/s, loss=0.592]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 44.33it/s, loss=0.589]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 43.88it/s, loss=0.588]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 43.51it/s, loss=0.587]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 43.80it/s, loss=0.59]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 44.05it/s, loss=0.586]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 43.90it/s, loss=0.591]
Epoch [1

Best Metrics:
    Validation Set: RNN_20_1_epoch_46: {'acc': 0.6902426432627775, 'bal_acc': 0.5659280386134269, 'f1_0': 0.33628318584070793, 'f1_1': 0.797979797979798, 'sup_0': 559, 'sup_1': 1378, 'auc': 0.5659280386134269, 'epoch': 46}
    Test Set: RNN_20_1_epoch_87: {'acc': 0.7007575757575758, 'bal_acc': 0.560542398777693, 'f1_0': 0.3039647577092511, 'f1_1': 0.8094089264173705, 'sup_0': 308, 'sup_1': 748, 'auc': 0.560542398777693, 'epoch': 87}

Training LSTM with N = 20 and n_layers = 1


100%|██████████| 3881/3881 [00:00<00:00, 316712.92it/s]
100%|██████████| 1937/1937 [00:00<00:00, 298777.83it/s]
100%|██████████| 1056/1056 [00:00<00:00, 255977.87it/s]
Epoch [1/1]: 100%|██████████| 122/122 [00:01<00:00, 85.19it/s, loss=0.599]
Epoch [1/1]: 100%|██████████| 122/122 [00:01<00:00, 85.96it/s, loss=0.598]
Epoch [1/1]: 100%|██████████| 122/122 [00:01<00:00, 87.18it/s, loss=0.623]
Epoch [1/1]: 100%|██████████| 122/122 [00:01<00:00, 86.72it/s, loss=0.633]
Epoch [1/1]: 100%|██████████| 122/122 [00:01<00:00, 86.20it/s, loss=0.634]
Epoch [1/1]: 100%|██████████| 122/122 [00:01<00:00, 86.51it/s, loss=0.591]
Epoch [1/1]: 100%|██████████| 122/122 [00:01<00:00, 86.12it/s, loss=0.593]
Epoch [1/1]: 100%|██████████| 122/122 [00:01<00:00, 86.10it/s, loss=0.586]
Epoch [1/1]: 100%|██████████| 122/122 [00:01<00:00, 86.97it/s, loss=0.627]
Epoch [1/1]: 100%|██████████| 122/122 [00:01<00:00, 86.93it/s, loss=0.631]
Epoch [1/1]: 100%|██████████| 122/122 [00:01<00:00, 86.72it/s, loss=0.593]
Epoch [

Best Metrics:
    Validation Set: LSTM_20_1_epoch_84: {'acc': 0.6618482188951987, 'bal_acc': 0.5885003544064535, 'f1_0': 0.41465594280607687, 'f1_1': 0.7622504537205081, 'sup_0': 559, 'sup_1': 1378, 'auc': 0.5885003544064535, 'epoch': 84}
    Test Set: LSTM_20_1_epoch_72: {'acc': 0.6912878787878788, 'bal_acc': 0.6140183346065698, 'f1_0': 0.44745762711864406, 'f1_1': 0.78580814717477, 'sup_0': 308, 'sup_1': 748, 'auc': 0.6140183346065698, 'epoch': 72}

Training Transformer with N = 20 and n_layers = 1


100%|██████████| 3881/3881 [00:00<00:00, 320643.21it/s]
100%|██████████| 1937/1937 [00:00<00:00, 289669.73it/s]
100%|██████████| 1056/1056 [00:00<00:00, 287665.46it/s]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 48.79it/s, loss=0.603]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 53.95it/s, loss=0.601]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 54.47it/s, loss=0.597]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 54.37it/s, loss=0.599]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 54.38it/s, loss=0.597]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 54.73it/s, loss=0.597]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 54.24it/s, loss=0.598]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 54.61it/s, loss=0.6]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 55.47it/s, loss=0.597]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 55.01it/s, loss=0.598]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 55.56it/s, loss=0.598]
Epoch [1/

Best Metrics:
    Validation Set: Transformer_20_1_epoch_66: {'acc': 0.6809499225606608, 'bal_acc': 0.5296266918689033, 'f1_0': 0.237037037037037, 'f1_1': 0.7983028720626631, 'sup_0': 559, 'sup_1': 1378, 'auc': 0.5296266918689033, 'epoch': 66}
    Test Set: Transformer_20_1_epoch_66: {'acc': 0.6856060606060606, 'bal_acc': 0.5297937356760886, 'f1_0': 0.22429906542056077, 'f1_1': 0.8028503562945367, 'sup_0': 308, 'sup_1': 748, 'auc': 0.5297937356760887, 'epoch': 66}

Training RNN with N = 20 and n_layers = 2


100%|██████████| 3881/3881 [00:00<00:00, 322395.95it/s]
100%|██████████| 1937/1937 [00:00<00:00, 228046.00it/s]
100%|██████████| 1056/1056 [00:00<00:00, 230111.44it/s]
Epoch [1/1]: 100%|██████████| 122/122 [00:04<00:00, 25.77it/s, loss=0.599]
Epoch [1/1]: 100%|██████████| 122/122 [00:04<00:00, 26.06it/s, loss=0.598]
Epoch [1/1]: 100%|██████████| 122/122 [00:04<00:00, 25.95it/s, loss=0.593]
Epoch [1/1]: 100%|██████████| 122/122 [00:04<00:00, 26.03it/s, loss=0.592]
Epoch [1/1]: 100%|██████████| 122/122 [00:04<00:00, 25.69it/s, loss=0.588]
Epoch [1/1]: 100%|██████████| 122/122 [00:04<00:00, 25.82it/s, loss=0.589]
Epoch [1/1]: 100%|██████████| 122/122 [00:04<00:00, 25.84it/s, loss=0.588]
Epoch [1/1]: 100%|██████████| 122/122 [00:04<00:00, 25.80it/s, loss=0.589]
Epoch [1/1]: 100%|██████████| 122/122 [00:04<00:00, 25.84it/s, loss=0.589]
Epoch [1/1]: 100%|██████████| 122/122 [00:04<00:00, 25.86it/s, loss=0.587]
Epoch [1/1]: 100%|██████████| 122/122 [00:04<00:00, 25.80it/s, loss=0.588]
Epoch [

Best Metrics:
    Validation Set: RNN_20_2_epoch_18: {'acc': 0.6448115642746515, 'bal_acc': 0.5749316501839539, 'f1_0': 0.39965095986038396, 'f1_1': 0.7478005865102639, 'sup_0': 559, 'sup_1': 1378, 'auc': 0.5749316501839539, 'epoch': 18}
    Test Set: RNN_20_2_epoch_49: {'acc': 0.6988636363636364, 'bal_acc': 0.5716195569136745, 'f1_0': 0.34024896265560167, 'f1_1': 0.8049079754601225, 'sup_0': 308, 'sup_1': 748, 'auc': 0.5716195569136746, 'epoch': 49}

Training LSTM with N = 20 and n_layers = 2


100%|██████████| 3881/3881 [00:00<00:00, 320794.86it/s]
100%|██████████| 1937/1937 [00:00<00:00, 300179.82it/s]
100%|██████████| 1056/1056 [00:00<00:00, 272029.54it/s]
Epoch [1/1]: 100%|██████████| 122/122 [00:01<00:00, 72.57it/s, loss=0.611]
Epoch [1/1]: 100%|██████████| 122/122 [00:01<00:00, 74.31it/s, loss=0.596]
Epoch [1/1]: 100%|██████████| 122/122 [00:01<00:00, 75.07it/s, loss=0.597]
Epoch [1/1]: 100%|██████████| 122/122 [00:01<00:00, 74.13it/s, loss=0.599]
Epoch [1/1]: 100%|██████████| 122/122 [00:01<00:00, 74.30it/s, loss=0.593]
Epoch [1/1]: 100%|██████████| 122/122 [00:01<00:00, 74.70it/s, loss=0.596]
Epoch [1/1]: 100%|██████████| 122/122 [00:01<00:00, 74.47it/s, loss=0.593]
Epoch [1/1]: 100%|██████████| 122/122 [00:01<00:00, 74.97it/s, loss=0.595]
Epoch [1/1]: 100%|██████████| 122/122 [00:01<00:00, 75.60it/s, loss=0.591]
Epoch [1/1]: 100%|██████████| 122/122 [00:01<00:00, 74.53it/s, loss=0.589]
Epoch [1/1]: 100%|██████████| 122/122 [00:01<00:00, 75.62it/s, loss=0.59]
Epoch [1

Best Metrics:
    Validation Set: LSTM_20_2_epoch_86: {'acc': 0.6737222509034589, 'bal_acc': 0.6005670503257164, 'f1_0': 0.4306306306306306, 'f1_1': 0.7713458755426919, 'sup_0': 559, 'sup_1': 1378, 'auc': 0.6005670503257164, 'epoch': 86}
    Test Set: LSTM_20_2_epoch_88: {'acc': 0.6922348484848485, 'bal_acc': 0.5965431627196333, 'f1_0': 0.41016333938294014, 'f1_1': 0.7918001281229982, 'sup_0': 308, 'sup_1': 748, 'auc': 0.5965431627196334, 'epoch': 88}

Training Transformer with N = 20 and n_layers = 2


100%|██████████| 3881/3881 [00:00<00:00, 321593.41it/s]
100%|██████████| 1937/1937 [00:00<00:00, 297770.37it/s]
100%|██████████| 1056/1056 [00:00<00:00, 274507.90it/s]
Epoch [1/1]: 100%|██████████| 122/122 [00:03<00:00, 37.64it/s, loss=0.598]
Epoch [1/1]: 100%|██████████| 122/122 [00:03<00:00, 37.81it/s, loss=0.599]
Epoch [1/1]: 100%|██████████| 122/122 [00:03<00:00, 37.87it/s, loss=0.599]
Epoch [1/1]: 100%|██████████| 122/122 [00:03<00:00, 38.22it/s, loss=0.599]
Epoch [1/1]: 100%|██████████| 122/122 [00:03<00:00, 38.07it/s, loss=0.6]
Epoch [1/1]: 100%|██████████| 122/122 [00:03<00:00, 37.77it/s, loss=0.597]
Epoch [1/1]: 100%|██████████| 122/122 [00:03<00:00, 38.27it/s, loss=0.599]
Epoch [1/1]: 100%|██████████| 122/122 [00:03<00:00, 38.00it/s, loss=0.6]
Epoch [1/1]: 100%|██████████| 122/122 [00:03<00:00, 38.14it/s, loss=0.598]
Epoch [1/1]: 100%|██████████| 122/122 [00:03<00:00, 38.06it/s, loss=0.597]
Epoch [1/1]: 100%|██████████| 122/122 [00:03<00:00, 38.03it/s, loss=0.598]
Epoch [1/1]

Best Metrics:
    Validation Set: Transformer_20_2_epoch_0: {'acc': 0.7114093959731543, 'bal_acc': 0.5, 'f1_0': 0.0, 'f1_1': 0.8313725490196078, 'sup_0': 559, 'sup_1': 1378, 'auc': 0.5, 'epoch': 0}
    Test Set: Transformer_20_2_epoch_0: {'acc': 0.7083333333333334, 'bal_acc': 0.5, 'f1_0': 0.0, 'f1_1': 0.8292682926829268, 'sup_0': 308, 'sup_1': 748, 'auc': 0.5, 'epoch': 0}

Training RNN with N = 20 and n_layers = 5


100%|██████████| 3881/3881 [00:00<00:00, 327810.66it/s]
100%|██████████| 1937/1937 [00:00<00:00, 311075.81it/s]
100%|██████████| 1056/1056 [00:00<00:00, 272313.87it/s]
Epoch [1/1]: 100%|██████████| 122/122 [00:10<00:00, 12.17it/s, loss=0.6]
Epoch [1/1]: 100%|██████████| 122/122 [00:09<00:00, 12.20it/s, loss=0.597]
Epoch [1/1]: 100%|██████████| 122/122 [00:09<00:00, 12.27it/s, loss=0.597]
Epoch [1/1]: 100%|██████████| 122/122 [00:10<00:00, 12.13it/s, loss=0.594]
Epoch [1/1]: 100%|██████████| 122/122 [00:10<00:00, 12.13it/s, loss=0.595]
Epoch [1/1]: 100%|██████████| 122/122 [00:10<00:00, 12.04it/s, loss=0.591]
Epoch [1/1]: 100%|██████████| 122/122 [00:10<00:00, 12.11it/s, loss=0.592]
Epoch [1/1]: 100%|██████████| 122/122 [00:10<00:00, 12.07it/s, loss=0.595]
Epoch [1/1]: 100%|██████████| 122/122 [00:10<00:00, 12.03it/s, loss=0.593]
Epoch [1/1]: 100%|██████████| 122/122 [00:10<00:00, 12.06it/s, loss=0.587]
Epoch [1/1]: 100%|██████████| 122/122 [00:10<00:00, 12.06it/s, loss=0.587]
Epoch [1/

Best Metrics:
    Validation Set: RNN_20_5_epoch_64: {'acc': 0.6907589055240062, 'bal_acc': 0.6247679481553988, 'f1_0': 0.4666073018699911, 'f1_1': 0.782260996001454, 'sup_0': 559, 'sup_1': 1378, 'auc': 0.6247679481553988, 'epoch': 64}
    Test Set: RNN_20_5_epoch_75: {'acc': 0.7017045454545454, 'bal_acc': 0.6461993888464477, 'f1_0': 0.5007923930269413, 'f1_1': 0.787305874409183, 'sup_0': 308, 'sup_1': 748, 'auc': 0.6461993888464477, 'epoch': 75}

Training LSTM with N = 20 and n_layers = 5


100%|██████████| 3881/3881 [00:00<00:00, 54868.25it/s]
100%|██████████| 1937/1937 [00:00<00:00, 269661.67it/s]
100%|██████████| 1056/1056 [00:00<00:00, 259913.45it/s]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 50.25it/s, loss=0.617]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 51.29it/s, loss=0.597]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 51.49it/s, loss=0.597]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 51.08it/s, loss=0.598]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 50.91it/s, loss=0.599]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 50.82it/s, loss=0.596]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 50.83it/s, loss=0.597]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 50.96it/s, loss=0.597]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 50.40it/s, loss=0.606]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 50.63it/s, loss=0.597]
Epoch [1/1]: 100%|██████████| 122/122 [00:02<00:00, 50.68it/s, loss=0.595]
Epoch [1

Best Metrics:
    Validation Set: LSTM_20_5_epoch_49: {'acc': 0.7000516262261228, 'bal_acc': 0.5393306780976812, 'f1_0': 0.23451910408432147, 'f1_1': 0.8134831460674158, 'sup_0': 559, 'sup_1': 1378, 'auc': 0.5393306780976812, 'epoch': 49}
    Test Set: LSTM_20_5_epoch_35: {'acc': 0.6884469696969697, 'bal_acc': 0.5356187929717342, 'f1_0': 0.24018475750577367, 'f1_1': 0.8040500297796308, 'sup_0': 308, 'sup_1': 748, 'auc': 0.5356187929717341, 'epoch': 35}

Training Transformer with N = 20 and n_layers = 5


100%|██████████| 3881/3881 [00:00<00:00, 304941.72it/s]
100%|██████████| 1937/1937 [00:00<00:00, 289618.10it/s]
100%|██████████| 1056/1056 [00:00<00:00, 261911.48it/s]
Epoch [1/1]: 100%|██████████| 122/122 [00:06<00:00, 19.81it/s, loss=0.597]
Epoch [1/1]: 100%|██████████| 122/122 [00:06<00:00, 19.68it/s, loss=0.594]
Epoch [1/1]: 100%|██████████| 122/122 [00:06<00:00, 19.86it/s, loss=0.594]
Epoch [1/1]: 100%|██████████| 122/122 [00:06<00:00, 19.94it/s, loss=0.603]
Epoch [1/1]: 100%|██████████| 122/122 [00:06<00:00, 19.98it/s, loss=0.603]
Epoch [1/1]: 100%|██████████| 122/122 [00:06<00:00, 20.03it/s, loss=0.593]
Epoch [1/1]: 100%|██████████| 122/122 [00:06<00:00, 20.00it/s, loss=0.593]
Epoch [1/1]: 100%|██████████| 122/122 [00:06<00:00, 20.19it/s, loss=0.599]
Epoch [1/1]: 100%|██████████| 122/122 [00:06<00:00, 20.19it/s, loss=0.603]
Epoch [1/1]: 100%|██████████| 122/122 [00:06<00:00, 20.18it/s, loss=0.603]
Epoch [1/1]: 100%|██████████| 122/122 [00:06<00:00, 20.11it/s, loss=0.592]
Epoch [

Best Metrics:
    Validation Set: Transformer_20_5_epoch_0: {'acc': 0.7114093959731543, 'bal_acc': 0.5, 'f1_0': 0.0, 'f1_1': 0.8313725490196078, 'sup_0': 559, 'sup_1': 1378, 'auc': 0.5, 'epoch': 0}
    Test Set: Transformer_20_5_epoch_0: {'acc': 0.7083333333333334, 'bal_acc': 0.5, 'f1_0': 0.0, 'f1_1': 0.8292682926829268, 'sup_0': 308, 'sup_1': 748, 'auc': 0.5, 'epoch': 0}






# Confidence Score Demo
We select one of our best models (RNN with N=5 and n_layers=5), and use it to both predict the correctness of a random student's answer as well as a confidence in this prediction (approximated by the raw sigmoid output).

In [4]:
hparams = {
    'batch_size': 32,
    'lr': 0.001,
    'epochs': 100,
    'input_size': 2
}

In [5]:
model_name = 'RNN'
N = 5
n_layers = 5

print(f"Training {model_name} with N = {N} and n_layers = {n_layers}")

train_data = load_and_process_data(f'data_outputs/train_fold_1_n_{N}.json', seq_len = N)
val_data = load_and_process_data(f'data_outputs/val_fold_1_n_{N}.json', seq_len = N)
test_data = load_and_process_data(f'data_outputs/test_n_{N}.json', seq_len = N)

train_dataset = StudentSequenceDataset(train_data, embedding_dim=0)
val_dataset = StudentSequenceDataset(val_data, embedding_dim=0)
test_dataset = StudentSequenceDataset(test_data, embedding_dim=0)

train_data_loader = DataLoader(train_dataset, batch_size=hparams['batch_size'], shuffle=True)
val_data_loader = DataLoader(val_dataset, batch_size=hparams['batch_size'], shuffle=False)
test_data_loader = DataLoader(test_dataset, batch_size=hparams['batch_size'], shuffle=False)

model = SimpleRNN(input_size=hparams['input_size'], num_layers=n_layers).to(device)

evaluator = ModelEvaluator({f'{model_name}_{N}_{n_layers}_epoch': model}, train_data_loader, val_data_loader, test_data_loader, device, epochs=hparams['epochs'], lr=hparams['lr'])
evaluator.train_and_evaluate()
val_metrics, test_metrics = evaluator.compute_best_metrics(save_dict=False)

Training RNN with N = 5 and n_layers = 5


100%|██████████| 11302/11302 [00:00<00:00, 907288.77it/s]
100%|██████████| 5675/5675 [00:00<00:00, 964647.42it/s]
100%|██████████| 3130/3130 [00:00<00:00, 960644.78it/s]
Epoch [1/1]: 100%|██████████| 354/354 [00:11<00:00, 31.70it/s, loss=0.651]
Epoch [1/1]: 100%|██████████| 354/354 [00:10<00:00, 32.89it/s, loss=0.634]
Epoch [1/1]: 100%|██████████| 354/354 [00:10<00:00, 33.43it/s, loss=0.618]
Epoch [1/1]: 100%|██████████| 354/354 [00:10<00:00, 33.42it/s, loss=0.615]
Epoch [1/1]: 100%|██████████| 354/354 [00:10<00:00, 33.22it/s, loss=0.634]
Epoch [1/1]: 100%|██████████| 354/354 [00:10<00:00, 33.15it/s, loss=0.629]
Epoch [1/1]: 100%|██████████| 354/354 [00:10<00:00, 32.94it/s, loss=0.613]
Epoch [1/1]: 100%|██████████| 354/354 [00:10<00:00, 33.89it/s, loss=0.604]
Epoch [1/1]: 100%|██████████| 354/354 [00:10<00:00, 33.52it/s, loss=0.601]
Epoch [1/1]: 100%|██████████| 354/354 [00:10<00:00, 34.07it/s, loss=0.598]
Epoch [1/1]: 100%|██████████| 354/354 [00:10<00:00, 33.33it/s, loss=0.595]
Epoch

Best Metrics:
    Validation Set: RNN_5_5_epoch_69: {'acc': 0.7309251101321586, 'bal_acc': 0.6920869715081122, 'f1_0': 0.5995279307631786, 'f1_1': 0.7973994958206182, 'sup_0': 2141, 'sup_1': 3534, 'auc': 0.6920869715081122, 'epoch': 69}
    Test Set: RNN_5_5_epoch_82: {'acc': 0.7408945686900958, 'bal_acc': 0.7014734452594178, 'f1_0': 0.6117759693633317, 'f1_1': 0.805562215296092, 'sup_0': 1192, 'sup_1': 1938, 'auc': 0.7014734452594178, 'epoch': 82}






In [10]:
labels, preds, confs = evaluator.predict(f'{model_name}_{N}_{n_layers}_epoch', test_data_loader, device, with_confidence=True)

# Print any 10 predictions and their confidences
for i in range(55, 66):
    print(f"True Label: {labels[i]}")
    print(f"Prediction: {preds[i]}")
    print(f"Confidence: {confs[i] * 100}%")
    if labels[i] != preds[i]:
        if confs[i] > 0.9:
            print("Verdict: Overconfident\n")
        else:
            print("Verdict: Balanced\n")
    
    if labels[i] == preds[i]:
        if confs[i] < 0.1:
            print("Verdict: Underconfident\n")
        else:
            print("Verdict: Balanced\n")

True Label: 1.0
Prediction: 1.0
Confidence: 99.99145269393921%
Verdict: Balanced

True Label: 0.0
Prediction: 0.0
Confidence: 0.0017738397218636237%
Verdict: Underconfident

True Label: 1.0
Prediction: 1.0
Confidence: 99.18718338012695%
Verdict: Balanced

True Label: 1.0
Prediction: 1.0
Confidence: 99.9940276145935%
Verdict: Balanced

True Label: 1.0
Prediction: 1.0
Confidence: 99.95296001434326%
Verdict: Balanced

True Label: 1.0
Prediction: 1.0
Confidence: 99.92734789848328%
Verdict: Balanced

True Label: 0.0
Prediction: 0.0
Confidence: 3.0384546789719025e-05%
Verdict: Underconfident

True Label: 1.0
Prediction: 1.0
Confidence: 99.97549653053284%
Verdict: Balanced

True Label: 0.0
Prediction: 1.0
Confidence: 99.97177720069885%
Verdict: Overconfident

True Label: 1.0
Prediction: 1.0
Confidence: 99.90384578704834%
Verdict: Balanced

True Label: 1.0
Prediction: 0.0
Confidence: 0.8072282187640667%
Verdict: Balanced

