# This code implements the training pipeline for various models

In [15]:
import torch
import torch.nn as nn
from torch.optim import Adam
from dataformatter import *
from models import *
import matplotlib.pyplot as plt
import os
import pdb

In [16]:
# Listing all the hyper-parameters that need to be set here
BATCH_SZ = 32
MODEL_TYPE = 'FC' # OPTIONS ARE : [FC, TRANSFORMER, LSTM]
LR = 1e-3
DATA_PATH = 'hawkeye_trace_belady_graph.csv' # This is the CSV FILE WE ARE TRYING TO ANALYZE
SAVE_FLDR = 'results'
N_EPOCHS = 10

In [17]:
def run_epoch(model, optimizer, data_iterator, mode='train'):
    model.train()
    if mode == 'val':
        model.eval()
    stats = []
    num_egs = 0
    for batch in data_iterator:
        # we get the loss from passing the batch to the model
        # each model will have it's own way of deadling with the data [we can jointly figure this out]
        loss, acc = model(batch)
        stats.append([loss.item(), acc.item()])
        num_egs += len(batch)
        if mode == 'train':
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    stats = np.array(stats)
    avg_loss = np.mean(stats[:, 0])
    avg_acc = (stats[:, 1]).sum() / num_egs
    return avg_loss, avg_acc

In [18]:
def graph_results(data, desc):
    plt.title(desc)
    plt.plot(range(len(data)), data[:, 0], color='tab:red', label='Loss')
    plt.plot(range(len(data)), data[:, 1], color='tab:blue', label='Accuracy')
    plt.legend()
    plt.savefig("{}/{}.png".format(SAVE_FLDR, desc.replace(' ', '_')))
    plt.show()

In [19]:
def trainer(model, optimizer, dataset, num_epochs=20, desc='Description of model'):
    # Todo [all]
    # Figure out how to split the data into a train-val-test regime
    stats = []
    for epoch_ in range(num_epochs):
        # get a data iterator for this epoch
        data_iter = get_batch_iterator(dataset, BATCH_SZ)
        epoch_stats = run_epoch(model, optimizer, data_iter, mode='train')
        # Todo[all] would normally run the validation set at this point
        stats.append(epoch_stats)
        print('Epoch {} : Avg Loss = {} Avg Acc = {}'.format(epoch_, stats[-1][0], stats[-1][1]))
    stats = np.array(stats)
    graph_results(stats, desc)
    return model

In [20]:
if not os.path.exists(SAVE_FLDR):
    os.makedirs(SAVE_FLDR)

model = get_model(MODEL_TYPE)
optimizer = Adam(model.parameters(), lr=LR)
dataset = csv_to_data(DATA_PATH)
model.prep_for_data(dataset)

In [None]:
average_prediction#

In [None]:
average_pred = np.mean(dataset[:, -1])
print('This is the average prediction : ', 1.0 - average_pred)
model = trainer(model, optimizer, dataset, num_epochs=N_EPOCHS, desc='Basic MLP Model')

This is the average prediction :  0.28403348597324535
Epoch 0 : Avg Loss = 0.43677452632940367 Avg Acc = 0.8003451152912622
Epoch 1 : Avg Loss = 0.43725020329929093 Avg Acc = 0.7993021844660194
Epoch 2 : Avg Loss = 0.4374499780767896 Avg Acc = 0.7993969963592233
Epoch 3 : Avg Loss = 0.43640173587369396 Avg Acc = 0.7995866201456311
