In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch
from tqdm import tqdm
import full_iri_dataset_generator as iri
from training_loop import train_model

In [None]:
SEQUENCE_LENGTH = 10

# Description

This is a test model to figure out standardized training and dataset formatting

## Preprocessing

In [None]:
train, test = iri.load_iri_datasets(path="../training_data/final_data.parquet",
                                    construction_path="../training_data/construction_data.parquet",
                                    seq_length=SEQUENCE_LENGTH)
train2, test2 = iri.load_iri_datasets(path="../training_data/final_data.parquet",
                                    construction_path="../training_data/construction_data.parquet",
                                    seq_length=SEQUENCE_LENGTH)
train3, test3 = iri.load_iri_datasets(path="../training_data/final_data.parquet",
                                    construction_path="../training_data/construction_data.parquet",
                                    seq_length=SEQUENCE_LENGTH)

print("Train size: ", len(train))
print("Test size: ", len(test))

## Model Definition

In [None]:
class FNN(nn.Module):
    def __init__(self):
        super(FNN, self).__init__()
        self.f1 = nn.Linear(SEQUENCE_LENGTH * 14, 4096)
        self.f2 = nn.Linear(4096, 512)
        self.f4 = nn.Linear(512, 64)
        self.f3 = nn.Linear(64, 2)
        
    def forward(self, x):
        x = x.view(-1, 140)
        x = torch.relu(self.f1(x))
        x = torch.relu(self.f2(x))
        x = torch.relu(self.f4(x))
        x = self.f3(x)
        return x


## Training

In [None]:
model1 = FNN()
model2 = FNN()
model3 = FNN()
loss = nn.MSELoss()
optimizer1 = torch.optim.Adam(model1.parameters(), lr=0.0001)
optimizer2 = torch.optim.Adam(model2.parameters(), lr=0.0001)
optimizer3 = torch.optim.Adam(model3.parameters(), lr=0.0001)
lr_scheduler1 = torch.optim.lr_scheduler.StepLR(optimizer1, step_size=10, gamma=0.75)
lr_scheduler2 = torch.optim.lr_scheduler.StepLR(optimizer2, step_size=10, gamma=0.75)
lr_scheduler3 = torch.optim.lr_scheduler.StepLR(optimizer3, step_size=10, gamma=0.75)

first = train_model(model1, train, test, loss, optimizer1, epochs=250, test_every_n=10, batch_size=512, lr_scheduler=lr_scheduler1)
second = train_model(model2, train2, test2, loss, optimizer2, epochs=250, test_every_n=10, batch_size=512, lr_scheduler=lr_scheduler2)
third = train_model(model3, train3, test3, loss, optimizer3, epochs=250, test_every_n=10, batch_size=512, lr_scheduler=lr_scheduler3)
        

In [None]:
print(f"ver1: trainloss: {first['train_losses'][-1]}, testloss: {first['test_losses'][-1]}, trainr2: {first['train_r2s'][-1]}, testr2: {first['test_r2s'][-1]}")
print(f"ver2: trainloss: {second['train_losses'][-1]}, testloss: {second['test_losses'][-1]}, trainr2: {second['train_r2s'][-1]}, testr2: {second['test_r2s'][-1]}")
print(f"ver3: trainloss: {third['train_losses'][-1]}, testloss: {third['test_losses'][-1]}, trainr2: {third['train_r2s'][-1]}, testr2: {third['test_r2s'][-1]}")

# Evaluation

In [None]:
# from torcheval.metrics import R2Score
from sklearn.metrics import r2_score
from torch.utils.data import DataLoader

def compute_r2_for(dataset):
    from_model = []
    goals = []
    train_data = DataLoader(dataset, batch_size=256)
    for _, data in enumerate(train_data):
        inputs, goal = data[0], data[1]
        outputs = model(inputs)
        from_model.append(outputs)
        goals.append(goal)
    from_model = torch.cat(from_model)
    goals = torch.cat(goals)
    return r2_score(goals, from_model)

model.to("cpu")
model.eval()
with torch.no_grad():
    train_r2 = compute_r2_for(train)
    print(f"R^2 for training data: {train_r2}")
    test_r2 = compute_r2_for(test)
    print(f"R^2 for testing data: {test_r2}")

In [None]:
from torcheval.metrics import MeanSquaredError

def compute_mse_for(dataset):
    mse = MeanSquaredError()
    train_data = DataLoader(dataset, batch_size=256, shuffle=True)
    for _, data in enumerate(train_data):
        inputs, goal = data[0], data[1]
        outputs = model(inputs)
        mse.update(goal, outputs)
    return mse.compute()

model.to("cpu")
model.eval()
with torch.no_grad():
    train_mse = compute_mse_for(train)
    print(f"MSE for training data: {train_mse}")
    test_mse = compute_mse_for(test)
    print(f"MSE for testing data: {test_mse}")