In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch
from tqdm import tqdm
import full_iri_dataset_generator as iri
from training_loop import train_model

In [2]:
SEQUENCE_LENGTH = 10

# Description

This is a test model to figure out standardized training and dataset formatting

## Preprocessing

In [3]:
train, test = iri.load_iri_datasets(path="../training_data/final_data.parquet",
                                    construction_path="../training_data/construction_data.parquet",
                                    seq_length=SEQUENCE_LENGTH)

print("Train size: ", len(train))
print("Test size: ", len(test))

                                                                         

Train size:  25554
Test size:  6389




## Model Definition

In [21]:
class FNN(nn.Module):
    def __init__(self):
        super(FNN, self).__init__()
        self.f1 = nn.Linear(SEQUENCE_LENGTH * 6, 4096)
        self.f2 = nn.Linear(4096, 512)
        self.f4 = nn.Linear(512, 64)
        self.f3 = nn.Linear(64, 2)
        
    def forward(self, x):
        x = x.view(-1, 60)
        x = torch.relu(self.f1(x))
        x = torch.relu(self.f2(x))
        x = torch.relu(self.f4(x))
        x = self.f3(x)
        return x


## Training

In [25]:
model = FNN()
loss = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.75)

train_model(model, train, test, loss, optimizer, epochs=250, test_every_n=10, batch_size=512, lr_scheduler=lr_scheduler)
        

Training Epoch:   0%|          | 0/250 [00:00<?, ?it/s]

Training Epoch: 100%|██████████| 250/250 [02:16<00:00,  1.83it/s, Train Loss=0.0148, Test Loss=0.0175]


# Evaluation

In [26]:
# from torcheval.metrics import R2Score
from sklearn.metrics import r2_score
from torch.utils.data import DataLoader

def compute_r2_for(dataset):
    from_model = []
    goals = []
    train_data = DataLoader(dataset, batch_size=256)
    for _, data in enumerate(train_data):
        inputs, goal = data[0], data[1]
        outputs = model(inputs)
        from_model.append(outputs)
        goals.append(goal)
    from_model = torch.cat(from_model)
    goals = torch.cat(goals)
    return r2_score(goals, from_model)

model.to("cpu")
model.eval()
with torch.no_grad():
    train_r2 = compute_r2_for(train)
    print(f"R^2 for training data: {train_r2}")
    test_r2 = compute_r2_for(test)
    print(f"R^2 for testing data: {test_r2}")

R^2 for training data: 0.3243620628620961
R^2 for testing data: 0.24752439315843794


In [27]:
from torcheval.metrics import MeanSquaredError

def compute_mse_for(dataset):
    mse = MeanSquaredError()
    train_data = DataLoader(dataset, batch_size=256, shuffle=True)
    for _, data in enumerate(train_data):
        inputs, goal = data[0], data[1]
        outputs = model(inputs)
        mse.update(goal, outputs)
    return mse.compute()

model.to("cpu")
model.eval()
with torch.no_grad():
    train_mse = compute_mse_for(train)
    print(f"MSE for training data: {train_mse}")
    test_mse = compute_mse_for(test)
    print(f"MSE for testing data: {test_mse}")

MSE for training data: 0.014994087629020214
MSE for testing data: 0.017716534435749054
