In [32]:
import os
import glob
from typing import Tuple

import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torch import optim

In [33]:
# Hyperparameters
EPOCHS = 200
BATCH = 100
LEARNING_RATE = 0.001

In [48]:
class NeuralNet(nn.Module):
    
    def __init__(self):
        super(NeuralNet, self).__init__()
        # an affine operation: y = Wx + b
        self.lin1 = nn.Linear(in_features=36, out_features=32)
        self.lin2 = nn.Linear(32, 26)
        self.lin3 = nn.Linear(26, 14)
        self.lin4 = nn.Linear(14, 8)
        self.lin5 = nn.Linear(8, 1)
        
    def forward(self, x):
        x = F.relu(self.lin1(x))
        x = F.relu(self.lin2(x))
        x = F.relu(self.lin3(x))
        x = F.relu(self.lin4(x))
        x = self.lin5(x)
        return x

In [49]:
def get_data(_type: str) -> Tuple[torch.tensor, torch.tensor]:
    """Retrieve X, Y data from the proper directory"""
    
    if _type not in {'train', 'dev', 'test'}:
        msg = f"{_type} not supported. Try 'train', 'dev', or 'test'."
        raise RuntimeError(msg)
    
    df = pd.DataFrame()
    
    data_path = os.path.join('data', _type, '*-data.csv')
    for file_name in glob.glob(data_path):
        # Cast to float because othewise we run into a type
        # mismatch error in PyTorch
        season_df = pd.read_csv(
            path, index_col=[0, 1], header=[0, 1, 2], dtype='float32')
        
        df = df.append(season_df)
    
    features = df[['this', 'other']]
    scores = df['TEAM_PTS']
    msg = 'Uh oh, you might be losing features!'
    assert len(features.columns) + len(scores.columns) == len(df.columns), msg
    
    features = torch.from_numpy(features.values)
    scores = torch.from_numpy(scores.values)
    
    return features, scores

In [50]:
def get_model():
    model = NeuralNet()
    return model, optim.SGD(model.parameters(), lr=LEARNING_RATE)

In [None]:
x, y = get_data('train')

# Follows https://pytorch.org/tutorials/beginner/nn_tutorial.html#refactor-using-optim
model, opt = get_model()

train_ds = TensorDataset(x, y)
train_dl = DataLoader(train_ds, batch_size=BATCH)

loss_func = F.l1_loss

for _ in range(EPOCHS):
    for xb, yb in train_dl:
        pred = model(xb)
        loss = loss_func(pred, yb)
        
        loss.backward()
        opt.step()
        opt.zero_grad()
        
print(loss_func(model(xb), yb))