In [1]:
import os
import glob
from typing import Tuple, List, Union

import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torch import optim

In [2]:
def get_data(_type: str) -> Tuple[torch.tensor, torch.tensor]:
    """Retrieve X, Y data from the proper directory. You can
    specify whether you want it to be pulled from /train, /dev, 
    or /test, and all the *-data.csv files will be loaded in.
    """
    
    if _type not in {'train', 'dev', 'test'}:
        msg = f"{_type} not supported. Try 'train', 'dev', or 'test'."
        raise RuntimeError(msg)
    
    df = pd.DataFrame()
    
    data_path = os.path.join('data', _type, '*-data.csv')
    for fp in glob.glob(data_path):
        # Cast to float because othewise we run into a type
        # mismatch error in PyTorch
        season_df = pd.read_csv(
            fp, index_col=[0, 1], header=[0, 1, 2], dtype='float32')
        
        df = df.append(season_df)
    
    features = df[['this', 'other']]

    # Normalize features
    features = (features - features.mean()) / features.std()
    scores = df['TEAM_PTS']
    
    n_features = len(features.columns)
    n_output = len(scores.columns)
    msg = 'Uh oh, you might be losing features!'
    assert n_features + n_output == len(df.columns), msg
    
    features = torch.from_numpy(features.values)
    scores = torch.from_numpy(scores.values)
    
    return features, scores

In [3]:
def log_two_layers(n: int) -> List[Union[nn.Linear, nn.ReLU]]:
    """Given an input with n features, construct a series
    of neural network layers that decrease logarithmically.
    """
    shift_bit_length = lambda x: 1 << (x - 1).bit_length() - 1
    
    layers = []
    while n > 4:
        power_of_two = shift_bit_length(n)
        # I belive nn.Linear uses Xavier initialization
        layers.append( nn.Linear(n, power_of_two) )
        layers.append( nn.ReLU() )
        n = power_of_two
    # Now n == 4 and we add a final regression layer
    layers.append( nn.Linear(n, 1) )
    return layers

## Main Driver Logic

In [4]:
# Roughly follows https://pytorch.org/tutorials/beginner/nn_tutorial.html#

# Model hyperparameters
num_epochs = 200
batch_size = 100
learning_rate = 0.0001

# Load data as torch.tensors
x_train, y_train = get_data('train')
x_validate, y_validate = get_data('dev')

# Define our model layers by decreasing powers of two
model = nn.Sequential( *log_two_layers(x_train.shape[1]) )
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)

validate_ds = TensorDataset(x_validate, y_validate)
validate_dl = DataLoader(validate_ds, batch_size=batch_size * 2)

# L1 loss is more robust to outliers
# loss_func = F.l1_loss
loss_func = F.mse_loss

for epoch in range(num_epochs):
    model.train()
    for xb, yb in train_dl:
        pred = model(xb)
        loss = loss_func(pred, yb)
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    model.eval()
    with torch.no_grad():
        validate_loss = sum(loss_func(model(xb), yb) for xb, yb in validate_dl)

    # Print epoch number and average validation loss
    print(epoch, validate_loss / len(validate_dl))

0 tensor(351.3893)
1 tensor(263.3325)
2 tensor(216.3497)
3 tensor(179.8292)
4 tensor(177.0203)
5 tensor(203.8855)
6 tensor(256.8966)
7 tensor(201.4151)
8 tensor(252.6625)
9 tensor(253.8376)
10 tensor(162.8323)
11 tensor(200.6913)
12 tensor(230.3293)
13 tensor(227.2189)
14 tensor(171.5022)
15 tensor(157.9652)
16 tensor(228.7696)
17 tensor(355.9074)
18 tensor(242.6539)
19 tensor(179.5560)
20 tensor(385.3568)
21 tensor(215.0043)
22 tensor(344.5336)
23 tensor(218.1063)
24 tensor(210.0494)
25 tensor(205.5783)
26 tensor(226.8335)
27 tensor(251.2827)
28 tensor(198.3258)
29 tensor(193.9536)
30 tensor(275.4183)
31 tensor(245.3987)
32 tensor(187.7682)
33 tensor(219.7996)
34 tensor(197.6419)
35 tensor(196.7668)
36 tensor(202.6770)
37 tensor(181.9888)
38 tensor(164.5212)
39 tensor(199.4467)
40 tensor(187.8617)
41 tensor(201.7645)
42 tensor(195.3200)
43 tensor(268.5247)
44 tensor(273.4436)
45 tensor(218.6918)
46 tensor(257.9191)
47 tensor(175.1298)
48 tensor(174.3317)
49 tensor(213.6090)
50 tensor(

In [5]:
x_test, y_test = get_data('test')

y_predicted = model(x_test)
y_predicted

tensor([[ 98.7893],
        [107.7839],
        [ 94.6973],
        ...,
        [ 99.2690],
        [116.1662],
        [ 98.9562]], grad_fn=<AddmmBackward>)