In [13]:
import os
import glob
from typing import Tuple, List, Union

import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torch import optim

In [14]:
def get_data(_type: str) -> Tuple[torch.tensor, torch.tensor]:
    """Retrieve X, Y data from the proper directory. You can
    specify whether you want it to be pulled from /train, /dev, 
    or /test, and all the *-data.csv files will be loaded in.
    """
    
    if _type not in {'train', 'dev', 'test'}:
        msg = f"{_type} not supported. Try 'train', 'dev', or 'test'."
        raise RuntimeError(msg)
    
    df = pd.DataFrame()
    
    data_path = os.path.join('data', _type, '*-data.csv')
    for fp in glob.glob(data_path):
        # Cast to float because othewise we run into a type
        # mismatch error in PyTorch
        season_df = pd.read_csv(
            fp, index_col=[0, 1], header=[0, 1, 2], dtype='float32')
        
        df = df.append(season_df)
    
    features = df[['this', 'other']]
    # Normalize features
    features = (features - features.mean()) / features.std()
    scores = df['TEAM_PTS']
    
    n_features = len(features.columns)
    n_output = len(scores.columns)
    msg = 'Uh oh, you might be losing features!'
    assert n_features + n_output == len(df.columns), msg
    
    features = torch.from_numpy(features.values)
    scores = torch.from_numpy(scores.values)
    
    return features, scores

In [15]:
def log_two_layers(n: int) -> List[Union[nn.Linear, nn.ReLU]]:
    """Given an input with n features, construct a series
    of neural network layers that decrease logarithmically.
    """
    shift_bit_length = lambda x: 1 << (x - 1).bit_length() - 1
    
    layers = []
    while n > 4:
        power_of_two = shift_bit_length(n)
        # I belive nn.Linear uses Xavier initialization
        layers.append( nn.Linear(n, power_of_two) )
        layers.append( nn.ReLU() )
        n = power_of_two
    # Now n == 4 and we add a final regression layer
    layers.append( nn.Linear(n, 1) )
    return layers

## Main Model Logic/Driver

In [16]:
# Roughly follows https://pytorch.org/tutorials/beginner/nn_tutorial.html#

# Model hyperparameters
num_epochs = 200
batch_size = 100
learning_rate = 0.001

# Load data as torch.tensors
x_train, y_train = get_data('train')
x_validate, y_validate = get_data('dev')

# Define our model layers by decreasing powers of two
model = nn.Sequential( *log_two_layers(x_train.shape[1]) )
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)

validate_ds = TensorDataset(x_validate, y_validate)
validate_dl = DataLoader(validate_ds, batch_size=batch_size * 2)

# L1 loss is more robust to outliers
loss_func = F.l1_loss
# loss_func = F.mse_loss

for epoch in range(num_epochs):
    model.train()
    for xb, yb in train_dl:
        pred = model(xb)
        loss = loss_func(pred, yb)
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    model.eval()
    with torch.no_grad():
        validate_loss = sum(loss_func(model(xb), yb) for xb, yb in validate_dl)

    # Print epoch number and average validation loss
    print(epoch, validate_loss / len(validate_dl))

0 tensor(110.9003)
1 tensor(110.7854)
2 tensor(110.6723)
3 tensor(110.5602)
4 tensor(110.4483)
5 tensor(110.3447)
6 tensor(110.2436)
7 tensor(110.1409)
8 tensor(110.0362)
9 tensor(109.9290)
10 tensor(109.8186)
11 tensor(109.7044)
12 tensor(109.5853)
13 tensor(109.4604)
14 tensor(109.3280)
15 tensor(109.1866)
16 tensor(109.0337)
17 tensor(108.8666)
18 tensor(108.6813)
19 tensor(108.4729)
20 tensor(108.2345)
21 tensor(107.9568)
22 tensor(107.6261)
23 tensor(107.2223)
24 tensor(106.7130)
25 tensor(106.0435)
26 tensor(105.1097)
27 tensor(103.6829)
28 tensor(101.1135)
29 tensor(94.1324)
30 tensor(13.8273)
31 tensor(13.5772)
32 tensor(13.3429)
33 tensor(12.7905)
34 tensor(13.1376)
35 tensor(11.5102)
36 tensor(13.1918)
37 tensor(12.2769)
38 tensor(11.5787)
39 tensor(12.6068)
40 tensor(12.4990)
41 tensor(12.2365)
42 tensor(12.4181)
43 tensor(13.0125)
44 tensor(11.2466)
45 tensor(12.9829)
46 tensor(12.6054)
47 tensor(12.0182)
48 tensor(11.0825)
49 tensor(11.5698)
50 tensor(12.9474)
51 tensor(12

In [17]:
x_test, y_test = get_data('test')

y_predicted = model(x_test)
y_predicted

tensor([[ 85.4145],
        [115.0387],
        [102.1066],
        ...,
        [ 88.3742],
        [119.4960],
        [105.0972]], grad_fn=<AddmmBackward>)

In [18]:
evaluation_score = torch.sum(torch.abs(y_predicted - y_test))
evaluation_score

tensor(28258.2539, grad_fn=<SumBackward0>)

In [12]:
y_test

tensor([[113.],
        [125.],
        [102.],
        ...,
        [107.],
        [109.],
        [117.]])