# Applying Neural Networks to NBA Team Win Prediction

In [67]:
# import packages
import os
import numpy as np
import pandas as pd
import torch, torchvision
import torch.nn as nn
import torch.optim as optim 
import torch.nn.functional as F
import math

### Data Cleaning and Aggregation

In [68]:
# create a list to hold all dataset names
dataset_list = []

# adds all CSV files from the datasets folder to a list
for dataset in os.listdir('./win_predictor_data/'):
    if (dataset.endswith('.csv')):
        dataset_list.append(dataset)

# adds all datasets to a dictionary with the key being 'Year Type' 
datasets = {}
for dataset in dataset_list:
    data_file = pd.read_csv('./win_predictor_data/' + dataset)
    dataset_name = dataset[dataset.index('- ') + 1 : dataset.index('.')].strip()
    datasets[dataset_name] = data_file

In [69]:
# function to remove asterisks from team names
def remove_asterisk(team):
    if team.endswith("*"):
        return team[0:len(team)-1]
    return team

In [70]:
def create_training_data(datasets):
    """
    Creates the training data using the basketball datasets
    @param datasets: A dictionary with year name & stat types as key and value of a pandas dataframe 
                         containing all the data 
    @return the training data and training data labels 
    """
    # create dictionary to hold aggregated datasets by year containing all (agg_datasets['2015-2016'])
    agg_datasets = {}

    # All the unwanted columns that won't be used in our neural network 
    unwanted_data = ['Rk_x', 'Team', 'Age', 'PW', 'PL', 'MOV', 'SOS', 'SRS', 'L', 'Arena', 'Attend.', 'Attend./G', 'Rk_y']

    # run through each dataset and add to dictionary
    for dataset in sorted(datasets):

        # separate year
        year_label = dataset[0 : dataset.index(' ')]

        # remove asterisk from given dataset
        datasets[dataset]['Team'] = datasets[dataset]['Team'].apply(remove_asterisk)

        # add dataset to agg_datasets dictionary
        if year_label in agg_datasets:
            agg_datasets[year_label] = pd.merge(agg_datasets[year_label], datasets[dataset], on='Team')
        else:
            agg_datasets[year_label] = datasets[dataset]

    # Aggregate all the data into one data frame and drop the unwanted features 
    agg_all_sets = pd.concat([agg_datasets[year_label] for year_label in agg_datasets], ignore_index = True)
    agg_all_sets = agg_all_sets.drop(unwanted_data, axis = 1)


    # Convert the aggregated data into a training set 
    training_data = [agg_all_sets.loc[i,:] for i in range(len(agg_all_sets))]
    training_data = [team[1:] for team in training_data]

    # Get the training data labels (Wins) from the training_data 
    training_data_labels = [team[0] for team in training_data]

    return torch.tensor(training_data), torch.tensor(training_data_labels)

In [71]:
class Net(nn.Module):
    
    def __init__(self, num_examples, num_features):
        super(Net, self).__init__()
        hidden_size =  math.floor((2/3) * num_features) # 2/3 * 38 = 25
        self.l1 = nn.Linear(num_features, hidden_size) # 38, 25
        self.l2 = nn.Linear(hidden_size, hidden_size) # 25, 25
        self.l3 = nn.Linear(hidden_size, 1) # 25, 1
    
    def forward(self, x):
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x))
        x = self.l3(x)
        return x

In [77]:
# Convert the training data into tensors 
training_data, training_data_labels = create_training_data(datasets)
net = Net(len(training_data), len(training_data[0]))
out = net(training_data.float())
out

tensor([[-0.2252],
        [-0.3540],
        [-0.0819],
        [ 0.0275],
        [-0.2229],
        [-0.1470],
        [-0.0116],
        [-0.0879],
        [ 0.0507],
        [ 0.1380],
        [ 0.0421],
        [ 0.0249],
        [ 0.1374],
        [ 0.2695],
        [ 0.1618],
        [ 0.1131],
        [ 0.1789],
        [ 0.0788],
        [ 0.1225],
        [ 0.0280],
        [ 0.2633],
        [ 0.1425],
        [ 0.3031],
        [ 0.1350],
        [ 0.2498],
        [ 0.1614],
        [ 0.5842],
        [ 0.4715],
        [ 0.4172],
        [ 0.5320],
        [-0.3303],
        [-0.2013],
        [ 0.0712],
        [-0.0828],
        [ 0.0800],
        [-0.2289],
        [ 0.0427],
        [-0.0035],
        [-0.0374],
        [ 0.2253],
        [ 0.0448],
        [ 0.0607],
        [ 0.2971],
        [ 0.1382],
        [ 0.1192],
        [ 0.0319],
        [ 0.0612],
        [-0.0085],
        [ 0.1104],
        [ 0.3335],
        [ 0.2568],
        [ 0.1178],
        [ 0.

In [78]:
# train the neural network
optimizer = torch.optim.SGD(net.parameters(), lr = 0.01) # stochastic gradient descent optimization function
criterion = nn.MSELoss() # mean squared error loss function
params = list(net.parameters()) # dimensions of the layers within the neural network

num_epochs = 5
for epoch in range(num_epochs):
    optimizer.zero_grad()
    out = net(train)
    
net.zero_grad()
out.backward(torch.randn(120, 1),retain_graph=True)
net.parameters

6
torch.Size([25, 38])


<bound method Module.parameters of Net(
  (l1): Linear(in_features=38, out_features=25, bias=True)
  (l2): Linear(in_features=25, out_features=25, bias=True)
  (l3): Linear(in_features=25, out_features=1, bias=True)
)>

In [74]:
target = training_data_labels.float()
target = target.view(-1, 1)
loss = criterion(out, target)

In [75]:
net.zero_grad()
criterion = nn.MSELoss()
loss.backward(retain_graph=True)

learning_rate = 0.01

optimizer.step()

In [76]:
for 

tensor([[-3.4856],
        [-3.6997],
        [-3.8999],
        [-3.3356],
        [-3.4191],
        [-3.5719],
        [-3.2878],
        [-3.5360],
        [-3.2956],
        [-3.4602],
        [-3.5641],
        [-3.7366],
        [-3.4206],
        [-3.4939],
        [-3.3873],
        [-3.2386],
        [-3.4685],
        [-3.6042],
        [-3.5117],
        [-3.5848],
        [-3.6501],
        [-3.5161],
        [-3.5415],
        [-3.7851],
        [-3.4437],
        [-3.6772],
        [-3.4404],
        [-3.4564],
        [-3.3511],
        [-3.2361],
        [-3.5050],
        [-3.5300],
        [-3.1970],
        [-3.4562],
        [-3.3547],
        [-3.5847],
        [-3.1710],
        [-3.1605],
        [-3.5531],
        [-3.7012],
        [-3.3401],
        [-3.3795],
        [-3.5180],
        [-3.5502],
        [-3.2611],
        [-3.4895],
        [-3.3966],
        [-3.4757],
        [-3.6244],
        [-3.4552],
        [-3.4400],
        [-3.3570],
        [-2.

ToDos
- Convert Data into Proper Format of Neural Net
- Research Neural Network architectures for predictions (watch tutorials/videos)
- set up PyTorch/Keras Framework to create neural network
- EDA (optional)