# Applying Neural Networks to NBA Team Win Prediction

In [46]:
# import packages
import os
import numpy as np
import pandas as pd
import torch, torchvision
import torch.nn as nn
import torch.optim as optim 
import torch.nn.functional as F
import math

### Data Cleaning and Aggregation

In [11]:
# create a list to hold all dataset names
dataset_list = []

# adds all CSV files from the datasets folder to a list
for dataset in os.listdir('./win_predictor_data/'):
    if (dataset.endswith('.csv')):
        dataset_list.append(dataset)

# adds all datasets to a dictionary with the key being 'Year Type' 
datasets = {}
for dataset in dataset_list:
    data_file = pd.read_csv('./win_predictor_data/' + dataset)
    dataset_name = dataset[dataset.index('- ') + 1 : dataset.index('.')].strip()
    print(dataset_name)
    datasets[dataset_name] = data_file

2015-2016 Misc
2015-2016 Team
2016-2017 Misc
2016-2017 Team
2017-2018 Misc
2017-2018 Team
2018-2019 Misc
2018-2019 Team


In [4]:
# function to remove asterisks from team names
def remove_asterisk(team):
    if team.endswith("*"):
        return team[0:len(team)-1]
    return team

In [79]:
def create_training_data(datasets):
    """
    Creates the training data using the basketball datasets
    @param datasets: A dictionary with year name & stat types as key and value of a pandas dataframe 
                         containing all the data 
    @return the training data and training data labels 
    """
    # create dictionary to hold aggregated datasets by year containing all (agg_datasets['2015-2016'])
    agg_datasets = {}

    # All the unwanted columns that won't be used in our neural network 
    unwanted_data = ['Rk_x', 'Team', 'Age', 'PW', 'PL', 'MOV', 'SOS', 'SRS', 'L', 'Arena', 'Attend.', 'Attend./G', 'Rk_y']

    # run through each dataset and add to dictionary
    for dataset in sorted(datasets):

        # separate year
        year_label = dataset[0 : dataset.index(' ')]

        # remove asterisk from given dataset
        datasets[dataset]['Team'] = datasets[dataset]['Team'].apply(remove_asterisk)

        # add dataset to agg_datasets dictionary
        if year_label in agg_datasets:
            agg_datasets[year_label] = pd.merge(agg_datasets[year_label], datasets[dataset], on='Team')
        else:
            agg_datasets[year_label] = datasets[dataset]

    # Aggregate all the data into one data frame and drop the unwanted features 
    agg_all_sets = pd.concat([agg_datasets[year_label] for year_label in agg_datasets], ignore_index = True)
    agg_all_sets = agg_all_sets.drop(unwanted_data, axis = 1)


    # Convert the aggregated data into a training set 
    training_data = [agg_all_sets.loc[i,:] for i in range(len(agg_all_sets))]
    training_data = [team[1:] for team in training_data]

    # Get the training data labels (Wins) from the training_data 
    training_data_labels = [team[0] for team in training_data]

    return torch.tensor(training_data), torch.tensor(training_data_labels) 

    



In [82]:
class Net(nn.Module):
    
    def __init__(self, num_examples, num_features):
        super(Net, self).__init__()
        hidden_size =  math.floor((2/3)*num_features)
        self.l1 = nn.Linear(num_examples, hidden_size)
        self.l2 = nn.Linear(hidden_size, hidden_size)
        self.l3 = nn.Linear(hidden_size, 1)
    
    def forward(self, x):
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x))
        x = self.l3(x)
        return x
        

        
        

In [86]:
# Convert the training data into tensors 
training_data, training_data_labels = create_training_data(datasets)
print(len(training_data[0]), len(training_data_labels))

net = Net(len(training_data), len(training_data[0]))

# out = net(training_data)
training_data

38 120


tensor([[114.5000, 103.8000,  10.7000,  ...,  15.2000,  20.7000, 114.9000],
        [110.3000,  99.0000,  11.3000,  ...,  13.1000,  17.5000, 103.5000],
        [113.1000, 105.6000,   7.5000,  ...,  15.9000,  20.6000, 110.2000],
        ...,
        [105.9000, 115.1000,  -9.2000,  ...,  15.6000,  23.6000, 107.5000],
        [104.5000, 113.7000,  -9.2000,  ...,  14.0000,  20.9000, 104.6000],
        [107.7000, 117.6000,  -9.9000,  ...,  13.5000,  20.0000, 104.5000]],
       dtype=torch.float64)

ToDos
- Convert Data into Proper Format of Neural Net
- Research Neural Network architectures for predictions (watch tutorials/videos)
- set up PyTorch/Keras Framework to create neural network
- EDA (optional)