In [105]:
import tensorflow as tf
from tensorflow.keras import utils, models, layers
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

Our first step is to create our training and testing data from all the data we downloaded.

In [106]:
df = pd.read_csv('output_data.csv')
df

Unnamed: 0,date,hometeam_slug,awayteam_slug,winner_is_home_team,hometeam_fieldGoalsMade,hometeam_fieldGoalsAttempted,hometeam_threePointersMade,hometeam_threePointersAttempted,hometeam_freeThrowsMade,hometeam_freeThrowsAttempted,...,awayteam_threePointersAttempted,awayteam_freeThrowsMade,awayteam_freeThrowsAttempted,awayteam_reboundsOffensive,awayteam_reboundsDefensive,awayteam_assists,awayteam_steals,awayteam_blocks,awayteam_foulsPersonal,awayteam_points
0,2010-01-02,blazers,warriors,True,36,74,5,15,28,34,...,12,18,24,11,25,14,6,1,25,89
1,2010-01-02,bulls,magic,True,37,87,3,5,24,27,...,37,13,20,16,32,19,4,7,23,93
2,2010-01-02,hornets,rockets,True,38,85,3,14,20,24,...,20,17,21,8,31,21,3,1,25,95
3,2010-01-02,suns,grizzlies,False,40,88,5,18,18,24,...,15,18,25,16,33,25,7,5,20,128
4,2010-01-02,kings,mavericks,False,37,92,6,18,11,13,...,9,24,28,4,32,21,11,4,15,99
5,2010-01-02,nets,cavaliers,False,29,75,3,10,25,32,...,14,26,33,16,36,19,4,5,24,94
6,2010-01-02,bucks,thunder,True,42,106,4,17,15,19,...,14,23,31,10,39,23,4,10,23,97
7,2010-01-02,jazz,nuggets,False,36,71,3,12,20,27,...,15,16,23,8,24,23,15,3,25,105
8,2010-01-02,celtics,raptors,True,36,70,6,16,25,31,...,15,18,25,5,23,22,6,4,27,96
9,2010-01-02,wizards,spurs,False,35,80,4,10,12,17,...,12,15,20,3,34,23,3,3,24,97


In [107]:
df = df.drop(['date','hometeam_slug','awayteam_slug'], axis = 1)

In [108]:
len(df.columns.values.tolist())

27

We have 27 - 1 = 26 input parameters. The remaining value is what we are trying to predict: whether the home team won.

In [109]:
#make arrays for X and Y data
X = df.drop('winner_is_home_team', axis=1).values
y = df['winner_is_home_team'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

In [123]:
train_ratio = 0.75
validation_ratio = 0.15
test_ratio = 0.10

# train is now 75% of the entire data set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1 - train_ratio, shuffle=True)

# test is now 10% of the initial data set
# validation is now 15% of the initial data set
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=test_ratio/(test_ratio + validation_ratio), shuffle=True) 

Before we create our model, we test the predictive performance of a simple Logistic Regression model first.

In [131]:
model_logistic_regression = models.Sequential([
    # Logistic Regression
    layers.InputLayer(input_shape=(26,)),
    layers.Dense(units=2, activation='sigmoid')
])

model_logistic_regression.summary()

Model: "sequential_18"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_23 (Dense)            (None, 2)                 54        
                                                                 
Total params: 54 (216.00 Byte)
Trainable params: 54 (216.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [132]:
model_logistic_regression.compile(optimizer='adam', # optimizer type
              loss=tf.keras.losses.sparse_categorical_crossentropy, # loss function
              metrics=['accuracy'])
history_logistic_regression = model_logistic_regression.fit(x=X_train,y=y_train,
                     epochs=20,
                     validation_data=(X_val,y_val))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


Now we try our own Neural Network model.

In [133]:
model1 = models.Sequential([
    # NN
    layers.InputLayer(input_shape=(26,)),
    layers.Dense(units=128, use_bias=True), # Linear layer
    layers.Dense(units=128, use_bias=True, activation='relu'), # Non-Linear layer
    layers.Dense(units=256, use_bias=True), # Linear layer
    layers.Dense(units=256, use_bias=True, activation='relu'), # Non-Linear layer
    layers.Dense(units=256, use_bias=True), # Linear layer
    #layers.Flatten(),
    layers.Dense(units=64, activation='relu'), # An inner product of previous parameters with weights
    layers.Dense(units=2)
    
])

model1.summary()

Model: "sequential_19"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_24 (Dense)            (None, 128)               3456      
                                                                 
 dense_25 (Dense)            (None, 128)               16512     
                                                                 
 dense_26 (Dense)            (None, 256)               33024     
                                                                 
 dense_27 (Dense)            (None, 256)               65792     
                                                                 
 dense_28 (Dense)            (None, 256)               65792     
                                                                 
 dense_29 (Dense)            (None, 64)                16448     
                                                                 
 dense_30 (Dense)            (None, 2)               

In [134]:
model1.compile(optimizer='adam', # optimizer type
              loss=tf.keras.losses.sparse_categorical_crossentropy, # loss function
              metrics=['accuracy'])

history_1 = model1.fit(x=X_train,y=y_train,
                     epochs=20,
                     validation_data=(X_val,y_val))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [1]:
from gui import create_model_gui

In [2]:
model, params_dict = create_model_gui()
params_dict

UnboundLocalError: cannot access local variable 'hidden_layer_configurations' where it is not associated with a value

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_11 (Dense)            (None, 1)                 27        
                                                                 
 dense_12 (Dense)            (None, 2)                 4         
                                                                 
Total params: 31
Trainable params: 31
Non-trainable params: 0
_________________________________________________________________
