In [1]:
import tensorflow as tf
from tensorflow.keras import utils, models, layers
import numpy as np
import pandas as pd

Our first step is to create our training and testing data from all the data we downloaded.

In [4]:
df = pd.read_csv('output_data.csv')
df

Unnamed: 0,date,hometeam_slug,awayteam_slug,winner_is_home_team,hometeam_fieldGoalsMade,hometeam_fieldGoalsAttempted,hometeam_threePointersMade,hometeam_threePointersAttempted,hometeam_freeThrowsMade,hometeam_freeThrowsAttempted,...,awayteam_threePointersAttempted,awayteam_freeThrowsMade,awayteam_freeThrowsAttempted,awayteam_reboundsOffensive,awayteam_reboundsDefensive,awayteam_assists,awayteam_steals,awayteam_blocks,awayteam_foulsPersonal,awayteam_points
0,2010-01-02,blazers,warriors,True,36,74,5,15,28,34,...,12,18,24,11,25,14,6,1,25,89
1,2010-01-02,bulls,magic,True,37,87,3,5,24,27,...,37,13,20,16,32,19,4,7,23,93
2,2010-01-02,hornets,rockets,True,38,85,3,14,20,24,...,20,17,21,8,31,21,3,1,25,95
3,2010-01-02,suns,grizzlies,False,40,88,5,18,18,24,...,15,18,25,16,33,25,7,5,20,128
4,2010-01-02,kings,mavericks,False,37,92,6,18,11,13,...,9,24,28,4,32,21,11,4,15,99
5,2010-01-02,nets,cavaliers,False,29,75,3,10,25,32,...,14,26,33,16,36,19,4,5,24,94
6,2010-01-02,bucks,thunder,True,42,106,4,17,15,19,...,14,23,31,10,39,23,4,10,23,97
7,2010-01-02,jazz,nuggets,False,36,71,3,12,20,27,...,15,16,23,8,24,23,15,3,25,105
8,2010-01-02,celtics,raptors,True,36,70,6,16,25,31,...,15,18,25,5,23,22,6,4,27,96
9,2010-01-02,wizards,spurs,False,35,80,4,10,12,17,...,12,15,20,3,34,23,3,3,24,97


In [10]:
df = df.drop(['date','hometeam_slug','awayteam_slug'], axis = 1)

In [11]:
df.columns.values.tolist()

['winner_is_home_team',
 'hometeam_fieldGoalsMade',
 'hometeam_fieldGoalsAttempted',
 'hometeam_threePointersMade',
 'hometeam_threePointersAttempted',
 'hometeam_freeThrowsMade',
 'hometeam_freeThrowsAttempted',
 'hometeam_reboundsOffensive',
 'hometeam_reboundsDefensive',
 'hometeam_assists',
 'hometeam_steals',
 'hometeam_blocks',
 'hometeam_foulsPersonal',
 'hometeam_points',
 'awayteam_fieldGoalsMade',
 'awayteam_fieldGoalsAttempted',
 'awayteam_threePointersMade',
 'awayteam_threePointersAttempted',
 'awayteam_freeThrowsMade',
 'awayteam_freeThrowsAttempted',
 'awayteam_reboundsOffensive',
 'awayteam_reboundsDefensive',
 'awayteam_assists',
 'awayteam_steals',
 'awayteam_blocks',
 'awayteam_foulsPersonal',
 'awayteam_points']

In [24]:
len(df.columns.values.tolist())

27

In [7]:
df[['awayteam_points']]

Unnamed: 0,awayteam_points
0,89
1,93
2,95
3,128
4,99
5,94
6,97
7,105
8,96
9,97


In [18]:
key_values_pairs = []
for entry in df.columns.values.tolist():
    key_values_pairs.append((entry,df[[entry]]))
key_values_pairs

[('winner_is_home_team',
      winner_is_home_team
  0                  True
  1                  True
  2                  True
  3                 False
  4                 False
  5                 False
  6                  True
  7                 False
  8                  True
  9                 False
  10                 True
  11                False),
 ('hometeam_fieldGoalsMade',
      hometeam_fieldGoalsMade
  0                        36
  1                        37
  2                        38
  3                        40
  4                        37
  5                        29
  6                        42
  7                        36
  8                        36
  9                        35
  10                       48
  11                       36),
 ('hometeam_fieldGoalsAttempted',
      hometeam_fieldGoalsAttempted
  0                             74
  1                             87
  2                             85
  3                             88
  4  

In [22]:
def make_dataset(df):

    # create Dataset
    input_dict = dict(key_values_pairs)
    del input_dict["winner_is_home_team"]
    output_dict = {"home_win" : df["winner_is_home_team"]}
    
    Dataset = tf.data.Dataset.from_tensor_slices((input_dict, output_dict))

    # batch Dataset to increase training speed
    # Dataset = Dataset.batch(100)
    return Dataset

data = make_dataset(df) # create our dataset
data = data.shuffle(buffer_size = len(data)) # add randomization by shuffling 

train_size = int(0.8*len(data)) # split Dataset
val_size   = int(0.2*len(data)) # 20% is to be used for validation 

train = data.take(train_size) # create our training set
val = data.skip(train_size).take(val_size) # create our validation set

Before we create our model, we test the predictive performance of a simple Logistic Regression model first.

In [23]:
model_logistic_regression = models.Sequential([
    # Logistic Regression
    layers.InputLayer(input_shape=(26,)),
    layers.Dense(units=2, activation='sigmoid')
])

model_logistic_regression.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 2)                 54        
                                                                 
Total params: 54 (216.00 Byte)
Trainable params: 54 (216.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


Now we try our own Neural Network model.

In [4]:
model1 = models.Sequential([
    # NN
    layers.InputLayer(input_shape=(26,)),
    layers.Dense(units=128, use_bias=True), # Linear layer
    layers.Dense(units=128, use_bias=True, activation='relu'), # Non-Linear layer
    layers.Dense(units=256, use_bias=True), # Linear layer
    layers.Dense(units=256, use_bias=True, activation='relu'), # Non-Linear layer
    layers.Dense(units=256, use_bias=True), # Linear layer
    #layers.Flatten(),
    layers.Dense(units=64, activation='relu'), # An inner product of previous parameters with weights
    layers.Dense(units=2)
    
])

model1.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_8 (Dense)             (None, 128)               1792      
                                                                 
 dense_9 (Dense)             (None, 128)               16512     
                                                                 
 dense_10 (Dense)            (None, 256)               33024     
                                                                 
 dense_11 (Dense)            (None, 256)               65792     
                                                                 
 dense_12 (Dense)            (None, 256)               65792     
                                                                 
 dense_13 (Dense)            (None, 64)                16448     
                                                                 
 dense_14 (Dense)            (None, 2)                

In [None]:
model1.compile(optimizer='adam', # optimizer type
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), # loss function
              metrics=['accuracy'])

history = model1.fit(train_dataset,
                     epochs=20,
                     validation_data=validation_dataset)

In [None]:
model1.predict(INSERT_INPUT)