# Deep Learning
In this notebook, we will be using neural networks in order to try and predict individual games. We will be using the data that was collected from the sportsreference API from the summer of 2020.

# Imports
Import the necessary modules we will be utilizing

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

from tqdm import tqdm
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing

pd.set_option("display.max_rows", None, "display.max_columns", None)

# Getting Data Ready

In [2]:
# Load data.csv
dataframe = pd.read_csv('../assets/data/data.csv')
dataframe.head()

Unnamed: 0,away_assist_percentage,away_assists,away_block_percentage,away_blocks,away_defensive_rating,away_defensive_rebound_percentage,away_defensive_rebounds,away_effective_field_goal_percentage,away_field_goal_attempts,away_field_goal_percentage,away_field_goals,away_free_throw_attempt_rate,away_free_throw_attempts,away_free_throw_percentage,away_free_throws,away_losses,away_minutes_played,away_offensive_rating,away_offensive_rebound_percentage,away_offensive_rebounds,away_personal_fouls,away_points,away_ranking,away_steal_percentage,away_steals,away_three_point_attempt_rate,away_three_point_field_goal_attempts,away_three_point_field_goal_percentage,away_three_point_field_goals,away_total_rebound_percentage,away_total_rebounds,away_true_shooting_percentage,away_turnover_percentage,away_turnovers,away_two_point_field_goal_attempts,away_two_point_field_goal_percentage,away_two_point_field_goals,away_win_percentage,away_wins,date,home_assist_percentage,home_assists,home_block_percentage,home_blocks,home_defensive_rating,home_defensive_rebound_percentage,home_defensive_rebounds,home_effective_field_goal_percentage,home_field_goal_attempts,home_field_goal_percentage,home_field_goals,home_free_throw_attempt_rate,home_free_throw_attempts,home_free_throw_percentage,home_free_throws,home_losses,home_minutes_played,home_offensive_rating,home_offensive_rebound_percentage,home_offensive_rebounds,home_personal_fouls,home_points,home_ranking,home_steal_percentage,home_steals,home_three_point_attempt_rate,home_three_point_field_goal_attempts,home_three_point_field_goal_percentage,home_three_point_field_goals,home_total_rebound_percentage,home_total_rebounds,home_true_shooting_percentage,home_turnover_percentage,home_turnovers,home_two_point_field_goal_attempts,home_two_point_field_goal_percentage,home_two_point_field_goals,home_win_percentage,home_wins,location,losing_abbr,losing_name,pace,winner,winning_abbr,winning_name
0,25.0,3,6.3,3,115.4,69.2,27,0.286,49,0.245,12,0.367,18,0.611,11,0,200,50.0,18.9,7,17,39,,3.8,3,0.429,21,0.19,4,44.7,34,0.339,33.0,28,28,0.286,8,0.0,0,"November 5, 2019",55.6,20,7.1,2,50.0,81.1,30,0.52,75,0.48,36,0.227,17,0.706,12,0,200,115.4,30.8,12,21,90,,20.5,16,0.36,27,0.222,6,55.3,42,0.542,7.8,7,48,0.625,30,1.0,1,"Moody Coliseum , Abilene, Texas",Arlington Baptist\r\n\t\t\t,Arlington Baptist\r\n\t\t\t,78.3,Home,ABILENE-CHRISTIAN,Abilene Christian
1,43.3,13,6.3,2,107.5,65.2,15,0.531,65,0.462,30,0.308,20,0.7,14,1,225,103.8,8.8,3,26,83,,15.0,12,0.385,25,0.36,9,31.6,18,0.557,9.8,8,40,0.525,21,0.5,1,"November 10, 2019",55.6,15,12.5,5,103.8,91.2,31,0.596,52,0.519,27,0.615,32,0.75,24,1,225,107.5,34.8,8,17,86,,5.0,4,0.385,20,0.4,8,68.4,39,0.64,25.0,22,32,0.594,19,0.667,2,"Daskalakis Athletic Center, Philadelphia, Penn...",ABILENE-CHRISTIAN,Abilene Christian,71.4,Home,DREXEL,Drexel
2,39.1,9,3.1,1,94.5,70.0,21,0.52,51,0.451,23,0.451,23,0.87,20,1,200,100.0,18.5,5,22,73,,12.3,9,0.412,21,0.333,7,45.6,26,0.589,21.8,17,30,0.533,16,0.75,3,"November 16, 2019",45.0,9,0.0,0,100.0,81.5,22,0.45,50,0.4,20,0.66,33,0.727,24,2,200,94.5,30.0,9,20,69,,13.7,10,0.36,18,0.278,5,54.4,31,0.525,18.9,15,32,0.469,15,0.333,1,"Moody Coliseum , Abilene, Texas",ABILENE-CHRISTIAN,Abilene Christian,72.8,Away,PEPPERDINE,Pepperdine
3,72.7,16,3.6,1,112.5,69.6,16,0.432,59,0.373,22,0.153,9,0.778,7,3,200,90.6,29.0,9,17,58,,9.4,6,0.441,26,0.269,7,46.3,25,0.458,14.9,11,33,0.455,15,0.25,1,"November 18, 2019",51.9,14,12.1,4,90.6,71.0,22,0.6,50,0.54,27,0.3,15,0.8,12,3,200,112.5,30.4,7,13,72,,6.3,4,0.44,22,0.273,6,53.7,29,0.63,17.5,12,28,0.75,21,0.4,2,"Thomas & Mack Center, Las Vegas, Nevada",ABILENE-CHRISTIAN,Abilene Christian,63.7,Home,NEVADA-LAS-VEGAS,UNLV
4,43.8,7,2.4,1,121.6,55.6,20,0.433,45,0.356,16,0.467,21,0.905,19,0,200,78.4,17.2,5,23,58,,9.5,7,0.356,16,0.438,7,38.5,25,0.528,29.8,23,29,0.31,9,0.0,0,"November 21, 2019",66.7,22,10.3,3,78.4,82.8,24,0.545,67,0.493,33,0.343,23,0.739,17,3,200,121.6,44.4,16,17,90,,8.1,6,0.373,25,0.28,7,61.5,40,0.577,14.4,13,42,0.619,26,0.4,2,"Moody Coliseum , Abilene, Texas",Champion Christian\r\n\t\t\t,Champion Christian\r\n\t\t\t,74.0,Home,ABILENE-CHRISTIAN,Abilene Christian


In [3]:
# These fields aren't necessary to train the model
FIELDS_TO_DROP = ['date',
                  'location',
                  'away_defensive_rating',
                  'home_defensive_rating',
                  'away_defensive_rebound_percentage',
                  'home_defensive_rebound_percentage',
                  'losing_abbr',
                  'winner',
                  'winning_abbr',
                  'home_ranking',
                  'away_ranking',
                  'winning_name',
                  'losing_name',
                  'away_points',
                  'home_points']
data = dataframe.copy()
data['target'] = np.where(dataframe['winner'] == "Home", 0, 1)
data.drop(columns=FIELDS_TO_DROP, inplace=True)
data.dropna(inplace=True)
data.head()

Unnamed: 0,away_assist_percentage,away_assists,away_block_percentage,away_blocks,away_defensive_rebounds,away_effective_field_goal_percentage,away_field_goal_attempts,away_field_goal_percentage,away_field_goals,away_free_throw_attempt_rate,away_free_throw_attempts,away_free_throw_percentage,away_free_throws,away_losses,away_minutes_played,away_offensive_rating,away_offensive_rebound_percentage,away_offensive_rebounds,away_personal_fouls,away_steal_percentage,away_steals,away_three_point_attempt_rate,away_three_point_field_goal_attempts,away_three_point_field_goal_percentage,away_three_point_field_goals,away_total_rebound_percentage,away_total_rebounds,away_true_shooting_percentage,away_turnover_percentage,away_turnovers,away_two_point_field_goal_attempts,away_two_point_field_goal_percentage,away_two_point_field_goals,away_win_percentage,away_wins,home_assist_percentage,home_assists,home_block_percentage,home_blocks,home_defensive_rebounds,home_effective_field_goal_percentage,home_field_goal_attempts,home_field_goal_percentage,home_field_goals,home_free_throw_attempt_rate,home_free_throw_attempts,home_free_throw_percentage,home_free_throws,home_losses,home_minutes_played,home_offensive_rating,home_offensive_rebound_percentage,home_offensive_rebounds,home_personal_fouls,home_steal_percentage,home_steals,home_three_point_attempt_rate,home_three_point_field_goal_attempts,home_three_point_field_goal_percentage,home_three_point_field_goals,home_total_rebound_percentage,home_total_rebounds,home_true_shooting_percentage,home_turnover_percentage,home_turnovers,home_two_point_field_goal_attempts,home_two_point_field_goal_percentage,home_two_point_field_goals,home_win_percentage,home_wins,pace,target
0,25.0,3,6.3,3,27,0.286,49,0.245,12,0.367,18,0.611,11,0,200,50.0,18.9,7,17,3.8,3,0.429,21,0.19,4,44.7,34,0.339,33.0,28,28,0.286,8,0.0,0,55.6,20,7.1,2,30,0.52,75,0.48,36,0.227,17,0.706,12,0,200,115.4,30.8,12,21,20.5,16,0.36,27,0.222,6,55.3,42,0.542,7.8,7,48,0.625,30,1.0,1,78.3,0
1,43.3,13,6.3,2,15,0.531,65,0.462,30,0.308,20,0.7,14,1,225,103.8,8.8,3,26,15.0,12,0.385,25,0.36,9,31.6,18,0.557,9.8,8,40,0.525,21,0.5,1,55.6,15,12.5,5,31,0.596,52,0.519,27,0.615,32,0.75,24,1,225,107.5,34.8,8,17,5.0,4,0.385,20,0.4,8,68.4,39,0.64,25.0,22,32,0.594,19,0.667,2,71.4,0
2,39.1,9,3.1,1,21,0.52,51,0.451,23,0.451,23,0.87,20,1,200,100.0,18.5,5,22,12.3,9,0.412,21,0.333,7,45.6,26,0.589,21.8,17,30,0.533,16,0.75,3,45.0,9,0.0,0,22,0.45,50,0.4,20,0.66,33,0.727,24,2,200,94.5,30.0,9,20,13.7,10,0.36,18,0.278,5,54.4,31,0.525,18.9,15,32,0.469,15,0.333,1,72.8,1
3,72.7,16,3.6,1,16,0.432,59,0.373,22,0.153,9,0.778,7,3,200,90.6,29.0,9,17,9.4,6,0.441,26,0.269,7,46.3,25,0.458,14.9,11,33,0.455,15,0.25,1,51.9,14,12.1,4,22,0.6,50,0.54,27,0.3,15,0.8,12,3,200,112.5,30.4,7,13,6.3,4,0.44,22,0.273,6,53.7,29,0.63,17.5,12,28,0.75,21,0.4,2,63.7,0
4,43.8,7,2.4,1,20,0.433,45,0.356,16,0.467,21,0.905,19,0,200,78.4,17.2,5,23,9.5,7,0.356,16,0.438,7,38.5,25,0.528,29.8,23,29,0.31,9,0.0,0,66.7,22,10.3,3,24,0.545,67,0.493,33,0.343,23,0.739,17,3,200,121.6,44.4,16,17,8.1,6,0.373,25,0.28,7,61.5,40,0.577,14.4,13,42,0.619,26,0.4,2,74.0,0


Now, we have to split the data into training, validation, and test sets

In [4]:
train, test = train_test_split(data, test_size=0.2)
train, val = train_test_split(train, test_size=0.2)
print(len(train), 'train examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')

7075 train examples
1769 validation examples
2212 test examples


Next, we need wrap the dataframes with `tf.data`, in order to shuffle and batch the data.

In [5]:
# A utility method to create a tf.data dataset from a Pandas Dataframe
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
    dataframe = dataframe.copy()
    labels = dataframe.pop('target')
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(dataframe))
    ds = ds.batch(batch_size)
    ds = ds.prefetch(batch_size)
    return ds

For each of the Numeric feature, we will use a Normalization() layer to make sure the mean of each feature is 0 and its standard deviation is 1.

In [6]:
def get_normalization_layer(name, dataset):
    # Create a Normalization layer for our feature.
    normalizer = preprocessing.Normalization()

    # Prepare a Dataset that only yields our feature.
    feature_ds = dataset.map(lambda x, y: x[name])

    # Learn the statistics of the data.
    normalizer.adapt(feature_ds)

    return normalizer

Create new input pipeline and make numeric columns for all of the features

In [7]:
batch_size = 32
train_ds = df_to_dataset(train, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)

In [8]:
all_inputs = []
encoded_features = []

# Just to get the training features
[(train_features, label_batch)] = train_ds.take(1)

# Numeric features.
for header in tqdm(list(train_features.keys()), unit='feautures'):
    numeric_col = tf.keras.Input(shape=(1,), name=header)
    normalization_layer = get_normalization_layer(header, train_ds)
    encoded_numeric_col = normalization_layer(numeric_col)
    all_inputs.append(numeric_col)
    encoded_features.append(encoded_numeric_col)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 71/71 [00:21<00:00,  3.35feautures/s]


# Compile the Neural Network

In [9]:
all_features = tf.keras.layers.concatenate(encoded_features)
x = tf.keras.layers.Dense(16, activation="relu")(all_features)
x = tf.keras.layers.Dense(16, activation="relu")(x)
x = tf.keras.layers.Dropout(0.5)(x)
output = tf.keras.layers.Dense(1)(x)
model = tf.keras.Model(all_inputs, output)
model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=["accuracy"])

# Train the Network

In [10]:
model.fit(train_ds, epochs=5, validation_data=val_ds)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x1df1e6c77c0>

In [11]:
loss, accuracy = model.evaluate(test_ds)
print("Accuracy", accuracy)

Accuracy 0.985985517501831


# Analysis
The results here, while they may look spectacular, are very misleading. The way that the current data is setup, we are only looking at the game stats and determining the winner based on that. If we have the number of 2PM, 3PM, and freethrows for both teams in the current game, it's trivial to find out who won the game, we can easily just calculate the number of points each team had. To make this better, we should restructure our data. In particular, for each sample (row), it should contain the **averages** for both teams **going into** the match up and then have a label of which team won after they played. This way, we don't know the game stats, but might be able to predict what they could be given the teams' averages.

# Take 2
Now we'll retrain a new neural network on the moving averages data that we generated.

In [12]:
data_ma = pd.read_csv('../assets/data/cleaned_data/cleaned_data.csv')
data_ma.head()

Unnamed: 0,date,location,home,away,winner,home_assist_percentage_SMA,home_assist_percentage_CMA,home_assist_percentage_EMA,home_assists_SMA,home_assists_CMA,home_assists_EMA,home_block_percentage_SMA,home_block_percentage_CMA,home_block_percentage_EMA,home_blocks_SMA,home_blocks_CMA,home_blocks_EMA,home_defensive_rating_SMA,home_defensive_rating_CMA,home_defensive_rating_EMA,home_defensive_rebound_percentage_SMA,home_defensive_rebound_percentage_CMA,home_defensive_rebound_percentage_EMA,home_defensive_rebounds_SMA,home_defensive_rebounds_CMA,home_defensive_rebounds_EMA,home_effective_field_goal_percentage_SMA,home_effective_field_goal_percentage_CMA,home_effective_field_goal_percentage_EMA,home_field_goal_attempts_SMA,home_field_goal_attempts_CMA,home_field_goal_attempts_EMA,home_field_goal_percentage_SMA,home_field_goal_percentage_CMA,home_field_goal_percentage_EMA,home_field_goals_SMA,home_field_goals_CMA,home_field_goals_EMA,home_free_throw_attempt_rate_SMA,home_free_throw_attempt_rate_CMA,home_free_throw_attempt_rate_EMA,home_free_throw_attempts_SMA,home_free_throw_attempts_CMA,home_free_throw_attempts_EMA,home_free_throw_percentage_SMA,home_free_throw_percentage_CMA,home_free_throw_percentage_EMA,home_free_throws_SMA,home_free_throws_CMA,home_free_throws_EMA,home_losses_SMA,home_losses_CMA,home_losses_EMA,home_minutes_played_SMA,home_minutes_played_CMA,home_minutes_played_EMA,home_offensive_rating_SMA,home_offensive_rating_CMA,home_offensive_rating_EMA,home_offensive_rebound_percentage_SMA,home_offensive_rebound_percentage_CMA,home_offensive_rebound_percentage_EMA,home_offensive_rebounds_SMA,home_offensive_rebounds_CMA,home_offensive_rebounds_EMA,home_personal_fouls_SMA,home_personal_fouls_CMA,home_personal_fouls_EMA,home_points_SMA,home_points_CMA,home_points_EMA,home_steal_percentage_SMA,home_steal_percentage_CMA,home_steal_percentage_EMA,home_steals_SMA,home_steals_CMA,home_steals_EMA,home_three_point_attempt_rate_SMA,home_three_point_attempt_rate_CMA,home_three_point_attempt_rate_EMA,home_three_point_field_goal_attempts_SMA,home_three_point_field_goal_attempts_CMA,home_three_point_field_goal_attempts_EMA,home_three_point_field_goal_percentage_SMA,home_three_point_field_goal_percentage_CMA,home_three_point_field_goal_percentage_EMA,home_three_point_field_goals_SMA,home_three_point_field_goals_CMA,home_three_point_field_goals_EMA,home_total_rebound_percentage_SMA,home_total_rebound_percentage_CMA,home_total_rebound_percentage_EMA,home_total_rebounds_SMA,home_total_rebounds_CMA,home_total_rebounds_EMA,home_true_shooting_percentage_SMA,home_true_shooting_percentage_CMA,home_true_shooting_percentage_EMA,home_turnover_percentage_SMA,home_turnover_percentage_CMA,home_turnover_percentage_EMA,home_turnovers_SMA,home_turnovers_CMA,home_turnovers_EMA,home_two_point_field_goal_attempts_SMA,home_two_point_field_goal_attempts_CMA,home_two_point_field_goal_attempts_EMA,home_two_point_field_goal_percentage_SMA,home_two_point_field_goal_percentage_CMA,home_two_point_field_goal_percentage_EMA,home_two_point_field_goals_SMA,home_two_point_field_goals_CMA,home_two_point_field_goals_EMA,home_win_percentage_SMA,home_win_percentage_CMA,home_win_percentage_EMA,home_wins_SMA,home_wins_CMA,home_wins_EMA,away_assist_percentage_SMA,away_assist_percentage_CMA,away_assist_percentage_EMA,away_assists_SMA,away_assists_CMA,away_assists_EMA,away_block_percentage_SMA,away_block_percentage_CMA,away_block_percentage_EMA,away_blocks_SMA,away_blocks_CMA,away_blocks_EMA,away_defensive_rating_SMA,away_defensive_rating_CMA,away_defensive_rating_EMA,away_defensive_rebound_percentage_SMA,away_defensive_rebound_percentage_CMA,away_defensive_rebound_percentage_EMA,away_defensive_rebounds_SMA,away_defensive_rebounds_CMA,away_defensive_rebounds_EMA,away_effective_field_goal_percentage_SMA,away_effective_field_goal_percentage_CMA,away_effective_field_goal_percentage_EMA,away_field_goal_attempts_SMA,away_field_goal_attempts_CMA,away_field_goal_attempts_EMA,away_field_goal_percentage_SMA,away_field_goal_percentage_CMA,away_field_goal_percentage_EMA,away_field_goals_SMA,away_field_goals_CMA,away_field_goals_EMA,away_free_throw_attempt_rate_SMA,away_free_throw_attempt_rate_CMA,away_free_throw_attempt_rate_EMA,away_free_throw_attempts_SMA,away_free_throw_attempts_CMA,away_free_throw_attempts_EMA,away_free_throw_percentage_SMA,away_free_throw_percentage_CMA,away_free_throw_percentage_EMA,away_free_throws_SMA,away_free_throws_CMA,away_free_throws_EMA,away_losses_SMA,away_losses_CMA,away_losses_EMA,away_minutes_played_SMA,away_minutes_played_CMA,away_minutes_played_EMA,away_offensive_rating_SMA,away_offensive_rating_CMA,away_offensive_rating_EMA,away_offensive_rebound_percentage_SMA,away_offensive_rebound_percentage_CMA,away_offensive_rebound_percentage_EMA,away_offensive_rebounds_SMA,away_offensive_rebounds_CMA,away_offensive_rebounds_EMA,away_personal_fouls_SMA,away_personal_fouls_CMA,away_personal_fouls_EMA,away_points_SMA,away_points_CMA,away_points_EMA,away_steal_percentage_SMA,away_steal_percentage_CMA,away_steal_percentage_EMA,away_steals_SMA,away_steals_CMA,away_steals_EMA,away_three_point_attempt_rate_SMA,away_three_point_attempt_rate_CMA,away_three_point_attempt_rate_EMA,away_three_point_field_goal_attempts_SMA,away_three_point_field_goal_attempts_CMA,away_three_point_field_goal_attempts_EMA,away_three_point_field_goal_percentage_SMA,away_three_point_field_goal_percentage_CMA,away_three_point_field_goal_percentage_EMA,away_three_point_field_goals_SMA,away_three_point_field_goals_CMA,away_three_point_field_goals_EMA,away_total_rebound_percentage_SMA,away_total_rebound_percentage_CMA,away_total_rebound_percentage_EMA,away_total_rebounds_SMA,away_total_rebounds_CMA,away_total_rebounds_EMA,away_true_shooting_percentage_SMA,away_true_shooting_percentage_CMA,away_true_shooting_percentage_EMA,away_turnover_percentage_SMA,away_turnover_percentage_CMA,away_turnover_percentage_EMA,away_turnovers_SMA,away_turnovers_CMA,away_turnovers_EMA,away_two_point_field_goal_attempts_SMA,away_two_point_field_goal_attempts_CMA,away_two_point_field_goal_attempts_EMA,away_two_point_field_goal_percentage_SMA,away_two_point_field_goal_percentage_CMA,away_two_point_field_goal_percentage_EMA,away_two_point_field_goals_SMA,away_two_point_field_goals_CMA,away_two_point_field_goals_EMA,away_win_percentage_SMA,away_win_percentage_CMA,away_win_percentage_EMA,away_wins_SMA,away_wins_CMA,away_wins_EMA
0,2019-11-17,"American Bank Center, Corpus Christi, Texas",North Dakota State,Stony Brook,Away,59.06,59.06,60.748148,13.6,13.6,13.012346,5.08,5.08,4.704938,1.8,1.8,1.580247,91.44,91.44,90.790123,83.66,83.66,82.006173,27.2,27.2,26.987654,0.4888,0.4888,0.477049,55.2,55.2,53.641975,0.4142,0.4142,0.398296,23.2,23.2,21.555556,0.3932,0.3932,0.370296,21.0,21.0,19.555556,0.7884,0.7884,0.800123,16.2,16.2,15.148148,1.0,1.0,1.0,199.8,199.8,199.802469,106.62,106.62,102.702469,26.12,26.12,23.65679,8.4,8.4,7.518519,13.2,13.2,13.407407,70.8,70.8,66.703704,7.38,7.38,7.507407,4.8,4.8,4.740741,0.471,0.471,0.493753,25.6,25.6,26.271605,0.3174,0.3174,0.318815,8.2,8.2,8.444444,55.1,55.1,53.501235,35.6,35.6,34.506173,0.5398,0.5398,0.527469,12.42,12.42,12.735802,9.2,9.2,9.185185,29.6,29.6,27.37037,0.4842,0.4842,0.463333,15.0,15.0,13.111111,0.5434,0.5434,0.581531,2.0,2.0,2.395062,49.0,49.0,49.120988,11.8,11.8,11.617284,15.36,15.36,13.42963,5.8,5.8,5.074074,87.66,87.66,86.544444,74.36,74.36,74.550617,25.8,25.8,25.592593,0.4646,0.4646,0.485864,60.8,60.8,58.802469,0.3926,0.3926,0.406642,23.6,23.6,23.37037,0.3672,0.3672,0.397914,21.0,21.0,21.740741,0.7182,0.7182,0.699975,15.2,15.2,15.37037,1.8,1.8,1.802469,205.0,205.0,204.938272,98.82,98.82,99.222222,31.38,31.38,28.197531,12.0,12.0,10.679012,14.0,14.0,13.481481,70.8,70.8,70.851852,8.86,8.86,8.940741,6.4,6.4,6.444444,0.4518,0.4518,0.451827,27.2,27.2,26.234568,0.3146,0.3146,0.343704,8.4,8.4,8.740741,53.1,53.1,52.322222,37.8,37.8,36.271605,0.506,0.506,0.522778,16.32,16.32,17.501235,13.4,13.4,14.148148,33.6,33.6,32.567901,0.4606,0.4606,0.460296,15.2,15.2,14.62963,0.2866,0.2866,0.360444,1.2,1.2,1.592593
1,2019-11-19,"A.G. Spanos Center, Stockton, California",Pacific,Coppin State,Home,46.56,45.466667,47.316461,12.0,11.666667,11.90535,8.22,8.183333,9.581481,2.2,2.166667,2.600823,94.42,90.55,86.714403,84.0,85.0,87.358436,24.4,24.833333,26.843621,0.5322,0.531667,0.524333,54.8,54.166667,54.08642,0.4756,0.478,0.472276,26.2,26.0,25.654321,0.3178,0.3465,0.355288,17.6,18.833333,19.345679,0.6628,0.652333,0.64284,11.2,11.833333,12.074074,1.6,1.333333,1.572016,200.0,200.0,200.0,109.78,108.9,108.503292,35.24,35.316667,32.730864,9.4,9.5,9.333333,15.2,16.666667,15.477366,69.8,69.666667,69.045267,11.28,11.166667,10.812757,7.2,7.166667,6.884774,0.3006,0.302833,0.286979,16.6,16.5,15.687243,0.3764,0.355333,0.364667,6.2,5.833333,5.662551,60.5,61.05,60.92428,33.8,34.333333,36.176955,0.551,0.550667,0.54351,12.6,13.016667,11.871193,9.0,9.333333,8.460905,38.2,37.666667,38.399177,0.5232,0.536,0.522975,20.0,20.166667,19.99177,0.5868,0.655667,0.660226,2.4,2.166667,2.691358,39.36,39.36,38.283951,9.0,9.0,9.259259,7.54,7.54,7.653086,2.6,2.6,2.617284,98.74,98.74,100.492593,72.74,72.74,69.771605,25.8,25.8,24.814815,0.407,0.407,0.425185,66.2,66.2,66.0,0.3474,0.3474,0.366716,22.8,22.8,24.098765,0.3176,0.3176,0.323642,20.6,20.6,21.08642,0.7126,0.7126,0.697506,14.8,14.8,14.82716,2.4,2.4,2.691358,200.0,200.0,200.0,84.1,84.1,87.290123,21.54,21.54,21.64321,9.2,9.2,8.950617,22.8,22.8,22.987654,68.2,68.2,70.666667,9.28,9.28,9.046914,7.6,7.6,7.407407,0.4754,0.4754,0.468247,31.8,31.8,31.111111,0.2632,0.2632,0.255556,7.8,7.8,7.641975,44.62,44.62,43.81358,35.0,35.0,33.765432,0.4496,0.4496,0.465407,16.48,16.48,16.412346,14.8,14.8,14.716049,34.4,34.4,34.888889,0.4368,0.4368,0.472333,15.0,15.0,16.45679,0.1566,0.1566,0.171556,0.6,0.6,0.703704
2,2019-11-20,"Daskalakis Athletic Center, Philadelphia, Penn...",Drexel,Bryant,Home,59.88,59.88,58.08642,14.4,14.4,13.345679,8.72,8.72,8.462963,3.6,3.6,3.469136,95.92,95.92,96.674074,80.28,80.28,76.877778,25.8,25.8,24.938272,0.515,0.515,0.502827,55.0,55.0,53.950617,0.4482,0.4482,0.438494,23.8,23.8,22.740741,0.407,0.407,0.438173,20.0,20.0,20.567901,0.6972,0.6972,0.717667,14.0,14.0,14.790123,1.6,1.6,1.888889,205.0,205.0,203.703704,91.6,91.6,88.416049,26.94,26.94,24.940741,8.8,8.8,8.395062,17.0,17.0,17.901235,68.8,68.8,67.098765,8.92,8.92,8.839506,6.6,6.6,6.617284,0.3904,0.3904,0.388099,21.8,21.8,21.333333,0.3468,0.3468,0.335617,7.2,7.2,6.82716,54.3,54.3,51.477778,34.6,34.6,33.333333,0.542,0.542,0.536617,23.14,23.14,24.403704,19.2,19.2,20.345679,33.2,33.2,32.617284,0.5124,0.5124,0.502358,16.6,16.6,15.91358,0.4134,0.4134,0.392642,1.4,1.4,1.506173,57.62,57.35,55.643621,17.8,17.166667,17.403292,11.3,9.416667,8.805761,4.4,3.666667,3.419753,82.64,86.0,83.51893,79.62,76.833333,78.460905,31.4,29.833333,31.119342,0.521,0.512167,0.505222,68.0,67.0,68.880658,0.4334,0.428333,0.427494,29.8,29.0,29.855967,0.2386,0.255333,0.249572,15.8,16.666667,16.740741,0.6914,0.679333,0.728457,10.8,11.166667,12.057613,2.0,1.833333,1.868313,200.0,200.0,200.0,103.66,103.05,105.719753,35.84,34.5,36.811934,13.4,12.833333,13.954733,19.0,19.833333,18.152263,82.2,80.333333,82.45679,11.32,11.55,11.466255,9.0,9.0,8.958848,0.5036,0.495,0.500111,34.0,33.0,34.201646,0.3486,0.338167,0.313041,11.8,11.166667,10.687243,58.46,56.233333,58.314815,44.8,42.666667,45.074074,0.54,0.532167,0.53163,17.82,17.083333,15.970782,16.0,15.166667,14.288066,34.0,34.0,34.679012,0.5162,0.5135,0.542395,18.0,17.833333,19.168724,0.42,0.35,0.46263,2.0,1.666667,2.395062
3,2019-11-21,"Winthrop Coliseum, Rock Hill, South Carolina",Winthrop,Tennessee Tech,Away,52.3,52.3,56.101235,15.0,15.0,16.728395,6.52,6.52,6.618519,2.2,2.2,2.333333,90.84,90.84,86.535802,74.1,74.1,74.764198,24.2,24.2,26.654321,0.527,0.527,0.541531,61.8,61.8,62.209877,0.455,0.455,0.467111,28.0,28.0,28.975309,0.2258,0.2258,0.260531,14.2,14.2,16.530864,0.4862,0.4862,0.509358,7.6,7.6,9.469136,1.2,1.2,1.358025,200.0,200.0,200.0,103.36,103.36,106.550617,33.0,33.0,36.482716,11.4,11.4,12.271605,18.2,18.2,18.222222,72.6,72.6,76.802469,7.4,7.4,8.318519,5.2,5.2,5.975309,0.4252,0.4252,0.430716,26.6,26.6,27.148148,0.325,0.325,0.331852,9.0,9.0,9.382716,52.44,52.44,55.488889,35.6,35.6,38.925926,0.5276,0.5276,0.544173,15.68,15.68,16.495062,12.6,12.6,13.666667,35.2,35.2,35.061728,0.549,0.549,0.571531,19.0,19.0,19.592593,0.6534,0.6534,0.65684,1.8,1.8,2.037037,43.14,43.65,37.239095,10.4,10.666667,8.0,6.32,6.166667,6.974074,2.2,2.166667,2.209877,98.64,100.033333,97.430041,71.52,70.933333,75.27284,23.0,22.0,21.288066,0.4242,0.439667,0.398642,56.8,57.0,55.374486,0.367,0.3805,0.343132,21.4,22.166667,19.222222,0.1558,0.147,0.127189,9.2,8.666667,7.271605,0.6868,0.6835,0.689782,6.6,6.166667,5.234568,3.0,2.666667,3.395062,205.0,204.166667,202.469136,79.94,81.633333,73.690123,27.74,27.95,28.074486,9.6,9.5,9.872428,13.2,14.5,13.329218,55.8,57.166667,49.683128,7.74,8.333333,9.016872,5.2,5.666667,5.921811,0.3856,0.396,0.42316,21.2,22.0,23.012346,0.315,0.313833,0.266218,6.4,6.666667,6.004115,48.5,48.15,49.236626,32.6,31.5,31.160494,0.4456,0.459,0.419004,22.36,22.616667,23.020988,17.2,17.5,17.320988,35.6,35.0,32.36214,0.3996,0.426833,0.396086,15.0,15.5,13.218107,0.29,0.241667,0.202959,1.0,0.833333,0.868313
4,2019-11-21,"Maples Pavilion, Stanford, California",Stanford,William & Mary,Home,52.62,52.62,54.062963,15.0,15.0,15.185185,8.92,8.92,8.966667,3.0,3.0,2.975309,82.8,82.8,83.458025,76.68,76.68,77.058025,24.8,24.8,24.382716,0.5796,0.5796,0.586593,54.2,54.2,53.45679,0.5156,0.5156,0.518963,28.0,28.0,27.765432,0.3804,0.3804,0.384519,19.8,19.8,19.950617,0.7644,0.7644,0.772049,14.4,14.4,14.851852,0.0,0.0,0.0,200.0,200.0,200.0,109.34,109.34,110.328395,21.32,21.32,23.088889,6.2,6.2,6.407407,14.6,14.6,14.271605,77.4,77.4,77.62963,12.76,12.76,12.88642,9.0,9.0,9.037037,0.3082,0.3082,0.304247,16.8,16.8,16.333333,0.414,0.414,0.440086,7.0,7.0,7.246914,50.3,50.3,51.52716,31.0,31.0,30.790123,0.6086,0.6086,0.617494,17.28,17.28,18.171605,13.2,13.2,13.888889,37.4,37.4,37.123457,0.5622,0.5622,0.552975,21.0,21.0,20.518519,1.0,1.0,1.0,3.0,3.0,3.395062,53.48,53.48,52.595062,15.0,15.0,14.703704,8.4,8.4,7.064198,4.0,4.0,3.506173,96.78,96.78,96.116049,77.36,77.36,78.709877,30.4,30.4,31.395062,0.5734,0.5734,0.572198,56.8,56.8,57.160494,0.4976,0.4976,0.492778,28.2,28.2,28.098765,0.2544,0.2544,0.222556,14.4,14.4,12.580247,0.7424,0.7424,0.735235,10.4,10.4,8.864198,0.2,0.2,0.333333,200.0,200.0,200.0,105.62,105.62,103.12716,21.88,21.88,19.211111,5.6,5.6,4.987654,15.2,15.2,16.160494,75.4,75.4,74.148148,5.58,5.58,4.751852,4.0,4.0,3.407407,0.3942,0.3942,0.420272,22.4,22.4,24.098765,0.382,0.382,0.380975,8.6,8.6,9.08642,54.42,54.42,53.922222,36.0,36.0,36.382716,0.5946,0.5946,0.588753,17.04,17.04,17.720988,13.0,13.0,13.530864,34.4,34.4,33.061728,0.5742,0.5742,0.578,19.6,19.6,19.012346,0.96,0.96,0.933333,2.8,2.8,3.061728


In [13]:
# These fields aren't necessary to train the model
DROPPING = ['date',
            'location',
            'home',
            'away']

ma = data_ma.copy()
ma['target'] = np.where(ma['winner'] == "Home", 0, 1)
ma.drop(columns=DROPPING, inplace=True)
ma.dropna(inplace=True)
ma.drop_duplicates(inplace=True)
ma.pop('winner')

target = ma.pop('target')
ma = ma.filter(regex="EMA", axis=1)
ma['target'] = target
ma.head()

Unnamed: 0,home_assist_percentage_EMA,home_assists_EMA,home_block_percentage_EMA,home_blocks_EMA,home_defensive_rating_EMA,home_defensive_rebound_percentage_EMA,home_defensive_rebounds_EMA,home_effective_field_goal_percentage_EMA,home_field_goal_attempts_EMA,home_field_goal_percentage_EMA,home_field_goals_EMA,home_free_throw_attempt_rate_EMA,home_free_throw_attempts_EMA,home_free_throw_percentage_EMA,home_free_throws_EMA,home_losses_EMA,home_minutes_played_EMA,home_offensive_rating_EMA,home_offensive_rebound_percentage_EMA,home_offensive_rebounds_EMA,home_personal_fouls_EMA,home_points_EMA,home_steal_percentage_EMA,home_steals_EMA,home_three_point_attempt_rate_EMA,home_three_point_field_goal_attempts_EMA,home_three_point_field_goal_percentage_EMA,home_three_point_field_goals_EMA,home_total_rebound_percentage_EMA,home_total_rebounds_EMA,home_true_shooting_percentage_EMA,home_turnover_percentage_EMA,home_turnovers_EMA,home_two_point_field_goal_attempts_EMA,home_two_point_field_goal_percentage_EMA,home_two_point_field_goals_EMA,home_win_percentage_EMA,home_wins_EMA,away_assist_percentage_EMA,away_assists_EMA,away_block_percentage_EMA,away_blocks_EMA,away_defensive_rating_EMA,away_defensive_rebound_percentage_EMA,away_defensive_rebounds_EMA,away_effective_field_goal_percentage_EMA,away_field_goal_attempts_EMA,away_field_goal_percentage_EMA,away_field_goals_EMA,away_free_throw_attempt_rate_EMA,away_free_throw_attempts_EMA,away_free_throw_percentage_EMA,away_free_throws_EMA,away_losses_EMA,away_minutes_played_EMA,away_offensive_rating_EMA,away_offensive_rebound_percentage_EMA,away_offensive_rebounds_EMA,away_personal_fouls_EMA,away_points_EMA,away_steal_percentage_EMA,away_steals_EMA,away_three_point_attempt_rate_EMA,away_three_point_field_goal_attempts_EMA,away_three_point_field_goal_percentage_EMA,away_three_point_field_goals_EMA,away_total_rebound_percentage_EMA,away_total_rebounds_EMA,away_true_shooting_percentage_EMA,away_turnover_percentage_EMA,away_turnovers_EMA,away_two_point_field_goal_attempts_EMA,away_two_point_field_goal_percentage_EMA,away_two_point_field_goals_EMA,away_win_percentage_EMA,away_wins_EMA,target
0,60.748148,13.012346,4.704938,1.580247,90.790123,82.006173,26.987654,0.477049,53.641975,0.398296,21.555556,0.370296,19.555556,0.800123,15.148148,1.0,199.802469,102.702469,23.65679,7.518519,13.407407,66.703704,7.507407,4.740741,0.493753,26.271605,0.318815,8.444444,53.501235,34.506173,0.527469,12.735802,9.185185,27.37037,0.463333,13.111111,0.581531,2.395062,49.120988,11.617284,13.42963,5.074074,86.544444,74.550617,25.592593,0.485864,58.802469,0.406642,23.37037,0.397914,21.740741,0.699975,15.37037,1.802469,204.938272,99.222222,28.197531,10.679012,13.481481,70.851852,8.940741,6.444444,0.451827,26.234568,0.343704,8.740741,52.322222,36.271605,0.522778,17.501235,14.148148,32.567901,0.460296,14.62963,0.360444,1.592593,1
1,47.316461,11.90535,9.581481,2.600823,86.714403,87.358436,26.843621,0.524333,54.08642,0.472276,25.654321,0.355288,19.345679,0.64284,12.074074,1.572016,200.0,108.503292,32.730864,9.333333,15.477366,69.045267,10.812757,6.884774,0.286979,15.687243,0.364667,5.662551,60.92428,36.176955,0.54351,11.871193,8.460905,38.399177,0.522975,19.99177,0.660226,2.691358,38.283951,9.259259,7.653086,2.617284,100.492593,69.771605,24.814815,0.425185,66.0,0.366716,24.098765,0.323642,21.08642,0.697506,14.82716,2.691358,200.0,87.290123,21.64321,8.950617,22.987654,70.666667,9.046914,7.407407,0.468247,31.111111,0.255556,7.641975,43.81358,33.765432,0.465407,16.412346,14.716049,34.888889,0.472333,16.45679,0.171556,0.703704,0
2,58.08642,13.345679,8.462963,3.469136,96.674074,76.877778,24.938272,0.502827,53.950617,0.438494,22.740741,0.438173,20.567901,0.717667,14.790123,1.888889,203.703704,88.416049,24.940741,8.395062,17.901235,67.098765,8.839506,6.617284,0.388099,21.333333,0.335617,6.82716,51.477778,33.333333,0.536617,24.403704,20.345679,32.617284,0.502358,15.91358,0.392642,1.506173,55.643621,17.403292,8.805761,3.419753,83.51893,78.460905,31.119342,0.505222,68.880658,0.427494,29.855967,0.249572,16.740741,0.728457,12.057613,1.868313,200.0,105.719753,36.811934,13.954733,18.152263,82.45679,11.466255,8.958848,0.500111,34.201646,0.313041,10.687243,58.314815,45.074074,0.53163,15.970782,14.288066,34.679012,0.542395,19.168724,0.46263,2.395062,0
3,56.101235,16.728395,6.618519,2.333333,86.535802,74.764198,26.654321,0.541531,62.209877,0.467111,28.975309,0.260531,16.530864,0.509358,9.469136,1.358025,200.0,106.550617,36.482716,12.271605,18.222222,76.802469,8.318519,5.975309,0.430716,27.148148,0.331852,9.382716,55.488889,38.925926,0.544173,16.495062,13.666667,35.061728,0.571531,19.592593,0.65684,2.037037,37.239095,8.0,6.974074,2.209877,97.430041,75.27284,21.288066,0.398642,55.374486,0.343132,19.222222,0.127189,7.271605,0.689782,5.234568,3.395062,202.469136,73.690123,28.074486,9.872428,13.329218,49.683128,9.016872,5.921811,0.42316,23.012346,0.266218,6.004115,49.236626,31.160494,0.419004,23.020988,17.320988,32.36214,0.396086,13.218107,0.202959,0.868313,1
4,54.062963,15.185185,8.966667,2.975309,83.458025,77.058025,24.382716,0.586593,53.45679,0.518963,27.765432,0.384519,19.950617,0.772049,14.851852,0.0,200.0,110.328395,23.088889,6.407407,14.271605,77.62963,12.88642,9.037037,0.304247,16.333333,0.440086,7.246914,51.52716,30.790123,0.617494,18.171605,13.888889,37.123457,0.552975,20.518519,1.0,3.395062,52.595062,14.703704,7.064198,3.506173,96.116049,78.709877,31.395062,0.572198,57.160494,0.492778,28.098765,0.222556,12.580247,0.735235,8.864198,0.333333,200.0,103.12716,19.211111,4.987654,16.160494,74.148148,4.751852,3.407407,0.420272,24.098765,0.380975,9.08642,53.922222,36.382716,0.588753,17.720988,13.530864,33.061728,0.578,19.012346,0.933333,3.061728,0


In [14]:
train_ma, test_ma = train_test_split(ma, test_size=0.2)
train_ma, val_ma = train_test_split(train_ma, test_size=0.2)
print(len(train_ma), 'train examples')
print(len(val_ma), 'validation examples')
print(len(test_ma), 'test examples')

2248 train examples
562 validation examples
703 test examples


In [15]:
batch_size = 32
train_ma_ds = df_to_dataset(train_ma, batch_size=batch_size)
val_ma_ds = df_to_dataset(val_ma, shuffle=False, batch_size=batch_size)
test_ma_ds = df_to_dataset(test_ma, shuffle=False, batch_size=batch_size)

In [16]:
all_ma_inputs = []
encoded_ma_features = []

# Just to get the training features
[(train_ma_features, label_ma_batch)] = train_ma_ds.take(1)

# Numeric features.
for header in tqdm(list(train_ma_features.keys()), unit="features"):
    numeric_col = tf.keras.Input(shape=(1,), name=header)
    normalization_layer = get_normalization_layer(header, train_ma_ds)
    encoded_numeric_col = normalization_layer(numeric_col)
    all_ma_inputs.append(numeric_col)
    encoded_ma_features.append(encoded_numeric_col)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [00:11<00:00,  6.77features/s]


In [17]:
all_ma_features = tf.keras.layers.concatenate(encoded_ma_features)
x = tf.keras.layers.Dense(32, activation="relu")(all_ma_features)
x = tf.keras.layers.Dense(32, activation="relu")(x)
x = tf.keras.layers.Dropout(0.5)(x)
output_ma = tf.keras.layers.Dense(1)(x)
model_ma = tf.keras.Model(all_ma_inputs, output_ma)
model_ma.compile(optimizer='adam',
                 loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                 metrics=["accuracy"])

In [18]:
model_ma.fit(train_ma_ds, epochs=20, validation_data=val_ma_ds)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1df2fb83d00>

In [19]:
loss_ma, accuracy_ma = model_ma.evaluate(test_ma_ds)
print("Accuracy", accuracy_ma)

Accuracy 0.6799430847167969


# Analysis on the New Model
Overall, a 68% accuracy, especially when it comes to sports predictions is quite impressive. This mean that the model will correctly determine the winner of a game two out of three times. It's unreasonable to expect a very high accuracy due to the randomness in sports, however this is a really great start.

# Using the Model for Singular Inputs

In [20]:
sample = test_ma.drop(columns=['target']).to_dict("list")

input_dict = {name: tf.convert_to_tensor([value[0]]) for name, value in sample.items()}
predictions = model_ma.predict(input_dict)
prob = tf.nn.sigmoid(predictions[0])

print("For this particular sample, there is %.1f percent probability of the away team winning." % (100 * prob))

For this particular sample, there is 23.4 percent probability of the away team winning.
