In [2]:
from keras.models import Sequential
from keras.layers import Dense
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

Using TensorFlow backend.


In [15]:
dataset = pd.read_csv('data/fullDF.csv', index_col=0)
dataset = dataset.drop(['GRID_TYPE', 'PLAYER_ID', 'SHOT_ATTEMPTED_FLAG', 'TEAM_ID'], axis=1)
dataset_copy = dataset

In [16]:
categorical_cols = ['ACTION_TYPE', 'HTM', 'PERIOD', 'PLAYER_NAME', 'QUARTER', 'SHOT_TYPE', 'SHOT_ZONE_AREA',
                    'SHOT_ZONE_BASIC', 'SHOT_ZONE_RANGE', 'TEAM_NAME', 'VTM']

def onehot_encode(data):
    for feature in categorical_cols:
        one_hot = pd.get_dummies(data[feature], prefix=feature)
        data = data.drop(feature, axis = 1)
        data = data.join(one_hot)
    return data

dataset = onehot_encode(dataset)

In [17]:
correlation_df = dataset.corr()[['SHOT_MADE_FLAG']]
correlation_df.rename(columns={"SHOT_MADE_FLAG": "correlation_with_making_shot"}, inplace=True)
correlation_df

Unnamed: 0,correlation_with_making_shot
EVENTTIME,0.012396
LOC_X,0.007481
LOC_Y,-0.036743
MINUTES_REMAINING,0.011510
SECONDS_REMAINING,0.010951
...,...
VTM_SAC,0.004892
VTM_SAS,0.003040
VTM_TOR,-0.004145
VTM_UTA,0.000401


In [18]:
correlation_df = correlation_df.sort_values(by='correlation_with_making_shot', ascending=False)

In [19]:
for i, j in correlation_df.iterrows():
    corr = j[0]
    if corr > 0.03 or corr < -0.03:
        print(i, j) 
        print() 

SHOT_MADE_FLAG correlation_with_making_shot    1.0
Name: SHOT_MADE_FLAG, dtype: float64

SHOT_ZONE_BASIC_Restricted Area correlation_with_making_shot    0.199887
Name: SHOT_ZONE_BASIC_Restricted Area, dtype: float64

SHOT_ZONE_RANGE_Less Than 8 ft. correlation_with_making_shot    0.167723
Name: SHOT_ZONE_RANGE_Less Than 8 ft., dtype: float64

SHOT_ZONE_AREA_Center(C) correlation_with_making_shot    0.133036
Name: SHOT_ZONE_AREA_Center(C), dtype: float64

PLAYER_AVG correlation_with_making_shot    0.120922
Name: PLAYER_AVG, dtype: float64

SHOT_TYPE_2PT Field Goal correlation_with_making_shot    0.12018
Name: SHOT_TYPE_2PT Field Goal, dtype: float64

ACTION_TYPE_Dunk Shot correlation_with_making_shot    0.10812
Name: ACTION_TYPE_Dunk Shot, dtype: float64

ACTION_TYPE_Cutting Layup Shot correlation_with_making_shot    0.085503
Name: ACTION_TYPE_Cutting Layup Shot, dtype: float64

ACTION_TYPE_Cutting Dunk Shot correlation_with_making_shot    0.082773
Name: ACTION_TYPE_Cutting Dunk Shot, d

Features with most correlation:

Shot Zone

Player Average

Shot Type

Action Type

Shot Distance

LOC_Y

In [23]:
dataset_imp_features_only = dataset_copy[['SHOT_MADE_FLAG', 'SHOT_ZONE_BASIC', 'SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE',
                                    'PLAYER_AVG', 'SHOT_TYPE', 'ACTION_TYPE', 'SHOT_DISTANCE', 'LOC_Y']]
dataset_imp_features_only

Unnamed: 0,SHOT_MADE_FLAG,SHOT_ZONE_BASIC,SHOT_ZONE_AREA,SHOT_ZONE_RANGE,PLAYER_AVG,SHOT_TYPE,ACTION_TYPE,SHOT_DISTANCE,LOC_Y
2732,0,Above the Break 3,Left Side Center(LC),24+ ft.,0.510288,3PT Field Goal,Jump Shot,25,125.2763
12130,1,Restricted Area,Center(C),Less Than 8 ft.,0.357466,2PT Field Goal,Running Dunk Shot,0,-5.5908
74191,1,Restricted Area,Center(C),Less Than 8 ft.,0.508380,2PT Field Goal,Hook Shot,2,78.5379
19621,0,Restricted Area,Center(C),Less Than 8 ft.,0.452555,2PT Field Goal,Layup Shot,1,238.1948
66474,0,Above the Break 3,Right Side Center(RC),24+ ft.,0.244444,3PT Field Goal,Jump Shot,25,139.8303
...,...,...,...,...,...,...,...,...,...
70944,0,Mid-Range,Right Side Center(RC),16-24 ft.,0.538462,2PT Field Goal,Jump Shot,18,217.1834
76301,0,Above the Break 3,Right Side Center(RC),24+ ft.,0.487013,3PT Field Goal,Running Pull-Up Jump Shot,25,206.2505
48761,0,In The Paint (Non-RA),Center(C),8-16 ft.,0.448649,2PT Field Goal,Jump Shot,12,246.5670
2308,0,Restricted Area,Center(C),Less Than 8 ft.,0.504292,2PT Field Goal,Layup Shot,1,50.1469


In [24]:
train_data, test_data = train_test_split(dataset, test_size=0.2)
x_train = train_data.drop(['SHOT_MADE_FLAG'], axis=1)
y_train = train_data[['SHOT_MADE_FLAG']]
x_test = test_data.drop(['SHOT_MADE_FLAG'], axis=1)
y_test = test_data[['SHOT_MADE_FLAG']]

categorical_cols1 = ['ACTION_TYPE', 'SHOT_TYPE', 'SHOT_ZONE_AREA', 'SHOT_ZONE_BASIC', 'SHOT_ZONE_RANGE']

def onehot_encode1(data):
    for feature in categorical_cols1:
        one_hot = pd.get_dummies(data[feature], prefix=feature)
        data = data.drop(feature, axis = 1)
        data = data.join(one_hot)
    return data

dataset_imp_features_only = onehot_encode1(dataset_imp_features_only)

train_data1, test_data1 = train_test_split(dataset_imp_features_only, test_size=0.2)
x_train1 = train_data1.drop(['SHOT_MADE_FLAG'], axis=1)
y_train1 = train_data1[['SHOT_MADE_FLAG']]
x_test1 = test_data1.drop(['SHOT_MADE_FLAG'], axis=1)
y_test1 = test_data1[['SHOT_MADE_FLAG']]

In [25]:
x_train.head()
x_train1.head()

Unnamed: 0,PLAYER_AVG,SHOT_DISTANCE,LOC_Y,ACTION_TYPE_Alley Oop Dunk Shot,ACTION_TYPE_Alley Oop Layup shot,ACTION_TYPE_Cutting Dunk Shot,ACTION_TYPE_Cutting Finger Roll Layup Shot,ACTION_TYPE_Cutting Layup Shot,ACTION_TYPE_Driving Bank Hook Shot,ACTION_TYPE_Driving Bank shot,...,SHOT_ZONE_BASIC_In The Paint (Non-RA),SHOT_ZONE_BASIC_Left Corner 3,SHOT_ZONE_BASIC_Mid-Range,SHOT_ZONE_BASIC_Restricted Area,SHOT_ZONE_BASIC_Right Corner 3,SHOT_ZONE_RANGE_16-24 ft.,SHOT_ZONE_RANGE_24+ ft.,SHOT_ZONE_RANGE_8-16 ft.,SHOT_ZONE_RANGE_Back Court Shot,SHOT_ZONE_RANGE_Less Than 8 ft.
39471,0.45122,17,88.0143,0,0,0,0,0,0,0,...,0,0,1,0,0,1,0,0,0,0
926,0.527869,8,2.2868,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,1,0,0
54252,0.41443,9,284.6756,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,1,0,0
76642,0.503464,5,58.3177,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
71131,0.487013,22,31.9727,0,0,0,0,0,0,0,...,0,1,0,0,0,0,1,0,0,0


In [27]:
model = Sequential()
model.add(Dense(1000, input_dim=len(x_train.columns), activation='relu'))
model.add(Dense(500, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(x_train, y_train, epochs=80, batch_size=1024)

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80


Epoch 79/80
Epoch 80/80


<keras.callbacks.callbacks.History at 0x7fb8f8d30828>

In [28]:
y_test_pred = model.predict_classes(x_test)

In [29]:
print(f"accuracy: {round(accuracy_score(y_test, y_test_pred), 3)}")

accuracy: 0.61


In [26]:
model1 = Sequential()
model1.add(Dense(150, input_dim=len(x_train1.columns), activation='relu'))
model1.add(Dense(75, activation='relu'))
model1.add(Dense(10, activation='relu'))
model1.add(Dense(1, activation='sigmoid'))

model1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model1.fit(x_train1, y_train1, epochs=80, batch_size=1024)

y_test_pred1 = model1.predict_classes(x_test1)
print(f"accuracy (only using most correlated features): {round(accuracy_score(y_test1, y_test_pred1), 3)}")

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80


Epoch 79/80
Epoch 80/80
accuracy (only using most correlated features): 0.645


In [100]:
model1 = Sequential()
model1.add(Dense(1000, input_dim=len(x_train.columns), activation='relu'))
model1.add(Dense(100, activation='relu'))
model1.add(Dense(1, activation='sigmoid'))

model1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model1.fit(x_train, y_train, epochs=120, batch_size=1024)

Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/120
Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78

<keras.callbacks.callbacks.History at 0x7f87ee5fd208>

In [101]:
y_test_pred_m1 = model1.predict_classes(x_test)
print(f"model 1 accuracy: {round(accuracy_score(y_test, y_test_pred_m1), 3)}")

model 1 accuracy: 0.607
