In [36]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor

In [37]:
print("Loading in Data...")

shotFile = "Basketball Data/NBAPlayerTrackingData_2014-17/2016-17_nba_shot_log.csv"
shot_df = pd.read_csv(shotFile)

print("Done!")

Loading in Data...
Done!


In [38]:
# define learning algorithm inputs and outputs
X = shot_df[['SHOT_DIST', 'TOUCH_TIME', 'CLOSE_DEF_DIST', 'PTS_TYPE']]
Y = shot_df["PTS"]

In [39]:
print("Prepare Data for Learning...")
print()

print("Vector Dimensions:")
print("X:", X.shape)
print("Y:", Y.shape)
print()

"""
# normalize data
for column in list(X):
    values = X[column]
    mean = np.mean(values)
    std = np.std(values)
    X[column] = (X[column] - mean) / std
"""

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=42)

print("Done!")

Prepare Data for Learning...

Vector Dimensions:
X: (222661, 4)
Y: (222661,)

Done!


In [40]:
print("Machine Learning Underway...")
print()

bestScore = 0
bestModel = None
NNInfra = (8, 11)

print("Grid Searching for Optimal # of Layers / Units...")
for layers in range(1, 3):
    for units in range(1, 3):
        model = MLPRegressor(hidden_layer_sizes=(layers, units), alpha=0.001, random_state=20,
                              batch_size='auto', learning_rate='constant', learning_rate_init=0.01)
        model.fit(X_train, y_train)
        currentScore = model.score(X_test, y_test)
        print(str(layers) + " layers & " + str(units) + " units yields error =", str(currentScore))
        if currentScore > bestScore:
            bestScore = currentScore
            bestModel = model
            NNInfra = (layers, units)

bestModel = MLPRegressor(hidden_layer_sizes=NNInfra, alpha=0.001, random_state=20,
                     batch_size='auto', learning_rate='constant', learning_rate_init=0.01)
bestModel.fit(X_train, y_train)
predicted_Y = bestModel.predict(X)

print("Error =", bestModel.score(X_test, y_test))
print()

print("Done!")
print()

Machine Learning Underway...

Grid Searching for Optimal # of Layers / Units...
1 layers & 1 units yields error = 0.0234009918178
1 layers & 2 units yields error = 0.0187449934856
2 layers & 1 units yields error = 0.0266120705257
2 layers & 2 units yields error = 0.0327631216686
Error = 0.0327631216686

Done!



In [44]:
# Feature Sequence: ['SHOT_DIST', 'TOUCH_TIME', 'CLOSE_DEF_DIST', 'PTS_TYPE']
bestModel.predict([[20, 10, 10, 2]])

array([ 0.82785283])