In [None]:
import pandas as pd
import pandas as pd
import numpy as np
from numpy import array
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint as sp_randint
from scipy.stats import uniform as sp_randfloat

nfl = pd.read_csv("NFL_data_super_cleaned.csv")

In [None]:
# convert to an array
play = array(nfl["play_type"])

# encode as integers
play_encoder = LabelEncoder()
play_encoded =  play_encoder.fit_transform(play) 
 
# binary encode
play_onehot_encoder = OneHotEncoder(sparse=False) # disable sparse return type
# reshape the array
play_encoded = play_encoded.reshape(len(play_encoded), 1) 
play_onehot_encoded = play_onehot_encoder.fit_transform(play_encoded)

nfl["play_type"] = play_onehot_encoded.tolist()

posteam = array(nfl["posteam"])

# encode as integers
posteam_encoder = LabelEncoder()
posteam_encoded =  posteam_encoder.fit_transform(posteam) 
 
# binary encode
posteam_onehot_encoder = OneHotEncoder(sparse=False) # disable sparse return type
# reshape the array
posteam_encoded = posteam_encoded.reshape(len(posteam_encoded), 1) 
posteam_onehot_encoded = pd.DataFrame(posteam_onehot_encoder.fit_transform(posteam_encoded))

nfl

In [None]:
from sklearn.preprocessing import MinMaxScaler
nfl1 = nfl.drop(columns = ['posteam'])
nfl2 = posteam_onehot_encoded.join(nfl1)
nfl2.columns = ['ARI', 'ATL', 'BAL', 'BUF', 'CAR', 'CHI', 'CIN', 'CLE', 'DAL', 'DEN', 'DET', 
'GB', 'HOU', 'IND', 'JAX', 'KC', 'LAC', 'LAR', 'LV', 'MIA', 'MIN', 'NE', 'NO', 'NYG', 'NYJ', 
'PHI', 'PIT', 'SEA', 'SF', 'TB', 'TEN', 'WAS', 'yardline_100', 'quarter_seconds_remaining', 
'qtr', 'down', 'goal_to_go', 'ydstogo', 'score_margin', 'play_type']

nfl2


In [None]:
nfl3 = nfl2.copy()
scaler = MinMaxScaler()
for column_name in ['yardline_100','quarter_seconds_remaining','qtr','down','goal_to_go','ydstogo','score_margin']:
    col = nfl3[column_name]
    scaled = scaler.fit_transform(nfl3[column_name].values.reshape(-1, 1))
    nfl3[column_name] = scaled
nfl3

In [None]:
X = nfl3.drop("play_type", axis = 1)
y = play_onehot_encoded

from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_squared_error
from sklearn.metrics import classification_report

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

clf = MLPClassifier(solver = 'sgd', random_state = 42, activation = 'logistic', learning_rate_init = 0.3, batch_size = 100, hidden_layer_sizes = (40, 3), max_iter = 500)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test) 

In [None]:
prob = clf.predict_proba(X_test)
n = 0
for idx, x in enumerate(prob):
    for idx, y in enumerate(x):
        if y == max(x):
            x[idx] = 1
        else:
            x[idx] = 0
newy_pred = prob

In [None]:
#Generate a confusion matrix
print(confusion_matrix(y_test.argmax(axis=1), y_pred.argmax(axis=1)))

print("Accuracy:", clf.score(X_test, y_test))

mse = mean_squared_error(y_test, newy_pred)
print('MSE:', mse)

print(classification_report(y_test, newy_pred))

In [None]:
acc_per_fold = []
loss_per_fold = []
fold_num = 1

kfold = KFold(n_splits=5)

for train, test in kfold.split(X, play_onehot_encoded):
    model = MLPClassifier(solver = 'sgd', random_state = 42, activation = 'logistic', learning_rate_init = 0.3, batch_size = 100, hidden_layer_sizes = (40, 3), max_iter = 500)
    hist = model.fit(X.iloc[train], play_onehot_encoded[train])
    score = model.score(X.iloc[test], play_onehot_encoded[test])
    y_pred = model.predict(X.iloc[test])
    prob = clf.predict_proba(X_test)
    n = 0
    for idx, x in enumerate(prob):
        for idx, y in enumerate(x):
            if y == max(x):
                x[idx] = 1
            else:
                x[idx] = 0
    newy_pred = prob
    mse = mean_squared_error(y_test, newy_pred)
    print(f'Fold {fold_num} - Accuracy: {score:.3f}; Loss: {mse:.3f}')
    acc_per_fold.append(score)
    loss_per_fold.append(mse)
    fold_num = fold_num + 1
   
print('Average Accuracy: %.3f' % (mean(acc_per_fold)))
print('Average Loss: %.3f' % (mean(loss_per_fold)))

In [None]:
hypertuner = RandomizedSearchCV(estimator = clf, param_distributions = {
    'hidden_layer_sizes': (3,40),(40,),
    'learning_rate_init': sp_randfloat(0.1,0.3),
}, cv=5, return_train_score=False,n_jobs = 5)
hypertuner.fit(X, play_onehot_encoded)

In [None]:
print('Best Score: %s' % hypertuner.best_score_)
print('Best Hyperparameters: %s' % hypertuner.best_params_)

In [None]:
bestModel = hypertuner.best_estimator_
print("Accuracy:", bestModel.score(X_test, y_test))