In [1]:
# import libraries and helpful functions 

import pandas as pd 
import numpy as np
import sklearn.model_selection as model_selection
from sklearn import metrics
from sklearn import svm
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import GridSearchCV

from sklearn import preprocessing
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report 
from sklearn.neural_network import MLPClassifier

In [2]:
team_file = 'teams_final.csv'
teams = pd.read_csv(team_file)

In [3]:
X = teams.drop(['team',
                     'sum',
                     'playoff_Wildcard', 
                     'playoff_Division',
                     'playoff_Conference', 
                     'playoff_Superbowl',
                     'division_win', 
                     'conference_win'], axis=1)
X = preprocessing.scale(X)
y = teams['playoff_Conference']

X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, 
                                                                    train_size=0.8, 
                                                                    test_size=0.2, 
                                                                    random_state=11)

## Support Vector Machines

In [4]:
## Create a SVM instance with no tuning
## once we get the predictions, round them to 0 or 1, take the absolute value, 
# and turn them to integers to create confusion matrix

clf = svm.SVR()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
y_pred = np.around(y_pred)
y_pred = np.abs(y_pred)
y_pred = y_pred.astype('int') 

print(clf)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test,y_pred))


SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)
Accuracy: 0.6447368421052632
[[42  8]
 [19  7]]
              precision    recall  f1-score   support

           0       0.69      0.84      0.76        50
           1       0.47      0.27      0.34        26

    accuracy                           0.64        76
   macro avg       0.58      0.55      0.55        76
weighted avg       0.61      0.64      0.61        76



In [5]:
## this function gives us the recommended C and gamma parameters based on the kernel
## placed within the grid search definiti8on

def svc_param_selection(X, y, nfolds):
    Cs = [0.001, 0.01, 0.1, 1, 10]
    gammas = [0.001, 0.01, 0.1, 1]
    param_grid = {'C': Cs, 'gamma' : gammas}
    grid_search = GridSearchCV(svm.SVC(kernel='rbf'), param_grid, cv=nfolds)
    grid_search.fit(X, y)
    grid_search.best_params_
    return grid_search.best_params_

print(svc_param_selection(X, y, 64))

{'C': 10, 'gamma': 0.001}


In [21]:
## Next, SVM with the recommended parameters, same kernel

clf = svm.SVR(C=10, kernel='rbf')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
y_pred = np.around(y_pred)
y_pred = np.abs(y_pred)
y_pred = y_pred.astype('int') 

print(clf)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test,y_pred))

SVR(C=10, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)
Accuracy: 0.631578947368421
[[40 10]
 [18  8]]
              precision    recall  f1-score   support

           0       0.69      0.80      0.74        50
           1       0.44      0.31      0.36        26

    accuracy                           0.63        76
   macro avg       0.57      0.55      0.55        76
weighted avg       0.61      0.63      0.61        76



In [22]:
# Using a linear kernel instead, we see a small improvement in our precision
# when classifying Conference participants

wclf = svm.SVC(kernel='linear')
wclf.fit(X_train, y_train)
y_pred = wclf.predict(X_test)
y_pred = np.around(y_pred)
y_pred = np.abs(y_pred)
y_pred = y_pred.astype('int') 
print(wclf)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test,y_pred))

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)
Accuracy: 0.6842105263157895
[[43  7]
 [17  9]]
              precision    recall  f1-score   support

           0       0.72      0.86      0.78        50
           1       0.56      0.35      0.43        26

    accuracy                           0.68        76
   macro avg       0.64      0.60      0.61        76
weighted avg       0.66      0.68      0.66        76



In [10]:
## Get all of the mathematical elements of our SVM

# print('w = ',clf.coef_)
print('b = ',clf.intercept_)
# print('Indices of support vectors = ', clf.support_)
# print('Support vectors = ', clf.support_vectors_)
print('Number of support vectors for each class = ', clf.n_support_)
# print('Coefficients of the support vector in the decision function = ', np.abs(clf.dual_coef_))

b =  [0.27570435]
Number of support vectors for each class =  [263]


In [20]:
## Get the 'perfect' spread of our dependent variable
print(confusion_matrix(y_test,y_test))

[[50  0]
 [ 0 26]]


## Neural Nets

In [12]:
## Create neural net with no parameters except random state

mlp = MLPClassifier(random_state=1137)
mlp.fit(X_train,y_train)
predictions = mlp.predict(X_test)
print(confusion_matrix(y_test,predictions))
print("Accuracy:",metrics.accuracy_score(y_test, predictions))
print(classification_report(y_test,predictions))
print(mlp)

[[39 11]
 [18  8]]
Accuracy: 0.618421052631579
              precision    recall  f1-score   support

           0       0.68      0.78      0.73        50
           1       0.42      0.31      0.36        26

    accuracy                           0.62        76
   macro avg       0.55      0.54      0.54        76
weighted avg       0.59      0.62      0.60        76

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=1137, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)




In [13]:
## Create NN with default solver but with specified layer architecture chosen after some initial experimentation.

mlp = MLPClassifier(hidden_layer_sizes=(8,6),max_iter=1500, random_state=1137)
mlp.fit(X_train,y_train)
predictions = mlp.predict(X_test)
print(confusion_matrix(y_test,predictions))
print("Accuracy:",metrics.accuracy_score(y_test, predictions))
print(classification_report(y_test,predictions))
print(mlp)

[[32 18]
 [13 13]]
Accuracy: 0.5921052631578947
              precision    recall  f1-score   support

           0       0.71      0.64      0.67        50
           1       0.42      0.50      0.46        26

    accuracy                           0.59        76
   macro avg       0.57      0.57      0.56        76
weighted avg       0.61      0.59      0.60        76

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(8, 6), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=1500,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=1137, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)


In [14]:
## Use the same architecture, but the gradient descent solver

mlp = MLPClassifier(hidden_layer_sizes=(8,6), solver='sgd', max_iter=5000, random_state=1137)
mlp.fit(X_train,y_train)
predictions = mlp.predict(X_test)
print(confusion_matrix(y_test,predictions))
print("Accuracy:",metrics.accuracy_score(y_test, predictions))
print(classification_report(y_test,predictions))
print(mlp)

[[40 10]
 [15 11]]
Accuracy: 0.6710526315789473
              precision    recall  f1-score   support

           0       0.73      0.80      0.76        50
           1       0.52      0.42      0.47        26

    accuracy                           0.67        76
   macro avg       0.63      0.61      0.61        76
weighted avg       0.66      0.67      0.66        76

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(8, 6), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=5000,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=1137, shuffle=True, solver='sgd',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)


In [15]:
## Use yet another solver designed for smaller data sets, the lbfgs algorithm.

mlp = MLPClassifier(hidden_layer_sizes=(8,6), solver='lbfgs', max_iter=5000, random_state=1137)
mlp.fit(X_train,y_train)
predictions = mlp.predict(X_test)
print(confusion_matrix(y_test,predictions))
print("Accuracy:",metrics.accuracy_score(y_test, predictions))
print(classification_report(y_test,predictions))
print(mlp)

[[34 16]
 [11 15]]
Accuracy: 0.6447368421052632
              precision    recall  f1-score   support

           0       0.76      0.68      0.72        50
           1       0.48      0.58      0.53        26

    accuracy                           0.64        76
   macro avg       0.62      0.63      0.62        76
weighted avg       0.66      0.64      0.65        76

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(8, 6), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=5000,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=1137, shuffle=True, solver='lbfgs',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)


In [16]:
## Using the lbfgs algorithm, but simplifying the architecture significantly.

mlp = MLPClassifier(hidden_layer_sizes=(1), solver='lbfgs', max_iter=5000, random_state=1137, early_stopping=True,
                   activation='logistic')
mlp.fit(X_train,y_train)
predictions = mlp.predict(X_test)
print(confusion_matrix(y_test,predictions))
print("Accuracy:",metrics.accuracy_score(y_test, predictions))
print(classification_report(y_test,predictions))
print(mlp)

[[40 10]
 [14 12]]
Accuracy: 0.6842105263157895
              precision    recall  f1-score   support

           0       0.74      0.80      0.77        50
           1       0.55      0.46      0.50        26

    accuracy                           0.68        76
   macro avg       0.64      0.63      0.63        76
weighted avg       0.67      0.68      0.68        76

MLPClassifier(activation='logistic', alpha=0.0001, batch_size='auto',
              beta_1=0.9, beta_2=0.999, early_stopping=True, epsilon=1e-08,
              hidden_layer_sizes=1, learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=5000,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=1137, shuffle=True, solver='lbfgs',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)
