# SVM Model

In [5]:
# load packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
from scipy.signal import savgol_filter
import statsmodels.api as sm
import pymc3 as pm
import statsmodels.api as sm
from statsmodels.tools import add_constant
from itertools import combinations
# settings for seaborn plotting style
sns.set(color_codes=True)
# settings for seaborn plot sizes
sns.set(rc={'figure.figsize':(12,6)})
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
# import logistic regression from sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [6]:
# Load Data
df = pd.read_pickle('nfl_df_averages.pkl')

In [7]:
# create X that is df columns with 'Avg' in them
X = df[[col for col in df.columns if 'Avg' in col]]
# remove home_homeAvg and home_awayAvg
X = X.drop(['home_homeAvg', 'home_awayAvg'], axis=1)

y = df['winner']

# split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# do a grid search to find the best parameters of the SVM model
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [0.1, 1, 10, 100, 1000], 'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 'kernel': ['linear']}
grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=3)
grid.fit(X_train, y_train)

# print the best parameters
print(grid.best_params_)

# report classification accuracy of the best model
print(grid.best_estimator_.score(X_test, y_test))

Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV 1/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.559 total time=   1.2s
[CV 2/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.559 total time=   1.2s
[CV 3/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.559 total time=   1.2s
[CV 4/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.559 total time=   1.2s
[CV 5/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.560 total time=   1.2s
[CV 1/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.559 total time=   1.2s
[CV 2/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.559 total time=   1.2s
[CV 3/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.559 total time=   1.2s
[CV 4/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.559 total time=   1.2s
[CV 5/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.560 total time=   1.2s
[CV 1/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.559 total time=   1.2s
[CV 2/5] END .....C=0.1, gamma=0.01, kernel=rbf

In [9]:
# create an SVM model
svm = SVC(kernel='linear', C=1, random_state=621)

# fit the model to the training data
svm.fit(X_train, y_train)

# make predictions
predictions = svm.predict(X_test)

# evaluate the model
print(confusion_matrix(y_test, predictions))
print(accuracy_score(y_test, predictions))

[[244 204]
 [173 451]]
0.648320895522388
