### Generate SVM Model

This file trains and fits the SVM model using the dataset ```NFL_data_super_cleaned.csv```, then saves the SVM model to ```finalized_svm_model.sav``` and the scaler used by the model to 'scaler.pkl'.

In [3]:

import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

import pickle



# import data set

nfl1 = pd.read_csv("NFL_data_super_cleaned.csv")

nfl1.dropna(inplace=True)
print(nfl1.isnull().values.any(), nfl1.isnull().sum().sum())

# Our 350,000 samples seem like a little too much, so sample about 10,000 rows
sample = nfl1.sample(n=150000, random_state=21, axis=0)
print(sample.isnull().values.any(), sample.isnull().sum().sum())
# one-hot encode the categorical variables
# posteam_type, defteam, side_of_field, game_date (drop), time (convert?), yrdline (convert?)

cat_columns = ["posteam", "qtr"]
# one-hot encode categorical variables
encoder = preprocessing.OneHotEncoder()
cat_array = encoder.fit_transform(sample[cat_columns]).toarray()
cat_labels = encoder.get_feature_names_out(cat_columns)
cat_onehot_encoded = pd.DataFrame(cat_array, columns=cat_labels)

# Add back the continuous variables
cat_onehot_encoded["yardline_100"] = sample["yardline_100"]
cat_onehot_encoded["quarter_seconds_remaining"] = sample["quarter_seconds_remaining"]
cat_onehot_encoded["down"] = sample["down"]
cat_onehot_encoded["goal_to_go"] = sample["goal_to_go"]
cat_onehot_encoded["ydstogo"] = sample["ydstogo"]
cat_onehot_encoded["score_margin"] = sample["score_margin"]

cat_onehot_encoded["play_type"] = sample["play_type"]
cat_onehot_encoded.dropna(inplace=True)

print(cat_onehot_encoded.isnull().values.any(), cat_onehot_encoded.isnull().sum().sum())

# split data into training and testing sets
# seed: 21, train/test ratio: 0.2 test, 0.8 train

x, y = cat_onehot_encoded.drop(["play_type"], axis=1).to_numpy(), cat_onehot_encoded["play_type"].to_numpy()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=21)

# compares each play_type to the other possible play_type

scaler = preprocessing.StandardScaler()
scaler.fit(X_train)

# find = np.where(cat_labels == team)
clf_ovo = SVC(kernel='linear', decision_function_shape='ovo')  # The other is ovr

clf_ovo.fit(scaler.transform(X_train), np.asarray(y_train))

# SAVE the model to disk
filename = 'finalized_svm_model.sav'
pickle.dump(clf_ovo, open(filename, 'wb'))

# SAVE the scaler
pickle.dump(scaler, open('scaler.pkl', 'wb'))


False 0
False 0
False 0
