## SVM Model

### Library and Data Importation

In [1]:
#import sys
#!{sys.executable} -m pip install cvxpy
import cvxpy as cp
import pandas as pd
import numpy as np
from numpy import array
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder 
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

#import data set
nfl1 = pd.read_csv("NFL_data_cleaned.csv")
#drop columns that currently cannot be used: game_date, time, yrdln
nfl1 = nfl1.drop(["game_date", "time", "yrdln"], axis=1)
display(nfl1)

nfl1 = nfl1[nfl1.play_type != "kickoff"]
nfl1 = nfl1[nfl1.play_type != "no_play"]
nfl1['play_type'] = nfl1['play_type'].replace({'extra_point': 'kick'})
nfl1['play_type'] = nfl1['play_type'].replace({'field_goal': 'kick'})
nfl1['play_type'] = nfl1['play_type'].replace({'punt': 'kick'})
nfl1 = nfl1.drop(["play_id", "game_id", "half_seconds_remaining", "game_half", "quarter_end", "drive", "sp", "goal_to_go", "ydsnet", "yards_gained"], axis = 1)
nfl1.dropna(inplace=True)
display(nfl1)
print(nfl1.isnull().values.any(), nfl1.isnull().sum().sum())

Unnamed: 0,play_id,game_id,home_team,away_team,posteam,posteam_type,defteam,side_of_field,yardline_100,quarter_seconds_remaining,...,quarter_end,drive,sp,qtr,down,goal_to_go,ydstogo,ydsnet,play_type,yards_gained
0,46,2009091000,PIT,TEN,PIT,home,TEN,TEN,30.0,900.0,...,0,1,0,1,,0.0,0,0,kickoff,0.0
1,68,2009091000,PIT,TEN,PIT,home,TEN,PIT,58.0,893.0,...,0,1,0,1,1.0,0.0,10,5,pass,5.0
2,92,2009091000,PIT,TEN,PIT,home,TEN,PIT,53.0,856.0,...,0,1,0,1,2.0,0.0,5,2,run,-3.0
3,113,2009091000,PIT,TEN,PIT,home,TEN,PIT,56.0,815.0,...,0,1,0,1,3.0,0.0,8,2,pass,0.0
4,139,2009091000,PIT,TEN,PIT,home,TEN,PIT,56.0,807.0,...,0,1,0,1,4.0,0.0,8,2,punt,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
449366,4059,2018121700,CAR,NO,CAR,home,NO,CAR,66.0,63.0,...,0,20,0,4,2.0,0.0,10,19,pass,0.0
449367,4081,2018121700,CAR,NO,CAR,home,NO,CAR,66.0,58.0,...,0,20,0,4,3.0,0.0,10,19,pass,5.0
449368,4106,2018121700,CAR,NO,CAR,home,NO,CAR,61.0,38.0,...,0,20,0,4,4.0,0.0,5,19,pass,0.0
449369,4128,2018121700,CAR,NO,NO,away,CAR,CAR,39.0,35.0,...,0,21,0,4,1.0,0.0,10,-1,qb_kneel,-1.0


Unnamed: 0,home_team,away_team,posteam,posteam_type,defteam,side_of_field,yardline_100,quarter_seconds_remaining,game_seconds_remaining,qtr,down,ydstogo,play_type
1,PIT,TEN,PIT,home,TEN,PIT,58.0,893.0,3593.0,1,1.0,10,pass
2,PIT,TEN,PIT,home,TEN,PIT,53.0,856.0,3556.0,1,2.0,5,run
3,PIT,TEN,PIT,home,TEN,PIT,56.0,815.0,3515.0,1,3.0,8,pass
4,PIT,TEN,PIT,home,TEN,PIT,56.0,807.0,3507.0,1,4.0,8,kick
5,PIT,TEN,TEN,away,PIT,TEN,98.0,796.0,3496.0,1,1.0,10,run
...,...,...,...,...,...,...,...,...,...,...,...,...,...
449365,CAR,NO,CAR,home,NO,CAR,66.0,64.0,64.0,4,1.0,10,qb_spike
449366,CAR,NO,CAR,home,NO,CAR,66.0,63.0,63.0,4,2.0,10,pass
449367,CAR,NO,CAR,home,NO,CAR,66.0,58.0,58.0,4,3.0,10,pass
449368,CAR,NO,CAR,home,NO,CAR,61.0,38.0,38.0,4,4.0,5,pass


False 0


### Data Preprocessing

In [19]:
#Our 350,000 samples seem like a little too much, so sample about 10,000 rows
sample = nfl1.sample(n=100000, random_state=21, axis=0)
print(sample.isnull().values.any(), sample.isnull().sum().sum())
display(sample)
#one-hot encode the categorical variables
#posteam_type, defteam, side_of_field, game_date (drop), time (convert?), yrdline (convert?)

cat_columns = ["home_team", "away_team", "posteam" , "posteam_type", "defteam", "side_of_field", "qtr"]
#one-hot encode categorical variables
encoder = preprocessing.OneHotEncoder()
cat_array = encoder.fit_transform(sample[cat_columns]).toarray()
cat_labels = encoder.get_feature_names_out(cat_columns)
cat_onehot_encoded = pd.DataFrame(cat_array, columns=cat_labels)

#Add back the continuous variables
cat_onehot_encoded["yardline_100"] = sample["yardline_100"]
cat_onehot_encoded["quarter_seconds_remaining"] = sample["quarter_seconds_remaining"]
cat_onehot_encoded["game_seconds_remaining"] = sample["game_seconds_remaining"]
cat_onehot_encoded["down"] = sample["down"]
cat_onehot_encoded["ydstogo"] = sample["ydstogo"]

cat_onehot_encoded["play_type"] = sample["play_type"]
cat_onehot_encoded.dropna(inplace=True)


display(cat_onehot_encoded)
print(cat_onehot_encoded.isnull().values.any(), cat_onehot_encoded.isnull().sum().sum())
#print(onehot_encoded.isnull().values.any())
#split data into training and testing sets
#seed: 21, train/test ratio: 0.2 test, 0.8 train



x, y = cat_onehot_encoded.drop(["play_type"], axis=1), cat_onehot_encoded["play_type"]
#Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state = 21)



False 0


Unnamed: 0,home_team,away_team,posteam,posteam_type,defteam,side_of_field,yardline_100,quarter_seconds_remaining,game_seconds_remaining,qtr,down,ydstogo,play_type
27320,OAK,CIN,CIN,away,OAK,CIN,67.0,298.0,298.0,4,1.0,10,run
276674,TEN,IND,IND,away,TEN,TEN,25.0,467.0,467.0,4,1.0,10,pass
210367,ARI,IND,ARI,home,IND,ARI,51.0,101.0,1901.0,2,2.0,10,pass
242713,DET,NO,NO,away,DET,NO,63.0,39.0,1839.0,2,1.0,10,pass
71203,NYJ,HOU,NYJ,home,HOU,NYJ,79.0,70.0,2770.0,1,3.0,8,pass
...,...,...,...,...,...,...,...,...,...,...,...,...,...
344999,SEA,PHI,PHI,away,SEA,PHI,63.0,798.0,3498.0,1,1.0,10,run
67705,BUF,DET,BUF,home,DET,DET,32.0,128.0,1028.0,3,1.0,10,run
131490,NO,ATL,NO,home,ATL,ATL,25.0,35.0,1835.0,2,3.0,8,run
218032,CLE,CHI,CLE,home,CHI,CLE,72.0,691.0,2491.0,2,1.0,10,run


Unnamed: 0,home_team_ARI,home_team_ATL,home_team_BAL,home_team_BUF,home_team_CAR,home_team_CHI,home_team_CIN,home_team_CLE,home_team_DAL,home_team_DEN,...,qtr_2,qtr_3,qtr_4,qtr_5,yardline_100,quarter_seconds_remaining,game_seconds_remaining,down,ydstogo,play_type
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,53.0,856.0,3556.0,2.0,5.0,run
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,56.0,815.0,3515.0,3.0,8.0,pass
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,43.0,684.0,3384.0,1.0,10.0,pass
15,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,89.0,500.0,3200.0,1.0,10.0,run
16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,42.0,477.0,3177.0,1.0,10.0,pass
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99967,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,2.0,659.0,2459.0,2.0,2.0,run
99984,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,46.0,301.0,2101.0,1.0,5.0,run
99987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,36.0,230.0,2030.0,2.0,10.0,run
99988,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,6.0,181.0,1981.0,1.0,6.0,run


False 0


### Multi-class SVM Model Implementation

In [20]:
#compares each play_type to the other possible play_type

scaler = preprocessing.StandardScaler()
scaler.fit(X_train)

clf_ovo = SVC(kernel='linear', decision_function_shape='ovo') # The other is ovr

clf_ovo.fit(scaler.transform(X_train), np.asarray(y_train))

print(classification_report(y_test, clf_ovo.predict(scaler.transform(X_test))))

home_team_ARI                   1.0
home_team_ATL                   1.0
home_team_BAL                   1.0
home_team_BUF                   1.0
home_team_CAR                   1.0
                              ...  
yardline_100                   99.0
quarter_seconds_remaining     900.0
game_seconds_remaining       3600.0
down                            4.0
ydstogo                        37.0
Length: 188, dtype: float64
              precision    recall  f1-score   support

        kick       0.88      0.98      0.93       397
        pass       0.64      0.71      0.67      2296
    qb_kneel       0.00      0.00      0.00        53
    qb_spike       0.00      0.00      0.00         9
         run       0.57      0.49      0.53      1703

    accuracy                           0.64      4458
   macro avg       0.42      0.44      0.43      4458
weighted avg       0.63      0.64      0.63      4458



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
