In [1]:
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import fbeta_score, make_scorer
from sklearn.metrics import confusion_matrix,plot_confusion_matrix, cohen_kappa_score, accuracy_score, classification_report, fbeta_score, make_scorer
from sklearn.metrics import mean_squared_error, r2_score
import math
from sklearn import linear_model
from sklearn.preprocessing import StandardScaler
pd.set_option('display.max_columns', None)
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.svm import LinearSVR
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
from sklearn.metrics import mean_squared_error
import seaborn as sns
from imblearn.under_sampling import TomekLinks


In [2]:
def clean_features(data):
    
    data=data.drop(columns=['Ties', 'Opponent Ties','Home','test'])
    data=data.dropna()
    data.columns=['Date', 'Team', 'Opponent', 'Score', 'Opponent Score',
       'Playoff Game?', 'Odds Open', 'Line Open', 'Win %', 'Total DVOA',
       'Offense DVOA', 'Defense DVOA', 'Special Teams DVOA', 'PassOffense',
       'RushOffense', 'PassDefense', 'RushDefense', 'Opponent Win %',
       'Opponent Total DVOA', 'Opponent Offense DVOA', 'Opponent Defense DVOA',
       'Opponent Special Teams DVOA', 'Opponent PassOffense',
       'Opponent RushOffense', 'Opponent PassDefense',
       'Opponent RushDefense', 'Wins', 'Losses', 'Opponent Wins',
       'Opponent Losses', 'Cover?']
    return data

In [3]:
def split_data(data):
    targets=data[['Score','Opponent Score','Odds Open','Line Open','Cover?']]
    features = data.drop(['Score','Opponent Score' ,'Odds Open', 'Wins' ,'Losses','Opponent Wins' ,'Opponent Losses' ,'Team', 'Date', 'Opponent','Cover?'],axis=1)
    features["Win %"] = pd.to_numeric(features["Win %"], downcast="float")
    features["Opponent Win %"] = pd.to_numeric(features["Opponent Win %"], downcast="float")
    numerical=features.select_dtypes(np.number)
    categorical=features.select_dtypes(np.object)
    dumcat=pd.get_dummies(categorical)
    features_final=pd.concat([numerical, dumcat],axis=1)
    return features_final,targets

In [25]:
def run_model(model,X,Y,scaled):

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=100)
    
    
    if scaled=="yes":
        scaler_x = StandardScaler().fit(X_train)
        X_train_scaled = pd.DataFrame(scaler_x.transform(X_train),columns=X.columns)
        X_test_scaled = pd.DataFrame(scaler_x.transform(X_test),columns=X.columns)
    elif scaled=="no":
        X_train_scaled=X_train
        X_test_scaled=X_test
    pm = model
    regression = pm.fit(X_train_scaled, y_train['Score'])
    predictions = regression.predict(X_test_scaled)
    predictions_df = pd.DataFrame(predictions,columns=["pred_scores"])
    mse = mean_squared_error(y_test['Score'], predictions)
    print("The MSE value is: ",round(mse,4))
    
    rmse = math.sqrt(mse)
    print("The RMSE value is: ",round(rmse,4))
    
    r2 = r2_score(y_test['Score'], predictions)
    print("The R2 value is: ",round(r2,4))
    
    N = len(X_test_scaled)
    p = X_test_scaled.shape[1]
    adj_r2 = 1-((1-r2)*(N-1)/(N-p-1))
    print("The R_adj^{2} value  is: ",round(adj_r2,4))

    regression2 = pm.fit(X_train_scaled, y_train['Opponent Score'])
    predictions2 = regression2.predict(X_test_scaled)
    predictions_df2 = pd.DataFrame(predictions2,columns=["Opp_pred_scores"])
    
    
    mse = mean_squared_error(y_test['Opponent Score'], predictions2)
    print("The MSE value is: ",round(mse,4))
    
    rmse = math.sqrt(mse)
    print("The RMSE value is: ",round(rmse,4))
    
    r2 = r2_score(y_test['Opponent Score'], predictions2)
    print("The R2 value is: ",round(r2,4))
    
    N = len(X_test_scaled)
    p = X_test_scaled.shape[1]
    adj_r2 = 1-((1-r2)*(N-1)/(N-p-1))
    print("The R_adj^{2} value  is: ",round(adj_r2,4))
    
    final=pd.concat([X_test, y_test],axis=1)
    passoff=list(final['PassOffense'])
    rushoff=list(final['RushOffense'])
    opppassdef=list(final['Opponent PassDefense'])
    opprushdef=list(final['Opponent RushDefense'])
    opppassoff=list(final['Opponent PassOffense'])
    opprushoff=list(final['Opponent RushOffense'])
    passdef=list(final['PassDefense'])
    rushdef=list(final['RushDefense'])
    for i in range (len(passoff)):
        if (passoff[i]+opppassdef[i])>0:
            predictions[i]=predictions[i]+1
    for i in range (len(passoff)):
        if (rushoff[i]+opprushdef[i])>0:
            predictions[i]=predictions[i]+1
    for i in range (len(passoff)):
        if (opppassoff[i]+passdef[i])>0:
            predictions2[i]=predictions2[i]+1
    for i in range (len(rushoff)):
        if (opprushoff[i]+rushdef[i])>0:
            predictions2[i]=predictions2[i]+1
    
    final['predictions']=predictions
    final['opp predictions']=predictions2
    
    finalscore=list(final['Score'])
    finalline=list(final['Line Open'])
    finalopp=list(final['Opponent Score'])
    finalcover=[]
    for i in range(len(finalscore)):
        if (finalscore[i]+finalline[i])*1.0>finalopp[i]:
            finalcover.append('Yes')
        else:
            finalcover.append('No')
    final['Cover?']=finalcover
    
    predfinalscore=list(final['predictions'])
    predfinalline=list(final['Line Open'])
    predfinalopp=list(final['opp predictions'])
    predfinalcover=[]

      
    for i in range(len(predfinalscore)):
        if (predfinalscore[i])+predfinalline[i]>(predfinalopp[i]):
            predfinalcover.append('Yes')
        else:
            predfinalcover.append('No')
    
    safebet=[]
    for i in range(len(finalscore)):
        if (predfinalscore[i]+predfinalline[i])>(predfinalopp[i]+5):
            safebet.append('Yes')
        elif (predfinalscore[i]+(predfinalline[i]+3))<predfinalopp[i]:   
            safebet.append('Yes')
        else:
            safebet.append('No')
    final['safebet?']=safebet

    final['Pred_Cover?']=predfinalcover  
    final['Cover?']=finalcover
    
    safepredfinalscore=list(final['predictions'][final['safebet?'] == 'Yes'])
    safepredfinalline=list(final['Line Open'][final['safebet?'] == 'Yes'])
    safepredfinalopp=list(final['opp predictions'][final['safebet?'] == 'Yes'])
    safepredfinalcover=[]

      
    for i in range(len(safepredfinalscore)):
        if (safepredfinalscore[i])+safepredfinalline[i]>(safepredfinalopp[i]):
            safepredfinalcover.append('Yes')
        else:
            safepredfinalcover.append('No')
    
    print("Model Results")
    print("The accuracy of the model on test set is: %4.2f " % accuracy_score(final['Cover?'], predfinalcover))
    print("The Kappa of your model is: %4.2f" % (cohen_kappa_score(final['Cover?'], predfinalcover)))
    print(cohen_kappa_score)
    print("------ Classification_Report------ ")
    print(classification_report(final['Cover?'], predfinalcover))
    print(" =================================================================== ")
    


    print("Safe Model Results")
    print("The accuracy of the model on test set is: %4.2f " % accuracy_score(final['Cover?'][final['safebet?'] == 'Yes'], safepredfinalcover))
    print("The Kappa of your model is: %4.2f" % (cohen_kappa_score(final['Cover?'][final['safebet?'] == 'Yes'], safepredfinalcover)))
    print(cohen_kappa_score)
    print("------ Classification_Report------ ")
    print(classification_report(final['Cover?'][final['safebet?'] == 'Yes'], safepredfinalcover))
    print(" =================================================================== ")
    return final,X_train_scaled,y_train


In [36]:
def jointmodels(df1,df2) :   
    rfpred=list(df1['Pred_Cover?'])
    LRpred=list(df2['Pred_Cover?'])
    combo=[]
    for i in range(len(rfpred)):
        if rfpred[i]==LRpred[i]:
            combo.append('Yes')
        else:   
            combo.append('No')
    combodf=df1
    combodf['combo']=combo
    combodf2=combodf[combodf['combo'] == 'Yes']
    ombodfsafe=combodf2[combodf2['safebet?'] == 'Yes']

    print("Model Results")
    print("The accuracy of the model on test set is: %4.2f " % accuracy_score(combodf2['Cover?'], combodf2['Pred_Cover?']))
    print("The Kappa of your model is: %4.2f" % (cohen_kappa_score(combodf2['Cover?'], combodf2['Pred_Cover?'])))
    print(cohen_kappa_score)
    print("------ Classification_Report------ ")
    print(classification_report(combodf2['Cover?'], combodf2['Pred_Cover?']))
    print(" =================================================================== ")
    return combodf2['Cover?']

In [6]:
data=pd.read_csv(r"C:\Users\Nickolaus Smith\Desktop\Final Project\lines4.csv")
data = data[data.Home != 'N']
data=clean_features(data)
features_final,targets=split_data(data)
features_final=features_final.drop(['Offense DVOA','Defense DVOA','Opponent Offense DVOA','Opponent Defense DVOA','Win %', 'Opponent Win %'],axis=1)



In [7]:
X=features_final
y=targets[['Score','Opponent Score']]

In [26]:
RF_final2,X_train_scaled,y_train=run_model(RandomForestRegressor(max_depth=None, random_state=10,min_samples_split=3,min_samples_leaf=3),X,y,"yes")

The MSE value is:  95.9993
The RMSE value is:  9.7979
The R2 value is:  0.0592
The R_adj^{2} value  is:  0.0374
The MSE value is:  81.9506
The RMSE value is:  9.0527
The R2 value is:  0.133
The R_adj^{2} value  is:  0.1129
Model Results
The accuracy of the model on test set is: 0.56 
The Kappa of your model is: 0.11
<function cohen_kappa_score at 0x0000029988A34A60>
------ Classification_Report------ 
              precision    recall  f1-score   support

          No       0.60      0.61      0.60       364
         Yes       0.51      0.50      0.51       300

    accuracy                           0.56       664
   macro avg       0.55      0.55      0.55       664
weighted avg       0.56      0.56      0.56       664

Safe Model Results
The accuracy of the model on test set is: 0.58 
The Kappa of your model is: 0.06
<function cohen_kappa_score at 0x0000029988A34A60>
------ Classification_Report------ 
              precision    recall  f1-score   support

          No       0.61   

In [16]:
SVR_final,z,q=run_model(make_pipeline(LinearSVR(random_state=0, tol=1e-5)),X,y,"yes")

The MSE value is:  91.3627
The RMSE value is:  9.5584
The R2 value is:  0.1046
The R_adj^{2} value  is:  0.0839
The MSE value is:  80.3968
The RMSE value is:  8.9664
The R2 value is:  0.1494
The R_adj^{2} value  is:  0.1297
Model Results
The accuracy of the model on test set is: 0.51 
The Kappa of your model is: 0.01
<function cohen_kappa_score at 0x0000029988A34A60>
------ Classification_Report------ 
              precision    recall  f1-score   support

          No       0.56      0.51      0.53       364
         Yes       0.46      0.50      0.48       300

    accuracy                           0.51       664
   macro avg       0.51      0.51      0.51       664
weighted avg       0.51      0.51      0.51       664

Safe Model Results
The accuracy of the model on test set is: 0.64 
The Kappa of your model is: 0.24
<function cohen_kappa_score at 0x0000029988A34A60>
------ Classification_Report------ 
              precision    recall  f1-score   support

          No       0.68  

In [21]:
LR_final,z,q=run_model(linear_model.LinearRegression(),X,y,"yes")

The MSE value is:  92.3073
The RMSE value is:  9.6077
The R2 value is:  0.0954
The R_adj^{2} value  is:  0.0744
The MSE value is:  80.4546
The RMSE value is:  8.9697
The R2 value is:  0.1488
The R_adj^{2} value  is:  0.1291
Model Results
The accuracy of the model on test set is: 0.52 
The Kappa of your model is: 0.04
<function cohen_kappa_score at 0x0000029988A34A60>
------ Classification_Report------ 
              precision    recall  f1-score   support

          No       0.57      0.55      0.56       364
         Yes       0.47      0.49      0.48       300

    accuracy                           0.52       664
   macro avg       0.52      0.52      0.52       664
weighted avg       0.53      0.52      0.52       664

Safe Model Results
The accuracy of the model on test set is: 0.56 
The Kappa of your model is: -0.05
<function cohen_kappa_score at 0x0000029988A34A60>
------ Classification_Report------ 
              precision    recall  f1-score   support

          No       0.58 

In [37]:
JOINT=jointmodels(RF_final2,LR_final)

Model Results
The accuracy of the model on test set is: 0.56 
The Kappa of your model is: 0.12
<function cohen_kappa_score at 0x0000029988A34A60>
------ Classification_Report------ 
              precision    recall  f1-score   support

          No       0.59      0.62      0.60       237
         Yes       0.53      0.50      0.51       204

    accuracy                           0.56       441
   macro avg       0.56      0.56      0.56       441
weighted avg       0.56      0.56      0.56       441



In [46]:
test

Unnamed: 0,Line Open,Total DVOA,Special Teams DVOA,PassOffense,RushOffense,PassDefense,RushDefense,Opponent Total DVOA,Opponent Special Teams DVOA,Opponent PassOffense,Opponent RushOffense,Opponent PassDefense,Opponent RushDefense,Playoff Game?_N,Playoff Game?_Y,Score,Opponent Score,predictions,opp predictions,Cover?,safebet?,Pred_Cover?,combo
4248,4.0,-0.075,-0.056,-0.141,0.127,0.007,-0.042,0.164,-0.015,0.282,0.153,0.048,-0.12,1,0,28,38,19.646513,27.163455,No,Yes,No,Yes
3973,8.5,-0.419,0.062,-0.418,-0.139,0.217,-0.003,0.114,0.034,-0.038,0.184,0.052,-0.284,1,0,7,27,13.72048,28.025206,No,Yes,No,Yes
2883,-3.0,-0.316,-0.024,-0.196,-0.105,0.045,0.074,-0.184,-0.025,-0.013,-0.103,0.142,-0.137,1,0,7,24,19.556092,20.898841,No,Yes,No,Yes
2379,3.0,0.09,-0.043,0.347,0.117,0.054,-0.017,0.384,0.057,0.403,0.119,-0.152,-0.122,0,1,14,24,21.841267,28.471698,No,Yes,No,Yes
1198,-3.0,-0.195,-0.065,-0.165,-0.23,-0.066,-0.101,0.076,0.004,0.347,-0.008,0.175,-0.095,0,1,27,14,22.454753,23.589482,Yes,Yes,No,Yes
3616,7.0,-0.347,0.05,-0.334,-0.166,0.057,0.094,-0.057,0.015,0.232,-0.025,0.15,0.101,1,0,27,16,15.543439,27.28914,Yes,Yes,No,Yes
721,-3.5,-0.264,-0.033,-0.361,-0.263,-0.121,-0.139,-0.231,0.059,-0.241,-0.195,0.066,-0.051,1,0,23,27,16.702169,17.94195,No,Yes,No,Yes
2556,1.0,-0.138,-0.007,0.044,-0.138,0.143,-0.13,0.109,0.111,0.12,0.08,0.081,0.001,1,0,13,16,21.02322,25.731773,No,Yes,No,Yes
3240,3.0,0.059,-0.017,0.237,0.312,0.323,-0.056,0.126,-0.199,0.42,0.035,-0.093,-0.171,1,0,23,29,27.015253,24.860687,No,Yes,Yes,No
4067,3.0,-0.513,0.049,-0.525,-0.147,0.254,-0.029,-0.371,-0.041,-0.192,-0.135,0.194,-0.014,1,0,9,13,14.860324,21.661712,No,Yes,No,Yes


In [55]:
test =RF_final2[RF_final2['safebet?'] == 'Yes']

In [56]:
LR_final[LR_final['safebet?'] == 'Yes']

Unnamed: 0,Line Open,Total DVOA,Special Teams DVOA,PassOffense,RushOffense,PassDefense,RushDefense,Opponent Total DVOA,Opponent Special Teams DVOA,Opponent PassOffense,Opponent RushOffense,Opponent PassDefense,Opponent RushDefense,Playoff Game?_N,Playoff Game?_Y,Score,Opponent Score,predictions,opp predictions,Cover?,safebet?,Pred_Cover?,combo
4248,4.0,-0.075,-0.056,-0.141,0.127,0.007,-0.042,0.164,-0.015,0.282,0.153,0.048,-0.12,1,0,28,38,19.646513,27.163455,No,Yes,No,Yes
3973,8.5,-0.419,0.062,-0.418,-0.139,0.217,-0.003,0.114,0.034,-0.038,0.184,0.052,-0.284,1,0,7,27,13.72048,28.025206,No,Yes,No,Yes
2883,-3.0,-0.316,-0.024,-0.196,-0.105,0.045,0.074,-0.184,-0.025,-0.013,-0.103,0.142,-0.137,1,0,7,24,19.556092,20.898841,No,Yes,No,Yes
2379,3.0,0.09,-0.043,0.347,0.117,0.054,-0.017,0.384,0.057,0.403,0.119,-0.152,-0.122,0,1,14,24,21.841267,28.471698,No,Yes,No,Yes
1198,-3.0,-0.195,-0.065,-0.165,-0.23,-0.066,-0.101,0.076,0.004,0.347,-0.008,0.175,-0.095,0,1,27,14,22.454753,23.589482,Yes,Yes,No,Yes
3616,7.0,-0.347,0.05,-0.334,-0.166,0.057,0.094,-0.057,0.015,0.232,-0.025,0.15,0.101,1,0,27,16,15.543439,27.28914,Yes,Yes,No,Yes
721,-3.5,-0.264,-0.033,-0.361,-0.263,-0.121,-0.139,-0.231,0.059,-0.241,-0.195,0.066,-0.051,1,0,23,27,16.702169,17.94195,No,Yes,No,Yes
2556,1.0,-0.138,-0.007,0.044,-0.138,0.143,-0.13,0.109,0.111,0.12,0.08,0.081,0.001,1,0,13,16,21.02322,25.731773,No,Yes,No,Yes
3240,3.0,0.059,-0.017,0.237,0.312,0.323,-0.056,0.126,-0.199,0.42,0.035,-0.093,-0.171,1,0,23,29,27.015253,24.860687,No,Yes,Yes,No
4067,3.0,-0.513,0.049,-0.525,-0.147,0.254,-0.029,-0.371,-0.041,-0.192,-0.135,0.194,-0.014,1,0,9,13,14.860324,21.661712,No,Yes,No,Yes


In [58]:
test[test['Pred_Cover?'] == 'Yes'].shape


(40, 23)