In [1]:
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import fbeta_score, make_scorer
from sklearn.metrics import confusion_matrix,plot_confusion_matrix, cohen_kappa_score, accuracy_score, classification_report, fbeta_score, make_scorer
from sklearn.metrics import mean_squared_error, r2_score
import math
from sklearn import linear_model
from sklearn.preprocessing import StandardScaler
pd.set_option('display.max_columns', None)
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.svm import LinearSVR
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
from sklearn.metrics import mean_squared_error
import seaborn as sns
from imblearn.under_sampling import TomekLinks


In [2]:
def clean_features(data):
    
    data=data.drop(columns=['Ties', 'Opponent Ties','Home','test'])
    data=data.dropna()
    data.columns=['Date', 'Team', 'Opponent', 'Score', 'Opponent Score',
       'Playoff Game?', 'Odds Open', 'Line Open', 'Win %', 'Total DVOA',
       'Offense DVOA', 'Defense DVOA', 'Special Teams DVOA', 'PassOffense',
       'RushOffense', 'PassDefense', 'RushDefense', 'Opponent Win %',
       'Opponent Total DVOA', 'Opponent Offense DVOA', 'Opponent Defense DVOA',
       'Opponent Special Teams DVOA', 'Opponent PassOffense',
       'Opponent RushOffense', 'Opponent PassDefense',
       'Opponent RushDefense', 'Wins', 'Losses', 'Opponent Wins',
       'Opponent Losses', 'Cover?']
    return data

In [3]:
def split_data(data):
    targets=data[['Score','Opponent Score','Odds Open','Line Open','Cover?']]
    features = data.drop(['Score','Opponent Score' ,'Odds Open', 'Wins' ,'Losses','Opponent Wins' ,'Opponent Losses' ,'Team', 'Date', 'Opponent','Cover?'],axis=1)
    features["Win %"] = pd.to_numeric(features["Win %"], downcast="float")
    features["Opponent Win %"] = pd.to_numeric(features["Opponent Win %"], downcast="float")
    numerical=features.select_dtypes(np.number)
    categorical=features.select_dtypes(np.object)
    dumcat=pd.get_dummies(categorical)
    features_final=pd.concat([numerical, dumcat],axis=1)
    return features_final,targets

In [16]:
def run_model(model,X,Y,scaled):

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=100)
    
    
    if scaled=="yes":
        scaler_x = StandardScaler().fit(X_train)
        X_train_scaled = pd.DataFrame(scaler_x.transform(X_train),columns=X.columns)
        X_test_scaled = pd.DataFrame(scaler_x.transform(X_test),columns=X.columns)
    elif scaled=="no":
        X_train_scaled=X_train
        X_test_scaled=X_test
    pm = model
    regression = pm.fit(X_train_scaled, y_train['Score'])
    predictions = regression.predict(X_test_scaled)
    predictions_df = pd.DataFrame(predictions,columns=["pred_scores"])
    mse = mean_squared_error(y_test['Score'], predictions)
    print("The MSE value is: ",round(mse,4))
    
    rmse = math.sqrt(mse)
    print("The RMSE value is: ",round(rmse,4))
    
    r2 = r2_score(y_test['Score'], predictions)
    print("The R2 value is: ",round(r2,4))
    
    N = len(X_test_scaled)
    p = X_test_scaled.shape[1]
    adj_r2 = 1-((1-r2)*(N-1)/(N-p-1))
    print("The R_adj^{2} value  is: ",round(adj_r2,4))

    regression2 = pm.fit(X_train_scaled, y_train['Opponent Score'])
    predictions2 = regression2.predict(X_test_scaled)
    predictions_df2 = pd.DataFrame(predictions2,columns=["Opp_pred_scores"])
    
    
    mse = mean_squared_error(y_test['Opponent Score'], predictions2)
    print("The MSE value is: ",round(mse,4))
    
    rmse = math.sqrt(mse)
    print("The RMSE value is: ",round(rmse,4))
    
    r2 = r2_score(y_test['Opponent Score'], predictions2)
    print("The R2 value is: ",round(r2,4))
    
    N = len(X_test_scaled)
    p = X_test_scaled.shape[1]
    adj_r2 = 1-((1-r2)*(N-1)/(N-p-1))
    print("The R_adj^{2} value  is: ",round(adj_r2,4))
    
    final=pd.concat([X_test, y_test],axis=1)
    passoff=list(final['PassOffense'])
    rushoff=list(final['RushOffense'])
    opppassdef=list(final['Opponent PassDefense'])
    opprushdef=list(final['Opponent RushDefense'])
    opppassoff=list(final['Opponent PassOffense'])
    opprushoff=list(final['Opponent RushOffense'])
    passdef=list(final['PassDefense'])
    rushdef=list(final['RushDefense'])
    for i in range (len(passoff)):
        if (passoff[i]+opppassdef[i])>0:
            predictions[i]=predictions[i]+1
    for i in range (len(passoff)):
        if (rushoff[i]+opprushdef[i])>0:
            predictions[i]=predictions[i]+1
    for i in range (len(passoff)):
        if (opppassoff[i]+passdef[i])>0:
            predictions2[i]=predictions2[i]+1
    for i in range (len(rushoff)):
        if (opprushoff[i]+rushdef[i])>0:
            predictions2[i]=predictions2[i]+1
    
    final['predictions']=predictions
    final['opp predictions']=predictions2
    
    finalscore=list(final['Score'])
    finalline=list(final['Line Open'])
    finalopp=list(final['Opponent Score'])
    finalcover=[]
    for i in range(len(finalscore)):
        if (finalscore[i]+finalline[i])*1.0>finalopp[i]:
            finalcover.append('Yes')
        else:
            finalcover.append('No')
    final['Cover?']=finalcover
    
    predfinalscore=list(final['predictions'])
    predfinalline=list(final['Line Open'])
    predfinalopp=list(final['opp predictions'])
    predfinalcover=[]

      
    for i in range(len(predfinalscore)):
        if (predfinalscore[i])+predfinalline[i]>(predfinalopp[i]):
            predfinalcover.append('Yes')
        else:
            predfinalcover.append('No')
    
    safebet=[]
    for i in range(len(finalscore)):
        if (predfinalscore[i]+predfinalline[i])>(predfinalopp[i]+3.5):
            safebet.append('Yes')
        elif (predfinalscore[i]+(predfinalline[i]+5))<predfinalopp[i]:   
            safebet.append('Yes')
        else:
            safebet.append('No')
    final['safebet?']=safebet

    final['Pred_Cover?']=predfinalcover  
    final['Cover?']=finalcover
    
    safepredfinalscore=list(final['predictions'][final['safebet?'] == 'Yes'])
    safepredfinalline=list(final['Line Open'][final['safebet?'] == 'Yes'])
    safepredfinalopp=list(final['opp predictions'][final['safebet?'] == 'Yes'])
    safepredfinalcover=[]

      
    for i in range(len(safepredfinalscore)):
        if (safepredfinalscore[i])+safepredfinalline[i]>(safepredfinalopp[i]):
            safepredfinalcover.append('Yes')
        else:
            safepredfinalcover.append('No')
    
    print("Model Results")
    print("The accuracy of the model on test set is: %4.2f " % accuracy_score(final['Cover?'], predfinalcover))
    print("The Kappa of your model is: %4.2f" % (cohen_kappa_score(final['Cover?'], predfinalcover)))
    print(cohen_kappa_score)
    print("------ Classification_Report------ ")
    print(classification_report(final['Cover?'], predfinalcover))
    print(" =================================================================== ")
    


    print("Safe Model Results")
    print("The accuracy of the model on test set is: %4.2f " % accuracy_score(final['Cover?'][final['safebet?'] == 'Yes'], safepredfinalcover))
    print("The Kappa of your model is: %4.2f" % (cohen_kappa_score(final['Cover?'][final['safebet?'] == 'Yes'], safepredfinalcover)))
    print(cohen_kappa_score)
    print("------ Classification_Report------ ")
    print(classification_report(final['Cover?'][final['safebet?'] == 'Yes'], safepredfinalcover))
    print(" =================================================================== ")
    return final,X_train_scaled,y_train


In [22]:
def jointmodels(df1,df2) :   
    rfpred=list(df1['Pred_Cover?'])
    LRpred=list(df2['Pred_Cover?'])
    combo=[]
    for i in range(len(rfpred)):
        if rfpred[i]==LRpred[i]:
            combo.append('Yes')
        else:   
            combo.append('No')
    combodf=df1
    combodf['combo']=combo
    combodf2=combodf[combodf['combo'] == 'Yes']
    ombodfsafe=combodf2[combodf2['safebet?'] == 'Yes']

    print("Model Results")
    print("The accuracy of the model on test set is: %4.2f " % accuracy_score(combodf2['Cover?'], combodf2['Pred_Cover?']))
    print("The Kappa of your model is: %4.2f" % (cohen_kappa_score(combodf2['Cover?'], combodf2['Pred_Cover?'])))
    print(cohen_kappa_score)
    print("------ Classification_Report------ ")
    print(classification_report(combodf2['Cover?'], combodf2['Pred_Cover?']))
    print(" =================================================================== ")

In [7]:
data=pd.read_csv(r"C:\Users\Nickolaus Smith\Desktop\Final Project\lines4.csv")
data = data[data.Home != 'Y']
data=clean_features(data)
features_final,targets=split_data(data)
features_final=features_final.drop(['Offense DVOA','Defense DVOA','Opponent Offense DVOA','Opponent Defense DVOA','Win %', 'Opponent Win %'],axis=1)



In [8]:
X=features_final
y=targets[['Score','Opponent Score']]

In [17]:
RF_final2,X_train_scaled,y_train=run_model(RandomForestRegressor(max_depth=None, random_state=10,min_samples_split=3,min_samples_leaf=3),X,y,"yes")

The MSE value is:  81.9234
The RMSE value is:  9.0512
The R2 value is:  0.1332
The R_adj^{2} value  is:  0.1132
The MSE value is:  96.0524
The RMSE value is:  9.8006
The R2 value is:  0.0587
The R_adj^{2} value  is:  0.0369
Model Results
The accuracy of the model on test set is: 0.54 
The Kappa of your model is: 0.08
<function cohen_kappa_score at 0x00000194DCC74AF0>
------ Classification_Report------ 
              precision    recall  f1-score   support

          No       0.52      0.48      0.50       316
         Yes       0.56      0.60      0.58       348

    accuracy                           0.54       664
   macro avg       0.54      0.54      0.54       664
weighted avg       0.54      0.54      0.54       664

Safe Model Results
The accuracy of the model on test set is: 0.61 
The Kappa of your model is: 0.15
<function cohen_kappa_score at 0x00000194DCC74AF0>
------ Classification_Report------ 
              precision    recall  f1-score   support

          No       0.56  

In [15]:
SVR_final,z,q=run_model(make_pipeline(LinearSVR(random_state=0, tol=1e-5)),X,y,"yes")

The MSE value is:  80.3968
The RMSE value is:  8.9664
The R2 value is:  0.1494
The R_adj^{2} value  is:  0.1297
The MSE value is:  91.3627
The RMSE value is:  9.5584
The R2 value is:  0.1046
The R_adj^{2} value  is:  0.0839
Model Results
The accuracy of the model on test set is: 0.51 
The Kappa of your model is: 0.01
<function cohen_kappa_score at 0x00000194DCC74AF0>
------ Classification_Report------ 
              precision    recall  f1-score   support

          No       0.48      0.50      0.49       316
         Yes       0.53      0.51      0.52       348

    accuracy                           0.51       664
   macro avg       0.51      0.51      0.51       664
weighted avg       0.51      0.51      0.51       664

Safe Model Results
The accuracy of the model on test set is: 0.64 
The Kappa of your model is: 0.24
<function cohen_kappa_score at 0x00000194DCC74AF0>
------ Classification_Report------ 
              precision    recall  f1-score   support

          No       0.57  

In [23]:
jointmodels(RF_final2,SVR_final)

Model Results
The accuracy of the model on test set is: 0.54 
The Kappa of your model is: 0.08
<function cohen_kappa_score at 0x00000194DCC74AF0>
------ Classification_Report------ 
              precision    recall  f1-score   support

          No       0.53      0.49      0.51       212
         Yes       0.55      0.59      0.57       222

    accuracy                           0.54       434
   macro avg       0.54      0.54      0.54       434
weighted avg       0.54      0.54      0.54       434



In [25]:
SVR_final

Unnamed: 0,Line Open,Total DVOA,Special Teams DVOA,PassOffense,RushOffense,PassDefense,RushDefense,Opponent Total DVOA,Opponent Special Teams DVOA,Opponent PassOffense,Opponent RushOffense,Opponent PassDefense,Opponent RushDefense,Playoff Game?_N,Playoff Game?_Y,Score,Opponent Score,predictions,opp predictions,Cover?,safebet?,Pred_Cover?
826,7.0,0.101,-0.043,0.042,0.087,-0.112,-0.096,0.227,-0.076,0.509,0.046,0.009,-0.141,1,0,23,22,21.120465,27.979219,Yes,No,Yes
2607,-3.0,0.313,0.017,0.452,-0.145,-0.014,-0.183,0.027,-0.079,0.323,0.020,0.256,-0.197,1,0,24,20,28.556732,25.169724,Yes,No,Yes
4118,3.0,-0.155,-0.077,0.202,-0.107,0.244,0.012,-0.152,-0.054,-0.076,-0.209,-0.084,-0.101,1,0,27,11,21.911327,23.135036,Yes,No,Yes
4264,-4.0,0.164,-0.015,0.282,0.153,0.048,-0.120,-0.075,-0.056,-0.141,0.127,0.007,-0.042,1,0,38,28,26.592800,18.060606,Yes,Yes,Yes
3598,2.0,0.041,-0.041,0.155,0.066,-0.025,-0.065,0.288,0.012,0.149,0.051,-0.159,-0.266,1,0,6,44,20.438373,22.229424,No,No,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2332,-1.0,-0.057,-0.039,0.254,0.116,0.277,-0.044,0.126,-0.030,0.367,0.138,0.232,-0.132,1,0,27,13,25.506104,25.562990,Yes,No,No
3039,3.5,-0.021,0.016,0.116,-0.130,0.031,-0.038,0.065,-0.014,0.165,-0.067,0.050,-0.197,1,0,34,27,21.052627,24.362504,Yes,No,Yes
1103,2.5,0.010,-0.098,0.342,-0.170,-0.074,-0.010,0.065,0.053,0.123,0.062,0.077,0.024,1,0,28,6,22.513266,25.541665,Yes,No,No
2836,-2.0,-0.145,-0.077,0.137,0.194,0.220,0.137,-0.338,-0.029,-0.279,-0.138,-0.019,0.053,1,0,38,19,24.065652,16.897129,Yes,Yes,Yes


In [27]:
SVR_final[SVR_final['safebet?'] == 'Yes'].shape

(42, 22)

In [28]:
RF_final2[RF_final2['safebet?'] == 'Yes'].shape

(155, 23)

In [29]:
SVR_final.shape


(664, 22)