In [7]:
import pandas as pd
import numpy as np
from sklearn.calibration import CalibratedClassifierCV
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.tree import DecisionTreeClassifier
from sklearn.impute import SimpleImputer

In [3]:
df = pd.read_csv('../nss_capstone/df_rebuild.csv').drop(columns = 'Unnamed: 0').fillna('')

In [4]:
df = df.apply(pd.to_numeric, errors='coerce')

In [5]:
df.columns

Index(['game_id', 'Date', 'VH', 'Team', 'Final', 'ML', 'ML_pct', 'Open Total',
       'Close Total', 'Open Spread', 'Close Spread', 'W/L_fav', 'W/L_dog',
       'ATS_fav', 'W/L_H_fav', 'W/L_V_fav', 'W/L_H_dog', 'W/L_V_dog',
       'ATS_H_fav', 'ATS_V_fav', 'ATS_H_dog', 'ATS_V_dog', 'Push'],
      dtype='object')

                                    ***********Random Forest******************

In [76]:
df = pd.read_csv('../nss_capstone/df_rebuild.csv')

variables = ['ML_pct',
             'Open Total',
            'Open Spread',
            'Close Total',
             'Close Spread']

X = df[variables]
y = df['W/L_bool']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

imputer = SimpleImputer(strategy='mean')

imputer.fit(X_train)

X_train_imputed = imputer.transform(X_train)
X_test_imputed = imputer.transform(X_test)

base_classifier = RandomForestClassifier()

calibrated_classifier = CalibratedClassifierCV(base_classifier, method='sigmoid', cv=5)

calibrated_classifier.fit(X_train_imputed, y_train)

probabilities = calibrated_classifier.predict_proba(X_test_imputed)

In [77]:
df_prob = pd.DataFrame(probabilities, columns=['Class 0 Probability', 'Class 1 Probability'])

In [78]:
df_prob

Unnamed: 0,Class 0 Probability,Class 1 Probability
0,0.592742,0.407258
1,0.603934,0.396066
2,0.746338,0.253662
3,0.327117,0.672883
4,0.686804,0.313196
...,...,...
7190,0.626295,0.373705
7191,0.628944,0.371056
7192,0.553616,0.446384
7193,0.643144,0.356856


In [79]:
df_val = pd.read_csv('../nss_capstone/df_val.csv').drop(columns='Unnamed: 0').fillna(0)

X_val_imputed = imputer.transform(df_val[variables])

probabilities = calibrated_classifier.predict_proba(X_val_imputed)

In [80]:
df_prob_val = pd.DataFrame(probabilities, columns=['Class 0 Probability', 'Class 1 Probability'])

In [81]:
df_prob_val.head(13)

Unnamed: 0,Class 0 Probability,Class 1 Probability
0,0.530163,0.469837
1,0.497032,0.502968
2,0.546788,0.453212
3,0.724295,0.275705
4,0.528382,0.471618
5,0.570039,0.429961
6,0.537592,0.462408
7,0.692377,0.307623
8,0.52918,0.47082
9,0.617364,0.382636


In [82]:
#df.loc[df['game_id'] == 17985]

In [17]:
a = pd.read_csv('../nss_capstone/df_22_23.csv').drop(columns='Unnamed: 0').fillna(0)


In [66]:
a[[ 'game_id', 'Team','ML', 'W/L_bool']].head(16)

Unnamed: 0,game_id,Team,ML,W/L_bool
0,1,Philadelphia,135,0
1,1,Boston,-155,1
2,2,LALakers,260,0
3,2,GoldenState,-310,1
4,3,Washington,-135,1
5,3,Indiana,115,0
6,4,Orlando,155,0
7,4,Detroit,-175,1
8,5,NewYork,165,0
9,5,Memphis,-185,1


                                ***************Gradient Boost************************

In [None]:
#df = pd.read_csv('../nss_capstone/df_rebuild.csv').drop(columns = 'Unnamed: 0').fillna('')

In [24]:
df = df.apply(pd.to_numeric, errors='coerce')

In [83]:


variables = ['ML_pct',
#             'Open Total',
#             'Open Spread',
            'Close Total',
            'Close Spread']

X = df[variables]
y = df['W/L_bool']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

imputer = SimpleImputer(strategy='mean')

imputer.fit(X_train)

X_train_imputed = imputer.transform(X_train)
X_test_imputed = imputer.transform(X_test)

base_classifier = GradientBoostingClassifier()

calibrated_classifier = CalibratedClassifierCV(base_classifier, method='sigmoid', cv=5)

calibrated_classifier.fit(X_train_imputed, y_train)

probabilities = calibrated_classifier.predict_proba(X_test_imputed)

In [84]:
df_prob = pd.DataFrame(probabilities, columns=['Class 0 Probability', 'Class 1 Probability'])

In [85]:
df_prob

Unnamed: 0,Class 0 Probability,Class 1 Probability
0,0.585916,0.414084
1,0.739254,0.260746
2,0.772610,0.227390
3,0.119879,0.880121
4,0.868386,0.131614
...,...,...
7190,0.650915,0.349085
7191,0.682900,0.317100
7192,0.586441,0.413559
7193,0.804945,0.195055


In [86]:
df_val = pd.read_csv('../nss_capstone/df_val.csv').drop(columns='Unnamed: 0').fillna(0)

X_val_imputed = imputer.transform(df_val[variables])

probabilities = calibrated_classifier.predict_proba(X_val_imputed)

In [87]:
df_prob_val = pd.DataFrame(probabilities, columns=['Class 0 Probability', 'Class 1 Probability'])

In [88]:
df_prob_val.head(13)

Unnamed: 0,Class 0 Probability,Class 1 Probability
0,0.361134,0.638866
1,0.572906,0.427094
2,0.393114,0.606886
3,0.771913,0.228087
4,0.287318,0.712682
5,0.56012,0.43988
6,0.417255,0.582745
7,0.643683,0.356317
8,0.390939,0.609061
9,0.672608,0.327392


In [32]:
val = pd.read_csv('../nss_capstone/df_22_23.csv').drop(columns='Unnamed: 0').fillna(0)

In [37]:
val[[ 'game_id', 'Team','ML', 'W/L_bool']].head(16)

Unnamed: 0,game_id,Team,ML,W/L_bool
0,1,Philadelphia,135,0
1,1,Boston,-155,1
2,2,LALakers,260,0
3,2,GoldenState,-310,1
4,3,Washington,-135,1
5,3,Indiana,115,0
6,4,Orlando,155,0
7,4,Detroit,-175,1
8,5,NewYork,165,0
9,5,Memphis,-185,1


                        *******************NAIVE BAYES***********************

In [89]:

variables = ['ML_pct',
#             'Open Total',
#             'Open Spread',
            'Close Total',
            'Close Spread']

X = df[variables]
y = df['W/L_bool']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

imputer = SimpleImputer(strategy='mean')

imputer.fit(X_train)

X_train_imputed = imputer.transform(X_train)
X_test_imputed = imputer.transform(X_test)

base_classifier = GaussianNB()

calibrated_classifier = CalibratedClassifierCV(base_classifier, method='sigmoid', cv=5)

calibrated_classifier.fit(X_train_imputed, y_train)

probabilities = calibrated_classifier.predict_proba(X_test_imputed)

In [90]:
df_prob = pd.DataFrame(probabilities, columns=['Class 0 Probability', 'Class 1 Probability'])

In [91]:
df_prob

Unnamed: 0,Class 0 Probability,Class 1 Probability
0,0.688711,0.311289
1,0.763661,0.236339
2,0.729117,0.270883
3,0.244127,0.755873
4,0.764080,0.235920
...,...,...
7190,0.690116,0.309884
7191,0.699117,0.300883
7192,0.753524,0.246476
7193,0.745158,0.254842


In [92]:
df_val = pd.read_csv('../nss_capstone/df_val.csv').drop(columns='Unnamed: 0').fillna(0)

X_val_imputed = imputer.transform(df_val[variables])

probabilities = calibrated_classifier.predict_proba(X_val_imputed)

In [93]:
df_prob_val = pd.DataFrame(probabilities, columns=['Class 0 Probability', 'Class 1 Probability'])

In [94]:
df_prob_val

Unnamed: 0,Class 0 Probability,Class 1 Probability
0,0.771188,0.228812
1,0.656538,0.343462
2,0.771188,0.228812
3,0.758739,0.241261
4,0.771188,0.228812
...,...,...
1323,0.764189,0.235811
1324,0.771188,0.228812
1325,0.770463,0.229537
1326,0.771188,0.228812
