# imports

In [1]:
import pandas
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

from src.constants import *
from src.utility import *
from src.targets import *
from src.rolling_data import RollingData
from src.per_game_model import PerGameModel
from src.init_DFs.per_game import PerGameInit
from src.init_DFs.next_game import NextGameInit

pgModel = PerGameModel()
initPG = PerGameInit()
initNextGame = NextGameInit()

# configure

In [None]:
season = "20_22"
situation = "5on5"
rolling_num = 4

target = 'next_reg_win'
target_cols = ['reg_win']
target_operations = []

include_placebo = False
include_null_targets = False
null_target_value = 2

df = pd.read_csv(CSV_DB_PATH + f"{season}/PER_GAME_BY_TEAM_{situation}_{season}.csv")


In [None]:
# load dataset
csv_path = "C:/Users/Michael Beebe/OneDrive - University of North Carolina at Chapel Hill/Desktop/bet-nhl-data/"
db_env = "PER_GAME_BY_TEAM"
season = "all"
sit = "5on5"
csv_file = f"{db_env}_{sit}_{season}"



#url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
#names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pandas.read_csv(csv_path + f"{season}/{csv_file}.csv")
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
# prepare configuration for cross validation test harness
seed = 7
# prepare models
models = []
models.append(('LR', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC()))
# evaluate each model in turn
results = []
names = []
scoring = 'accuracy'
for name, model in models:
 kfold = model_selection.KFold(n_splits=10, random_state=seed)
 cv_results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
 results.append(cv_results)
 names.append(name)
 msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
 print(msg)
# boxplot algorithm comparison
fig = plt.figure()
fig.suptitle('Algorithm Comparison')
ax = fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(names)
plt.show()