In [None]:
import pandas as pd
import numpy as np
import json
import requests
from sklearn.model_selection import train_test_split
import statsmodels.api as sm
from sklearn import metrics
import seaborn as sn
import app

### Predictors

In [None]:
schedule = app.get_schedule(year=2021)
schedule

In [None]:
schedule = schedule[schedule['game_time'] == 'Final']
game_dates = schedule['game_date'].unique()
game_dates = list(filter(lambda date: date.split('-')[1] > '05', game_dates)) # Ignore games early in the season (not enough data)
game_dates

In [None]:
df = pd.read_json('http://localhost:5000/loadTableData?hitMin=10&date={}'.format(','.join(game_dates)))
df

In [None]:
# Remove extraneous columns
df.drop([col for col in ['batter', 'game_pk', 'probability', 'B', 'name', 'team', 'G_weighted', 'H_weighted'] if col in df.columns], axis=1, inplace=True)
df['hit'] = df['hit'].apply(lambda x: 1 if x > 0 else 0)
df.columns

In [None]:
df = df[(df['H_total'] >= 30) & (df['order'] > -1)]
df

### Model

In [None]:
predictors = ['H_per_BF_vs_B_Hand', 'H_per_PA_vs_BP', 'H_per_PA_vs_SP_Hand', 'hit_bullpen', 'hit_pct_total', 'hit_pct_weighted', 'order', 'xBA_bullpen', 'xH_per_G_total', 'xH_per_G_weighted', 'x_hit_pct_total', 'x_hit_pct_weighted']
X = df[predictors]
y = df['hit']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [None]:
y_train.value_counts()

In [None]:
X_train[X_train.isna().any(axis=1)]

In [None]:
model = sm.Logit(y_train, X_train.astype(float)).fit()
model.summary()

In [None]:
y_pred = model.predict(X_test.astype(float))
X_test_copy = X_test.copy()
X_test_copy['probability'] = y_pred
X_test_copy.sort_values(by='probability', ascending=False)

In [None]:
threshold = 0.6

In [None]:
confusion_matrix = pd.crosstab(y_test, y_pred.apply(lambda x: 1 if x >= threshold else 0), rownames=['Actual'], colnames=['Predicted'])
sn.heatmap(confusion_matrix, annot=True, cmap='Blues', fmt='g')

In [None]:
model.save('/Users/peterberryman/Desktop/bts_advisor/log_reg_model.pickle')