In [None]:
#from pybaseball import statcast, statcast_batter, playerid_lookup
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.model_selection import train_test_split, cross_val_score, KFold
import xgboost

import pybaseball
from pybaseball import statcast, statcast_batter, playerid_lookup, spraychart, statcast_sprint_speed, statcast_running_splits


# Scheme

Step 1:

    If pitchtype = FF/SI/FC:
    - Use FB model scheme
        - Predict Contact/SwStr Event, probabilities 
        - Predict Foul/Fair Event, probabilities
        - Predict In-Play LA/EV bucket, probabilities

    If CH/FS:
    - Use OS model scheme
        - Predict Contact/SwStr Event, probabilities 
        - Predict Foul/Fair Event, probabilities
        - Predict In-Play LA/EV bucket, probabilities

    Else:
    - Use BrBall model scheme
        - Predict Contact/SwStr Event, probabilities 
        - Predict Foul/Fair Event, probabilities
        - Predict In-Play LA/EV bucket, probabilities


Step 2:
    
    Find context-neutral run-values of foul, swinging-strike, balls-in-play (all buckets)
    
    With probabilities, context-neutral RV's of foul, swinging strike, ball-in-play (all buckets):
        - Multiply probability * RV (found in neutral_run_vals table)

## Run Values

In [None]:
all_data = pd.read_csv('TrainDataNonClustered.csv', low_memory=False)

In [None]:
all_data['description'] = all_data.description.replace('swinging_strike','strike').replace(
'called_strike','strike')

In [None]:
# Calculate context-neutral run values, remove in-play for LA_EV bucket run values
rvs = all_data.groupby(
    ['description']).delta_run_exp.mean().round(3).reset_index()
non_bip_rvs = rvs[rvs.description != 'hit_into_play']

# Calculate context-neutral run values for LA_EV buckets
LA_EV_rvs = all_data.groupby(
    ['LA_EV']).delta_run_exp.mean().round(3).reset_index().rename(
    columns={'LA_EV':'description'})

# Calculate context-neutral run values for Spray Buckets
Spray_rvs = all_data.groupby(
    ['Spray_Bucket']).delta_run_exp.mean().round(3).reset_index().rename(
    columns={'Spray_Bucket':'description'})

In [None]:
# Create single dataframe of context-neutral run values
all_rvs = pd.concat([non_bip_rvs, LA_EV_rvs, Spray_rvs]).reset_index(drop=True)
all_rvs['delta_run_exp'] = np.where(all_rvs.description == 'foul', 0, all_rvs.delta_run_exp)
all_rvs

In [None]:
all_rvs.to_csv('RunValues.csv',index=False)

# Training

## Fastballs

In [None]:
fb_stf_con = pd.read_csv('Data/Models/FB Models/Stuff_FB_Contact.csv', low_memory = False)
fb_stf_foul = pd.read_csv('Data/Models/FB Models/Stuff_FB_Foul.csv', low_memory = False)
fb_stf_inplay1 = pd.read_csv('Data/Models/FB Models/Stuff_FB_InPlay1.csv', low_memory = False)
fb_stf_inplay2 = pd.read_csv('Data/Models/FB Models/Stuff_FB_InPlay2.csv', low_memory = False)

In [None]:
fb_stf_con_y = fb_stf_con['Contact_Event']
fb_stf_con_X = fb_stf_con.drop('Contact_Event',axis=1)

fb_stf_foul_y = fb_stf_foul['Foul_Event']
fb_stf_foul_X = fb_stf_foul.drop('Foul_Event',axis=1)

fb_stf_bip1_y = fb_stf_inplay1['LA_EV']
fb_stf_bip1_X = fb_stf_inplay1.drop('LA_EV',axis=1)

## Offspeed

In [None]:
os_contact = pd.read_csv('Data/Models/Contact_Models/Stuff_OS_Contact.csv', low_memory = False)
os_foul = pd.read_csv('Data/Models/Foul_Models/Stuff_OS_Foul.csv', low_memory = False)
os_inplay = pd.read_csv('Data/Models/In_Play_Models/Stuff_OS_InPlay.csv', low_memory = False)

In [None]:
os_con_y = os_contact['Contact_Event']
os_con_X = os_contact.drop('Contact_Event',axis=1)

os_foul_y = os_foul['Foul_Event']
os_foul_X = os_foul.drop('Foul_Event',axis=1)

os_bip_y = os_inplay['LA_EV']
os_bip_X = os_inplay.drop('LA_EV',axis=1)

## Breaking Balls

In [None]:
brba_contact = pd.read_csv('Data/Models/Contact_Models/Stuff_BrBall_Contact.csv', low_memory = False)
brba_foul = pd.read_csv('Data/Models/Foul_Models/Stuff_BrBall_Foul.csv', low_memory = False)
brba_inplay = pd.read_csv('Data/Models/In_Play_Models/Stuff_BrBall_InPlay.csv', low_memory = False)

In [None]:
brba_con_y = brba_contact['Contact_Event']
brba_con_X = brba_contact.drop('Contact_Event',axis=1)

brba_foul_y = brba_foul['Foul_Event']
brba_foul_X = brba_foul.drop('Foul_Event',axis=1)

brba_bip_y = brba_inplay['LA_EV']
brba_bip_X = brba_inplay.drop('LA_EV',axis=1)