In [1]:
!pip install pybaseball



In [2]:
import pandas as pd
import numpy as np
import pybaseball as pyb

In [3]:
statcast_data = pyb.statcast(start_dt='2022-04-07', end_dt='2022-06-07') #pull statcast data into a data frame.
#create new data frame using only relevant stats
data = statcast_data[['stand', 'launch_angle', 'launch_speed', 'hc_x', 'hc_y', 'events']].dropna() 
data['ba'] = 0 # new column for batting average
data.loc[data['events'].isin(['single', 'double', 'triple', 'home_run']), 'ba'] = 1.0
data = data[~data['events'].isin(['sac_fly', 'sac_bunt', 'field_error'])] 


This is a large query, it may take a moment to complete


100%|██████████████████████████████████████████████████████████████████████████████████| 62/62 [00:38<00:00,  1.63it/s]


In [4]:
#hc_x and hc_y represent the location where the batted ball was fielded. 

data.hc_x = (data.hc_x - 125.42)
data.hc_y = (198.27 - data.hc_y)
spray_angle = np.round(np.arctan(data.hc_x / data.hc_y) * 180 / np.pi * 0.75)
spray_angle[data['stand'] == 'L'] *= -1 # spray angle is dependent on the hitter's handedness
data['spray_angle'] = spray_angle

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
rf_data = data.drop(columns=['hc_x', 'hc_y', 'events'])
rf_data['stand'] = rf_data['stand'].map({'R':0, 'L':1})
print(rf_data.tail(10))

X = rf_data.drop(columns=['ba'])
y = rf_data['ba']

      stand  launch_angle  launch_speed  ba  spray_angle
468       1            21         105.3   1        -12.0
902       0            -2          96.7   0        -10.0
930       1            18          95.8   0         -3.0
1242      0           -10          85.4   1         35.0
896       1             4          85.8   0        -25.0
1509      0           -26          80.2   0         -2.0
2029      1           -53          88.7   0         -7.0
1005      0            64          81.5   0         36.0
1389      0           -34          80.7   0         -9.0
1701      1            15          70.0   1         21.0


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=4, test_size=0.2)

In [8]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [9]:
rf = RandomForestClassifier(class_weight='balanced')
gb = GradientBoostingClassifier()

#rf.fit(X_train, y_train)
#y_pred = rf.predict(X_test)

In [10]:
pip install xgboost

Note: you may need to restart the kernel to use updated packages.


In [11]:
import xgboost as xgb

xgb_model = xgb.XGBClassifier().fit(X_train, y_train)
y_pred = xgb_model.predict(X_test)

In [12]:
from sklearn.metrics import classification_report

In [13]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.87      0.90      0.88      5538
           1       0.78      0.71      0.75      2711

    accuracy                           0.84      8249
   macro avg       0.82      0.81      0.81      8249
weighted avg       0.84      0.84      0.84      8249



In [14]:
!pip install seaborn 



In [15]:
!pip install ipywidgets --upgrade



In [16]:
import ipywidgets as widgets
from IPython.display import display
from ipywidgets import interact
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import ListedColormap


In [17]:
ss= {'handle_color': '#f79168'}
cs = '#f79168'
ev = widgets.IntSlider(min= 10, max= 125, description= 'Exit Velocity', style= ss)
la = widgets.IntSlider(min= -90, max= 90, description= 'Launch Angle', style= ss)
sa = widgets.IntSlider(min= -45, max= 45, description= 'Spray Angle', style= ss)
stand = widgets.RadioButtons(options= ['Right Handed Hitter', 'Left Handed Hitter'])
output = widgets.Output()
enter = widgets.Button(description= 'Predict Outcome')
enter.style.button_color= cs
hbox = widgets.HBox([ev, la, sa, stand, enter])
hbox.layout.margin= 'auto'
display(hbox, output)

def plot_angles(ball, outcome):

    stand = ball[0][0]
    la = ball[0][1]
    ev = ball[0][2]
    sa = ball[0][3]
    la_rad = np.radians(la)
    sa_rad = np.radians(sa)
    fig, ax = plt.subplots(1, 3, figsize=(15, 5), subplot_kw={'projection': 'polar'})

    ax[0].text(0, 0, outcome, fontsize= 20, ha= 'center', va= 'center')
    ax[0].axis('off')
    
    ax[1].plot([la_rad, la_rad], [0, 1], linewidth=4, color=cs)
    ax[1].set_theta_zero_location('E')
    ax[1].set_xlim(-np.pi / 2, np.pi / 2)
    ax[1].set_title('Launch Angle')


    ax[2].plot([sa_rad, sa_rad], [0, 1], linewidth=4, c=cs)
    ax[2].set_title('Spray Angle LHH')
    if stand == 0:
        ax[2].set_theta_direction(-1)
        ax[2].set_title('Spray Angle RHH')


    ax[2].set_theta_zero_location('N')
    ax[2].set_xlim(-np.pi / 4, np.pi / 4)



    plt.show()

def plot_graphs(graph_ball):

    input_df = pd.DataFrame(graph_ball, columns=['stand', 'launch_angle', 'launch_speed' ,'ba' ,'spray_angle'])
    all_balls = pd.concat([rf_data, input_df], ignore_index= True)
    fig, ax = plt.subplots(1, 3, figsize=(15, 5))
    
    colors = ['black', '#a9b0ba', '#f79168']
    
    outcome_map= {0: 'Out',1: 'Hit',2: 'Projected Ball'}
    outcomes = all_balls.ba.map(outcome_map)

    sns.scatterplot(x='launch_angle', y='launch_speed', data=all_balls, hue=outcomes, palette= colors, ax= ax[0])
    ax[0].set_title('Outcome by Exit Velo & Launch Angle')
    ax[0].set_xlabel('Launch Angle')
    ax[0].set_ylabel('Exit Velocity')
    ax[0].legend(title= '', loc= 'lower left', ncol=3)
    sns.scatterplot(x='spray_angle', y='launch_speed', data=all_balls, hue=outcomes, palette= colors, ax= ax[1])
    ax[1].set_xlabel('Spray Angle')
    ax[1].set_ylabel('Exit Velocity')
    ax[1].legend(title= '', loc= 'lower left', ncol= 3)
    ax[1].set_title('Outcome by Exit Velo & Spray Angle')
    sns.scatterplot(x='spray_angle', y='launch_angle', data=all_balls, hue=outcomes, palette=colors, ax= ax[2])
    ax[2].set_xlabel('Spray Angle')
    ax[2].set_ylabel('Launch Angle')
    ax[2].legend(title= '', loc='upper left', ncol= 3)
    ax[2].set_title('Outcome by Spray Angle & Launch Angle')

    plt.tight_layout()
    plt.show()

def clicked(_):
    with output:
        output.clear_output()
        
        ev_s= ev.value
        la_s= la.value
        sa_s= sa.value
        s= 0
        if stand.value == 'Left Handed Hitter':
            s= 1
        
        ball= np.array([[s, la_s, ev_s, sa_s]])
        balls = pd.DataFrame(ball, columns=['stand', 'launch_angle', 'launch_speed', 'spray_angle'])
        #outcome = rf.predict(balls)
        outcome = xgb_model.predict(balls)
        if outcome == 0: 
            outcome = 'Prediction: Out'
        else: outcome = 'Prediction: Hit'
        plot_angles(ball, outcome)
        graph_ball= np.array([[s, la_s, ev_s, 2, sa_s]])
        plot_graphs(graph_ball)
        
enter.on_click(clicked)

HBox(children=(IntSlider(value=10, description='Exit Velocity', max=125, min=10, style=SliderStyle(handle_colo…

Output()