In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from pybaseball import statcast #https://github.com/jldbc/pybaseball
from pybaseball import  playerid_lookup
from pybaseball import  statcast_batter

### Inspiration:
1. https://github.com/jwilsonds/swing_probability_model/blob/master/votto%20swing%20probability.ipynb 

In [6]:
#Lookup for a player using their name. 
#In this case, it's Juan Soto
#Useful to find their id which is found in key_mlbam
playerid_lookup('soto', 'juan')

Unnamed: 0,name_last,name_first,key_mlbam,key_retro,key_bbref,key_fangraphs,mlb_played_first,mlb_played_last
0,soto,juan,665742,sotoj001,sotoju01,20123,2018.0,2024.0


In [7]:
#Juan Soto's id is 665742
#Find juan soto's stats from '2023-04-21' to '2024-04-21'
soto_stats = statcast_batter('2023-04-21', '2024-04-21', 665742)
soto_stats

Gathering Player Data


Unnamed: 0,pitch_type,game_date,release_speed,release_pos_x,release_pos_z,player_name,batter,pitcher,events,description,...,fld_score,post_away_score,post_home_score,post_bat_score,post_fld_score,if_fielding_alignment,of_fielding_alignment,spin_axis,delta_home_win_exp,delta_run_exp
0,CU,2024-04-21,76.7,-0.78,6.51,"Soto, Juan",665742,650644,field_out,hit_into_play,...,1,1,1,1,1,Infield shade,Standard,43.0,-0.029,-0.392
1,SI,2024-04-21,91.1,-1.13,6.24,"Soto, Juan",665742,650644,,ball,...,1,1,1,1,1,Infield shade,Standard,209.0,0.000,0.116
2,SI,2024-04-21,92.2,-1.14,6.23,"Soto, Juan",665742,650644,,called_strike,...,1,1,1,1,1,Infield shade,Standard,210.0,0.000,-0.060
3,FF,2024-04-21,91.2,-0.93,6.45,"Soto, Juan",665742,650644,,ball,...,1,1,1,1,1,Infield shade,Standard,206.0,0.000,0.065
4,FS,2024-04-21,84.7,-1.18,6.23,"Soto, Juan",665742,650644,,ball,...,1,1,1,1,1,Infield shade,Standard,232.0,0.000,0.040
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3118,FC,2023-04-21,88.5,-2.93,5.83,"Soto, Juan",665742,668678,,called_strike,...,0,0,0,0,0,Infield shade,Standard,185.0,0.000,-0.038
3119,KC,2023-04-21,84.7,-2.93,5.86,"Soto, Juan",665742,668678,field_out,hit_into_play,...,0,0,0,0,0,Infield shade,Standard,44.0,0.015,-0.196
3120,CH,2023-04-21,85.0,-2.97,5.81,"Soto, Juan",665742,668678,,ball,...,0,0,0,0,0,Infield shade,Standard,228.0,0.000,0.034
3121,CH,2023-04-21,87.7,-3.09,5.80,"Soto, Juan",665742,668678,,called_strike,...,0,0,0,0,0,Infield shade,Standard,221.0,0.000,-0.031


Useful site to understand what the columns mean: https://baseballsavant.mlb.com/csv-docs

In [10]:
soto_stats['description'].unique()

array(['hit_into_play', 'ball', 'called_strike', 'foul', 'blocked_ball',
       'swinging_strike', 'foul_tip', 'hit_by_pitch', 'missed_bunt',
       'foul_bunt', 'swinging_strike_blocked'], dtype=object)

#### Problem statement: Out of all swings, predict which ones will be a successful hit (i.e. single, double, triple, home run)
- Therefore, we only care about 'hit into play', 'foul', 'swinging_strike', 'foul_tip', 'missed_bunt', 'foul_bunt', and 'swinging_strike_blocked'

In [22]:
soto_stats = soto_stats[(soto_stats['description'] == 'hit_into_play') | (soto_stats['description'] == 'foul') | \
(soto_stats['description'] == 'swinging_strike') | (soto_stats['description'] == 'foul_tip') | \
(soto_stats['description'] == 'missed_bunt') | (soto_stats['description'] == 'foul_bunt') | \
(soto_stats['description'] == 'swinging_strike_blocked')]

soto_stats

Unnamed: 0,pitch_type,game_date,release_speed,release_pos_x,release_pos_z,player_name,batter,pitcher,events,description,...,fld_score,post_away_score,post_home_score,post_bat_score,post_fld_score,if_fielding_alignment,of_fielding_alignment,spin_axis,delta_home_win_exp,delta_run_exp
0,CU,2024-04-21,76.7,-0.78,6.51,"Soto, Juan",665742,650644,field_out,hit_into_play,...,1,1,1,1,1,Infield shade,Standard,43.0,-0.029,-0.392
5,FC,2024-04-21,88.7,-0.96,6.42,"Soto, Juan",665742,650644,field_out,hit_into_play,...,0,0,1,1,0,Infield shade,Standard,154.0,-0.043,-0.504
6,SI,2024-04-21,92.3,-1.01,6.38,"Soto, Juan",665742,650644,,foul,...,0,0,1,1,0,Infield shade,Standard,215.0,0.000,-0.146
8,FF,2024-04-21,92.0,-0.84,6.41,"Soto, Juan",665742,650644,,foul,...,0,0,1,1,0,Infield shade,Standard,208.0,0.000,-0.147
12,FC,2024-04-21,90.2,-2.35,5.68,"Soto, Juan",665742,541640,,foul,...,4,4,5,5,4,Infield shade,Standard,196.0,0.000,-0.026
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3102,FS,2023-04-22,86.2,-1.70,5.98,"Soto, Juan",665742,543518,single,hit_into_play,...,3,4,3,4,3,Infield shade,Standard,217.0,-0.033,0.398
3105,FF,2023-04-22,92.8,-1.70,5.55,"Soto, Juan",665742,518876,field_out,hit_into_play,...,0,1,0,1,0,Infield shade,Standard,211.0,0.014,-0.162
3109,FF,2023-04-21,95.2,-2.79,5.93,"Soto, Juan",665742,668678,,foul,...,2,0,2,0,2,Infield shade,Standard,208.0,0.000,0.000
3112,CH,2023-04-21,84.4,-3.12,5.75,"Soto, Juan",665742,668678,,foul,...,2,0,2,0,2,Infield shade,Standard,233.0,0.000,-0.050


#### Events we care about: 'single', 'double', 'triple', 'home_run', 'fielders_choice'

In [26]:
#How many of the 1139 swings resulted in a successful swing(single, double, triple, HR)
soto_stats[(soto_stats['events'] == 'single') | (soto_stats['events'] == 'double') | \
(soto_stats['events'] == 'triple') | (soto_stats['events'] == 'home_run') | \
(soto_stats['events'] == 'fielders_choice')]

Unnamed: 0,pitch_type,game_date,release_speed,release_pos_x,release_pos_z,player_name,batter,pitcher,events,description,...,fld_score,post_away_score,post_home_score,post_bat_score,post_fld_score,if_fielding_alignment,of_fielding_alignment,spin_axis,delta_home_win_exp,delta_run_exp
25,FC,2024-04-20,89.5,-2.22,5.79,"Soto, Juan",665742,621107,double,hit_into_play,...,0,0,0,0,0,Infield shade,Standard,209.0,0.037,0.241
30,CU,2024-04-20,79.4,-2.24,6.09,"Soto, Juan",665742,621107,single,hit_into_play,...,0,0,0,0,0,Infield shade,Standard,66.0,0.024,0.301
40,FF,2024-04-19,91.9,-0.07,6.51,"Soto, Juan",665742,606965,home_run,hit_into_play,...,1,1,5,5,1,Infield shade,Standard,213.0,0.149,2.300
46,FF,2024-04-17,96.5,-2.20,5.61,"Soto, Juan",665742,592332,double,hit_into_play,...,2,1,2,1,2,Standard,Standard,216.0,-0.115,1.046
48,SL,2024-04-17,85.2,-2.40,5.58,"Soto, Juan",665742,592332,single,hit_into_play,...,2,0,2,0,2,Standard,Standard,202.0,-0.042,0.416
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2997,SI,2023-04-29,94.3,-1.84,5.70,"Soto, Juan",665742,596001,home_run,hit_into_play,...,8,8,6,6,8,,,244.0,0.091,0.933
3023,CU,2023-04-27,81.6,-1.40,5.97,"Soto, Juan",665742,624522,single,hit_into_play,...,5,1,5,1,5,Infield shade,Standard,31.0,-0.022,0.453
3086,SI,2023-04-23,95.8,-2.03,5.51,"Soto, Juan",665742,686753,single,hit_into_play,...,0,0,0,0,0,Infield shade,Standard,229.0,-0.086,0.633
3094,CU,2023-04-22,82.3,-1.63,5.64,"Soto, Juan",665742,518876,single,hit_into_play,...,3,2,3,2,3,Infield shade,Standard,58.0,-0.087,0.733


In [29]:
186/1139 * 100

16.330114135206323

#### 186 of 1139 swings were successful. 16.33% of Juan Soto's swings were successful in the past year.