# Imports

In [45]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)

# Import Data
mlb2020 = pd.read_csv('data/2020mlb.csv')
mlb2020.shape

(279660, 90)

For a full explanation of what the data dictionary is, go [here](https://baseballsavant.mlb.com/csv-docs).

In [None]:
mlb2019 = pd.read_csv('data/2019mlb.csv')

# Cleaning

---
The cleaning will be shown on the `mlb2020` dataset but will be applied on past data as well.

In [68]:
def renaming_columns(mlb):
    mlb.rename(columns={'player_name':'pitcher_name'}, inplace=True)
    mlb.rename(columns={'fielder_2':'Catcher'}, inplace=True)
    mlb.rename(columns={'fielder_3':'FirstBasemen',
                        'fielder_4':'SecondBasemen',
                        'fielder_5':'ThirdBasemen',
                        'fielder_6':'ShortStop',
                        'fielder_7':'LeftField',
                        'fielder_8':'CenterField',
                        'fielder_9':'RightField'}, inplace=True)
    
    return mlb

In [46]:
# Rename 'player_name' to 'pitcher_name'

# Make sure all dataframes go through this.
mlb2020.rename(columns={'player_name':'pitcher_name'}, inplace=True)

In [47]:
mlb2020.rename(columns={'fielder_2':'Catcher'}, inplace=True) # confirmed these are catchers

In [48]:
# rename fielding positions
mlb2020.rename(columns={'fielder_3':'FirstBasemen',
                        'fielder_4':'SecondBasemen',
                        'fielder_5':'ThirdBasemen',
                        'fielder_6':'ShortStop',
                        'fielder_7':'LeftField',
                        'fielder_8':'CenterField',
                        'fielder_9':'RightField'}, inplace=True)

In [49]:
# Create Batter Name
def batter_name(des):
    try:
        name = ' '.join(des.split(' ',2)[:2])
        return name
    except:
        return np.nan
mlb2020['batter_name'] = mlb2020['des'].map(batter_name)
mlb2020['batter_name'].ffill(axis=0, inplace=True)
mlb2020.head()

Unnamed: 0,index,pitch_type,game_date,release_speed,release_pos_x,release_pos_z,pitcher_name,batter,pitcher,events,description,spin_dir,spin_rate_deprecated,break_angle_deprecated,break_length_deprecated,zone,des,game_type,stand,p_throws,home_team,away_team,type,hit_location,bb_type,balls,strikes,game_year,pfx_x,pfx_z,plate_x,plate_z,on_3b,on_2b,on_1b,outs_when_up,inning,inning_topbot,hc_x,hc_y,tfs_deprecated,tfs_zulu_deprecated,Catcher,umpire,sv_id,vx0,vy0,vz0,ax,ay,az,sz_top,sz_bot,hit_distance_sc,launch_speed,launch_angle,effective_speed,release_spin_rate,release_extension,game_pk,pitcher.1,fielder_2.1,FirstBasemen,SecondBasemen,ThirdBasemen,ShortStop,LeftField,CenterField,RightField,release_pos_y,estimated_ba_using_speedangle,estimated_woba_using_speedangle,woba_value,woba_denom,babip_value,iso_value,launch_speed_angle,at_bat_number,pitch_number,pitch_name,home_score,away_score,bat_score,fld_score,post_away_score,post_home_score,post_bat_score,post_fld_score,if_fielding_alignment,of_fielding_alignment,batter_name
0,0,FF,2020-10-27,96.7,1.58,5.99,Julio Urias,642715.0,628711.0,strikeout,called_strike,,,,,4.0,Willy Adames called out on strikes.,W,R,L,LAD,TB,S,2.0,,0.0,2.0,2020.0,0.2,1.6,-0.53,2.29,,,,2.0,9.0,Top,,,,,605131.0,,,-5.950264,-140.490456,-7.897391,3.772,32.321911,-8.981441,3.5,1.69,,,,95.4,2615.0,5.7,635886.0,628711.0,605131.0,571970.0,571771.0,621458.0,608369.0,621035.0,641355.0,605141.0,54.82,,,0.0,1.0,0.0,0.0,,65.0,3.0,4-Seam Fastball,3.0,1.0,1.0,3.0,1.0,3.0,1.0,3.0,Standard,Standard,Willy Adames
1,1,FF,2020-10-27,94.1,2.91,5.45,Julio Urias,642715.0,628711.0,,called_strike,,,,,1.0,,W,R,L,LAD,TB,S,,,0.0,1.0,2020.0,0.8,1.3,-0.55,3.03,,,,2.0,9.0,Top,,,,,605131.0,,,-10.560246,-136.599519,-3.429867,11.723598,29.18381,-15.237217,3.5,1.69,,,,93.4,2470.0,5.9,635886.0,628711.0,605131.0,571970.0,571771.0,621458.0,608369.0,621035.0,641355.0,605141.0,54.59,,,,,,,,65.0,2.0,4-Seam Fastball,3.0,1.0,1.0,3.0,1.0,3.0,1.0,3.0,Standard,Standard,Willy Adames
2,2,FF,2020-10-27,94.9,1.77,6.02,Julio Urias,642715.0,628711.0,,swinging_strike,,,,,2.0,,W,R,L,LAD,TB,S,,,0.0,0.0,2020.0,0.2,1.5,-0.04,3.32,,,,2.0,9.0,Top,,,,,605131.0,,,-5.199252,-138.098234,-4.63797,4.158758,30.838499,-12.535677,3.5,1.69,,,,94.0,2397.0,5.7,635886.0,628711.0,605131.0,571970.0,571771.0,621458.0,608369.0,621035.0,641355.0,605141.0,54.76,,,,,,,,65.0,1.0,4-Seam Fastball,3.0,1.0,1.0,3.0,1.0,3.0,1.0,3.0,Standard,Standard,Willy Adames
3,3,FF,2020-10-27,94.4,1.66,5.93,Julio Urias,670712.0,628711.0,strikeout,called_strike,,,,,4.0,Mike Brosseau called out on strikes.,W,R,L,LAD,TB,S,2.0,,3.0,2.0,2020.0,0.3,1.5,-0.37,2.15,,,,1.0,9.0,Top,,,,,605131.0,,,-5.843595,-137.294295,-7.414897,4.754147,30.016237,-11.778755,3.34,1.53,,,,93.7,2508.0,5.9,635886.0,628711.0,605131.0,571970.0,571771.0,621458.0,608369.0,621035.0,641355.0,605141.0,54.6,,,0.0,1.0,0.0,0.0,,64.0,6.0,4-Seam Fastball,3.0,1.0,1.0,3.0,1.0,3.0,1.0,3.0,Standard,Standard,Mike Brosseau
4,4,CU,2020-10-27,81.4,1.46,6.06,Julio Urias,670712.0,628711.0,,ball,,,,,13.0,,W,R,L,LAD,TB,B,,,2.0,2.0,2020.0,-1.7,-0.2,-0.14,0.96,,,,1.0,9.0,Top,,,,,605131.0,,,-0.383207,-118.44781,-4.454166,-15.553576,24.451936,-32.892744,3.34,1.53,,,,80.2,3031.0,5.7,635886.0,628711.0,605131.0,571970.0,571771.0,621458.0,608369.0,621035.0,641355.0,605141.0,54.84,,,,,,,,64.0,5.0,Curveball,3.0,1.0,1.0,3.0,1.0,3.0,1.0,3.0,Standard,Standard,Mike Brosseau


In [50]:
mlb2020.sample(5)

Unnamed: 0,index,pitch_type,game_date,release_speed,release_pos_x,release_pos_z,pitcher_name,batter,pitcher,events,description,spin_dir,spin_rate_deprecated,break_angle_deprecated,break_length_deprecated,zone,des,game_type,stand,p_throws,home_team,away_team,type,hit_location,bb_type,balls,strikes,game_year,pfx_x,pfx_z,plate_x,plate_z,on_3b,on_2b,on_1b,outs_when_up,inning,inning_topbot,hc_x,hc_y,tfs_deprecated,tfs_zulu_deprecated,Catcher,umpire,sv_id,vx0,vy0,vz0,ax,ay,az,sz_top,sz_bot,hit_distance_sc,launch_speed,launch_angle,effective_speed,release_spin_rate,release_extension,game_pk,pitcher.1,fielder_2.1,FirstBasemen,SecondBasemen,ThirdBasemen,ShortStop,LeftField,CenterField,RightField,release_pos_y,estimated_ba_using_speedangle,estimated_woba_using_speedangle,woba_value,woba_denom,babip_value,iso_value,launch_speed_angle,at_bat_number,pitch_number,pitch_name,home_score,away_score,bat_score,fld_score,post_away_score,post_home_score,post_bat_score,post_fld_score,if_fielding_alignment,of_fielding_alignment,batter_name
92899,22715,SI,2020-09-09,93.9,-1.57,5.75,Jorge Lopez,605204.0,605347.0,,ball,,,,,13.0,,R,R,R,NYM,BAL,B,,,1.0,0.0,2020.0,-1.4,0.8,-0.13,1.08,,,,2.0,1.0,Bot,,,,,642082.0,,,6.745938,-136.536555,-8.186714,-18.507555,26.716948,-20.582476,3.62,1.68,,,,94.2,1796.0,6.2,631212.0,605347.0,642082.0,642162.0,593643.0,547004.0,578428.0,663624.0,656775.0,621466.0,54.25,,,,,,,,10.0,2.0,Sinker,0.0,2.0,0.0,2.0,2.0,0.0,0.0,2.0,Standard,Standard,J.D. Davis
12639,3342,CU,2020-09-30,74.1,1.28,6.15,Max Fried,592206.0,608331.0,,swinging_strike,,,,,8.0,,F,R,L,ATL,CIN,S,,,0.0,0.0,2020.0,-0.8,-1.4,-0.2,1.74,,,,1.0,6.0,Top,,,,,518595.0,,,-1.623186,-107.801645,0.791686,-5.887085,20.58641,-43.614964,3.7,1.79,,,,73.4,2665.0,6.0,635914.0,608331.0,518595.0,518692.0,645277.0,663586.0,621020.0,594807.0,660670.0,455976.0,54.49,,,,,,,,38.0,1.0,Curveball,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Standard,Standard,Nick Castellanos
53087,6744,FF,2020-09-19,91.7,2.15,6.28,Patrick Corbin,542583.0,571578.0,,foul,,,,,1.0,,R,R,L,MIA,WSH,S,,,1.0,1.0,2020.0,0.6,1.3,-0.77,3.26,,,516782.0,1.0,5.0,Bot,,,,,543228.0,,,-8.788956,-133.194722,-4.568928,9.563427,30.309089,-15.731353,3.49,1.6,141.0,58.7,59.0,91.7,2217.0,6.5,631317.0,571578.0,543228.0,452678.0,671277.0,666198.0,607208.0,665742.0,645302.0,664057.0,53.98,,,,,,,,43.0,3.0,4-Seam Fastball,5.0,2.0,5.0,2.0,2.0,5.0,5.0,2.0,Infield shift,Standard,Jesus Aguilar
96994,3356,FF,2020-09-08,88.3,0.91,5.81,Mike Minor,664702.0,501985.0,,called_strike,,,,,8.0,,R,R,L,OAK,HOU,S,,,0.0,0.0,2020.0,-0.2,1.2,0.18,1.64,,,,0.0,4.0,Top,,,,,641680.0,,,-1.414931,-128.597931,-6.632923,-1.600369,24.161748,-17.490839,3.31,1.58,,,,88.1,2342.0,5.9,631160.0,501985.0,641680.0,621566.0,643393.0,600303.0,605353.0,543257.0,657656.0,592192.0,54.62,,,,,,,,26.0,1.0,4-Seam Fastball,4.0,0.0,0.0,4.0,0.0,4.0,0.0,4.0,Standard,Standard,Myles Straw
95220,20325,FF,2020-09-09,95.8,2.19,5.77,Caleb Ferguson,668942.0,657571.0,,ball,,,,,11.0,,R,L,L,ARI,LAD,B,,,0.0,2.0,2020.0,0.8,1.2,-0.89,3.32,,500871.0,,0.0,8.0,Bot,,,,,669257.0,,,-9.944003,-139.202699,-3.600667,12.603348,32.045509,-15.709962,3.44,1.57,,,,95.9,2183.0,6.5,630221.0,657571.0,669257.0,641355.0,666158.0,571970.0,608369.0,571771.0,572041.0,605141.0,54.0,,,,,,,,66.0,3.0,4-Seam Fastball,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,Standard,Standard,Josh Rojas


In [51]:
mlb2020['hit_location'].unique() # Infielder/Outfielder did the ball get hit to

array([ 2., nan,  9.,  4.,  5.,  8.,  7.,  3.,  6.,  1.])

In [52]:
mlb2020['bb_type'].unique() # Fielder out

array([nan, 'fly_ball', 'ground_ball', 'line_drive', 'popup'],
      dtype=object)

In [53]:
mlb2020['zone'].unique()

array([ 4.,  1.,  2., 13., 11.,  5., 12.,  6.,  8.,  3., 14.,  9.,  7.,
       nan])

In [54]:
# Run after you have selected the ones needed to be dropped

mlb2020.drop(columns=['spin_dir', 'spin_rate_deprecated',
       'break_angle_deprecated', 'break_length_deprecated','tfs_deprecated', 'tfs_zulu_deprecated',
        'umpire', 'sv_id', 'pitcher.1', 'fielder_2.1'], inplace=True)

In [55]:
mlb2020.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 279660 entries, 0 to 279659
Data columns (total 81 columns):
 #   Column                           Non-Null Count   Dtype  
---  ------                           --------------   -----  
 0   index                            279660 non-null  int64  
 1   pitch_type                       279089 non-null  object 
 2   game_date                        279660 non-null  object 
 3   release_speed                    279652 non-null  float64
 4   release_pos_x                    279652 non-null  float64
 5   release_pos_z                    279652 non-null  float64
 6   pitcher_name                     279660 non-null  object 
 7   batter                           279660 non-null  float64
 8   pitcher                          279660 non-null  float64
 9   events                           70437 non-null   object 
 10  description                      279660 non-null  object 
 11  zone                             279652 non-null  float64
 12  de

In [56]:
# Gives a pitch and determines what happens at that particular at-bat

# Good for creating a model to determine if a ball is a strike or not
mlb2020['strike_attempt']=mlb2020['description'].map(
{'called_strike':'strike',
 'swinging_strike':'strike',
 'ball':'ball',
 'foul':'strike', # False in out_via_description
 'hit_into_play':'out',
 'blocked_ball':'ball',
 'hit_into_play_score':'ob',
 'swinging_strike_blocked':'strike',
 'hit_into_play_no_out':'ob',
 'foul_bunt':'strike',
 'foul_tip':'strike',
 'hit_by_pitch':'ob',
 'missed_bunt':'strike',
 'pitchout':'out',
 'bunt_foul_tip':'strike'})

In [57]:
mlb2020.iloc[279655]['strike_attempt'] #['hc_x'].notnull()
# hc_x and hc_y are the hit coordinates

'out'

In [58]:
# velocity and acceleration of pitch
vel_accel = ['vx0', 'vy0', 'vz0', 'ax', 'ay', 'az']

In [59]:
mlb2020.columns

Index(['index', 'pitch_type', 'game_date', 'release_speed', 'release_pos_x',
       'release_pos_z', 'pitcher_name', 'batter', 'pitcher', 'events',
       'description', 'zone', 'des', 'game_type', 'stand', 'p_throws',
       'home_team', 'away_team', 'type', 'hit_location', 'bb_type', 'balls',
       'strikes', 'game_year', 'pfx_x', 'pfx_z', 'plate_x', 'plate_z', 'on_3b',
       'on_2b', 'on_1b', 'outs_when_up', 'inning', 'inning_topbot', 'hc_x',
       'hc_y', 'Catcher', 'vx0', 'vy0', 'vz0', 'ax', 'ay', 'az', 'sz_top',
       'sz_bot', 'hit_distance_sc', 'launch_speed', 'launch_angle',
       'effective_speed', 'release_spin_rate', 'release_extension', 'game_pk',
       'FirstBasemen', 'SecondBasemen', 'ThirdBasemen', 'ShortStop',
       'LeftField', 'CenterField', 'RightField', 'release_pos_y',
       'estimated_ba_using_speedangle', 'estimated_woba_using_speedangle',
       'woba_value', 'woba_denom', 'babip_value', 'iso_value',
       'launch_speed_angle', 'at_bat_number', 'pitch_

If I were to go with trying to figure out whether a pitch was a `strike`, `ball`, `out`, or `on-base`, I would select these features.

```python
features = ['pitch_type', 'release_speed', 'release_pos_x', 'release_pos_z', 'stand', 'p_throws', 'balls', 'strikes',
            'pfx_x', 'pfx_z', 'plate_x', 'plate_z', 'on_3b', 'on_2b', 'on_1b', 'inning', 'inning_topbot',
            'catcher_id', 'effective_speed', 'release_spin_rate', 'pitcher.1','fielder_2.1', 'fielder_3', 'fielder_4', 'fielder_5', 'fielder_6',
            'fielder_7', 'fielder_8', 'fielder_9', 'at_bat_number', 'pitch_number', 'pitch_name',
            'bat_score', 'fld_score', 'post_bat_score', 'post_fld_score', 'if_fielding_alignment', 'of_fielding_alignment']

target = mlb2020['strike_attempt']
```

If I wanted to predict the next pitch of a sequence based on factors such as lineup or current at-bat, I would need these features, most likely.

```python
features = 

target = mlb2020['pitch_name', 'pitch_speed'] # anything after pitch name is debatable
```

# Feature Engineering 

In [60]:
# Get Pitcher Name Stats
def pitcher_stats(name ,df):
    '''
    name: name of pitcher as a string
    df: dataframe of the season you want to check
    '''
    try:
        return df[df['pitcher_name']==name]
    except:
        return 'Did not play this season or you misspelled his name.'

# Get Batter Stats
def batter_stats(name ,df):
    '''
    name: name of batter as a string
    df: dataframe of the season you want to check
    '''
    try:
        return df[df['batter_name']==name]
    except:
        return 'Did not play this season or you misspelled his name.'

In [61]:
# Example of grabbing pitcher stats
julio_urias = pitcher_stats('Julio Urias', mlb2020)

yu_darvish = pitcher_stats('Yu Darvish', mlb2020)

# Example of grabbing batter stats
justin_turner = batter_stats('Justin Turner', mlb2020)

joey_gallo = batter_stats('Joey Gallo', mlb2020)

freddy = batter_stats('Freddie Freeman', mlb2020)

In [62]:
julio_urias.head(10)

Unnamed: 0,index,pitch_type,game_date,release_speed,release_pos_x,release_pos_z,pitcher_name,batter,pitcher,events,description,zone,des,game_type,stand,p_throws,home_team,away_team,type,hit_location,bb_type,balls,strikes,game_year,pfx_x,pfx_z,plate_x,plate_z,on_3b,on_2b,on_1b,outs_when_up,inning,inning_topbot,hc_x,hc_y,Catcher,vx0,vy0,vz0,ax,ay,az,sz_top,sz_bot,hit_distance_sc,launch_speed,launch_angle,effective_speed,release_spin_rate,release_extension,game_pk,FirstBasemen,SecondBasemen,ThirdBasemen,ShortStop,LeftField,CenterField,RightField,release_pos_y,estimated_ba_using_speedangle,estimated_woba_using_speedangle,woba_value,woba_denom,babip_value,iso_value,launch_speed_angle,at_bat_number,pitch_number,pitch_name,home_score,away_score,bat_score,fld_score,post_away_score,post_home_score,post_bat_score,post_fld_score,if_fielding_alignment,of_fielding_alignment,batter_name,strike_attempt
0,0,FF,2020-10-27,96.7,1.58,5.99,Julio Urias,642715.0,628711.0,strikeout,called_strike,4.0,Willy Adames called out on strikes.,W,R,L,LAD,TB,S,2.0,,0.0,2.0,2020.0,0.2,1.6,-0.53,2.29,,,,2.0,9.0,Top,,,605131.0,-5.950264,-140.490456,-7.897391,3.772,32.321911,-8.981441,3.5,1.69,,,,95.4,2615.0,5.7,635886.0,571970.0,571771.0,621458.0,608369.0,621035.0,641355.0,605141.0,54.82,,,0.0,1.0,0.0,0.0,,65.0,3.0,4-Seam Fastball,3.0,1.0,1.0,3.0,1.0,3.0,1.0,3.0,Standard,Standard,Willy Adames,strike
1,1,FF,2020-10-27,94.1,2.91,5.45,Julio Urias,642715.0,628711.0,,called_strike,1.0,,W,R,L,LAD,TB,S,,,0.0,1.0,2020.0,0.8,1.3,-0.55,3.03,,,,2.0,9.0,Top,,,605131.0,-10.560246,-136.599519,-3.429867,11.723598,29.18381,-15.237217,3.5,1.69,,,,93.4,2470.0,5.9,635886.0,571970.0,571771.0,621458.0,608369.0,621035.0,641355.0,605141.0,54.59,,,,,,,,65.0,2.0,4-Seam Fastball,3.0,1.0,1.0,3.0,1.0,3.0,1.0,3.0,Standard,Standard,Willy Adames,strike
2,2,FF,2020-10-27,94.9,1.77,6.02,Julio Urias,642715.0,628711.0,,swinging_strike,2.0,,W,R,L,LAD,TB,S,,,0.0,0.0,2020.0,0.2,1.5,-0.04,3.32,,,,2.0,9.0,Top,,,605131.0,-5.199252,-138.098234,-4.63797,4.158758,30.838499,-12.535677,3.5,1.69,,,,94.0,2397.0,5.7,635886.0,571970.0,571771.0,621458.0,608369.0,621035.0,641355.0,605141.0,54.76,,,,,,,,65.0,1.0,4-Seam Fastball,3.0,1.0,1.0,3.0,1.0,3.0,1.0,3.0,Standard,Standard,Willy Adames,strike
3,3,FF,2020-10-27,94.4,1.66,5.93,Julio Urias,670712.0,628711.0,strikeout,called_strike,4.0,Mike Brosseau called out on strikes.,W,R,L,LAD,TB,S,2.0,,3.0,2.0,2020.0,0.3,1.5,-0.37,2.15,,,,1.0,9.0,Top,,,605131.0,-5.843595,-137.294295,-7.414897,4.754147,30.016237,-11.778755,3.34,1.53,,,,93.7,2508.0,5.9,635886.0,571970.0,571771.0,621458.0,608369.0,621035.0,641355.0,605141.0,54.6,,,0.0,1.0,0.0,0.0,,64.0,6.0,4-Seam Fastball,3.0,1.0,1.0,3.0,1.0,3.0,1.0,3.0,Standard,Standard,Mike Brosseau,strike
4,4,CU,2020-10-27,81.4,1.46,6.06,Julio Urias,670712.0,628711.0,,ball,13.0,,W,R,L,LAD,TB,B,,,2.0,2.0,2020.0,-1.7,-0.2,-0.14,0.96,,,,1.0,9.0,Top,,,605131.0,-0.383207,-118.44781,-4.454166,-15.553576,24.451936,-32.892744,3.34,1.53,,,,80.2,3031.0,5.7,635886.0,571970.0,571771.0,621458.0,608369.0,621035.0,641355.0,605141.0,54.84,,,,,,,,64.0,5.0,Curveball,3.0,1.0,1.0,3.0,1.0,3.0,1.0,3.0,Standard,Standard,Mike Brosseau,ball
5,5,FF,2020-10-27,95.6,1.81,5.95,Julio Urias,670712.0,628711.0,,ball,11.0,,W,R,L,LAD,TB,B,,,1.0,2.0,2020.0,0.3,1.4,-0.47,3.94,,,,1.0,9.0,Top,,,605131.0,-6.600812,-139.135295,-2.905682,5.33565,30.811825,-12.976271,3.34,1.53,,,,94.9,2437.0,5.8,635886.0,571970.0,571771.0,621458.0,608369.0,621035.0,641355.0,605141.0,54.66,,,,,,,,64.0,4.0,4-Seam Fastball,3.0,1.0,1.0,3.0,1.0,3.0,1.0,3.0,Standard,Standard,Mike Brosseau,ball
6,6,CU,2020-10-27,80.6,1.5,6.11,Julio Urias,670712.0,628711.0,,foul,13.0,,W,R,L,LAD,TB,S,,,1.0,1.0,2020.0,-1.6,-0.1,-0.97,2.06,,,,1.0,9.0,Top,,,605131.0,-2.476312,-117.286244,-2.102737,-13.916321,24.086999,-32.781095,3.34,1.53,9.0,92.8,-16.0,79.3,2971.0,5.6,635886.0,571970.0,571771.0,621458.0,608369.0,621035.0,641355.0,605141.0,54.94,,,,,,,,64.0,3.0,Curveball,3.0,1.0,1.0,3.0,1.0,3.0,1.0,3.0,Infield shift,Standard,Mike Brosseau,strike
7,7,FF,2020-10-27,94.7,1.82,5.95,Julio Urias,670712.0,628711.0,,foul,5.0,,W,R,L,LAD,TB,S,,,1.0,0.0,2020.0,0.5,1.4,0.08,2.59,,,,1.0,9.0,Top,,,605131.0,-5.607907,-137.809921,-6.197171,7.630847,29.808875,-13.053227,3.34,1.53,239.0,77.3,29.0,93.9,2478.0,5.8,635886.0,571970.0,571771.0,621458.0,608369.0,621035.0,641355.0,605141.0,54.72,,,,,,,,64.0,2.0,4-Seam Fastball,3.0,1.0,1.0,3.0,1.0,3.0,1.0,3.0,Infield shift,Standard,Mike Brosseau,strike
8,8,FF,2020-10-27,94.3,1.67,6.11,Julio Urias,670712.0,628711.0,,ball,12.0,,W,R,L,LAD,TB,B,,,0.0,0.0,2020.0,0.5,1.4,1.18,3.52,,,,1.0,9.0,Top,,,605131.0,-2.392348,-137.363478,-4.225068,7.109486,29.169727,-13.246297,3.34,1.53,,,,93.6,2581.0,5.7,635886.0,571970.0,571771.0,621458.0,608369.0,621035.0,641355.0,605141.0,54.79,,,,,,,,64.0,1.0,4-Seam Fastball,3.0,1.0,1.0,3.0,1.0,3.0,1.0,3.0,Infield shift,Standard,Mike Brosseau,ball
9,9,FF,2020-10-27,95.2,1.72,6.09,Julio Urias,622534.0,628711.0,field_out,hit_into_play,6.0,Manuel Margot flies out to right fielder Mooki...,W,R,L,LAD,TB,X,9.0,fly_ball,1.0,2.0,2020.0,0.4,1.5,0.4,2.56,,,,0.0,9.0,Top,191.34,99.03,605131.0,-4.306764,-138.539023,-6.867868,6.166594,30.824839,-11.731952,3.25,1.53,284.0,83.4,44.0,94.0,2450.0,5.6,635886.0,571970.0,571771.0,621458.0,608369.0,621035.0,641355.0,605141.0,54.92,0.029,0.04,0.0,1.0,0.0,0.0,3.0,63.0,4.0,4-Seam Fastball,3.0,1.0,1.0,3.0,1.0,3.0,1.0,3.0,Standard,Standard,Manuel Margot,out


In [63]:
julio_urias['strike_attempt'].value_counts(normalize=True)

strike    0.496779
ball      0.332528
out       0.122383
ob        0.048309
Name: strike_attempt, dtype: float64

In [64]:
julio_urias['description'].value_counts(normalize=True)

ball                       0.318841
foul                       0.194847
called_strike              0.159420
swinging_strike            0.123994
hit_into_play              0.122383
hit_into_play_no_out       0.033011
hit_into_play_score        0.015298
blocked_ball               0.013688
foul_tip                   0.012882
swinging_strike_blocked    0.005636
Name: description, dtype: float64

In [65]:
joey_gallo['strike_attempt'].value_counts(normalize=True)

strike    0.474737
ball      0.401053
out       0.080000
ob        0.044211
Name: strike_attempt, dtype: float64

In [66]:
freddy['strike_attempt'].value_counts(normalize=True)

strike    0.414018
ball      0.408313
out       0.103504
ob        0.074165
Name: strike_attempt, dtype: float64

In [67]:
justin_turner['strike_attempt'].value_counts(normalize=True)

strike    0.429150
ball      0.384615
out       0.110324
ob        0.075911
Name: strike_attempt, dtype: float64