### How often do batters swing at 3-0 pitchers?  How has this changed over time?

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px

import boxball_loader as bbl

In [2]:
def get_threeoh_outcomes(pa):
    def filter_pitches_only(seq):
        return ''.join(filter(str.isalpha, seq))
    pitches_only = pa['pitch_seq_tx'].apply(filter_pitches_only)
    threeoh = pitches_only.str.contains('^BBB')
    return pitches_only[threeoh].str[3].isin(['X', 'S', 'F', 'T', 'M']).value_counts()

In [3]:
def compute_data(df, yr):
    pa = df[df['yr']==yr]
    outcomes = get_threeoh_outcomes(pa.dropna())
    return {'yr': yr, 'pa': len(pa), 'pa_pitch': len(pa.dropna()), 'pa_3_0': outcomes.sum(), 'pa_3_0_swing': outcomes.loc[True]}


In [4]:
era = bbl.Eras.PitchCount
pa = bbl.load_event_data(era, ['pitch_seq_tx'])


In [5]:
outcomes = pd.DataFrame([compute_data(pa, yr) for yr in set(pa['yr'])])
outcomes['swing_30_freq'] = outcomes['pa_3_0_swing']/outcomes['pa_3_0']
outcomes

Unnamed: 0,yr,pa,pa_pitch,pa_3_0,pa_3_0_swing,swing_30_freq
0,1988,159380,156883,7296,725,0.09937
1,1989,160033,155302,7365,657,0.089206
2,1990,160316,147577,7187,662,0.092111
3,1991,160746,160744,7865,666,0.084679
4,1992,160545,160487,7485,637,0.085104
5,1993,174564,168861,8082,827,0.102326
6,1994,124483,119143,5861,594,0.101348
7,1995,156703,146326,7089,749,0.105657
8,1996,177261,158903,7648,921,0.120424
9,1997,175541,163080,7970,940,0.117942


In [6]:
print(outcomes.to_string(index=False))

  yr     pa  pa_pitch  pa_3_0  pa_3_0_swing  swing_30_freq
1988 159380    156883    7296           725       0.099370
1989 160033    155302    7365           657       0.089206
1990 160316    147577    7187           662       0.092111
1991 160746    160744    7865           666       0.084679
1992 160545    160487    7485           637       0.085104
1993 174564    168861    8082           827       0.102326
1994 124483    119143    5861           594       0.101348
1995 156703    146326    7089           749       0.105657
1996 177261    158903    7648           921       0.120424
1997 175541    163080    7970           940       0.117942
1998 188280    176341    8454           968       0.114502
1999 189692    186635   10097          1021       0.101119
2000 190261    190261    9924           885       0.089178
2001 186976    186976    8204           830       0.101170
2002 186615    186615    8421           724       0.085976
2003 187449    187449    8346           595       0.0712

In [7]:
px.scatter(outcomes, x='yr', y='swing_30_freq', title="Frequency of swinging on 3-0 counts")

In [8]:
# Roughly how many times/day does a player swing at a 3-0 pitch
outcomes['pa_3_0_swing']/180

0     4.027778
1     3.650000
2     3.677778
3     3.700000
4     3.538889
5     4.594444
6     3.300000
7     4.161111
8     5.116667
9     5.222222
10    5.377778
11    5.672222
12    4.916667
13    4.611111
14    4.022222
15    3.305556
16    3.683333
17    3.250000
18    2.900000
19    3.105556
20    3.305556
21    2.794444
22    2.805556
23    3.377778
24    3.361111
25    3.594444
26    3.700000
27    3.500000
28    4.088889
29    4.483333
30    4.488889
31    4.638889
32    1.666667
Name: pa_3_0_swing, dtype: float64