In [None]:
import scipy.stats as scs
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('aces_2020.csv')
cole = df[df.pitcher==543037]
degrom = df[df.pitcher==594798]

In [None]:
df.release_speed.min()

In [None]:
c = cole.groupby('pitch_type')
d = degrom.groupby('pitch_type')

ps = ['CH','CU','FF','SL']
avg_cats = ['release_speed', 'release_spin_rate','pfx_x',
            'pfx_z','vx0','vy0','vz0','ax','ay','az']

agg = pd.concat([c.agg('sum')[ps].sum(axis=1),
                d.agg('sum')[ps].sum(axis=1)], axis=1).rename(columns={0: "Cole", 1: "deGrom"})

cole_mean = c.agg('mean')[avg_cats]
degr_mean = d.agg('mean')[avg_cats]

In [None]:
ff_mean_spin = cole_mean.release_spin_rate['FF']

In [None]:
degr_ff = degrom[degrom.pitch_type=='FF']
cole_ff = cole[cole.pitch_type=='FF']

degr_sl = degrom[degrom.pitch_type=='SL']
cole_sl = cole[cole.pitch_type=='SL']

degr_cu = degrom[degrom.pitch_type=='CU']
cole_cu = cole[cole.pitch_type=='CU']

degr_ch = degrom[degrom.pitch_type=='CH']
cole_ch = cole[cole.pitch_type=='CH']

In [None]:
#Hypothesis test of fastball spin. Use mean spin rate.
fig, ax = plt.subplots(figsize=(7,5))
x=np.linspace(2000, 3000, 25)
ax.hist(cole_ff.release_spin_rate, alpha=.6, bins=x, label='Cole')
ax.hist(degr_ff.release_spin_rate, alpha=.6, bins=x, label='deGrom')
ax.legend()

ff_mean_spin_co = cole_mean.release_spin_rate['FF']
ff_mean_spin_de = degr_mean.release_spin_rate['FF']
ff_spin_res = scs.ttest_ind(cole_ff.release_spin_rate, degr_ff.release_spin_rate, equal_var=False)
(result, f'Cole: {ff_mean_spin_co}', f'deGrom: {ff_mean_spin_de}')

In [None]:
#Hypothesis test of fastball speed. Use mean spin rate.
fig, ax = plt.subplots(figsize=(7,5))
x=np.linspace(80, 102, 50)
ax.hist(cole_ff.release_speed, alpha=.6, bins=x, label='Cole')
ax.hist(degr_ff.release_speed, alpha=.6, bins=x, label='deGrom')
ax.legend()

ff_mean_speed_co = cole_mean.release_speed['FF']
ff_mean_speed_de = degr_mean.release_speed['FF']
ff_spin_res = scs.ttest_ind(cole_ff.release_speed, degr_ff.release_speed, equal_var=False)
(result, ff_mean_spin)
(result, f'Cole: {ff_mean_spin_co}', f'deGrom: {ff_mean_spin_de}')

In [None]:
#Hypothesis test of fastball break. Use mean spin rate.
fig, ax = plt.subplots(figsize=(7,5))
x=np.linspace(-2, 0, 25)
ax.hist(cole_ff.pfx_x, alpha=.6, bins=x, label='Cole')
ax.hist(degr_ff.pfx_x, alpha=.6, bins=x, label='deGrom')
ax.legend()

ff_mean_break = cole_mean.pfx_x['FF']
ff_break_res = scs.ttest_ind(cole_ff.pfx_x, degr_ff.pfx_x, equal_var=False)
(result, ff_mean_break)

In [None]:
#Hypothesis test of fastball rise. Use mean spin rate.
fig, ax = plt.subplots(figsize=(7,5))
x=np.linspace(.5, 2.5, 25)
ax.hist(cole_ff.pfx_z, alpha=.6, bins=x, label='Cole')
ax.hist(degr_ff.pfx_z, alpha=.6, bins=x, label='deGrom')
ax.legend()

ff_mean_break2 = cole_mean.pfx_z['FF']
ff_break2_res = scs.ttest_ind(cole_ff.pfx_z, degr_ff.pfx_z, equal_var=False)
(result, ff_mean_break2)

In [None]:
#Hypothesis test of slider spin. Use mean spin rate.
fig, ax = plt.subplots(figsize=(7,5))
x=np.linspace(2000, 3000, 25)
ax.hist(cole_sl.release_spin_rate, alpha=.6, bins=x, label='Cole')
ax.hist(degr_sl.release_spin_rate, alpha=.6, bins=x, label='deGrom')
ax.legend()

sl_mean_spin = cole_mean.release_spin_rate['SL']
sl_spin_res = scs.ttest_ind(cole_sl.release_spin_rate, degr_sl.release_spin_rate, equal_var=False)
(result, sl_mean_spin)

In [None]:
#Hypothesis test of slider speed. Use mean spin rate.
fig, ax = plt.subplots(figsize=(7,5))
x=np.linspace(83, 98, 25)
ax.hist(cole_sl.release_speed, alpha=.6, bins=x, label='Cole')
ax.hist(degr_sl.release_speed, alpha=.6, bins=x, label='deGrom')
ax.legend()

sl_mean_spin = cole_mean.release_speed['SL']
sl_spin_res = scs.ttest_ind(cole_sl.release_speed, degr_sl.release_speed, equal_var=False)
(result, sl_mean_spin)

In [None]:
#Hypothesis test of slider break. Use mean spin rate.
fig, ax = plt.subplots(figsize=(7,5))
x=np.linspace(-.2, 1, 25)
ax.hist(cole_sl.pfx_x, alpha=.6, bins=x, label='Cole')
ax.hist(degr_sl.pfx_x, alpha=.6, bins=x, label='deGrom')
ax.legend()

sl_mean_break = cole_mean.pfx_x['SL']
sl_break_res = scs.ttest_ind(cole_sl.pfx_x, degr_sl.pfx_x, equal_var=False)
(result, sl_mean_break)

In [None]:
#Hypothesis test of slider drop. Use mean spin rate.
fig, ax = plt.subplots(figsize=(7,5))
x=np.linspace(-.5, 1.5, 25)
ax.hist(cole_sl.pfx_z, alpha=.6, bins=x, label='Cole')
ax.hist(degr_sl.pfx_z, alpha=.6, bins=x, label='deGrom')
ax.legend()

sl_mean_break2 = cole_mean.pfx_z['SL']
sl_break2_res = scs.ttest_ind(cole_sl.pfx_z, degr_sl.pfx_z, equal_var=False)
(result, sl_mean_break2)