In [None]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
df = pd.read_csv('aces_2020.csv')
cole = df[df.pitcher==543037]
degrom = df[df.pitcher==594798]

In [None]:
df

In [None]:
cole.info()

In [None]:
degrom.info()

In [None]:
c = cole.groupby('pitch_type')
d = degrom.groupby('pitch_type')

ps = ['CH','CU','FF','SL']
avg_cats = ['release_speed', 'release_spin_rate','pfx_x',
            'pfx_z','vx0','vy0','vz0','ax','ay','az']

agg = pd.concat([c.agg('sum')[ps].sum(axis=1),
                d.agg('sum')[ps].sum(axis=1)], axis=1).rename(columns={0: "Cole", 1: "deGrom"})

cole_mean = c.agg('mean')[avg_cats]
degr_mean = d.agg('mean')[avg_cats]

In [None]:
df['release_speed']

In [None]:
fig, ax = plt.subplots(figsize=(12,3))
agg.T.plot(kind='barh', stacked=True, ax=ax)
ax.legend(['Changeup','Curveball','Fastball','Slider'], bbox_to_anchor=(1, 1))
ax.set_title('Frequency of various pitch types')
ax.set_xlabel('total pitches thrown in 2020')
plt.tight_layout()
plt.savefig('stacked_bar.png')

In [None]:
fig, ax = plt.subplots(figsize=(12, 3))
agg.T.plot(kind='barh', stacked=True, ax=ax)
ax.legend(['Changeup','Curveball','Fastball','Slider'], bbox_to_anchor=(1, 1))
ax.set_title('Frequency of various pitch types')
ax.set_xlabel('total pitches thrown in 2020')
plt.tight_layout()
plt.savefig('stacked_bar.png')

In [None]:
fig, ax = plt.subplots(figsize=(5,8))

ax.set_ylabel('Release speeds (all pitches)')
ax.boxplot([cole.release_speed, degrom.release_speed], labels=['Cole', 'deGrom']);


In [None]:
fig, ax = plt.subplots(figsize=(5,8))
ax.set_ylabel('Horizontal movement (all pitches)')
ax.boxplot([cole.pfx_x, degrom.pfx_x], labels=['Cole', 'deGrom']);

In [None]:
fig, ax = plt.subplots(figsize=(5,8))
ax.set_ylabel('Vertical movement (all pitches)')
ax.boxplot([cole.pfx_z, degrom.pfx_z], labels=['Cole', 'deGrom']);

In [None]:
#Draw a nested boxplot
ax = sns.boxplot(x=df[['release_speed']], y="value", hue="player_name",
                 data=df, linewidth=2.5)

In [None]:
cole.hist(figsize=(10, 7), grid=False)
plt.tight_layout()
degrom.hist(figsize=(10, 7), grid=False, color='orange')
plt.tight_layout()

In [None]:
fig, ax = plt.subplots(figsize=(12,3))
sns.set_theme(style='ticks')
sns.color_palette('hls', 8)
ax.set_title('Gerrit Cole: speeds by type of pitch')
ax.set_xlabel('Speed out of hand (mph)')
sns.kdeplot(cole.release_speed, hue=cole.pitch_type, fill=True, alpha=.5, linewidth=2)
#Make sure legend labels are in order
ax.legend(['Curveball','Changeup','Slider','Fastball'], bbox_to_anchor=(1, 1))
plt.tight_layout()
plt.savefig('cole_speed_density.png')

In [None]:
fig, ax = plt.subplots(figsize=(12,3))
sns.set_theme(style='ticks')
sns.color_palette('hls', 8)
ax.set_title('Jacob deGrom: speeds by type of pitch')
ax.set_xlabel('Speed out of hand')
sns.kdeplot(degrom.release_speed, hue=degrom.pitch_type, fill=True, alpha=.5,linewidth=2)
#Make sure legend labels are in order
ax.legend(['Curveball','Changeup', 'Fastball','Slider'], bbox_to_anchor=(1, 1))
plt.tight_layout()
plt.savefig('degrom_speed_density.png')

In [None]:
fig, ax = plt.subplots(figsize=(12,3))
sns.set_theme(style='ticks')
sns.color_palette('hls', 8)
ax.set_title('Gerrit Cole: spin rates by type of pitch')
ax.set_xlabel('Spin rate out of hand')
sns.kdeplot(cole.release_spin_rate, hue=cole.pitch_type, fill=True, alpha=.5, linewidth=2)
#Make sure legend labels are in order
ax.legend(['Curveball','Changeup','Slider','Fastball'], bbox_to_anchor=(1, 1))
plt.tight_layout()
plt.savefig('cole_spin_density.png')

In [None]:
fig, ax = plt.subplots(figsize=(12,3))
sns.set_theme(style='ticks')
sns.color_palette('hls', 8)
ax.set_title('Jacob deGrom: spin rates by type of pitch')
ax.set_xlabel('Spin rate out of hand')
sns.kdeplot(degrom.release_spin_rate, hue=degrom.pitch_type, fill=True, alpha=.5, clip=(1230, 3100), linewidth=2)
#Make sure legend labels are in order
ax.legend(['Curveball','Changeup', 'Fastball','Slider'], bbox_to_anchor=(1, 1))
plt.tight_layout()
plt.savefig('degrom_spin_density.png')

In [None]:
fig, ax = plt.subplots(figsize=(12,3))
sns.set_theme(style='ticks')
sns.color_palette('hls', 8)
ax.set_title('Gerrit Cole: horizontal movement')
ax.set_xlabel('Horizontal movement (in.) of the pitch between the release point and home plate.')
sns.kdeplot(cole.pfx_x, hue=cole.pitch_type, fill=True, alpha=.5, linewidth=2)
#Make sure legend labels are in order
ax.legend(['Curveball','Changeup','Slider','Fastball'], bbox_to_anchor=(1, 1))
plt.tight_layout()
plt.savefig('cole_lateral_density.png')

In [None]:
fig, ax = plt.subplots(figsize=(12,3))
sns.set_theme(style='ticks')
sns.color_palette('hls', 8)
ax.set_title('Jacob deGrom: horizontal movement')
ax.set_xlabel('Horizontal movement (in.) of the pitch between the release point and home plate.')
sns.kdeplot(degrom.pfx_x, hue=degrom.pitch_type, fill=True, alpha=.5, linewidth=2)
#Make sure legend labels are in order
ax.legend(['Curveball','Changeup', 'Fastball','Slider'], bbox_to_anchor=(1, 1))
plt.tight_layout()
plt.savefig('degrom_lateral_density.png')

In [None]:
sns.set_theme(style='ticks')
sns.color_palette('hls', 8)
sns.pairplot(df[['player_name', 'release_speed', 'release_spin_rate']], hue='player_name')

In [None]:
sns.set_theme(style='ticks')
sns.color_palette('hls', 8)
sns.pairplot(cole[['pitch_type', 'release_speed', 'release_spin_rate', 'pfx_x', 'pfx_z']], hue='pitch_type')

In [None]:
sns.set_theme(style='ticks')
sns.color_palette('hls', 8)
sns.pairplot(degrom[['pitch_type', 'release_speed', 'release_spin_rate', 'pfx_x', 'pfx_z']], hue='pitch_type')

In [None]:
g=sns.FacetGrid(df, col='pitch_type',  row='pitcher')
g.map(sns.scatterplot, 'release_speed', 'release_spin_rate');

In [None]:
h=sns.FacetGrid(df, col='pitch_type',  row='pitcher')
h.map(plt.hist, 'release_speed');