In [51]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns 
 

In [52]:
#Imports the CSV for Jake Arrieta's pitch data 

df = pd.read_csv('/Users/mason_cotter/Desktop/python_projects/baseball_ds/Jake_Arrieta_CyYoung.csv')


In [53]:
# Total number of starts 

games_played = len(pd.unique(df['game_pk']))
print('Number of games played:', games_played) 

Number of games played: 33


In [54]:
# Total number of pitches thrown 

num_pitch = len(df['pitch_type'])
print('Number of pitches thrown:', num_pitch)

Number of pitches thrown: 3438


In [55]:
# Types of pitches thrown 

pitch_types = df.pitch_name.unique()
print('Unique pitch types:', pitch_types)

Unique pitch types: ['Slider' 'Sinker' 'Curveball' '4-Seam Fastball' 'Changeup'
 'Intentional Ball' nan]


In [56]:
# Creates a dataframe with the name of the pitch and that pitche's average release speed. 

pitch_data = df.groupby('pitch_name')['release_speed'].mean().reset_index()
# pitch_data.columns = ['pitch_name', 'avg_relsease_velo']
print(pitch_data)



         pitch_name  release_speed
0   4-Seam Fastball      95.067638
1          Changeup      89.489308
2         Curveball      81.246388
3  Intentional Ball      82.966667
4            Sinker      95.345991
5            Slider      90.807078


In [57]:
# creates a dataframe named pitch_usage with the frequency of each pitch thrown. However, the columns are incorrect. 

pitch_usage = df['pitch_name'].value_counts(normalize=True).reset_index()
print(pitch_usage)

              index  pitch_name
0            Sinker    0.330615
1            Slider    0.288086
2   4-Seam Fastball    0.180017
3         Curveball    0.153219
4          Changeup    0.046315
5  Intentional Ball    0.001748


In [58]:
# Fixes the columns of pitch_usage 

pitch_usage.columns = ['pitch_name', 'pitch_frequency']
print(pitch_usage.head())



        pitch_name  pitch_frequency
0           Sinker         0.330615
1           Slider         0.288086
2  4-Seam Fastball         0.180017
3        Curveball         0.153219
4         Changeup         0.046315


In [59]:
# Adds the pitch_frequency column to the pitch_data dataframe 

pitch_data = pd.merge(pitch_data, pitch_usage)
print(pitch_data)


         pitch_name  release_speed  pitch_frequency
0   4-Seam Fastball      95.067638         0.180017
1          Changeup      89.489308         0.046315
2         Curveball      81.246388         0.153219
3  Intentional Ball      82.966667         0.001748
4            Sinker      95.345991         0.330615
5            Slider      90.807078         0.288086


In [60]:
#Calculates the number of total pitches thrown to left handed batters 

num_left = 0 
for i in df['stand']: 
    if i == 'L': 
        num_left += 1 
print('Number of pitches thrown to left-handed batters:', num_left)

#Calculates the number of total pitches thrown to right handed batters 

num_right = 0
for i in df['stand']: 
    if i == 'R': 
        num_right += 1 
print('Number of pitches thrown to right-handed batters:', num_right)

Number of pitches thrown to left-handed batters: 1695
Number of pitches thrown to right-handed batters: 1743


In [61]:
def pitch_L(x): 
    ''' 
    Calculates the number of a particular pitch trown to left handed batters. 
    '''
    x = str(x)
    count = 0 
    for i in range(len(df['pitch_name'])): 
        if df['pitch_name'][i] == x and df['stand'][i] == 'L': 
            count += 1
    return count 

def pitch_R(x):
    ''' 
    Calculates the number of a particulat pitch trown to right handed batters. 
    '''
    x = str(x)
    count = 0 
    for i in range(len(df['pitch_name'])): 
        if df['pitch_name'][i] == x and df['stand'][i] == 'R': 
            count += 1
    return count 

In [62]:
# Outputing pitch_R 

FF_R = pitch_R('4-Seam Fastball')
print(FF_R)

Change_R = pitch_R("Changeup") 
print(Change_R)

Curve_R = pitch_R("Curveball")
print(Curve_R)

IBall_R = pitch_R("Intentional Ball")
print(IBall_R)

Sink_R = pitch_R("Sinker")
print(Sink_R)

Slide_R = pitch_R("Slider")
print(Slide_R)

337
27
247
6
578
545


In [63]:
# Outputing pitch_L 

FF_L = pitch_L("4-Seam Fastball")
print(FF_L)

Change_L = pitch_L("Changeup") 
print(Change_L)

Curve_L = pitch_L("Curveball")
print(Curve_L)
IBall_L = pitch_L("Intentional Ball")
print(IBall_L)

Sink_L = pitch_L("Sinker")
print(Sink_L)

Slide_L = pitch_L("Slider")
print(Slide_L)

281
132
279
0
557
444


In [64]:
# Turning outputs of pitch_L and pitch_R into a dataframe 
pitch_LR = {"VS L": [FF_L, Change_L, Curve_L, IBall_L, Sink_L, Slide_L], 
            "VS R": [FF_R, Change_R, Curve_R, IBall_R, Sink_R, Slide_R]}
pitch_LR_1 = pd.DataFrame(pitch_LR)
print(pitch_LR_1)

   VS L  VS R
0   281   337
1   132    27
2   279   247
3     0     6
4   557   578
5   444   545


In [65]:
# Adding pitch_L and pitch_R data to pitch_data 

pitch_data = pd.merge(pitch_data, pitch_LR_1, left_index=True, right_index=True)
print(pitch_data)

         pitch_name  release_speed  pitch_frequency  VS L  VS R
0   4-Seam Fastball      95.067638         0.180017   281   337
1          Changeup      89.489308         0.046315   132    27
2         Curveball      81.246388         0.153219   279   247
3  Intentional Ball      82.966667         0.001748     0     6
4            Sinker      95.345991         0.330615   557   578
5            Slider      90.807078         0.288086   444   545


In [66]:
# Adding pitch count to pitch_data 

pitch_count = pd.DataFrame(df.value_counts('pitch_name').reset_index())
pitch_count.columns = ['pitch_name', 'pitch_count']
pitch_data = pd.merge(pitch_data, pitch_count, right_on='pitch_name', left_on= 'pitch_name')
pitch_data = pitch_data.reindex(columns= ['pitch_name', 'pitch_count', 'pitch_frequency', 'release_speed', 'VS L', 'VS R'])

print(pitch_data)



         pitch_name  pitch_count  pitch_frequency  release_speed  VS L  VS R
0   4-Seam Fastball          618         0.180017      95.067638   281   337
1          Changeup          159         0.046315      89.489308   132    27
2         Curveball          526         0.153219      81.246388   279   247
3  Intentional Ball            6         0.001748      82.966667     0     6
4            Sinker         1135         0.330615      95.345991   557   578
5            Slider          989         0.288086      90.807078   444   545


In [67]:
# adding number of hits per pitch 

def hits(x): 
    ''' 
    Calculates the number of hits off of a particular pitch. 
    '''
    x = str(x)
    count = 0 
    for i in range(len(df['pitch_name'])): 
        if df['pitch_name'][i] == x and df['type'][i] == 'X': 
            count += 1
    return count 

hits_4 = hits('4-Seam Fastball')
hits_change = hits('Changeup')
hits_curve = hits('Curveball')
hits_IB = hits('Intentional Ball')
hits_sinker = hits('Sinker')
hits_slider = hits('Slider')

pitch_data['hits'] = [hits_4, hits_change, hits_curve, hits_IB, hits_sinker, hits_slider]

print(pitch_data)

         pitch_name  pitch_count  pitch_frequency  release_speed  VS L  VS R  \
0   4-Seam Fastball          618         0.180017      95.067638   281   337   
1          Changeup          159         0.046315      89.489308   132    27   
2         Curveball          526         0.153219      81.246388   279   247   
3  Intentional Ball            6         0.001748      82.966667     0     6   
4            Sinker         1135         0.330615      95.345991   557   578   
5            Slider          989         0.288086      90.807078   444   545   

   hits  
0    75  
1    23  
2    78  
3     0  
4   184  
5   219  


In [68]:
# adding average effective speed 
effective_velo = df.groupby('pitch_name')['effective_speed'].mean()
pitch_data = pd.merge(pitch_data, effective_velo, right_on='pitch_name', left_on= 'pitch_name')
print(pitch_data)

         pitch_name  pitch_count  pitch_frequency  release_speed  VS L  VS R  \
0   4-Seam Fastball          618         0.180017      95.067638   281   337   
1          Changeup          159         0.046315      89.489308   132    27   
2         Curveball          526         0.153219      81.246388   279   247   
3  Intentional Ball            6         0.001748      82.966667     0     6   
4            Sinker         1135         0.330615      95.345991   557   578   
5            Slider          989         0.288086      90.807078   444   545   

   hits  effective_speed  
0    75        94.553909  
1    23        88.956329  
2    78        79.682218  
3     0        81.750000  
4   184        94.628533  
5   219        90.095219  


In [69]:
spin_rate = df.groupby('pitch_name')['release_spin_rate'].mean()
pitch_data = pd.merge(pitch_data, spin_rate, right_on='pitch_name', left_on= 'pitch_name')
print(pitch_data)

         pitch_name  pitch_count  pitch_frequency  release_speed  VS L  VS R  \
0   4-Seam Fastball          618         0.180017      95.067638   281   337   
1          Changeup          159         0.046315      89.489308   132    27   
2         Curveball          526         0.153219      81.246388   279   247   
3  Intentional Ball            6         0.001748      82.966667     0     6   
4            Sinker         1135         0.330615      95.345991   557   578   
5            Slider          989         0.288086      90.807078   444   545   

   hits  effective_speed  release_spin_rate  
0    75        94.553909        2292.386401  
1    23        88.956329        1943.621795  
2    78        79.682218        2688.809249  
3     0        81.750000        2012.333333  
4   184        94.628533        2261.964865  
5   219        90.095219        2222.204724  


In [70]:
pitch_data.columns = ['pitch_name', 'count', 'pitch_freq', 'avg_release_velo', 'VS L', 'VS R', 'hits', 'avg_effective_speed', 'avg_release_spin']
print(pitch_data)

         pitch_name  count  pitch_freq  avg_release_velo  VS L  VS R  hits  \
0   4-Seam Fastball    618    0.180017         95.067638   281   337    75   
1          Changeup    159    0.046315         89.489308   132    27    23   
2         Curveball    526    0.153219         81.246388   279   247    78   
3  Intentional Ball      6    0.001748         82.966667     0     6     0   
4            Sinker   1135    0.330615         95.345991   557   578   184   
5            Slider    989    0.288086         90.807078   444   545   219   

   avg_effective_speed  avg_release_spin  
0            94.553909       2292.386401  
1            88.956329       1943.621795  
2            79.682218       2688.809249  
3            81.750000       2012.333333  
4            94.628533       2261.964865  
5            90.095219       2222.204724  
