The code below merge the spin direction data provided by [Bill Petti](https://twitter.com/BillPetti) and baseball savant data in 2020

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import math
pd.options.display.max_columns = 999

In [None]:
pitch_stat = pd.read_csv('/kaggle/input/mlb-statcast-data/Statcast_2020.csv')
pitch_stat = pitch_stat[(pitch_stat['balls'] <= 3)& (pitch_stat['strikes'] <= 2)]

Calculate inferred pitch data using [Alan Nathan](https://twitter.com/pobguy) calculator

In [None]:
pitch_stat['yR'] = 60-pitch_stat.release_extension
pitch_stat['tR'] = (-pitch_stat.vy0 - (pitch_stat.vy0**2-2*pitch_stat.ay*(50-pitch_stat.yR))**0.5)/pitch_stat.ay
pitch_stat['vxR'] = pitch_stat.vx0 + pitch_stat.ax * pitch_stat.tR
pitch_stat['vyR'] = pitch_stat.vy0 + pitch_stat.ay * pitch_stat.tR
pitch_stat['vzR'] = pitch_stat.vz0 + pitch_stat.az * pitch_stat.tR
pitch_stat['dv0'] = pitch_stat.release_speed - (pitch_stat.vxR**2+pitch_stat.vyR**2+pitch_stat.vzR**2)**0.5/1.467
pitch_stat['tf'] = (-pitch_stat.vyR-(pitch_stat.vyR**2-2*pitch_stat.ay*(pitch_stat.yR-17/12))**0.5)/pitch_stat.ay

pitch_stat['x_mvt'] = pitch_stat.plate_x - pitch_stat.release_pos_x - (pitch_stat.vxR/pitch_stat.vyR)*(17/12-pitch_stat.yR)
pitch_stat['z_mvt'] = pitch_stat.plate_z - pitch_stat.release_pos_z - (pitch_stat.vzR/pitch_stat.vyR)*(17/12-pitch_stat.yR) + 0.5*32.174*pitch_stat.tf**2

pitch_stat['vxbar'] = (2*pitch_stat.vxR+pitch_stat.ax*pitch_stat.tf)/2
pitch_stat['vybar'] = (2*pitch_stat.vyR+pitch_stat.ay*pitch_stat.tf)/2
pitch_stat['vzbar'] = (2*pitch_stat.vzR+pitch_stat.az*pitch_stat.tf)/2
pitch_stat['vbar'] = (pitch_stat.vxbar**2+pitch_stat.vybar**2+pitch_stat.vzbar**2)**0.5


pitch_stat['adrag'] = -(pitch_stat.ax*pitch_stat.vxbar+pitch_stat.ay*pitch_stat.vybar+(pitch_stat.az+32.174)*pitch_stat.vzbar)/pitch_stat.vbar
pitch_stat['Cd'] = pitch_stat.adrag/(5.153E-03*pitch_stat.vbar**2)

pitch_stat['amagx'] = pitch_stat.ax+pitch_stat.adrag*pitch_stat.vxbar/pitch_stat.vbar
pitch_stat['amagy'] = pitch_stat.ay+pitch_stat.adrag*pitch_stat.vybar/pitch_stat.vbar
pitch_stat['amagz'] = pitch_stat.az+pitch_stat.adrag*pitch_stat.vzbar/pitch_stat.vbar+32.174
pitch_stat['amag'] = (pitch_stat.amagx**2+pitch_stat.amagy**2+pitch_stat.amagz**2)**0.5

pitch_stat['Mx'] = 0.5*pitch_stat.amagx*pitch_stat.tf**2*12
pitch_stat['Mz'] = 0.5*pitch_stat.amagz*pitch_stat.tf**2*12

pitch_stat['Cl'] = pitch_stat.amag/(5.153E-03*pitch_stat.vbar**2)
pitch_stat['S'] = 0.4*pitch_stat.Cl/(1-2.32*pitch_stat.Cl)

pitch_stat['spinT'] = 78.92*pitch_stat.S*pitch_stat.vbar
pitch_stat['spinTX'] = pitch_stat.spinT*(pitch_stat.vybar*pitch_stat.amagz-pitch_stat.vzbar*pitch_stat.amagy)/(pitch_stat.amag*pitch_stat.vbar)
pitch_stat['spinTY'] = pitch_stat.spinT*(pitch_stat.vzbar*pitch_stat.amagx-pitch_stat.vxbar*pitch_stat.amagz)/(pitch_stat.amag*pitch_stat.vbar)
pitch_stat['spinTZ'] = pitch_stat.spinT*(pitch_stat.vxbar*pitch_stat.amagy-pitch_stat.vybar*pitch_stat.amagx)/(pitch_stat.amag*pitch_stat.vbar)

pitch_stat['spin_check'] = (pitch_stat.spinTX**2+pitch_stat.spinTY**2+pitch_stat.spinTZ**2)**0.5-pitch_stat.spinT

pitch_stat['phi'] = np.arctan2(pitch_stat.amagz,pitch_stat.amagx)*180/math.pi
pitch_stat['phi'] = pitch_stat['phi'] + pitch_stat['amagz'].apply(lambda x: 360 if x < 0 else 0)
pitch_stat['spin_eff'] = pitch_stat.spinT/pitch_stat.release_spin_rate

pitch_stat['flag'] = pitch_stat['description'].apply(lambda x: 1 if x == 'swinging_strike' else 0)
pitch_stat['flag_2'] = pitch_stat['estimated_woba_using_speedangle'].apply(lambda x: 1 if x>= 0 else 0)

Read the pitch stat and merge to baseball savant data

In [None]:
spin_dir = pd.read_csv('/kaggle/input/spin-direction-2020/spin_direction_pbp.csv')
pitch_stat = pd.merge(pitch_stat,spin_dir,on=['batter','pitcher','game_pk','pitch_number','inning'])

In [None]:
pitch_stat.head(5)

As an example we want to see if our calculataion match the calculation by [Barton Smith](https://twitter.com/NotRealCertain) for Max Scherzer Changeup

![Image](https://i.imgur.com/bRHIrFN.jpg)

In [None]:
changeup_df = pitch_stat[pitch_stat.pitch_type == 'CH']
changeup_df = changeup_df[~pd.isna(changeup_df.description) & (changeup_df.release_speed > 0)] 

In [None]:
x = np.arange(360)
y = np.arange(360)

plt.scatter(changeup_df[changeup_df.pitcher == 453286]['phi']+90,changeup_df[changeup_df.pitcher == 453286]['release_spin_direction'])
plt.plot(x,y)
plt.xlim([0,360])
plt.ylim([0,360])
plt.xlabel('Inferred Axis')
plt.ylabel('Observed Axis')
plt.title('Max Scherzer Changeup 2020')

We can see the above code reproduce the graph pretty well