In [63]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from matplotlib.gridspec import GridSpec
from matplotlib.patches import Rectangle
import numpy as np
from scipy.stats import rankdata 
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy import stats
import matplotlib.lines 
from matplotlib.lines import Line2D
from scipy.stats import gaussian_kde
import pickle as pkl

In [64]:
df = pd.read_csv('mlb pitch.csv')

In [65]:
#group all fastballs and sinkers together and name them 'Fastball'
#df.loc[df['TaggedPitchType'].str.contains('Sinker'), 'TaggedPitchType'] = 'Fastball'

df.loc[df['TaggedPitchType'].str.contains('FastBall'), 'TaggedPitchType'] = 'Fastball'

df.loc[df['TaggedPitchType'].str.contains('FourSeamFastBall'), 'TaggedPitchType'] = 'Fastball'
df.loc[df['TaggedPitchType'].str.contains('TwoSeamFastBall'), 'TaggedPitchType'] = 'Fastball'

In [66]:
pd.set_option('display.max_columns', None)
dft = df[['Pitcher','TaggedPitchType','PitchCall','RelSpeed', 'SpinRate','RelHeight','Extension','InducedVertBreak','HorzBreak','PlateLocSide','PlateLocHeight','ExitSpeed']]

In [67]:
#function calls for name and converts it
def get_pitcher_name():
    first_name = input("Enter the pitchers first name: ")
    last_name = input("Enter the pitchers last name: ")
    pitcher_name = last_name + ", " + first_name
    return pitcher_name

In [68]:
def get_pitch_heatmap(dft,pitcher_name, pitch_type):
    if pitch_type == "Breaking Ball":
        pitch_df = dft.loc[(df['Pitcher'] == pitcher_name) & (dft['TaggedPitchType'].isin(["Slider", "Curveball"]))]
    elif pitch_type == "Fastball":
        pitch_df = dft.loc[(df['Pitcher'] == pitcher_name) & (dft['TaggedPitchType'].isin(["Fastball","Four-Seam", "Sinker","Cutter" ]))]
    elif pitch_type == "Offspeed":
        pitch_df = dft.loc[(dft['Pitcher'] == pitcher_name) & (dft['TaggedPitchType'].isin(["ChangeUp", "Splitter"]))]
    x = -pitch_df['PlateLocSide']
    y = pitch_df['PlateLocHeight']
    k = gaussian_kde(np.vstack([x, y]))
    xi, yi = np.mgrid[-2:2:100j, 0:5:100j]
    zi = k(np.vstack([xi.flatten(), yi.flatten()]))


    return xi, yi, zi.reshape(xi.shape)

    



In [69]:
#create a pdf report for each pitcher
def create_pdf_report_2(dft, pitcher_name):
    #call pitcher's name
    pitcher_data = dft[dft['Pitcher'] == pitcher_name]
    first_name, last_name = pitcher_name.split(', ')
    formatted_pitcher_name = f"{last_name} {first_name}"
    #top 10 velo's
    velo = pitcher_data[pitcher_data['Pitcher'] == pitcher_name]
    velo = velo.sort_values(by = 'RelSpeed', ascending = False)
    #remove pitchcall, platelocheight, platelocside column
    velo = velo.drop(columns = ['PlateLocHeight', 'PlateLocSide', 'PitchCall','ExitSpeed'])
    velo = velo.round(2)
    velo = velo.head(10)
    #pitch type averages
    pitch_type_averages = pitcher_data.groupby('TaggedPitchType')[['RelSpeed', 'SpinRate','RelHeight','Extension','InducedVertBreak', 'HorzBreak']].mean()
    pitch_type_averages = pitch_type_averages.round(2)
    #add taggepitchtype column to the front
    pitch_type_averages = pitch_type_averages.reset_index()
    
    #calculate the strike percentage for each pitch type
    total_pitches = pitcher_data.groupby('TaggedPitchType').size()
    strikes = pitcher_data[pitcher_data['PitchCall'].isin(['StrikeCalled', 'StrikeSwinging','FoulBall','InPlay'])].groupby('TaggedPitchType').size()
    strike_percentages = (strikes / total_pitches) * 100
    strike_percentages = strike_percentages.fillna(0)
    #create a dataframe with strike% and taggepitchtype as the index
    strike_percentages = strike_percentages.reset_index()
    strike_percentages.columns = ['TaggedPitchType', 'Strike%']
    strike_percentages = strike_percentages.round(2)
    
    
    #calculate whiff and csw
    total_swings = pitcher_data[pitcher_data['PitchCall'].isin(['StrikeSwinging','FoulBall','InPlay'])].groupby('TaggedPitchType').size()
    whiffs = pitcher_data[pitcher_data['PitchCall'] == 'StrikeSwinging'].groupby('TaggedPitchType').size()
    called = pitcher_data[pitcher_data['PitchCall'] == 'StrikeCalled'].groupby('TaggedPitchType').size()
    count = pitcher_data[pitcher_data['PitchCall'].isin(['BallCalled', 'StrikeCalled', 'FoulBall', 'InPlay',
        'StrikeSwinging', 'BallinDirt', 'HitByPitch', 'Undefined'])].groupby('TaggedPitchType').size()
    csw = ((whiffs + called) / count) * 100
    whiff_percentages = (whiffs / total_swings) * 100
    whiff_percentages = whiff_percentages.fillna(0)
    #create a dataframe with whiff% and csw% and taggepitchtype as the index
    whiff_csw = pd.concat([whiff_percentages, csw], axis = 1)
    whiff_csw.columns = ['Whiff%', 'CSW%']
    whiff_csw = whiff_csw.round(2)
   #add taggedpitchtype as the index
    whiff_csw = whiff_csw.reset_index()


    #count the number of InPlay for each pitch type
    inplay_count = pitcher_data[pitcher_data['PitchCall'] == 'InPlay'].groupby('TaggedPitchType').size()
    inplay_count = inplay_count.reset_index()
    inplay_count.columns = ['TaggedPitchType', 'InPlay Count']
    #count the number of InPlay with an exit speed greater than 95mph
    inplay_count_95 = pitcher_data[(pitcher_data['PitchCall'] == 'InPlay') & (pitcher_data['ExitSpeed'] >= 95)].groupby('TaggedPitchType').size()
    inplay_count_95 = inplay_count_95.reset_index()
    inplay_count_95.columns = ['TaggedPitchType', 'InPlay Count 95+']
    #calculate the percentage of inplay with an exit speed greater than 95mph
    inplay_count_95['InPlay Count 95+%'] = (inplay_count_95['InPlay Count 95+'] / inplay_count['InPlay Count']) * 100
    #merge inplay_count and inplay_count_95
    inplay_count = inplay_count.merge(inplay_count_95, on = 'TaggedPitchType', how = 'outer')
  
    inplay_count = inplay_count.fillna(0)
    #merge inplay_count and whiff_csw
    whiff_csw = whiff_csw.merge(inplay_count, on = 'TaggedPitchType', how = 'outer')
    
    whiff_csw = whiff_csw.fillna(0)
    #round to 1 decimal places
    whiff_csw = whiff_csw.round(1)
  
    whiff_csw = whiff_csw.drop(columns = ['InPlay Count 95+', 'InPlay Count'])
    whiff_csw = whiff_csw.rename(columns = {'InPlay Count 95+%': 'HardHit%'})
    
    

    

    #create a piechart with pitch type percentages
    pitch_type_percentages = pitcher_data.groupby('TaggedPitchType').size()
    pitch_type_percentages = pitch_type_percentages.reset_index()
    pitch_type_percentages.columns = ['TaggedPitchType', 'Count']
    pitch_type_percentages = pitch_type_percentages.sort_values(by = 'Count', ascending = False)
    pitch_type_percentages = pitch_type_percentages.reset_index(drop = True)
    pitch_type_percentages['Percentage'] = (pitch_type_percentages['Count'] / pitch_type_percentages['Count'].sum()) * 100
    pitch_type_percentages = pitch_type_percentages.round(2)
    pitch_type_percentages = pitch_type_percentages.head(5)
    #create a pie chart
    

    #plot pitch movement
    pitch_colors = {
        "FourSeamFastBall": "red",
        "TwoSeamFastBall": "blue",
        "Sinker": "blue",
        "Cutter": "violet",
        "Fastball": "black",
        "Curveball": "green",
        "Knuckle Curve": "pink",
        "Slider": "orange",
        "ChangeUp": "purple",
        "Splitter": "beige",
        "Knuckleball": "gold"
    }

    pitch_types = pitcher_data['TaggedPitchType'].unique()

    for pitch_type in pitch_types:
        subset = pitcher_data[pitcher_data['TaggedPitchType'] == pitch_type]
        plt.scatter(subset['HorzBreak'], subset['InducedVertBreak'], color=pitch_colors[pitch_type], label=pitch_type, alpha=0.5)

    plt.axvline(x=0, color='black', linestyle='--')
    plt.axhline(y=0, color='black', linestyle='--')

    plt.ylabel('Induced Vertical Break')
    plt.xlabel('Horizontal Break')
    plt.title(' Pitch Movement')
    plt.xlim(-30, 30)
    plt.ylim(-30, 30)
    plt.xticks(np.arange(-20, 21, 5))
    plt.yticks(np.arange(-20, 21, 5))
    plt.gca().set_aspect('equal', adjustable='box')
    plt.legend(title='Pitch Type')
    #move the legend outside the plot
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
    plt.tight_layout()
  
    plt.savefig('pitch_movement.png')
    plt.close()

    

    

    dft = dft.replace([np.inf, -np.inf], np.nan)
    #if NaN, take mean
    dft = dft.fillna(df.mean())
    #heatmaps
    fastball_heatmap = get_pitch_heatmap(dft, pitcher_name, 'Fastball')
    breakingball_heatmap = get_pitch_heatmap(dft, pitcher_name, 'Breaking Ball')
    dft['PlateLocSide'].replace([np.inf, -np.inf], np.nan, inplace=True)
    dft['PlateLocSide'].fillna(dft['PlateLocSide'].median(), inplace=True)
    

    dft['PlateLocHeight'].replace([np.inf, -np.inf], np.nan, inplace=True)
    dft['PlateLocHeight'].fillna(dft['PlateLocHeight'].median(), inplace=True)
    offspeed_heatmap = get_pitch_heatmap(dft, pitcher_name, 'Offspeed')
    
    #create pdf
    pdf = PdfPages(f'{formatted_pitcher_name}.pdf')
    fig = plt.figure(figsize=(8.5,11))
    gs = GridSpec(4, 1, figure=fig)
    fig.patch.set_facecolor('aliceblue')
    #xkcd:light blue
    #add title to top of page
    fig.suptitle(f'{formatted_pitcher_name} 2023', fontsize=25)
    ax9 = fig.add_subplot(gs[0, 0])
    ax2 = fig.add_subplot(gs[1, 0])
    ax3 = fig.add_subplot(gs[2, 0])
    ax4 = fig.add_subplot(gs[3, 0])
    #ax1.axis('off')
    ax2.axis('off')
    ax3.axis('off')
    ax4.axis('off')
    #ax1.set_title( 'Top 10 Fastball Velo\'s')
    #ax1.table(cellText=velo.values, colLabels=velo.columns, loc='center')
    #ax2.set_title('Pitch Type Averages')
    #ax2.set_title('Pitch Type Averages')
    #increase titile size
    ax2.title.set_size(20)
    ax2.table(cellText=pitch_type_averages.values, colLabels=pitch_type_averages.columns, loc='center')

    #ax3.set_title('Whiff% and CSW%')
    ax3.table(cellText=whiff_csw.values, colLabels=whiff_csw.columns, loc='center')
            

    #make ax3 smaller
    ax3.set_position([0.60, 0.32, 0.35, 0.2])

    #move ax4 to the bottom of the page

    ax4.set_title('Fastball', fontsize=16)
    ax4.imshow(fastball_heatmap[2], cmap='Oranges', interpolation='nearest', extent=[-2, 2, 0, 5], origin='lower', aspect='auto')
    ax4.set_aspect('equal')
    ax4.set_position([0.05, 0.05, 0.35, 0.25])
    #add a rectangle to the plot of the heatmap to show the strike zone
    ax4.add_patch(Rectangle((-0.83, 1.5), 1.66, 2.1, linewidth=1, edgecolor='w', facecolor='none'))
    ax4.invert_yaxis()
    ax4.imshow(np.rot90(fastball_heatmap[2],3), cmap='inferno', interpolation='nearest', extent=[-2, 2, 0, 5,], origin='lower', aspect='auto')
    ax4.set_aspect('equal')

    ax5 = fig.add_subplot(gs[3, 0])
    ax5.axis('off')
    ax5.set_title('Breaking Ball', fontsize=16)
    ax5.imshow(breakingball_heatmap[2], cmap='Oranges', interpolation='nearest', extent=[-2, 2, 0, 5], origin='lower', aspect='auto')
    ax5.set_aspect('equal')
    ax5.set_position([0.35, 0.05, 0.35, 0.25])
    ax5.add_patch(Rectangle((-0.83, 1.5), 1.66, 2.1, linewidth=1, edgecolor='w', facecolor='none'))
    ax5.invert_yaxis()
    ax5.imshow(np.rot90(breakingball_heatmap[2],3), cmap='inferno', interpolation='nearest', extent=[-2, 2, 0, 5,], origin='lower', aspect='auto')
    ax5.set_aspect('equal')

    ax6 = fig.add_subplot(gs[3, 0])
    ax6.axis('off')
    ax6.set_title('Offspeed', fontsize=16)
    ax6.imshow(offspeed_heatmap[2], cmap='Oranges', interpolation='nearest', extent=[-2, 2, 0, 5], origin='lower', aspect='auto')
    ax6.set_aspect('equal')
    ax6.set_position([0.65, 0.05, 0.35, 0.25])
    ax6.add_patch(Rectangle((-0.83, 1.5), 1.66, 2.1, linewidth=1, edgecolor='w', facecolor='none'))
    ax6.invert_yaxis()
    ax6.imshow(np.rot90(offspeed_heatmap[2],3), cmap='inferno', interpolation='nearest', extent=[-2, 2, 0, 5,], origin='lower', aspect='auto')
    ax6.set_aspect('equal')

    #add a pie chart to the bottom of the page
    ax7 = fig.add_subplot(gs[3, 0])
    ax7.axis('off')
    ax7.pie(pitch_type_percentages['Percentage'], labels=pitch_type_percentages['TaggedPitchType'], autopct='%1.1f%%', shadow=True, startangle=90)
    ax7.set_position([0.05, 0.30, 0.25, 0.25])

    #add strike percentage to the bottom of the page
    ax8 = fig.add_subplot(gs[3, 0])
    ax8.axis('off')
    ax8.table(cellText=strike_percentages.values, colLabels=strike_percentages.columns, loc='center')
    ax8.set_position([0.37, 0.32, 0.2, 0.2])

    ax9.axis('off')
    ax9.imshow(plt.imread('pitch_movement.png'))

    ax9.set_position([0.27, 0.55, 0.5, 0.5])

    pdf.savefig(fig)
    plt.close()
    pdf.close()


    
    

In [70]:
create_pdf_report_2(dft, get_pitcher_name())

  dft = dft.fillna(df.mean())
