# Scatterplot Example Using Pitch Breaks
## Full and Condensed Versions of Scatterplots
### Condensed are my favorite

In [None]:
# RUN THIS FIRST
import pybaseball as pb
import pandas as pd
import plotly.graph_objects as go
import numpy as np
from plotly.subplots import make_subplots

In [None]:
#RUN THIS SECOND
# functions 
## skip this cell if you dont care what's under the hood

def pull_edit_data(pfirst,plast,date_start, date_end, game_types,save = False):
    #data pull
    #enabling cacheing so you dont have to pull this data over and over and over
    pb.cache.enable()
    key_mlbam = pb.playerid_lookup(plast.lower(),pfirst.lower()).reset_index(drop = True).at[0,'key_mlbam']
    data = pb.statcast_pitcher(date_start,date_end,key_mlbam)


    data_specific = data.loc[data.game_type.isin(game_types)][['player_name','pitcher','game_date','pitch_type','release_speed','release_spin_rate',
                                                 'pfx_x','pfx_z']].dropna().reset_index(drop = True)
    
    
    data_specific['IVB'] = data_specific['pfx_z'] * 12
    data_specific['HB'] = data_specific['pfx_x'] * -12
    data_specific.rename(columns = {'player_name':'Pitcher','pitcher':'MLB_AM_ID','game_date':'Date'
                           ,'pitch_type':'Pitch','release_speed':'Velocity','release_spin_rate':'Spin'},inplace = True)
    if save:
        pfull = pfirst +'_' +plast
        data_specific.to_csv(f'statcast_data_{pfull}_{date_start}_to_{date_end}.csv',index = False)
    return data_specific

# not using this but want to leave it in as an example of how to pull all pitches across all time
# if you want to pull all data and ALL COLUMNS, delete the part is says to delete below
def pull_all_edit_data(date_start, date_end, game_types,save = False):
    #data pull
    #enabling cacheing so you dont have to pull this data over and over and over
    pb.cache.enable()
    data = pb.statcast(date_start,date_end)

    ## Comment out this line
    data_specific = data.loc[data.game_type.isis(game_types)][['player_name','pitcher','game_date','pitch_type','release_speed','release_spin_rate',
                                                 'pfx_x','pfx_z']].dropna().reset_index(drop = True)
    #uncomment the line below if you coummented the line above
    #data_specific = data.loc[data.game_type.isis(game_types)].reset_index(drop = True)
    
    
    data_specific['IVB'] = data_specific['pfx_z'] * 12
    data_specific['HB'] = data_specific['pfx_x'] * -12
    data_specific.rename(columns = {'player_name':'Pitcher','pitcher':'MLB_AM_ID','game_date':'Date'
                           ,'pitch_type':'Pitch','release_speed':'Velocity','release_spin_rate':'Spin'},inplace = True)
    if save:
        data_specific.to_csv(f'statcast_data_{date_start}_to_{date_end}.csv',index = False)
    return data_specific
    

In [None]:
### Run This Third

def agg_func(frame):
    
    frame_agg = frame.groupby(by=['Pitch']).agg(NumPitches=('HB',"count"),
                                                HB=('HB',np.mean),IVB=('IVB',np.mean),
                                                StdHB=('HB',np.std),StdIVB=('IVB',np.std),
                                               ).reset_index(drop = False)
    
    round_reference = {'HB':1,'IVB':1}
    for r in round_reference.keys():
        frame_agg[r] = [round(a,round_reference[r]) for a in frame_agg[r]]
    
    return frame_agg
    
    
def plotting_function(data,size=1000,bar=1,agg=True):
    agg_title = "Condensed" if agg else "Full"
    
    pitcher = data.at[0,'Pitcher']
    fig = make_subplots(rows=1, cols=1)
    
    #Color Mapping
    color_map = { 
    'FS':'rgb(166,86,40)', 'FF':'rgb(228,26,28)', 'SL': 'rgb(220,180,47)', 'CH':'rgb(77,175,74)', 'CU':'rgb(55,126,255)',
     'FC':'rgb(153,153,153)', 'KC':'#1616A7', 'SI':'rgb(255,127,0)', 'KN':'rgb(152,78,163)','CS':'rgb(250,250,250)','CS':'rgb(250,250,250)'
    
    }
    
    #make the graph
    if agg:
        data_c = agg_func(data)
        for p in pd.unique(data_c['Pitch']):
            try:
                to_plot = data_c.loc[data_c['Pitch'] == p]
                fig.add_trace(go.Scatter(x=to_plot['HB'],y=to_plot['IVB'],showlegend=True, 
                                            legendgroup="avg-data",  name=p,
                                            legendgrouptitle_text="Pitch",mode="markers",
                                            error_y=dict(
                                            type='data', 
                                            array=bar*to_plot['StdIVB'],
                                            visible=True,
                                            thickness = 3,
                                            width = 10),
                                            error_x=dict(
                                            type='data',
                                            array=bar*to_plot['StdHB'],
                                            visible=True,
                                            thickness = 3,
                                            width = 10),
                                            marker=dict(color=color_map[p],symbol="circle",opacity = 0.8,
                                                        size=20,line=dict(width=0.5,color='DarkSlateGrey')
                                                       )
                                           ) , row = 1 , col =1)
            except KeyError:
                print(f"No pitch {p} in color map. Skipping it this run, but if you want to add it in and set the color, go for it." )
                
    else:
        #copy so we dont mess up the dataframe if you use it somewhere else
        data_c = data.copy()

        for p in pd.unique(data_c['Pitch']):
            try:
                to_plot = data_c.loc[data_c['Pitch'] == p]
                fig.add_trace(go.Scatter(x=to_plot['HB'],y=to_plot['IVB'],showlegend=True, 
                                            legendgroup="avg-data",  name=p,
                                            legendgrouptitle_text="Pitch",mode="markers",
                                            marker=dict(color=color_map[p],symbol="circle-x",opacity = 0.8,
                                                        size=8,line=dict(width=0.5,color='DarkSlateGrey')
                                                       )
                                           )
                                , row = 1, col = 1)
            except KeyError:
                print(f"No pitch {p} in color map. Skipping it this run, but if you want to add it in and set the color, go for it." )
                
    #adding a title
    fig.update_layout(title=f"{pitcher} Break Plots for 2022: {agg_title}",
                      width=size,
                      height=size)
    
    #seting the range of the graph
    max_xy = max([max(abs(data.HB)) , max(abs(data.IVB))])+1
    fig.update_xaxes(range=[-1*max_xy,max_xy])
    fig.update_yaxes(range=[-1*max_xy,max_xy],scaleanchor = "x",
    scaleratio = 1)
    
    fig.show()



In [None]:
### Run This Forth

def exeggcute(player_name_first,player_name_last,date_start, date_end, game_types=['R'],save = False,cond=True,bar=1,size=1000):
    data = pull_edit_data(player_name_first,player_name_last,date_start, date_end, game_types,save)
    plotting_function(data,size,bar,cond)
    

In [None]:
### Finally Change this stuff and Run it when you like your choices
########## CHANGE THIS ########
# Format as seen on statcast/baseball savant
player_name_first = 'Reid'
player_name_last = 'Detmers'


########## CHANGE THIS ########
# Format "YYYY-MM-DD"
# will automaticall skip offseason dates
date_start = "2022-01-01"
date_end = "2022-12-31"

########## CHANGE THIS ########
#options
# 'W' = WS, 'L' = LCS, 'D' = LDS, 'F' = Wild Card, 'R' = Regular Season, 'S'= Spring training\
# edit the list to only pull certain rounds or regular season etc.
# ['W','L','D','F'] will only grab playoff games
game_types = ['R']

##### save the data pull into a csv for future use???
# options True or False
save = False

## do you want a condensed scatter or full? True == condensced, 
cond = True

#if cond = True, how big do you want the bars? Best between 1 - 2 but can be 0 to 5 (technically any number
## but why would you want infinitely big bars?)
#personally I prefer 1.25
bar = 2

## how big do you want the graph??
size = 650

#run dat ting
exeggcute(player_name_first,player_name_last,date_start, date_end, game_types,save,cond,bar,size)