# NCAA College Baseball Player Projection System (Exp. Weighted Mean)

In [1]:
#Import Relevant Packages
import numpy as np 
import pandas as pd

In [2]:
#Load In All Parsed 2018 and 2019
pa_logs_2018 = pd.read_csv(r"C:\Users\Peter Majors\%ML_PATH%\datasets\all_parsed_data_prefit_2018.csv")
pa_logs_2019 = pd.read_csv(r"C:\Users\Peter Majors\%ML_PATH%\datasets\all_parsed_data_prefit_2019.csv")

In [3]:
#Create List of Values From 'result' Column
outcomes = ['flied', 'struck', 'walked', 'grounded', 'doubled', 'out', 'hit', 
            'homered', 'lined', 'popped', 'reached', 'singled', 'tripled', 'walked']

new_column_names = ['FB', 'K', 'BB', 'GB', '2B', 'Out', 'H', 'HR', 'LD', 'PO', 'RE', '1B', '3B']

In [4]:
#For Loop To Create Columns Based On 'result' Column
for i, j in zip(outcomes, new_column_names):
    pa_logs_2019[j] = np.where(pa_logs_2019['result'] == i, 1, 0)

In [5]:
#Count of 'result' Column Values To Confirm They Make Sense
pa_logs_2019['result'].value_counts()

struck      79674
grounded    68711
singled     62927
flied       57856
walked      39622
out         17474
reached     16976
doubled     16475
popped      14084
hit         10516
lined       10241
homered      7198
tripled      2127
Name: result, dtype: int64

In [6]:
#Only Select Columns of Interest
pa_logs_2019 = pa_logs_2019[['GameId', 'GameDate', 'result', 'batter_id', 'batter_name', 'BattingTeam', 'pitcher_id',
                             'pitcher_name', 'PitchingTeam', 'FB', 'K', 'BB', 'GB', '2B', 'Out', 'H', 'HR', 'LD', 'PO', 
                             'RE', '1B', '3B']]

In [7]:
#Caclculate New Columns Based On New Column Names
pa_logs_2019['PA'] = pa_logs_2019['FB'] + pa_logs_2019['K'] + pa_logs_2019['BB'] + pa_logs_2019['GB'] + + pa_logs_2019['H'] + pa_logs_2019['RE'] + pa_logs_2019['2B'] + pa_logs_2019['Out'] + pa_logs_2019['HR'] + pa_logs_2019['LD'] + pa_logs_2019['PO'] + pa_logs_2019['3B'] + pa_logs_2019['1B']
pa_logs_2019['AB'] = pa_logs_2019['FB'] + pa_logs_2019['K'] + pa_logs_2019['GB'] + pa_logs_2019['2B'] + pa_logs_2019['Out'] + pa_logs_2019['HR'] + pa_logs_2019['LD'] + pa_logs_2019['PO'] + pa_logs_2019['3B'] + pa_logs_2019['1B']

pa_logs_2019['BA'] = (pa_logs_2019['1B'] + pa_logs_2019['2B'] + pa_logs_2019['3B'] + pa_logs_2019['HR'])/pa_logs_2019['AB']
pa_logs_2019['SLG'] = (pa_logs_2019['1B'] + 2*pa_logs_2019['2B'] + 3*pa_logs_2019['3B'] + 4*pa_logs_2019['HR'])/pa_logs_2019['AB']
pa_logs_2019['OBP'] = (pa_logs_2019['1B'] + pa_logs_2019['2B'] + pa_logs_2019['3B'] + pa_logs_2019['HR'] + pa_logs_2019['BB'] + pa_logs_2019['H'])/pa_logs_2019['PA']
pa_logs_2019['BABIP'] = (pa_logs_2019['1B'] + pa_logs_2019['2B'] + pa_logs_2019['3B'])/(pa_logs_2019['AB'] - pa_logs_2019['HR'] - pa_logs_2019['K'])
pa_logs_2019['ISO'] = pa_logs_2019['SLG'] - pa_logs_2019['BA']
pa_logs_2019['OPS'] = pa_logs_2019['SLG'] + pa_logs_2019['OBP']

pa_logs_2019['K_rate'] = pa_logs_2019['K']/pa_logs_2019['PA']
pa_logs_2019['BB_rate'] = pa_logs_2019['BB']/pa_logs_2019['PA']
pa_logs_2019['HBP_rate'] = pa_logs_2019['H']/pa_logs_2019['PA']
pa_logs_2019['HR_rate'] = pa_logs_2019['HR']/pa_logs_2019['PA']

In [8]:
#Remove 13 Rows Which Contain Games Played During 2018 and 2017
pa_logs_2019 = pa_logs_2019[pa_logs_2019["GameDate"].str.contains("2017|2018") == False]

In [9]:
# Noticed An Issue Where 9,009 Rows Out of 403,868 Didn't Have Time of Game Start Time. Instead of Trying To Add 
# Hundreds of Game Start Times, I Decided To Remove All Times. This Leaves The Possibility That, Upon Performing A 
# Cumulative Sum On Statistics For Each Player, Days With Two Games Could Leave Plate Appearances Improperly Ordered.

#Split Up Rows Into Dataframes Based On Whether They Have Time Of Day
pa_logs_2019_TBA = pa_logs_2019[pa_logs_2019['GameDate'].str.contains("TBA")]
pa_logs_2019_M = pa_logs_2019[pa_logs_2019['GameDate'].str.contains("M")]

#Create New Column That Only Has The Date Of The Game
pa_logs_2019_TBA['Date'] = pa_logs_2019_TBA['GameDate'].str[:-4]
pa_logs_2019_M['Date'] = pa_logs_2019_M['GameDate'].str[:-9]

#Combine The Dataframes By Rows
pa_logs_2019 = pa_logs_2019_TBA.append(pa_logs_2019_M)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pa_logs_2019_TBA['Date'] = pa_logs_2019_TBA['GameDate'].str[:-4]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pa_logs_2019_M['Date'] = pa_logs_2019_M['GameDate'].str[:-9]


In [24]:
#Sort The New Dataframe By The 'Date' Column To Ensure PA Are In Order
pa_logs_2019 = pa_logs_2019.sort_values(by ='Date')

In [46]:
#Now That Stat Columns Have Been Created, Create Cumulative Sums For Each Statistic

#Calculate Cumulative Columns For Each Batter
pa_logs_2019['roll_PA'] = pa_logs_2019.groupby(['batter_id'])['PA'].cumsum(axis=0)
pa_logs_2019['roll_AB'] = pa_logs_2019.groupby(['batter_id'])['AB'].cumsum(axis=0)

pa_logs_2019['roll_BA'] = (pa_logs_2019.groupby(['batter_id'])['1B'].cumsum(axis=0) + pa_logs_2019.groupby(['batter_id'])['2B'].cumsum(axis=0) + pa_logs_2019.groupby(['batter_id'])['3B'].cumsum(axis=0) + pa_logs_2019.groupby(['batter_id'])['HR'].cumsum(axis=0))/pa_logs_2019.groupby(['batter_id'])['AB'].cumsum(axis=0)
pa_logs_2019['roll_SLG'] = (pa_logs_2019.groupby(['batter_id'])['1B'].cumsum(axis=0) + 2*pa_logs_2019.groupby(['batter_id'])['2B'].cumsum(axis=0) + 3*pa_logs_2019.groupby(['batter_id'])['3B'].cumsum(axis=0) + 4*pa_logs_2019.groupby(['batter_id'])['HR'].cumsum(axis=0))/(pa_logs_2019.groupby(['batter_id'])['AB'].cumsum(axis=0))
pa_logs_2019['roll_OBP'] = (pa_logs_2019.groupby(['batter_id'])['1B'].cumsum(axis=0) + pa_logs_2019.groupby(['batter_id'])['2B'].cumsum(axis=0) + pa_logs_2019.groupby(['batter_id'])['3B'].cumsum(axis=0) + pa_logs_2019.groupby(['batter_id'])['HR'].cumsum(axis=0) + pa_logs_2019.groupby(['batter_id'])['BB'].cumsum(axis=0) + pa_logs_2019.groupby(['batter_id'])['H'].cumsum(axis=0))/(pa_logs_2019.groupby(['batter_id'])['PA'].cumsum(axis=0))
pa_logs_2019['roll_BABIP'] = (pa_logs_2019.groupby(['batter_id'])['1B'].cumsum(axis=0) + pa_logs_2019.groupby(['batter_id'])['2B'].cumsum(axis=0) + pa_logs_2019.groupby(['batter_id'])['3B'].cumsum(axis=0))/(pa_logs_2019.groupby(['batter_id'])['AB'].cumsum(axis=0) - pa_logs_2019.groupby(['batter_id'])['HR'].cumsum(axis=0) - pa_logs_2019.groupby(['batter_id'])['K'].cumsum(axis=0))
pa_logs_2019['roll_ISO'] = pa_logs_2019['roll_SLG'] - pa_logs_2019['BA']
pa_logs_2019['roll_OPS'] = pa_logs_2019['roll_SLG'] + pa_logs_2019['OBP']

pa_logs_2019['roll_K_rate'] = pa_logs_2019.groupby(['batter_id'])['K'].cumsum(axis=0)/pa_logs_2019.groupby(['batter_id'])['PA'].cumsum(axis=0)
pa_logs_2019['roll_BB_rate'] = pa_logs_2019.groupby(['batter_id'])['BB'].cumsum(axis=0)/pa_logs_2019.groupby(['batter_id'])['PA'].cumsum(axis=0)
pa_logs_2019['roll_HBP_rate'] = pa_logs_2019.groupby(['batter_id'])['H'].cumsum(axis=0)/pa_logs_2019.groupby(['batter_id'])['PA'].cumsum(axis=0)
pa_logs_2019['roll_HR_rate'] = pa_logs_2019.groupby(['batter_id'])['HR'].cumsum(axis=0)/pa_logs_2019.groupby(['batter_id'])['PA'].cumsum(axis=0)

#Fill Null Cells With '0' (i.e. Slugging Percentage Can't Be Calculated Becuase Hitter Lacks An At-Bat)
pa_logs_2019.fillna(0)

Unnamed: 0,GameId,GameDate,result,batter_id,batter_name,BattingTeam,pitcher_id,pitcher_name,PitchingTeam,FB,K,BB,GB,2B,Out,H,HR,LD,PO,RE,1B,3B,PA,AB,BA,SLG,OBP,BABIP,ISO,OPS,K_rate,BB_rate,HBP_rate,HR_rate,Date,roll_PA,roll_AB,roll_BA,roll_SLG,roll_OBP,roll_BABIP,roll_ISO,roll_OPS,roll_K_rate,roll_BB_rate,roll_HBP_rate,roll_HR_rate
8003,4705626,02/15/2019 TBA,hit,1976413.0,Judd Ward,Auburn,1889201.0,Seth Shuman,Ga. Southern,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,02/15/2019,1,0,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000
335791,4705080,02/15/2019 12:00 AM,struck,1981391.0,Will Harless,Radford,1981423.0,Zach Barnes,East Carolina,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,02/15/2019,1,1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000
335792,4705080,02/15/2019 12:00 AM,grounded,2138456.0,David Bryant,Radford,1981423.0,Zach Barnes,East Carolina,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,02/15/2019,1,1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
335793,4705080,02/15/2019 12:00 AM,grounded,1647346.0,Kyle Butler,Radford,1981423.0,Zach Barnes,East Carolina,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,02/15/2019,1,1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
335794,4705080,02/15/2019 12:00 AM,hit,1867740.0,Carlin Christian,Radford,1981423.0,Zach Barnes,East Carolina,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,02/15/2019,1,0,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295613,4803056,06/17/2019 07:00 PM,flied,1982309.0,Blake Nelson,Michigan,1753371.0,Chase Haney,Florida St.,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,06/17/2019,181,153,0.307190,0.418301,0.381215,0.389831,0.418301,0.418301,0.187845,0.082873,0.038674,0.005525
295611,4803056,06/17/2019 07:00 PM,struck,2123798.0,Jordan Brewer,Michigan,1973978.0,CJ Van Eyk,Florida St.,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,06/17/2019,192,166,0.361446,0.608434,0.390625,0.408000,0.608434,0.608434,0.166667,0.067708,0.010417,0.046875
295610,4803056,06/17/2019 07:00 PM,singled,1982308.0,Jesse Franklin,Michigan,1973978.0,CJ Van Eyk,Florida St.,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1.0,1.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,06/17/2019,221,173,0.260116,0.456647,0.393665,0.298387,-0.543353,1.456647,0.185520,0.171946,0.018100,0.036199
295608,4803056,06/17/2019 07:00 PM,struck,1760434.0,Ako Thomas,Michigan,1973978.0,CJ Van Eyk,Florida St.,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,06/17/2019,189,158,0.227848,0.335443,0.322751,0.285714,0.335443,0.335443,0.195767,0.111111,0.021164,0.010582


In [47]:
#Export Dataframe To .csv To Inspect 
pa_logs_2019.to_csv('C:/Users/Peter Majors/OneDrive/Documents/Python Scripts/test.csv')

PermissionError: [Errno 13] Permission denied: 'C:/Users/Peter Majors/OneDrive/Documents/Python Scripts/test.csv'

In [49]:
#Perfrom Data Integrity Check
pa_logs_2019.describe()

#Some Players Have Over 500 PA, Which Is Too Many. Also, Means Of Rolling Statisitcs Differ Slightly From Overall Statistics.

Unnamed: 0,GameId,batter_id,pitcher_id,FB,K,BB,GB,2B,Out,H,HR,LD,PO,RE,1B,3B,PA,AB,BA,SLG,OBP,BABIP,ISO,OPS,K_rate,BB_rate,HBP_rate,HR_rate,roll_PA,roll_AB,roll_BA,roll_SLG,roll_OBP,roll_BABIP,roll_ISO,roll_OPS,roll_K_rate,roll_BB_rate,roll_HBP_rate,roll_HR_rate
count,403868.0,403868.0,403868.0,403868.0,403868.0,403868.0,403868.0,403868.0,403868.0,403868.0,403868.0,403868.0,403868.0,403868.0,403868.0,403868.0,403868.0,403868.0,336755.0,336755.0,403868.0,249885.0,336755.0,336755.0,403868.0,403868.0,403868.0,403868.0,403868.0,403868.0,402904.0,402904.0,403868.0,400064.0,336755.0,402904.0,403868.0,403868.0,403868.0,403868.0
mean,4759354.0,1949626.0,1951060.0,0.143252,0.197272,0.098104,0.170122,0.040791,0.043267,0.026038,0.017823,0.025355,0.034873,0.042034,0.155803,0.005267,1.0,0.833824,0.263465,0.38914,0.343825,0.32625,0.125676,0.652605,0.197272,0.098104,0.026038,0.017823,69.852521,58.041969,0.264539,0.389337,0.346742,0.331724,0.125136,0.732195,0.202621,0.100945,0.026528,0.017166
std,27234.95,138826.9,137593.9,0.35033,0.39794,0.297455,0.375741,0.197805,0.203457,0.159249,0.132307,0.157201,0.183458,0.200666,0.362669,0.07238,0.0,0.372239,0.440513,0.79369,0.474984,0.468841,0.500762,1.182044,0.39794,0.297455,0.159249,0.132307,51.31635,42.852743,0.113435,0.207315,0.117405,0.136934,0.445716,0.54899,0.117928,0.076463,0.041562,0.031223
min,4704861.0,1526216.0,1526211.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,-0.984848,0.0,0.0,0.0,0.0,0.0
25%,4737630.0,1866191.0,1867504.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,27.0,22.0,0.210526,0.28125,0.292683,0.266667,-0.193365,0.318182,0.133858,0.057971,0.0,0.0
50%,4762577.0,1977772.0,1977737.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,61.0,50.0,0.266129,0.375,0.348101,0.333333,0.295455,0.46875,0.189189,0.09375,0.016667,0.0
75%,4784828.0,2114016.0,2113885.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,105.0,87.0,0.319444,0.483333,0.401408,0.394737,0.421053,1.324561,0.25,0.131944,0.037037,0.025974
max,4803619.0,2150001.0,2154512.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,1.0,1.0,3.0,5.0,1.0,1.0,1.0,1.0,522.0,440.0,1.0,4.0,1.0,1.0,3.0,5.0,1.0,1.0,1.0,1.0


In [39]:
#Look At Daniel Jung, Who Has "522" PA
pd.set_option("display.max_columns", None)
pa_logs_2019.loc[pa_logs_2019['batter_name'] == 'Daniel Jung'].sort_values('roll_PA', axis=0, ascending=False)head(10)

Unnamed: 0,GameId,GameDate,result,batter_id,batter_name,BattingTeam,pitcher_id,pitcher_name,PitchingTeam,FB,K,BB,GB,2B,Out,H,HR,LD,PO,RE,1B,3B,PA,AB,BA,SLG,OBP,BABIP,ISO,OPS,K_rate,BB_rate,HBP_rate,HR_rate,Date,roll_PA,roll_AB,roll_BA,roll_SLG,roll_OBP,roll_BABIP,roll_ISO,roll_OPS,roll_K_rate,roll_BB_rate,roll_HBP_rate,roll_HR_rate
114848,4779880,02/21/2019 12:00 AM,doubled,1888225.0,Daniel Jung,Notre Dame,1982338.0,Scott Granzotto,Eastern Mich.,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,1.0,2.0,1.0,1.0,1.0,3.0,0.0,0.0,0.0,0.0,02/21/2019,1,1,1.0,2.0,1.0,1.0,1.0,3.0,0.0,0.0,0.0,0.0
114874,4779880,02/21/2019 12:00 AM,hit,1888225.0,Daniel Jung,Notre Dame,2127117.0,Tyler Koons,Eastern Mich.,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,,,1.0,,,,0.0,0.0,1.0,0.0,02/21/2019,2,1,1.0,2.0,1.0,1.0,,3.0,0.0,0.0,0.5,0.0
114873,4779880,02/21/2019 12:00 AM,singled,1888225.0,Daniel Jung,Notre Dame,2127117.0,Tyler Koons,Eastern Mich.,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1.0,1.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,02/21/2019,3,2,1.0,1.5,1.0,1.0,0.5,2.5,0.0,0.0,0.333333,0.0
114870,4779880,02/21/2019 12:00 AM,grounded,1888225.0,Daniel Jung,Notre Dame,1982338.0,Scott Granzotto,Eastern Mich.,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,02/21/2019,4,3,0.666667,1.0,0.75,0.666667,1.0,1.0,0.0,0.0,0.25,0.0
114869,4779880,02/21/2019 12:00 AM,walked,1888225.0,Daniel Jung,Notre Dame,1982338.0,Scott Granzotto,Eastern Mich.,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,,,1.0,,,,0.0,1.0,0.0,0.0,02/21/2019,5,3,0.666667,1.0,0.8,0.666667,,2.0,0.0,0.2,0.2,0.0
114867,4779880,02/21/2019 12:00 AM,grounded,1888225.0,Daniel Jung,Notre Dame,1982338.0,Scott Granzotto,Eastern Mich.,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,02/21/2019,6,4,0.5,0.75,0.666667,0.5,0.75,0.75,0.0,0.166667,0.166667,0.0
114865,4779880,02/21/2019 12:00 AM,grounded,1888225.0,Daniel Jung,Notre Dame,1982338.0,Scott Granzotto,Eastern Mich.,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,02/21/2019,7,5,0.4,0.6,0.571429,0.4,0.6,0.6,0.0,0.142857,0.142857,0.0
114876,4779880,02/21/2019 12:00 AM,tripled,1888225.0,Daniel Jung,Notre Dame,2127117.0,Tyler Koons,Eastern Mich.,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1.0,3.0,1.0,1.0,2.0,4.0,0.0,0.0,0.0,0.0,02/21/2019,8,6,0.5,1.0,0.625,0.5,0.0,2.0,0.0,0.125,0.125,0.0
114878,4779880,02/21/2019 12:00 AM,flied,1888225.0,Daniel Jung,Notre Dame,2127117.0,Tyler Koons,Eastern Mich.,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,02/21/2019,9,7,0.428571,0.857143,0.555556,0.428571,0.857143,0.857143,0.0,0.111111,0.111111,0.0
114888,4779880,02/21/2019 12:00 AM,singled,1888225.0,Daniel Jung,Notre Dame,2127128.0,Remington Monce,Eastern Mich.,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1.0,1.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,02/21/2019,10,8,0.5,0.875,0.6,0.5,-0.125,1.875,0.0,0.1,0.1,0.0


In [43]:
#Look At Tommy Jordan, Who "Has" 308 PA
pd.set_option("display.max_columns", None)
pa_logs_2019.loc[pa_logs_2019['batter_name'] == 'Tommy Jordan'].sort_values('roll_PA', axis=0, ascending=False).head(10)

Unnamed: 0,GameId,GameDate,result,batter_id,batter_name,BattingTeam,pitcher_id,pitcher_name,PitchingTeam,FB,K,BB,GB,2B,Out,H,HR,LD,PO,RE,1B,3B,PA,AB,BA,SLG,OBP,BABIP,ISO,OPS,K_rate,BB_rate,HBP_rate,HR_rate,Date,roll_PA,roll_AB,roll_BA,roll_SLG,roll_OBP,roll_BABIP,roll_ISO,roll_OPS,roll_K_rate,roll_BB_rate,roll_HBP_rate,roll_HR_rate
233241,4792522,05/17/2019 12:00 AM,struck,1889447.0,Tommy Jordan,Delaware St.,1981625.0,Jeremiah McCollum,Florida A&M,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,,0.0,0.0,1.0,0.0,0.0,0.0,05/17/2019,308,247,0.255061,0.323887,0.331169,0.324468,0.323887,0.323887,0.185065,0.100649,0.025974,0.006494
43934,4792522,05/17/2019 12:00 AM,singled,1889447.0,Tommy Jordan,Florida A&M,1871290.0,Jamie Grant,Delaware St.,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1.0,1.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,05/17/2019,307,246,0.256098,0.325203,0.332248,0.324468,-0.674797,1.325203,0.18241,0.100977,0.026059,0.006515
43925,4792522,05/17/2019 12:00 AM,doubled,1889447.0,Tommy Jordan,Florida A&M,1998666.0,Josh Barr,Delaware St.,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,1.0,2.0,1.0,1.0,1.0,3.0,0.0,0.0,0.0,0.0,05/17/2019,306,245,0.253061,0.322449,0.330065,0.320856,-0.677551,1.322449,0.183007,0.101307,0.026144,0.006536
43943,4792522,05/17/2019 12:00 AM,grounded,1889447.0,Tommy Jordan,Florida A&M,1981625.0,Jeremiah McCollum,Delaware St.,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,05/17/2019,305,244,0.25,0.315574,0.327869,0.317204,0.315574,0.315574,0.183607,0.101639,0.02623,0.006557
43907,4792522,05/17/2019 12:00 AM,struck,1889447.0,Tommy Jordan,Florida A&M,1998666.0,Josh Barr,Delaware St.,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,,0.0,0.0,1.0,0.0,0.0,0.0,05/17/2019,304,243,0.251029,0.316872,0.328947,0.318919,0.316872,0.316872,0.184211,0.101974,0.026316,0.006579
43916,4792522,05/17/2019 12:00 AM,walked,1889447.0,Tommy Jordan,Florida A&M,1998666.0,Josh Barr,Delaware St.,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,,,1.0,,,,0.0,1.0,0.0,0.0,05/17/2019,303,242,0.252066,0.318182,0.330033,0.318919,,1.318182,0.181518,0.10231,0.026403,0.006601
43952,4792522,05/17/2019 12:00 AM,struck,1889447.0,Tommy Jordan,Florida A&M,1981625.0,Jeremiah McCollum,Delaware St.,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,,0.0,0.0,1.0,0.0,0.0,0.0,05/17/2019,302,242,0.252066,0.318182,0.327815,0.318919,0.318182,0.318182,0.182119,0.099338,0.02649,0.006623
233214,4792522,05/17/2019 12:00 AM,doubled,1889447.0,Tommy Jordan,Delaware St.,1998666.0,Josh Barr,Florida A&M,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,1.0,2.0,1.0,1.0,1.0,3.0,0.0,0.0,0.0,0.0,05/17/2019,301,241,0.253112,0.319502,0.328904,0.318919,-0.680498,1.319502,0.179402,0.099668,0.026578,0.006645
233223,4792522,05/17/2019 12:00 AM,singled,1889447.0,Tommy Jordan,Delaware St.,1871290.0,Jamie Grant,Florida A&M,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1.0,1.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,05/17/2019,300,240,0.25,0.3125,0.326667,0.315217,-0.6875,1.3125,0.18,0.1,0.026667,0.006667
233232,4792522,05/17/2019 12:00 AM,grounded,1889447.0,Tommy Jordan,Delaware St.,1981625.0,Jeremiah McCollum,Florida A&M,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,05/17/2019,299,239,0.246862,0.309623,0.324415,0.311475,0.309623,0.309623,0.180602,0.100334,0.026756,0.006689


In [48]:
#Look At Tommy Jordan, Who "Has" 389 PA
pd.set_option("display.max_columns", None)
pa_logs_2019.loc[pa_logs_2019['batter_name'] == 'Spencer Myers'].sort_values('roll_PA', axis=0, ascending=False).head(10)

Unnamed: 0,GameId,GameDate,result,batter_id,batter_name,BattingTeam,pitcher_id,pitcher_name,PitchingTeam,FB,K,BB,GB,2B,Out,H,HR,LD,PO,RE,1B,3B,PA,AB,BA,SLG,OBP,BABIP,ISO,OPS,K_rate,BB_rate,HBP_rate,HR_rate,Date,roll_PA,roll_AB,roll_BA,roll_SLG,roll_OBP,roll_BABIP,roll_ISO,roll_OPS,roll_K_rate,roll_BB_rate,roll_HBP_rate,roll_HR_rate
319157,4793058,05/21/2019 12:00 AM,grounded,1991579.0,Spencer Myers,Notre Dame,1977227.0,Thomas Girard,Duke,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,05/21/2019,389,320,0.246875,0.33125,0.336761,0.320513,0.33125,0.33125,0.210797,0.118252,0.015424,0.010283
319161,4793058,05/21/2019 12:00 AM,walked,1991579.0,Spencer Myers,Notre Dame,1977227.0,Thomas Girard,Duke,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,,,1.0,,,,0.0,1.0,0.0,0.0,05/21/2019,388,319,0.247649,0.332288,0.337629,0.321888,,1.332288,0.21134,0.118557,0.015464,0.010309
319162,4793058,05/21/2019 12:00 AM,struck,1991579.0,Spencer Myers,Notre Dame,1977227.0,Thomas Girard,Duke,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,,0.0,0.0,1.0,0.0,0.0,0.0,05/21/2019,387,319,0.247649,0.332288,0.335917,0.321888,0.332288,0.332288,0.211886,0.116279,0.015504,0.010336
319166,4793058,05/21/2019 12:00 AM,struck,1991579.0,Spencer Myers,Notre Dame,1977227.0,Thomas Girard,Duke,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,,0.0,0.0,1.0,0.0,0.0,0.0,05/21/2019,386,318,0.248428,0.333333,0.336788,0.321888,0.333333,0.333333,0.209845,0.11658,0.015544,0.010363
319144,4793058,05/21/2019 12:00 AM,singled,1991579.0,Spencer Myers,Notre Dame,1977229.0,Matt Dockman,Duke,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1.0,1.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,05/21/2019,385,317,0.249211,0.334385,0.337662,0.321888,-0.665615,1.334385,0.207792,0.116883,0.015584,0.01039
319153,4793058,05/21/2019 12:00 AM,homered,1991579.0,Spencer Myers,Notre Dame,1977229.0,Matt Dockman,Duke,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1.0,4.0,1.0,,3.0,5.0,0.0,0.0,0.0,1.0,05/21/2019,384,316,0.246835,0.332278,0.335938,0.318966,-0.667722,1.332278,0.208333,0.117188,0.015625,0.010417
319148,4793058,05/21/2019 12:00 AM,grounded,1991579.0,Spencer Myers,Notre Dame,1977229.0,Matt Dockman,Duke,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,05/21/2019,383,315,0.244444,0.320635,0.334204,0.318966,0.320635,0.320635,0.208877,0.117493,0.015666,0.007833
319139,4793058,05/21/2019 12:00 AM,reached,1991579.0,Spencer Myers,Notre Dame,1977231.0,Bryce Jarvis,Duke,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,,,0.0,,,,0.0,0.0,0.0,0.0,05/21/2019,382,314,0.245223,0.321656,0.335079,0.320346,,0.321656,0.209424,0.117801,0.015707,0.007853
319135,4793058,05/21/2019 12:00 AM,struck,1991579.0,Spencer Myers,Notre Dame,1977231.0,Bryce Jarvis,Duke,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,,0.0,0.0,1.0,0.0,0.0,0.0,05/21/2019,381,314,0.245223,0.321656,0.335958,0.320346,0.321656,0.321656,0.209974,0.11811,0.015748,0.007874
319130,4793058,05/21/2019 12:00 AM,struck,1991579.0,Spencer Myers,Notre Dame,1977231.0,Bryce Jarvis,Duke,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,,0.0,0.0,1.0,0.0,0.0,0.0,05/21/2019,380,313,0.246006,0.322684,0.336842,0.320346,0.322684,0.322684,0.207895,0.118421,0.015789,0.007895


In [57]:
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
pa_logs_2019.loc[pa_logs_2019['batter_id'] == 1886590].sort_values('roll_PA', axis=0, ascending=False)

Unnamed: 0,GameId,GameDate,result,batter_id,batter_name,BattingTeam,pitcher_id,pitcher_name,PitchingTeam,FB,K,BB,GB,2B,Out,H,HR,LD,PO,RE,1B,3B,PA,AB,BA,SLG,OBP,BABIP,ISO,OPS,K_rate,BB_rate,HBP_rate,HR_rate,Date,roll_PA,roll_AB,roll_BA,roll_SLG,roll_OBP,roll_BABIP,roll_ISO,roll_OPS,roll_K_rate,roll_BB_rate,roll_HBP_rate,roll_HR_rate
463,4797738,05/31/2019 04:00 PM,singled,1886590.0,Josh Jung,Texas Tech,1769743.0,Jacob Carte,Army West Point,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1.0,1.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,05/31/2019,170,126,0.31746,0.603175,0.435294,0.366667,-0.396825,1.603175,0.170588,0.164706,0.035294,0.041176
436,4797738,05/31/2019 04:00 PM,flied,1886590.0,Josh Jung,Texas Tech,1769743.0,Jacob Carte,Army West Point,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,05/31/2019,169,125,0.312,0.6,0.431953,0.359551,0.6,0.6,0.171598,0.16568,0.035503,0.04142
445,4797738,05/31/2019 04:00 PM,walked,1886590.0,Josh Jung,Texas Tech,1879826.0,Joe Santoro,Army West Point,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,,,1.0,,,,0.0,1.0,0.0,0.0,05/31/2019,168,124,0.314516,0.604839,0.434524,0.363636,,1.604839,0.172619,0.166667,0.035714,0.041667
454,4797738,05/31/2019 04:00 PM,walked,1886590.0,Josh Jung,Texas Tech,2139057.0,Anthony Loricco,Army West Point,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,,,1.0,,,,0.0,1.0,0.0,0.0,05/31/2019,167,124,0.314516,0.604839,0.431138,0.363636,,1.604839,0.173653,0.161677,0.035928,0.041916
427,4797738,05/31/2019 04:00 PM,singled,1886590.0,Josh Jung,Texas Tech,1769743.0,Jacob Carte,Army West Point,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1.0,1.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,05/31/2019,166,124,0.314516,0.604839,0.427711,0.363636,-0.395161,1.604839,0.174699,0.156627,0.036145,0.042169
161993,4795271,05/25/2019 01:00 AM,struck,1886590.0,Josh Jung,Texas Tech,2138449.0,Ryan Bergert,West Virginia,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,,0.0,0.0,1.0,0.0,0.0,0.0,05/25/2019,165,123,0.308943,0.601626,0.424242,0.356322,0.601626,0.601626,0.175758,0.157576,0.036364,0.042424
162002,4795271,05/25/2019 01:00 AM,flied,1886590.0,Josh Jung,Texas Tech,2138490.0,Zach Ottinger,West Virginia,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,05/25/2019,164,122,0.311475,0.606557,0.426829,0.356322,0.606557,0.606557,0.170732,0.158537,0.036585,0.042683
161984,4795271,05/25/2019 01:00 AM,flied,1886590.0,Josh Jung,Texas Tech,2138449.0,Ryan Bergert,West Virginia,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,05/25/2019,163,121,0.31405,0.61157,0.429448,0.360465,0.61157,0.61157,0.171779,0.159509,0.03681,0.042945
162023,4795289,05/25/2019 01:00 AM,struck,1886590.0,Josh Jung,Texas Tech,1888740.0,Kade Strowd,West Virginia,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,,0.0,0.0,1.0,0.0,0.0,0.0,05/25/2019,162,120,0.316667,0.616667,0.432099,0.364706,0.616667,0.616667,0.17284,0.160494,0.037037,0.04321
162014,4795289,05/25/2019 01:00 AM,struck,1886590.0,Josh Jung,Texas Tech,1888740.0,Kade Strowd,West Virginia,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0.0,0.0,0.0,,0.0,0.0,1.0,0.0,0.0,0.0,05/25/2019,161,119,0.319328,0.621849,0.434783,0.364706,0.621849,0.621849,0.167702,0.161491,0.037267,0.043478


In [61]:
#PA For ALl Hitters Seem To Be Innacurate
pa_logs_2019['batter_name'].value_counts()

Daniel Jung                522
Will Johnson               394
Spencer Myers              389
Austin Martin              378
Adrian Del Castillo        324
Ryan McCarthy              313
Tommy Jordan               308
Brock Anderson             305
Mike Amditis               292
Nick Grande                291
Noah Hill                  288
Josh Smith                 286
Keil Krumwiede             269
Bennett Hostetler          267
AJ Wright                  257
John Rave                  255
Tony Jenkins               253
Chris Hamilton             250
Grae Kessinger             249
Matt Smith                 247
Anthony Martinez           246
Nick Howie                 246
Jack Lombardi              242
Ryland Kerr                240
Kyle Johnson               239
Jordan Libman              239
Trevor McCutchin           238
Ben Palensky               238
Tyler Keenan               237
Derek Parola               236
Connor O'Brien             235
Ryan Ward                  235
James Ci