In [31]:
# Import needed dependencies
import requests
import re
import pandas as pd
import numpy as np
import scipy.stats as stats
from datetime import date
from bs4 import BeautifulSoup, Comment

pd.set_option('display.max_columns', None)


In [19]:
today = date.today()

# dd/mm/YY
current_year = today.strftime("%Y")

last_year = int(current_year) - 1

In [20]:
last_five_years = []
for i in range(1,6):
    last_five_years.append(int(current_year) - i)

In [21]:
# Create a list to help create a dataframe from batter statistics data
batter_stats = []

# Create a loop to create a dataframe from Baseball Reference tables
for year in last_five_years:
    
    # input URL and use BeautifulSoup to parse through the page
    url = f'https://www.baseball-reference.com/leagues/majors/{year}-standard-batting.shtml'
    soup = BeautifulSoup(requests.get(url).content, 'html.parser')

    # Grab the table element that has batter statistics
    table = BeautifulSoup(soup.select_one('#all_players_standard_batting').find_next(text=lambda x: isinstance(x, Comment)), 'html.parser')


    # Grab data from table and put it into the list created above
    for tr in table.select('tr:has(td)'):
        tds = [td.get_text(strip=True) for td in tr.select('td')]
        tds.append(year)
        batter_stats.append(tds)

In [22]:
# Create dataframe for batter statistics
batter_stats_df = pd.DataFrame(batter_stats)

# Create an empty list to store dataframe header information
header_list = []

# Grab the table header information to use as column headers in our dataframe
for tr in table.select('tr:has(th)'):
    ths = [th.get_text(strip=True) for th in tr.select('th')]
    header_list.append(ths)

# For loop returns a list of lists, and we only need the first list 
df_headers = header_list[0]

# Remove the first item from our headers list, it is the index header that we do not need
df_headers.remove('Rk')
df_headers.append("Year")

# Set column headers equal to our list
batter_stats_df.columns = df_headers
batter_stats_df

Unnamed: 0,Name,Age,Tm,Lg,G,PA,AB,R,H,2B,...,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos Summary,Year
0,CJ Abrams*,22,WSN,NL,151,614,563,83,138,28,...,.712,95,232,7,13,3,3,2,*6/H,2023
1,José Abreu,36,HOU,AL,141,594,540,62,128,23,...,.680,87,207,16,6,0,6,1,*3/D,2023
2,Wilyer Abreu*,24,BOS,AL,28,85,76,10,24,6,...,.862,132,36,0,0,0,0,0,87/H9D,2023
3,Ronald Acuna Jr.,25,ATL,NL,159,735,643,149,217,35,...,1.012,168,383,15,9,0,3,3,*9/D,2023
4,Willy Adames,27,MIL,NL,149,638,553,73,120,29,...,.717,95,225,12,6,0,6,1,*6/D,2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5865,Ryan Zimmerman,34,WSN,NL,52,190,171,20,44,9,...,.736,89,71,4,0,0,2,0,3/HD,2019
5866,Jordan Zimmermann,33,DET,AL,1,2,2,0,0,0,...,.000,-100,0,0,0,0,0,0,1,2019
5867,Ben Zobrist#,38,CHC,NL,47,176,150,24,39,5,...,.671,79,47,6,1,0,2,0,49/7HD16,2019
5868,Mike Zunino,28,TBR,AL,90,289,266,30,44,10,...,.544,45,83,4,3,0,0,0,2/H,2019


In [23]:
# Create a list to help create a dataframe from batter statistics data
fielding_stats = []

# Create a loop to create a dataframe from Baseball Reference tables
for year in last_five_years:
    
    # input URL and use BeautifulSoup to parse through the page
    url = f'https://www.baseball-reference.com/leagues/majors/{year}-standard-fielding.shtml'
    soup = BeautifulSoup(requests.get(url).content, 'html.parser')

    # Grab the table element that has batter statistics
    table = soup.select_one('#all_players_players_standard_fielding_fielding')#.find_next(text=lambda x: isinstance(x, Comment)), 'html.parser')


    # Grab data from table and put it into the list created above
    for tr in table.select('tr:has(td)'):
        tds = [td.get_text(strip=True) for td in tr.select('td')]
        tds.append(year)
        fielding_stats.append(tds)

# Create dataframe for batter statistics
fielding_stats_df = pd.DataFrame(fielding_stats)

# Create an empty list to store dataframe header information
fielding_header_list = []

# Grab the table header information to use as column headers in our dataframe
for tr in table.select('tr:has(th)'):
    ths = [th.get_text(strip=True) for th in tr.select('th')]
    fielding_header_list.append(ths)

# For loop returns a list of lists, and we only need the first list 
fielding_df_headers = fielding_header_list[0]

# Remove the first item from our headers list, it is the index header that we do not need
fielding_df_headers.remove('Rk')
fielding_df_headers.append("Year")

# Set column headers equal to our list
fielding_stats_df.columns = fielding_df_headers

final_fielding_stats_df = fielding_stats_df[['Name','PO','A','E','Year','Tm']]

position_players_df = pd.merge(final_fielding_stats_df, batter_stats_df, on=['Name','Year','Tm'])
position_players_df


Unnamed: 0,Name,PO,A,E,Year,Tm,Age,Lg,G,PA,...,SLG,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos Summary
0,José Abreu,1017,67,5,2023,HOU,36,AL,141,594,...,.383,.680,87,207,16,6,0,6,1,*3/D
1,Ronald Acuna Jr.,281,10,5,2023,ATL,25,NL,159,735,...,.596,1.012,168,383,15,9,0,3,3,*9/D
2,Willy Adames,159,351,14,2023,MIL,27,NL,149,638,...,.407,.717,95,225,12,6,0,6,1,*6/D
3,Jordyn Adams,27,0,2,2023,LAA,23,AL,17,40,...,.128,.253,-31,5,0,0,0,1,0,/98H7
4,Riley Adams,293,8,2,2023,WSN,27,NL,44,158,...,.476,.807,120,68,5,2,1,1,0,2/HD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2963,T.J. Zeuch,0,3,0,2019,TOR,23,AL,1,1,...,.000,.000,-100,0,0,0,0,0,0,/1
2964,Kyle Zimmer,0,1,0,2019,KCR,27,AL,1,0,...,,,,0,0,0,0,0,0,1
2965,Ryan Zimmerman,307,20,3,2019,WSN,34,NL,52,190,...,.415,.736,89,71,4,0,0,2,0,3/HD
2966,Jordan Zimmermann,7,10,1,2019,DET,33,AL,1,2,...,.000,.000,-100,0,0,0,0,0,0,1


In [24]:
position_players_df.columns

Index(['Name', 'PO', 'A', 'E', 'Year', 'Tm', 'Age', 'Lg', 'G', 'PA', 'AB', 'R',
       'H', '2B', '3B', 'HR', 'RBI', 'SB', 'CS', 'BB', 'SO', 'BA', 'OBP',
       'SLG', 'OPS', 'OPS+', 'TB', 'GDP', 'HBP', 'SH', 'SF', 'IBB',
       'Pos Summary'],
      dtype='object')

In [25]:
# Change types of columns to numeric for columns with number values
position_players_df[['Age','G','R','H','2B','3B','HR','RBI','SB','TB','BB','SO','PO','A','E','PA','OPS','OPS+']] = position_players_df[['Age','G', 'R','H','2B','3B','HR','RBI','SB','TB','BB','SO','PO','A','E','PA','OPS','OPS+']].apply(pd.to_numeric)

# Drop any players with 0 plate appearances to remove null values and change PA type to integer
position_players_df.dropna(subset=['PA'], axis = 0 , inplace= True)

# Remove any players with fewer than 100 plate appearances
filtered_position_players_df = position_players_df[position_players_df['PA'] >= 100]

# Select the columns we want for our batter analysis
final_position_players_df = filtered_position_players_df[['Year','Name','Tm','Age','G', 'R','H','2B','3B','HR','RBI','SB','TB','BB','SO','PO','A','E','PA','OPS','OPS+','Pos\xa0Summary']]

# Sort by index to prepare to drop duplicates
final_position_players_df = final_position_players_df.sort_index()

# Drop duplicate entries of Player Name and Year
final_position_players_df['1B'] = ''

for index, row in final_position_players_df.iterrows():
    final_position_players_df['1B'] = (final_position_players_df['H'] - (final_position_players_df['2B'] + final_position_players_df['3B'] + final_position_players_df['HR']))

final_position_players_df.columns


Index(['Year', 'Name', 'Tm', 'Age', 'G', 'R', 'H', '2B', '3B', 'HR', 'RBI',
       'SB', 'TB', 'BB', 'SO', 'PO', 'A', 'E', 'PA', 'OPS', 'OPS+',
       'Pos Summary', '1B'],
      dtype='object')

In [26]:
final_position_players_df['FPTS'] = ''
final_position_players_df['AVG_FPTS'] = ''

for index, row in final_position_players_df.iterrows():
    final_position_players_df['FPTS'] = (final_position_players_df['1B'] + (2*final_position_players_df['2B']) + (3*final_position_players_df['3B']) + (4*final_position_players_df['HR']) + final_position_players_df['TB'] + final_position_players_df['BB'] + final_position_players_df['R'] + (2*final_position_players_df['RBI']) + (2*final_position_players_df['SB']) + final_position_players_df['PO'] + final_position_players_df['A'] - (2*final_position_players_df['SO']) - final_position_players_df['E'])
    final_position_players_df['AVG_FPTS'] = (final_position_players_df['FPTS']/final_position_players_df['G'])

final_position_players_df
    

Unnamed: 0,Year,Name,Tm,Age,G,R,H,2B,3B,HR,...,PO,A,E,PA,OPS,OPS+,Pos Summary,1B,FPTS,AVG_FPTS
0,2023,José Abreu,HOU,36,141,62,128,23,1,18,...,1017,67,5,594,0.680,87.0,*3/D,86,1517,10.758865
1,2023,Ronald Acuna Jr.,ATL,25,159,149,217,35,4,41,...,281,10,5,735,1.012,168.0,*9/D,137,1471,9.251572
2,2023,Willy Adames,MIL,27,149,73,120,29,2,24,...,159,351,14,638,0.717,95.0,*6/D,65,930,6.241611
4,2023,Riley Adams,WSN,27,44,8,39,13,2,4,...,293,8,2,158,0.807,120.0,2/HD,20,406,9.227273
7,2023,Jesús Aguilar,OAK,33,36,8,23,2,0,5,...,112,10,2,115,0.665,89.0,3/DH,16,172,4.777778
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2925,2019,Christian Walker,ARI,28,152,86,137,26,1,29,...,1042,139,11,603,0.825,111.0,*3H/D,81,1679,11.046053
2935,2019,Tyler White,TOT,28,83,18,50,14,0,3,...,344,30,1,279,0.612,63.0,3DH/1,33,463,5.578313
2940,2019,Mac Williamson,TOT,28,40,13,20,1,0,4,...,69,4,2,144,0.508,38.0,7/H9D,15,114,2.850000
2965,2019,Ryan Zimmerman,WSN,34,52,20,44,9,0,6,...,307,20,3,190,0.736,89.0,3/HD,29,479,9.211538


In [27]:
# Sort data by name alphabetically, then by year in descending order
final_position_players_df = final_position_players_df.sort_values(['Year','Name'], ascending=[True, True])

# Eliminate Baseball Reference's name badges for accolades
final_position_players_df['Name'] = final_position_players_df['Name'].str.extract('([^\*|#]*)')

cleaned_player_list = []
for player in final_position_players_df['Name']:
    player = player.replace("\xa0", " ")
    cleaned_player_list.append(player)

final_position_players_df['Name'] = cleaned_player_list 
final_position_players_df = final_position_players_df.sort_values(['FPTS'], ascending=False)

final_position_players_df

Unnamed: 0,Year,Name,Tm,Age,G,R,H,2B,3B,HR,...,PO,A,E,PA,OPS,OPS+,Pos Summary,1B,FPTS,AVG_FPTS
1226,2021,Vladimir Guerrero Jr.,TOR,22,161,123,188,29,1,48,...,1026,46,8,698,1.002,167.0,*3D/5,110,2009,12.478261
1203,2021,Paul Goldschmidt,STL,33,158,102,177,36,2,31,...,1144,106,2,679,0.879,141.0,*3/DH,108,1987,12.575949
2425,2019,Paul Goldschmidt,STL,31,161,97,155,25,1,34,...,1256,111,5,682,0.821,115.0,*3/H,95,1973,12.254658
606,2022,Paul Goldschmidt,STL,34,151,106,178,41,0,35,...,1071,91,1,651,0.981,177.0,*3D/H,102,1956,12.953642
2157,2019,Pete Alonso,NYM,24,161,103,155,30,2,53,...,1078,112,12,693,0.941,147.0,*3/HD,70,1925,11.956522
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2323,2019,Yonathan Daza,COL,25,44,7,20,1,1,0,...,49,4,2,105,0.494,23.0,8H/79,18,77,1.750000
1308,2021,JaCoby Jones,DET,29,36,9,17,2,0,2,...,74,0,0,105,0.460,28.0,8/H7D,13,76,2.111111
1790,2020,Jo Adell,LAA,21,38,9,20,4,0,3,...,72,2,3,132,0.478,30.0,9/8H,13,57,1.500000
1090,2021,Khris Davis,TOT,33,42,11,21,5,1,3,...,4,0,0,114,0.635,75.0,DH/7,12,57,1.357143


In [28]:
# separate position column into a list for editing
new_pos_list = []
pos_list = final_position_players_df['Pos\xa0Summary'].tolist()

# loop through list and pull only the last item, which represents player position listed from most recent season
for i in pos_list:
#     j = i[-1]
    
    if re.search('/', i):
        j = re.sub("([^\/]+$)","",i)
        new_pos_list.append(j)
    else:
        new_pos_list.append(i)

cleaned_list = []
for pos in new_pos_list:
    placeholder = re.findall("[a-zA-Z0-9]+", pos)
    placeholder_2 = ''.join(placeholder)
    placeholder_3 = [d for d in placeholder_2]
    cleaned_list.append(placeholder_3)

cleaned_pos_list = []
for n_list in cleaned_list:
    
    placeholder_list = []
    for pos in n_list:
        if pos == '1':
            placeholder_list.append('P')
        elif pos == '2':
            placeholder_list.append('C')
        elif pos == '3':
            placeholder_list.append('1B')
        elif pos == '4':
            placeholder_list.append('2B')
        elif pos == '5':
            placeholder_list.append('3B')
        elif pos == '6':
            placeholder_list.append('SS')
        elif pos == ('7'):
            placeholder_list.append('OF')
        elif pos == ('8'):
            placeholder_list.append('OF')
        elif pos == ('9'):
            placeholder_list.append('OF')
        elif pos == ('D'):
            placeholder_list.append('DH')
        
    cleaned_pos_list.append(placeholder_list)        

temp_pos_list = []
for item in cleaned_pos_list:
    new_string = []
    for pos in item:
        string = str(pos)
        new_string = f'{new_string},{string}'
    temp_pos_list.append(new_string)

    
final_pos_list = []
for i in temp_pos_list:
    i = i.replace('[],', '')
    final_pos_list.append(i)
    
# replace old position column with new position column
final_position_players_df.drop('Pos\xa0Summary', axis = 1, inplace = True)
final_position_players_df['Pos'] = final_pos_list

final_position_players_df.head(25)

Unnamed: 0,Year,Name,Tm,Age,G,R,H,2B,3B,HR,...,PO,A,E,PA,OPS,OPS+,1B,FPTS,AVG_FPTS,Pos
1226,2021,Vladimir Guerrero Jr.,TOR,22,161,123,188,29,1,48,...,1026,46,8,698,1.002,167.0,110,2009,12.478261,"1B,DH"
1203,2021,Paul Goldschmidt,STL,33,158,102,177,36,2,31,...,1144,106,2,679,0.879,141.0,108,1987,12.575949,1B
2425,2019,Paul Goldschmidt,STL,31,161,97,155,25,1,34,...,1256,111,5,682,0.821,115.0,95,1973,12.254658,1B
606,2022,Paul Goldschmidt,STL,34,151,106,178,41,0,35,...,1071,91,1,651,0.981,177.0,102,1956,12.953642,"1B,DH"
2157,2019,Pete Alonso,NYM,24,161,103,155,30,2,53,...,1078,112,12,693,0.941,147.0,70,1925,11.956522,1B
2503,2019,Rhys Hoskins,PHI,26,160,86,129,33,5,29,...,1193,122,9,705,0.819,111.0,62,1854,11.5875,1B
422,2023,Christian Walker,ARI,32,157,86,150,36,2,33,...,1075,77,2,661,0.83,123.0,79,1850,11.783439,1B
892,2022,Christian Walker,ARI,31,160,84,141,25,2,36,...,1109,78,5,667,0.804,125.0,78,1821,11.38125,"1B,DH"
1229,2021,Yuli Gurriel,HOU,37,143,83,169,31,0,15,...,1057,86,6,605,0.846,131.0,123,1797,12.566434,1B
458,2022,Pete Alonso,NYM,27,160,95,162,27,0,40,...,913,92,8,685,0.869,146.0,95,1793,11.20625,"1B,DH"


In [36]:
# final_position_players_df = final_position_players_df.sort_values(['Year','Name'], ascending=[True, True])
# final_position_players_df
pos_filtered_df

Unnamed: 0,Year,Name,Tm,Age,G,R,H,2B,3B,HR,RBI,SB,TB,BB,SO,PO,A,E,PA,OPS,OPS+,1B,FPTS,AVG_FPTS,Pos
2730,2019,AJ Pollock,LAD,31,86,49,82,15,1,15,47,5,144,23,74,116,3,2,342,0.795,107.0,51,433,5.034884,"OF,OF"
2530,2019,Aaron Judge,NYY,27,102,75,103,18,1,27,55,3,204,64,141,177,7,0,447,0.921,143.0,57,565,5.539216,"OF,DH"
2356,2019,Adam Duvall,ATL,30,41,17,32,4,1,10,19,0,68,7,39,44,2,2,130,0.882,117.0,17,164,4.000000,OF
2363,2019,Adam Engel,CHW,27,89,26,55,10,2,6,26,3,87,14,78,159,2,3,248,0.687,83.0,37,274,3.078652,OF
2527,2019,Adam Jones,ARI,33,137,66,126,25,1,16,67,2,201,31,101,209,2,6,528,0.728,87.0,84,640,4.671533,OF
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
290,2023,Tyler O'Neill,STL,28,72,27,55,14,0,9,21,5,96,28,67,108,5,2,266,0.715,94.0,32,276,3.833333,"OF,OF"
392,2023,Tyrone Taylor,MIL,29,81,36,54,17,1,10,35,9,103,8,55,130,4,0,243,0.713,91.0,26,362,4.469136,"OF,OF"
332,2023,Víctor Robles,WSN,26,36,15,32,5,1,0,8,8,39,11,18,81,1,0,126,0.750,110.0,26,182,5.055556,OF
260,2023,Whit Merrifield,TOR,34,145,66,149,27,0,11,67,26,209,36,101,235,144,5,592,0.700,94.0,111,878,6.055172,"2B,OF"


In [46]:
# Create a new dataframe for stats percentile calculations
percentile_df = pd.DataFrame(columns = ['Year', 'Name', 'Age','Pos', 'Tm'])

# Carry over columnns from final_batter_stats_df that shouldn't be comparatively ranked 
percentile_df['Year'] = final_position_players_df['Year']
percentile_df['Name'] = final_position_players_df['Name']
percentile_df['Age'] = final_position_players_df['Age']
percentile_df['Tm'] = final_position_players_df['Tm']
percentile_df['Pos'] = final_position_players_df['Pos']

# Calculate the percentile rank for each player in each season, seperately, then add all the seasons in one dataframe
pos_list = ['C','1B','2B','3B','SS','OF']

for pos in pos_list:
    pos_filtered_df = final_position_players_df[final_position_players_df['Pos'].str.contains(pos)]

    for year in last_five_years:
        year_df = pos_filtered_df.loc[pos_filtered_df['Year'] == year]
        year_df[f'{pos}_FPTS_Percentile'] = year_df['FPTS'].rank(pct=True)
        year_df[f'{pos}_AVG_FPTS_Percentile'] = year_df['AVG_FPTS'].rank(pct=True)
        year_df.sort_values('Name', ascending=True)

        # Each of the seasons are added back to the percentile dataframe
        percentile_df = percentile_df.append(year_df, ignore_index=True)
        
    percentile_df['New_Pos'] = pos

        
percentile_df = percentile_df[percentile_df['G'].notna()]

percentile_df = percentile_df.sort_values(['Year','Name'], ascending=[True, True])
percentile_df = percentile_df.drop(['Tm','G','R','H','2B','3B','HR','RBI','SB','TB','BB','SO','PO','A','E','PA','OPS','OPS+','1B'],axis=1)

percentile_df = percentile_df.fillna(0)

percentile_df


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  year_df[f'{pos}_FPTS_Percentile'] = year_df['FPTS'].rank(pct=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  year_df[f'{pos}_AVG_FPTS_Percentile'] = year_df['AVG_FPTS'].rank(pct=True)


Unnamed: 0,Year,Name,Age,Pos,FPTS,AVG_FPTS,C_FPTS_Percentile,C_AVG_FPTS_Percentile,New_Pos,1B_FPTS_Percentile,1B_AVG_FPTS_Percentile,2B_FPTS_Percentile,2B_AVG_FPTS_Percentile,3B_FPTS_Percentile,3B_AVG_FPTS_Percentile,SS_FPTS_Percentile,SS_AVG_FPTS_Percentile,OF_FPTS_Percentile,OF_AVG_FPTS_Percentile
2691,2019,AJ Pollock,31,"OF,OF",433.0,5.034884,0.0,0.0,OF,0.0,0.0,0.000000,0.000000,0.00000,0.000000,0.00000,0.000000,0.424242,0.595960
2692,2019,Aaron Judge,27,"OF,DH",565.0,5.539216,0.0,0.0,OF,0.0,0.0,0.000000,0.000000,0.00000,0.000000,0.00000,0.000000,0.575758,0.727273
2693,2019,Adam Duvall,30,OF,164.0,4.000000,0.0,0.0,OF,0.0,0.0,0.000000,0.000000,0.00000,0.000000,0.00000,0.000000,0.070707,0.343434
2694,2019,Adam Engel,27,OF,274.0,3.078652,0.0,0.0,OF,0.0,0.0,0.000000,0.000000,0.00000,0.000000,0.00000,0.000000,0.242424,0.181818
2695,2019,Adam Jones,33,OF,640.0,4.671533,0.0,0.0,OF,0.0,0.0,0.000000,0.000000,0.00000,0.000000,0.00000,0.000000,0.696970,0.505051
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1690,2023,Zach Remillard,29,2B,224.0,4.148148,0.0,0.0,OF,0.0,0.0,0.157895,0.210526,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000
1691,2023,Zack Gelof,23,2B,541.0,7.840580,0.0,0.0,OF,0.0,0.0,0.614035,0.894737,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000
1692,2023,Zack Short,28,"2B,SS,3B",394.0,3.581818,0.0,0.0,OF,0.0,0.0,0.421053,0.105263,0.00000,0.000000,0.00000,0.000000,0.000000,0.000000
1927,2023,Zack Short,28,"2B,SS,3B",394.0,3.581818,0.0,0.0,OF,0.0,0.0,0.000000,0.000000,0.41791,0.134328,0.00000,0.000000,0.000000,0.000000


In [18]:
# Clean up the percentile dataframe, drop NaNs and remove unnecessary columns
percentile_df = percentile_df.dropna()

# Add a rank column that adds the percentiles from each category
percentile_df['Rank'] = (percentile_df['FPTS_Percentile'] + percentile_df['AVG_FPTS_Percentile'])

percentile_df


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  percentile_df['Rank'] = (percentile_df['FPTS_Percentile'] + percentile_df['AVG_FPTS_Percentile'])


Unnamed: 0,Year,Name,Age,Pos Summary,Tm,FPTS_Percentile,AVG_FPTS_Percentile,G,R,H,...,PO,A,E,PA,OPS,OPS+,1B,FPTS,AVG_FPTS,Rank
2311,2019,AJ Pollock,31,87/HD,LAD,0.322709,0.354582,86.0,49.0,82.0,...,116.0,3.0,2.0,342.0,0.795,107.0,51.0,433.0,5.034884,0.677291
2280,2019,Aaron Judge,27,9D/H,NYY,0.446215,0.414343,102.0,75.0,103.0,...,177.0,7.0,0.0,447.0,0.921,143.0,57.0,565.0,5.539216,0.860558
2381,2019,Adam Duvall,30,7H/9,ATL,0.045817,0.211155,41.0,17.0,32.0,...,44.0,2.0,2.0,130.0,0.882,117.0,17.0,164.0,4.000000,0.256972
2353,2019,Adam Engel,27,8/H,CHW,0.155378,0.095618,89.0,26.0,55.0,...,159.0,2.0,3.0,248.0,0.687,83.0,37.0,274.0,3.078652,0.250996
2262,2019,Adam Jones,33,*9H/8,ARI,0.517928,0.298805,137.0,66.0,126.0,...,209.0,2.0,6.0,528.0,0.728,87.0,84.0,640.0,4.671533,0.816733
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1264,2023,Yuli Gurriel,39,3H/D,MIA,0.739464,0.758621,108.0,32.0,73.0,...,569.0,32.0,2.0,329.0,0.663,79.0,50.0,845.0,7.824074,1.498084
1340,2023,Zach Neto,22,6/H,LAA,0.448276,0.578544,84.0,38.0,65.0,...,114.0,205.0,7.0,329.0,0.685,86.0,39.0,512.0,6.095238,1.02682
1419,2023,Zach Remillard,29,4/H5976D,CHW,0.145594,0.203065,54.0,16.0,37.0,...,72.0,90.0,4.0,160.0,0.615,69.0,29.0,224.0,4.148148,0.348659
1330,2023,Zack Gelof,23,4,OAK,0.488506,0.762452,69.0,40.0,72.0,...,137.0,141.0,3.0,300.0,0.840,137.0,37.0,541.0,7.840580,1.250958


In [19]:
# Create a list of each unique player we have in our dataframe
player_list = percentile_df.Name.unique().tolist()

# Create a new dataframe for combined, averaged percentiles over the past 5 seasons
new_df = pd.DataFrame(columns = ['Name', 'Rank', 'Trend', 'Pos', 'Years', 'FPTS', 'AVG_FPTS', 'FPTS_Percentile', 'AVG_FPTS_Percentile'])

# Create a list for each percentile stat category for upcoming loop
player_trends = []
average_FPTS = []
average_AVG_FPTS = []
average_FPTS_Percentile = []
average_AVG_FPTS_Percentile = []
average_Rank = []
year_count = []
pos = []

# Loop through each player, check if they played in the past two seasons. If not, remove them
for player in player_list:
    filter_df = percentile_df.loc[percentile_df['Name'] == player]
    filter_df = filter_df.sort_values(['Year'], ascending=[False])
    year_list = filter_df.Year.tolist()
    if (year_list[0] != last_year) and (year_list[0] != (last_year - 1)):
        player_list.remove(player)

# Update new dataframe with updated unique player list
new_df['Name'] = player_list        

# Loop through each player, locate their percentile stats for each season, average them out
for player in player_list:
    player_df = percentile_df.loc[percentile_df['Name'] == player]
    
    # We want to find the slope of the line of best fit for each player's overall ranking each season
    x = np.array(player_df['Year'], dtype = float)
    y = np.array(player_df['Rank'], dtype = float)
    slope, intercept = np.polyfit(x, y, 1)
    player_trends.append(slope)
    
    # Find average of each player's percentiles from previous 5 seasons
    average_FPTS.append(sum(player_df['FPTS']) / len(player_df['FPTS']))
    average_AVG_FPTS.append(sum(player_df['AVG_FPTS']) / len(player_df['AVG_FPTS']))
    average_FPTS_Percentile.append(sum(player_df['FPTS_Percentile']) / len(player_df['FPTS_Percentile']))
    average_AVG_FPTS_Percentile.append(sum(player_df['AVG_FPTS_Percentile']) / len(player_df['AVG_FPTS_Percentile']))
    average_Rank.append(sum(player_df['Rank']) / len(player_df['Rank']))
    year_count.append(len(x))
    
    # Keep player positions for reference purposes during the draft
    pos.append(player_df['Pos\xa0Summary'].unique())

# Update new dataframe with the list data from each stat
new_df['Pos'] = pos
new_df['Trend'] = player_trends
new_df['FPTS'] = average_FPTS
new_df['AVG_FPTS'] = average_AVG_FPTS
new_df['FPTS_Percentile'] = average_FPTS_Percentile
new_df['AVG_FPTS_Percentile'] = average_AVG_FPTS_Percentile
new_df['Rank'] = average_Rank

# Keep track of how many seasons are being considered, so we know how reliable the data is
new_df['Years'] = year_count



  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_

In [20]:
# Create a weighted rank column by adding trend data to the rank data and account for number of seasons played
# Basically, if you played all 5 seasons, your trend stat is added directly
# If you played fewer than all 5 seasons, your trend stat is reduced depending on how few seasons you played
# new_df['Weighted Rank'] = (new_df['Rank'] + ((new_df['Trend'] * (new_df['Years'] - 1) / 4)))

# # shift column 'Weighted Rank' to first position
# first_column = new_df.pop('Weighted Rank')
  
# # insert column using insert(position,column_name,first_column) function
# new_df.insert(1, 'Weighted Rank', first_column)

new_df = new_df.sort_values('FPTS', ascending = False)

In [21]:
# # separate position column into a list for editing
# new_pos_list = []
# pos_list = new_df['Pos'].tolist()

# # loop through list and pull only the last item, which represents player position listed from most recent season
# for i in pos_list:
#     j = i[-1]
    
#     if re.search('/', j):
#         k = re.sub("([^\/]+$)","",j)
#         new_pos_list.append(k)
#     else:
#         new_pos_list.append(j)

# cleaned_list = []
# for pos in new_pos_list:
#     placeholder = re.findall("[a-zA-Z0-9]+", pos)
#     placeholder_2 = ''.join(placeholder)
#     placeholder_3 = [d for d in placeholder_2]
#     cleaned_list.append(placeholder_3)

# cleaned_pos_list = []
# for n_list in cleaned_list:
    
#     placeholder_list = []
#     for pos in n_list:
#         if pos == '1':
#             placeholder_list.append('P')
#         elif pos == '2':
#             placeholder_list.append('C')
#         elif pos == '3':
#             placeholder_list.append('1B')
#         elif pos == '4':
#             placeholder_list.append('2B')
#         elif pos == '5':
#             placeholder_list.append('3B')
#         elif pos == '6':
#             placeholder_list.append('SS')
#         elif pos == ('7'):
#             placeholder_list.append('OF')
#         elif pos == ('8'):
#             placeholder_list.append('OF')
#         elif pos == ('9'):
#             placeholder_list.append('OF')
#         elif pos == ('D'):
#             placeholder_list.append('DH')
        
#     cleaned_pos_list.append(placeholder_list)        

# temp_pos_list = []
# for item in cleaned_pos_list:
#     new_string = []
#     for pos in item:
#         string = str(pos)
#         new_string = f'{new_string},{string}'
#     temp_pos_list.append(new_string)

    
# final_pos_list = []
# for i in temp_pos_list:
#     i = i.replace('[],', '')
#     final_pos_list.append(i)
    
# # replace old position column with new position column
# new_df.drop('Pos', axis = 1, inplace = True)
# new_df['Pos'] = final_pos_list

# new_df.head(25)

Unnamed: 0,Name,Rank,Trend,Years,FPTS,AVG_FPTS,FPTS_Percentile,AVG_FPTS_Percentile,Pos
138,Paul Goldschmidt,1.964299,0.006415,5,1651.2,11.9135,0.993248,0.971051,"1B,DH"
139,Pete Alonso,1.903071,0.032115,5,1540.2,10.900115,0.972508,0.930564,1B
90,José Abreu,1.926786,-0.00902,5,1473.8,11.317376,0.980736,0.946049,"1B,DH"
72,J.T. Realmuto,1.94215,0.016769,5,1459.0,11.991166,0.971011,0.97114,C
35,Christian Walker,1.864457,0.038828,5,1390.0,10.539662,0.95098,0.913476,1B
295,Spencer Torkelson,1.845339,0.22503,2,1389.5,10.163608,0.921741,0.923598,1B
26,C.J. Cron,1.831295,-0.065972,4,1364.5,11.041019,0.89037,0.940925,1B
142,Rhys Hoskins,1.922377,-0.001514,4,1336.25,11.739209,0.955699,0.966678,1B
167,Vladimir Guerrero Jr.,1.717357,0.201218,5,1314.8,9.624007,0.893751,0.823606,"1B,DH"
180,Yuli Gurriel,1.855091,-0.091258,5,1310.6,10.862739,0.934267,0.920824,1B


In [16]:
############################################################################################
############################################################################################
############################################################################################

#    However, for example, if your league is set to have a Games Started limit of 12 and you have 10 pitchers at the 
#    completion of Saturday's games and start 4 pitchers on Sunday, you will receive stats for all 14 pitchers. 

#    (Note: This can happen on any day during the week. If managers have 10 pitchers by end of Wednesday and 
#    starts 4 on Thursday, they will receive points for the 4 pitchers on Thursday but for Friday, Saturday and 
#    Sunday, they will not receive any starting pitchers points.)

############################################################################################
############################################################################################
############################################################################################


In [22]:
# Create a list to help create a dataframe from batter statistics data
pitcher_stats = []

for year in last_five_years:

    # input URL and use BeautifulSoup to parse through the page
    pitching_url = f'https://www.baseball-reference.com/leagues/majors/{year}-standard-pitching.shtml'
    pitching_soup = BeautifulSoup(requests.get(pitching_url).content, 'html.parser')

    # Grab the table element that has batter statistics
    pitching_table = BeautifulSoup(pitching_soup.select_one('#all_players_standard_pitching').find_next(text=lambda x: isinstance(x, Comment)), 'html.parser')

    # Grab data from table and put it into the list created above
    for tr in pitching_table.select('tr:has(td)'):
        tds = [td.get_text(strip=True) for td in tr.select('td')]
        tds.append(year)
        pitcher_stats.append(tds)
        

In [23]:
# Create dataframe for batter statistics
raw_pitcher_stats_df = pd.DataFrame(pitcher_stats)

# Create an empty list to store dataframe header information
pitcher_header_list = []

# Grab the table header information to use as column headers in our dataframe
for tr in pitching_table.select('tr:has(th)'):
    ths = [th.get_text(strip=True) for th in tr.select('th')]
    pitcher_header_list.append(ths)

# For loop returns a list of lists, and we only need the first list 
pitcher_df_headers = pitcher_header_list[0]

# Remove the first item from our headers list, it is the index header that we do not need
pitcher_df_headers.remove('Rk')
pitcher_df_headers.append("Year")

# Set column headers equal to our list
raw_pitcher_stats_df.columns = pitcher_df_headers



In [24]:
raw_pitcher_stats_df

Unnamed: 0,Name,Age,Tm,Lg,W,L,W-L%,ERA,G,GS,...,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Year
0,Fernando Abad*,37,COL,NL,1,0,1.000,4.26,6,0,...,32,124,8.15,2.211,15.6,2.8,4.3,2.8,0.67,2023
1,Andrew Abbott*,24,CIN,NL,8,6,.571,3.87,21,21,...,459,118,4.20,1.317,8.2,1.3,3.6,9.9,2.73,2023
2,Cory Abbott,27,WSN,NL,1,2,.333,6.64,22,0,...,183,65,5.95,1.703,11.0,2.1,4.3,9.2,2.11,2023
3,Albert Abreu,27,NYY,AL,2,2,.500,4.73,45,0,...,268,92,5.26,1.475,7.9,1.4,5.3,9.3,1.74,2023
4,Bryan Abreu,26,HOU,AL,3,2,.600,1.75,72,0,...,287,241,2.98,1.042,5.5,0.8,3.9,12.5,3.23,2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5149,T.J. Zeuch,23,TOR,AL,1,2,.333,4.76,5,3,...,99,96,4.05,1.456,8.7,0.8,4.4,7.9,1.82,2019
5150,Kyle Zimmer,27,KCR,AL,0,1,.000,10.80,15,0,...,102,45,5.78,2.564,13.7,1.0,9.3,8.8,0.95,2019
5151,Jordan Zimmermann,33,DET,AL,1,13,.071,6.91,23,23,...,504,69,4.79,1.518,11.7,1.5,2.0,6.6,3.28,2019
5152,Ben Zobrist,38,CHC,NL,0,0,,0.00,1,0,...,5,,7.21,2.000,0.0,0.0,18.0,9.0,0.50,2019


In [25]:
# Create a list to help create a dataframe from batter statistics data
reliever_stats = []

# Create a loop to create a dataframe from Baseball Reference tables
for year in last_five_years:
    
    # input URL and use BeautifulSoup to parse through the page
    url = f'https://www.baseball-reference.com/leagues/majors/{year}-reliever-pitching.shtml'
    soup = BeautifulSoup(requests.get(url).content, 'html.parser')

    # Grab the table element that has batter statistics
    table = BeautifulSoup(soup.select_one('#all_players_reliever_pitching').find_next(text=lambda x: isinstance(x, Comment)), 'html.parser')


    # Grab data from table and put it into the list created above
    for tr in table.select('tr:has(td)'):
        tds = [td.get_text(strip=True) for td in tr.select('td')]
        tds.append(year)
        reliever_stats.append(tds)

# Create dataframe for batter statistics
reliever_stats_df = pd.DataFrame(reliever_stats)

# Create an empty list to store dataframe header information
header_list = []

# Grab the table header information to use as column headers in our dataframe
for tr in table.select('tr:has(th)'):
    ths = [th.get_text(strip=True) for th in tr.select('th')]
    header_list.append(ths)

# For loop returns a list of lists, and we only need the first list 
df_headers = header_list[0]

# Remove the first item from our headers list, it is the index header that we do not need
df_headers.remove('Rk')
df_headers.append("Year")

# Set column headers equal to our list
reliever_stats_df.columns = df_headers


final_reliever_stats_df = reliever_stats_df[['Name','Hold','Year','Tm']]

final_pitcher_stats_df = pd.merge(final_reliever_stats_df, raw_pitcher_stats_df, how = 'outer', on=['Name','Year','Tm'])

final_pitcher_stats_df



Unnamed: 0,Name,Hold,Year,Tm,Age,Lg,W,L,W-L%,ERA,...,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W
0,Fernando Abad*,0,2023,COL,37,NL,1,0,1.000,4.26,...,0,32,124,8.15,2.211,15.6,2.8,4.3,2.8,0.67
1,Cory Abbott,0,2023,WSN,27,NL,1,2,.333,6.64,...,4,183,65,5.95,1.703,11.0,2.1,4.3,9.2,2.11
2,Albert Abreu,3,2023,NYY,27,AL,2,2,.500,4.73,...,5,268,92,5.26,1.475,7.9,1.4,5.3,9.3,1.74
3,Bryan Abreu,24,2023,HOU,26,AL,3,2,.600,1.75,...,1,287,241,2.98,1.042,5.5,0.8,3.9,12.5,3.23
4,Domingo Acevedo,2,2023,OAK,29,AL,0,0,,10.61,...,1,47,40,5.51,1.929,15.4,1.9,1.9,6.8,3.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5205,Alex Wood*,,2019,CIN,28,NL,1,3,.250,5.80,...,0,153,81,6.38,1.402,10.3,2.8,2.3,7.6,3.33
5206,Brandon Woodruff,,2019,MIL,26,NL,11,3,.786,3.62,...,1,493,123,3.01,1.142,8.1,0.9,2.2,10.6,4.77
5207,Jordan Yamamoto,,2019,MIA,23,NL,4,5,.444,4.46,...,5,325,96,4.51,1.144,6.2,1.3,4.1,9.4,2.28
5208,Jordan Zimmermann,,2019,DET,33,AL,1,13,.071,6.91,...,3,504,69,4.79,1.518,11.7,1.5,2.0,6.6,3.28


In [26]:
final_pitcher_stats_df.columns

Index(['Name', 'Hold', 'Year', 'Tm', 'Age', 'Lg', 'W', 'L', 'W-L%', 'ERA', 'G',
       'GS', 'GF', 'CG', 'SHO', 'SV', 'IP', 'H', 'R', 'ER', 'HR', 'BB', 'IBB',
       'SO', 'HBP', 'BK', 'WP', 'BF', 'ERA+', 'FIP', 'WHIP', 'H9', 'HR9',
       'BB9', 'SO9', 'SO/W'],
      dtype='object')

In [27]:
# Change types of columns to numeric for columns with number values
final_pitcher_stats_df[['Age','G','GS','IP','ER','W','L','SV','SO','H','BB','CG','Hold']] = final_pitcher_stats_df[['Age','G','GS','IP','ER','W','L','SV','SO','H','BB','CG','Hold']].apply(pd.to_numeric)

# Drop any players with NaN innings pitched, ERA, and WHIP to remove null values 
final_pitcher_stats_df["Hold"].fillna(0, inplace = True)
final_pitcher_stats_df.dropna(subset=['IP'], axis = 0 , inplace= True)
final_pitcher_stats_df.dropna(subset=['ERA'], axis = 0 , inplace= True)
final_pitcher_stats_df.dropna(subset=['WHIP'], axis = 0 , inplace= True)
final_pitcher_stats_df.replace([np.inf, -np.inf], np.nan, inplace=True)

# Remove any pitchers with fewer than 30 innings pitched
final_pitcher_stats_df = final_pitcher_stats_df[final_pitcher_stats_df['IP'] >= 30]

# Select the columns we want for our pitcher analysis
final_pitcher_stats_df = final_pitcher_stats_df[['Year','Name','Age','G','GS','IP','ER','W','L','SV','SO','H','BB','CG','Hold']]

# Eliminate Baseball Reference's name badges for accolades
final_pitcher_stats_df['Name'] = final_pitcher_stats_df['Name'].str.extract('([^\*|#]*)')

pitcher_list = final_pitcher_stats_df.Name.tolist()

cleaned_pitcher_list = []
for pitcher in pitcher_list:
    pitcher = pitcher.replace("\xa0", " ")
    cleaned_pitcher_list.append(pitcher) 
    
final_pitcher_stats_df['Name'] = cleaned_pitcher_list 




In [28]:

final_pitcher_stats_df
test_df = final_pitcher_stats_df.loc[final_pitcher_stats_df['Name'] == 'Blake Snell']
test_df


Unnamed: 0,Year,Name,Age,G,GS,IP,ER,W,L,SV,SO,H,BB,CG,Hold
1639,2022,Blake Snell,29.0,24,24,128.0,48,8,10,0,171,103,51,0,0.0
4493,2023,Blake Snell,30.0,32,32,180.0,45,14,9,0,234,115,99,0,0.0
4877,2021,Blake Snell,28.0,27,27,128.2,60,7,6,0,170,101,69,0,0.0
5028,2020,Blake Snell,27.0,11,11,50.0,18,4,2,0,63,42,18,0,0.0
5187,2019,Blake Snell,26.0,23,23,107.0,51,6,8,0,147,96,40,0,0.0


In [29]:
# Sort data by name alphabetically, then by year in descending order
final_pitcher_stats_df = final_pitcher_stats_df.sort_values(['Year','Name'], ascending=[True, True])



final_pitcher_stats_df['FPTS'] = ''
final_pitcher_stats_df['AVG_FPTS'] = ''
final_pitcher_stats_df['Pos'] = ''
pos_list = []
final_pos_list = []

for index, row in final_pitcher_stats_df.iterrows():
    final_pitcher_stats_df['FPTS'] = ((3*final_pitcher_stats_df['IP']) - final_pitcher_stats_df['H'] - (2*final_pitcher_stats_df['ER']) - final_pitcher_stats_df['BB'] + (2*final_pitcher_stats_df['W']) - (2*final_pitcher_stats_df['L']) + (5*final_pitcher_stats_df['SV']) + (2*final_pitcher_stats_df['SO']) + (3*final_pitcher_stats_df['CG']) + (2*final_pitcher_stats_df['Hold']))
    final_pitcher_stats_df['AVG_FPTS'] = (final_pitcher_stats_df['FPTS']/final_pitcher_stats_df['G'])
#     final_pitcher_stats_df['Pos'] = (final_pitcher_stats_df['GS']/final_pitcher_stats_df['G'])
    
# #    print(final_pitcher_stats_df['GS'] / final_pitcher_stats_df['G'])
    pitcher_ratio = (final_pitcher_stats_df['GS'] / final_pitcher_stats_df['G'])[index]
    pos_list.append(pitcher_ratio)
# #    final_pitcher_stats_df['Pos'] = pitcher_ratio

for i in range(len(pos_list)):
    if pos_list[i] > (2/3):
        final_pos_list.append('SP')
    elif pos_list[i] < (1/3):
        final_pos_list.append('RP')
    else:
        final_pos_list.append('SP,RP')
        
final_pitcher_stats_df['Pos'] = final_pos_list
        
final_pitcher_stats_df = final_pitcher_stats_df.sort_values(['FPTS'], ascending=False)
final_pitcher_stats_df.head(50)


Unnamed: 0,Year,Name,Age,G,GS,IP,ER,W,L,SV,SO,H,BB,CG,Hold,FPTS,AVG_FPTS,Pos
5082,2019,Gerrit Cole,28.0,33,33,212.1,59,20,5,0,326,142,48,0,0.0,1010.3,30.615152,SP
5198,2019,Justin Verlander,36.0,34,34,223.0,64,21,6,0,300,137,42,2,0.0,998.0,29.352941,SP
5087,2019,Jacob deGrom,31.0,32,32,204.0,55,11,8,0,255,154,44,0,0.0,820.0,25.625,SP
4893,2021,Zack Wheeler,31.0,32,32,213.1,66,14,10,0,247,169,46,3,0.0,803.3,25.103125,SP
3552,2019,Shane Bieber,24.0,34,33,214.1,78,15,8,0,259,186,40,3,0.0,801.3,23.567647,SP
4496,2023,Spencer Strider,24.0,32,32,186.2,80,20,5,0,281,146,58,0,0.0,786.6,24.58125,SP
4532,2022,Sandy Alcántara,26.0,32,32,228.2,58,14,9,0,207,174,50,6,0.0,786.6,24.58125,SP
5189,2019,Stephen Strasburg,30.0,33,33,209.0,77,18,6,0,251,161,56,0,0.0,782.0,23.69697,SP
4871,2021,Max Scherzer,36.0,30,30,179.1,49,15,4,0,236,119,36,1,0.0,781.3,26.043333,SP
4548,2022,Corbin Burnes,27.0,33,33,202.0,66,12,8,0,243,144,51,0,0.0,773.0,23.424242,SP


In [30]:
# Create a new dataframe for stats percentile calculations
pitcher_percentile_df = pd.DataFrame(columns = ['Year','Name','Age','G','GS','IP','ER','W','L','SV','SO','H','BB','CG','Hold','Pos'])

# Carry over columnns from final_pitcher_stats_df that shouldn't be comparatively ranked 
pitcher_percentile_df['Year'] = final_pitcher_stats_df['Year']
pitcher_percentile_df['Name'] = final_pitcher_stats_df['Name']
pitcher_percentile_df['Age'] = final_pitcher_stats_df['Age']

# Calculate the percentile rank for each player in each season, seperately, then add all the seasons in one dataframe
for year in last_five_years:
    year_df = final_pitcher_stats_df.loc[final_pitcher_stats_df['Year'] == year]
    year_df['FPTS_Percentile'] = year_df['FPTS'].rank(pct=True)
    year_df['AVG_FPTS_Percentile'] = year_df['AVG_FPTS'].rank(pct=True)
    year_df.sort_values('Name', ascending=True)    
    
    # Each of the seasons are added back to the percentile dataframe
    pitcher_percentile_df = pitcher_percentile_df.append(year_df, ignore_index=True)

pitcher_percentile_df = pitcher_percentile_df.sort_values(['Year','Name'], ascending=[True, True])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  year_df['FPTS_Percentile'] = year_df['FPTS'].rank(pct=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  year_df['AVG_FPTS_Percentile'] = year_df['AVG_FPTS'].rank(pct=True)


In [33]:
# Clean up the percentile dataframe, drop NaNs and remove unnecessary columns
pitcher_percentile_df = pitcher_percentile_df.dropna()

# Add a rank column that adds the percentiles from each category
pitcher_percentile_df['Rank'] = (pitcher_percentile_df['FPTS_Percentile'] + pitcher_percentile_df['AVG_FPTS_Percentile'])

pitcher_percentile_df


Unnamed: 0,Year,Name,Age,G,GS,IP,ER,W,L,SV,...,H,BB,CG,Hold,Pos,FPTS,AVG_FPTS,FPTS_Percentile,AVG_FPTS_Percentile,Rank
4355,2019,Aaron Brooks,29.0,29,18,110.0,69,6,8,0,...,118,34,0,0.0,"SP,RP",200.0,6.896552,0.508588,0.601145,1.109733
4522,2019,Aaron Brooks,29.0,15,6,50.1,28,2,3,0,...,49,14,0,0.0,"SP,RP",115.3,7.686667,0.190840,0.633588,0.824427
4567,2019,Aaron Brooks,29.0,14,12,59.2,41,4,5,0,...,69,20,0,0.0,SP,82.6,5.900000,0.104962,0.549618,0.654580
4258,2019,Aaron Bummer,25.0,58,0,67.2,16,0,0,1,...,43,24,0,27.0,RP,281.6,4.855172,0.694656,0.463740,1.158397
4411,2019,Aaron Civale,24.0,10,10,57.2,15,3,4,0,...,44,16,0,0.0,SP,171.6,17.160000,0.402672,0.906489,1.309160
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2517,2023,Zack Greinke,39.0,30,27,142.1,80,2,15,0,...,158,23,0,0.0,SP,253.3,8.443333,0.628159,0.651625,1.279783
2561,2023,Zack Littell,27.0,28,14,90.0,41,3,6,0,...,94,12,0,0.0,"SP,RP",224.0,8.000000,0.549639,0.638989,1.188628
2562,2023,Zack Littell,27.0,26,14,87.0,38,3,6,0,...,91,9,0,0.0,"SP,RP",223.0,8.576923,0.546931,0.657040,1.203971
2624,2023,Zack Thompson,25.0,25,9,66.1,33,5,7,0,...,69,25,0,1.0,"SP,RP",180.3,7.212000,0.435018,0.590253,1.025271


In [27]:


# test_df = raw_pitcher_stats_df.loc[raw_pitcher_stats_df['Name'] == 'Blake\xa0Snell']
# test_df

In [34]:
# Create a list of each unique player we have in our dataframe
pitcher_list = pitcher_percentile_df.Name.unique().tolist()

# Create a new dataframe for combined, averaged percentiles over the past 5 seasons
new_pitcher_df = pd.DataFrame(columns = ['Name', 'Rank', 'Trend', 'Years', 'FPTS', 'AVG_FPTS', 'FPTS_Percentile', 'AVG_FPTS_Percentile','Pos','G','GS'])

# Create a list for each percentile stat category for upcoming loop
pitcher_trends = []
average_FPTS = []
average_AVG_FPTS = []
average_FPTS_Percentile = []
average_AVG_FPTS_Percentile = []
pitcher_average_Rank = []
pitcher_year_count = []
pos = []
games = []
games_started = []

cleaned_pitcher_list = []
for pitcher in pitcher_list:
    pitcher = pitcher.replace("\xa0", " ")
    cleaned_pitcher_list.append(pitcher) 

# new_pitcher_df
    
# Loop through each player, check if they played in the past two seasons. If not, remove them
for pitcher in cleaned_pitcher_list:
    filter_df = pitcher_percentile_df.loc[pitcher_percentile_df['Name'] == pitcher]
    filter_df = filter_df.sort_values(['Year'], ascending=[False])
    year_list = filter_df.Year.tolist()
    if (year_list[0] != last_year) and (year_list[0] != (last_year - 1)):
        pitcher_list.remove(pitcher)

# Update new dataframe with updated unique player list
new_pitcher_df['Name'] = pitcher_list        

# Loop through each player, locate their percentile stats for each season, average them out
for pitcher in pitcher_list:
    pitcher_df = pitcher_percentile_df.loc[pitcher_percentile_df['Name'] == pitcher]
    
    # We want to find the slope of the line of best fit for each player's overall ranking each season
    x = np.array(pitcher_df['Year'], dtype = float)
    y = np.array(pitcher_df['Rank'], dtype = float)
    slope, intercept = np.polyfit(x, y, 1)
    pitcher_trends.append(slope)
    
    # Find average of each player's percentiles from previous 5 seasons
    average_FPTS.append(sum(pitcher_df['FPTS']) / len(pitcher_df['FPTS']))
    average_AVG_FPTS.append(sum(pitcher_df['AVG_FPTS']) / len(pitcher_df['AVG_FPTS']))
    average_FPTS_Percentile.append(sum(pitcher_df['FPTS_Percentile']) / len(pitcher_df['FPTS_Percentile']))
    average_AVG_FPTS_Percentile.append(sum(pitcher_df['AVG_FPTS_Percentile']) / len(pitcher_df['AVG_FPTS_Percentile']))
    pitcher_average_Rank.append(sum(pitcher_df['Rank']) / len(pitcher_df['Rank']))
    pitcher_year_count.append(len(x))
    pos.append(pitcher_df['Pos'].iloc[-1])
    games.append(pitcher_df['G'].iloc[-1])
    games_started.append(pitcher_df['GS'].iloc[-1])

# Update new dataframe with the list data from each stat
new_pitcher_df['Trend'] = pitcher_trends
new_pitcher_df['Pos'] = pos
new_pitcher_df['G'] = games
new_pitcher_df['GS'] = games_started
new_pitcher_df['FPTS'] = average_FPTS
new_pitcher_df['AVG_FPTS'] = average_AVG_FPTS
new_pitcher_df['FPTS_Percentile'] = average_FPTS_Percentile
new_pitcher_df['AVG_FPTS_Percentile'] = average_AVG_FPTS_Percentile
new_pitcher_df['Rank'] = pitcher_average_Rank

# Keep track of how many seasons are being considered, so we know how reliable the data is
new_pitcher_df['Years'] = pitcher_year_count



  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


In [35]:
# Create a weighted rank column by adding trend data to the rank data and account for number of seasons played
# Basically, if you played all 5 seasons, your trend stat is added directly
# If you played fewer than all 5 seasons, your trend stat is reduced depending on how few seasons you played
# new_pitcher_df['Weighted Rank'] = (new_pitcher_df['Rank'] + ((new_pitcher_df['Trend'] * (new_pitcher_df['Years'] - 1) / 4)))

# # shift column 'Weighted Rank' to first position
# first_pitcher_column = new_pitcher_df.pop('Weighted Rank')
  
# # insert column using insert(position,column_name,first_column) function
# new_pitcher_df.insert(1, 'Weighted Rank', first_pitcher_column)

new_pitcher_df = new_pitcher_df.sort_values('FPTS', ascending = False)
new_pitcher_df.head(25)


Unnamed: 0,Name,Rank,Trend,Years,FPTS,AVG_FPTS,FPTS_Percentile,AVG_FPTS_Percentile,Pos,G,GS
81,Gerrit Cole,1.974887,0.00401,5,716.24,25.381091,0.989682,0.985205,SP,33,33
502,Spencer Strider,1.949479,0.097432,2,695.6,22.042238,0.98187,0.967609,SP,32,32
585,Kodai Senga,1.954874,0.000483,1,599.3,20.665517,0.976534,0.978339,SP,29,29
2,Aaron Nola,1.914393,0.003927,5,568.56,20.47125,0.967907,0.946486,SP,32,32
365,Shohei Ohtani,1.937188,0.017889,3,562.433333,22.657557,0.952475,0.984713,SP,23,23
243,Zack Wheeler,1.861066,0.048409,5,550.72,20.417187,0.932936,0.928129,SP,32,32
128,Justin Verlander,1.777411,-0.090201,5,535.32,21.78772,0.821877,0.955534,SP,11,11
200,Shane Bieber,1.882688,-0.066086,5,516.24,23.537608,0.91858,0.964108,SP,21,21
35,Charlie Morton,1.681653,0.079569,5,512.9,17.755587,0.834116,0.847537,SP,30,30
235,Yu Darvish,1.890662,-0.042978,5,509.76,20.64436,0.945639,0.945023,SP,24,24


In [53]:
draft_df = pd.concat([new_df, new_pitcher_df], ignore_index=True, sort=False)

# draft_df.drop(columns = ['Rank','FPTS_Percentile','AVG_FPTS_Percentile'])


# draft_df['FPTS_Percentile'] = draft_df['FPTS'].rank(pct=True)
# draft_df['AVG_FPTS_Percentile'] = draft_df['AVG_FPTS'].rank(pct=True)
    
# draft_df['Rank'] = (draft_df['FPTS_Percentile'] + (draft_df['AVG_FPTS_Percentile'] / 2))

draft_df['Weighted_Rank'] = ''
draft_df['Weighted_Rank'] = abs(draft_df['FPTS'] * (1 + (draft_df['Trend'] * (draft_df['Years'] - 1) / 4)))
# shift column 'Weighted Rank' to first position
# weighted_rank_column = draft_df.pop('Weighted Rank')
  
# # insert column using insert(position,column_name,first_column) function
# draft_df.insert(1, 'Weighted Rank', weighted_rank_column)


# draft_df['Weighted_Rank'] = ''
# draft_df['Weighted_Rank'] = abs(draft_df['FPTS'] * (1 + draft_df['Trend']))

weighted_rank_column = draft_df.pop('Weighted_Rank')
  
# insert column using insert(position,column_name,first_column) function
draft_df.insert(1, 'Weighted_Rank', weighted_rank_column)


draft_df = draft_df.sort_values(by='Weighted_Rank', ascending=False)
draft_df["G"].fillna("N/A", inplace = True)
draft_df["GS"].fillna("N/A", inplace = True)

draft_df.head(50)

Unnamed: 0,Name,Weighted_Rank,Rank,Trend,Years,FPTS,AVG_FPTS,FPTS_Percentile,AVG_FPTS_Percentile,Pos,G,GS
0,Paul Goldschmidt,1661.791744,1.964299,0.006415,5,1651.2,11.9135,0.993248,0.971051,"1B,DH",,
1,Pete Alonso,1589.662825,1.903071,0.032115,5,1540.2,10.900115,0.972508,0.930564,1B,,
8,Vladimir Guerrero Jr.,1579.361694,1.717357,0.201218,5,1314.8,9.624007,0.893751,0.823606,"1B,DH",,
3,J.T. Realmuto,1483.465859,1.94215,0.016769,5,1459.0,11.991166,0.971011,0.97114,C,,
5,Spencer Torkelson,1467.669789,1.845339,0.22503,2,1389.5,10.163608,0.921741,0.923598,1B,,
2,José Abreu,1460.506024,1.926786,-0.00902,5,1473.8,11.317376,0.980736,0.946049,"1B,DH",,
27,Ty France,1458.832937,1.310292,0.450132,5,1006.0,7.838502,0.668516,0.641776,1B,,
4,Christian Walker,1443.970352,1.864457,0.038828,5,1390.0,10.539662,0.95098,0.913476,1B,,
7,Rhys Hoskins,1334.732511,1.922377,-0.001514,4,1336.25,11.739209,0.955699,0.966678,1B,,
6,C.J. Cron,1296.985519,1.831295,-0.065972,4,1364.5,11.041019,0.89037,0.940925,1B,,


In [33]:
######################################################################################################################
######################################################################################################################
######################################################################################################################
#### DRAFT DAY FUNCTIONS
   
# DROP A PLAYER 
def drafted(player):
    global draft_df
    global final_pitcher_stats_df
    global final_position_players_df
    draft_df = draft_df[draft_df.Name != player]
    final_pitcher_stats_df = final_pitcher_stats_df[final_pitcher_stats_df.Name != player]
    final_position_players_df = final_position_players_df[final_position_players_df.Name != player]
    return draft_df.head(25)
    
# FILTER PLAYERS BY POSITION
def position_filter(POS):
    filtered_draft_df = draft_df[draft_df['Pos'].str.contains(POS)]
    return filtered_draft_df.head(25)

# PULL PITCHING STAT CATEGORY LEADERS
def pitching_stat_leaders(CAT):
    global final_pitcher_stats_df
    pitching_filtered_draft_df = final_pitcher_stats_df.sort_values([CAT], ascending=[False])
    return pitching_filtered_draft_df.head(25)

# PULL BATTING STAT CATEGORY LEADERS
def batting_stat_leaders(CAT):
    global final_position_players_df
    batting_filtered_draft_df = final_position_players_df.sort_values([CAT], ascending=[False])
    return batting_filtered_draft_df.head(25)

def drop_all_position(POS):
    global draft_df
    draft_df = draft_df[draft_df.Pos != POS]
    return draft_df.head(25)

In [43]:
# drop_all_position('')
# drafted('')


In [42]:
# Filter the DataFrame by a string value in the "City" column
filtered_df = draft_df[draft_df["Pos"].str.contains("1B")]
filtered_df.head(60)

Unnamed: 0,Name,Weighted_Rank,Rank,Trend,Years,FPTS,AVG_FPTS,FPTS_Percentile,AVG_FPTS_Percentile,Pos,G,GS
5,Spencer Torkelson,1702.179155,1.845339,0.22503,2,1389.5,10.163608,0.921741,0.923598,1B,,
0,Paul Goldschmidt,1661.791744,1.964299,0.006415,5,1651.2,11.9135,0.993248,0.971051,"1B,DH",,
1,Pete Alonso,1589.662825,1.903071,0.032115,5,1540.2,10.900115,0.972508,0.930564,1B,,
8,Vladimir Guerrero Jr.,1579.361694,1.717357,0.201218,5,1314.8,9.624007,0.893751,0.823606,"1B,DH",,
108,Spencer Steer,1500.977724,1.088126,1.180069,2,688.5,6.611264,0.477885,0.610241,"1B,3B,OF,2B",,
39,Andrew Vaughn,1482.766775,1.267219,0.569065,3,945.0,6.684855,0.70918,0.558039,1B,,
2,José Abreu,1460.506024,1.926786,-0.00902,5,1473.8,11.317376,0.980736,0.946049,"1B,DH",,
27,Ty France,1458.832937,1.310292,0.450132,5,1006.0,7.838502,0.668516,0.641776,1B,,
4,Christian Walker,1443.970352,1.864457,0.038828,5,1390.0,10.539662,0.95098,0.913476,1B,,
7,Rhys Hoskins,1334.226681,1.922377,-0.001514,4,1336.25,11.739209,0.955699,0.966678,1B,,


In [49]:
copy_test_df = filtered_df

copy_test_df['Test_FPTS_Percentile'] = copy_test_df['FPTS'].rank(pct=True)
copy_test_df['Test_AVG_FPTS_Percentile'] = copy_test_df['AVG_FPTS'].rank(pct=True)

copy_test_df['Test_Rank'] = (copy_test_df['Test_FPTS_Percentile'] + copy_test_df['Test_AVG_FPTS_Percentile'])


copy_test_df.head(60)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  copy_test_df['Test_FPTS_Percentile'] = copy_test_df['FPTS'].rank(pct=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  copy_test_df['Test_AVG_FPTS_Percentile'] = copy_test_df['AVG_FPTS'].rank(pct=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  copy_test_df['Test_Rank'] = (copy_test_df['Test

Unnamed: 0,Name,Weighted_Rank,Rank,Trend,Years,FPTS,AVG_FPTS,FPTS_Percentile,AVG_FPTS_Percentile,Pos,G,GS,Test_FPTS_Percentile,Test_AVG_FPTS_Percentile,Test_Rank
5,Spencer Torkelson,1702.179155,1.845339,0.22503,2,1389.5,10.163608,0.921741,0.923598,1B,,,0.9375,0.875,1.8125
0,Paul Goldschmidt,1661.791744,1.964299,0.006415,5,1651.2,11.9135,0.993248,0.971051,"1B,DH",,,1.0,1.0,2.0
1,Pete Alonso,1589.662825,1.903071,0.032115,5,1540.2,10.900115,0.972508,0.930564,1B,,,0.984375,0.9375,1.921875
8,Vladimir Guerrero Jr.,1579.361694,1.717357,0.201218,5,1314.8,9.624007,0.893751,0.823606,"1B,DH",,,0.890625,0.84375,1.734375
108,Spencer Steer,1500.977724,1.088126,1.180069,2,688.5,6.611264,0.477885,0.610241,"1B,3B,OF,2B",,,0.609375,0.53125,1.140625
39,Andrew Vaughn,1482.766775,1.267219,0.569065,3,945.0,6.684855,0.70918,0.558039,1B,,,0.765625,0.5625,1.328125
2,José Abreu,1460.506024,1.926786,-0.00902,5,1473.8,11.317376,0.980736,0.946049,"1B,DH",,,0.96875,0.96875,1.9375
27,Ty France,1458.832937,1.310292,0.450132,5,1006.0,7.838502,0.668516,0.641776,1B,,,0.820312,0.734375,1.554688
4,Christian Walker,1443.970352,1.864457,0.038828,5,1390.0,10.539662,0.95098,0.913476,1B,,,0.953125,0.890625,1.84375
7,Rhys Hoskins,1334.226681,1.922377,-0.001514,4,1336.25,11.739209,0.955699,0.966678,1B,,,0.90625,0.984375,1.890625


In [50]:
copy_test_df.sort_values('Test_Rank', ascending = False).head(60)

Unnamed: 0,Name,Weighted_Rank,Rank,Trend,Years,FPTS,AVG_FPTS,FPTS_Percentile,AVG_FPTS_Percentile,Pos,G,GS,Test_FPTS_Percentile,Test_AVG_FPTS_Percentile,Test_Rank
0,Paul Goldschmidt,1661.791744,1.964299,0.006415,5,1651.2,11.9135,0.993248,0.971051,"1B,DH",,,1.0,1.0,2.0
2,José Abreu,1460.506024,1.926786,-0.00902,5,1473.8,11.317376,0.980736,0.946049,"1B,DH",,,0.96875,0.96875,1.9375
1,Pete Alonso,1589.662825,1.903071,0.032115,5,1540.2,10.900115,0.972508,0.930564,1B,,,0.984375,0.9375,1.921875
7,Rhys Hoskins,1334.226681,1.922377,-0.001514,4,1336.25,11.739209,0.955699,0.966678,1B,,,0.90625,0.984375,1.890625
6,C.J. Cron,1274.480692,1.831295,-0.065972,4,1364.5,11.041019,0.89037,0.940925,1B,,,0.921875,0.953125,1.875
4,Christian Walker,1443.970352,1.864457,0.038828,5,1390.0,10.539662,0.95098,0.913476,1B,,,0.953125,0.890625,1.84375
5,Spencer Torkelson,1702.179155,1.845339,0.22503,2,1389.5,10.163608,0.921741,0.923598,1B,,,0.9375,0.875,1.8125
9,Yuli Gurriel,1190.997366,1.855091,-0.091258,5,1310.6,10.862739,0.934267,0.920824,1B,,,0.875,0.921875,1.796875
18,Salvador Pérez,1034.954524,1.824555,-0.081061,4,1126.25,10.654744,0.924159,0.900396,"C,DH,1B",,,0.859375,0.90625,1.765625
8,Vladimir Guerrero Jr.,1579.361694,1.717357,0.201218,5,1314.8,9.624007,0.893751,0.823606,"1B,DH",,,0.890625,0.84375,1.734375


In [54]:
fielding_stats_df.loc[420:450]

Unnamed: 0,Name,Age,Tm,Lg,G,GS,CG,Inn,Ch,PO,A,E,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos Summary,Year
420,Kyle Freeland,30,COL,NL,29,29,0,155.2,17,5,12,0,2,1.0,,,-3,-4,0.0,0.98,0.59,P,2023
421,Freddie Freeman,33,LAD,NL,161,161,139,1378.1,1260,1131,128,1,126,0.999,3.0,3.0,-9,-8,0.0,8.22,7.82,1B,2023
422,Tyler Freeman,24,CLE,AL,53,39,31,359.0,132,45,83,4,12,0.97,-4.0,-14.0,-6,-20,0.0,3.21,2.33,3B-2B-SS-OF-1B,2023
423,Sal Frelick,23,MIL,NL,55,49,33,446.1,99,95,4,0,1,1.0,5.0,14.0,6,16,0.0,2.0,1.8,OF,2023
424,Luis Frías,25,ARI,NL,29,0,0,31.0,4,1,3,0,0,1.0,,,0,0,0.0,1.16,0.14,P,2023
425,Max Fried,29,ATL,NL,14,14,1,77.2,15,6,7,2,0,0.867,,,-2,-5,0.0,1.51,0.93,P,2023
426,TJ Friedl,27,CIN,NL,137,122,97,1090.1,315,305,9,1,2,0.997,6.0,6.0,1,1,2.0,2.59,2.29,OF,2023
427,David Fry,27,CLE,AL,55,25,13,262.2,190,178,9,3,10,0.984,-2.0,-9.0,-4,-18,-1.0,6.41,3.17,C-1B-OF-3B-P,2023
428,Shintaro Fujinami,29,TOT,AL,64,7,0,79.0,7,6,1,0,0,1.0,,,-1,-15,2.0,0.8,0.11,P,2023
429,Carson Fulmer,29,LAA,AL,3,1,0,10.0,3,2,1,0,0,1.0,,,0,0,0.0,2.7,1.0,P,2023
