In [1]:
# Import needed dependencies
import requests
import re
import pandas as pd
import numpy as np
import scipy.stats as stats
from datetime import date
from bs4 import BeautifulSoup, Comment

In [8]:
today = date.today()

# dd/mm/YY
current_year = today.strftime("%Y")

last_year = int(current_year) - 1

In [9]:
last_five_years = []
for i in range(1,6):
    last_five_years.append(int(current_year) - i)

In [10]:
# Create a list to help create a dataframe from batter statistics data
batter_stats = []

# Create a loop to create a dataframe from Baseball Reference tables
for year in last_five_years:
    
    # input URL and use BeautifulSoup to parse through the page
    url = f'https://www.baseball-reference.com/leagues/majors/{year}-standard-batting.shtml'
    soup = BeautifulSoup(requests.get(url).content, 'html.parser')

    # Grab the table element that has batter statistics
    table = BeautifulSoup(soup.select_one('#all_players_standard_batting').find_next(text=lambda x: isinstance(x, Comment)), 'html.parser')


    # Grab data from table and put it into the list created above
    for tr in table.select('tr:has(td)'):
        tds = [td.get_text(strip=True) for td in tr.select('td')]
        tds.append(year)
        batter_stats.append(tds)

In [11]:
# Create dataframe for batter statistics
batter_stats_df = pd.DataFrame(batter_stats)

# Create an empty list to store dataframe header information
header_list = []

# Grab the table header information to use as column headers in our dataframe
for tr in table.select('tr:has(th)'):
    ths = [th.get_text(strip=True) for th in tr.select('th')]
    header_list.append(ths)

# For loop returns a list of lists, and we only need the first list 
df_headers = header_list[0]

# Remove the first item from our headers list, it is the index header that we do not need
df_headers.remove('Rk')
df_headers.append("Year")

# Set column headers equal to our list
batter_stats_df.columns = df_headers
batter_stats_df

Unnamed: 0,Name,Age,Tm,Lg,G,PA,AB,R,H,2B,...,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos Summary,Year
0,CJ Abrams*,21,TOT,NL,90,302,284,33,70,12,...,.604,76,92,5,9,2,2,0,64/H9D,2022
1,CJ Abrams*,21,SDP,NL,46,139,125,16,29,5,...,.605,77,40,4,6,2,2,0,64/H9D,2022
2,CJ Abrams*,21,WSN,NL,44,163,159,17,41,7,...,.603,75,52,1,3,0,0,0,6/H,2022
3,Albert Abreu,26,TOT,AL,1,0,0,0,0,0,...,,,0,0,0,0,0,0,1,2022
4,Albert Abreu,26,KCR,AL,1,0,0,0,0,0,...,,,0,0,0,0,0,0,/1,2022
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6505,Ryan Zimmerman,33,WSN,NL,85,323,288,33,76,21,...,.824,114,140,10,3,0,2,1,3H,2018
6506,Jordan Zimmermann,32,DET,AL,2,2,2,0,0,0,...,.000,-100,0,0,0,0,0,0,1,2018
6507,Ben Zobrist#,37,CHC,NL,139,520,455,67,139,28,...,.817,117,200,8,2,1,7,1,497H/3D,2018
6508,Mike Zunino,27,SEA,AL,113,405,373,37,75,18,...,.669,85,153,7,6,0,2,0,*2/HD,2018


In [38]:
# Create a list to help create a dataframe from batter statistics data
fielding_stats = []

# Create a loop to create a dataframe from Baseball Reference tables
#for year in last_five_years:
    
# input URL and use BeautifulSoup to parse through the page
url = f'https://www.baseball-reference.com/leagues/majors/2022-standard-fielding.shtml'
soup = BeautifulSoup(requests.get(url).content, 'html.parser')

# Grab the table element that has batter statistics
table = soup.select_one('#all_players_players_standard_fielding_fielding')#.find_next(text=lambda x: isinstance(x, Comment)), 'html.parser')


# Grab data from table and put it into the list created above
for tr in table.select('tr:has(td)'):
    tds = [td.get_text(strip=True) for td in tr.select('td')]
    tds.append(year)
    fielding_stats.append(tds)

# Create dataframe for batter statistics
fielding_stats_df = pd.DataFrame(fielding_stats)

# Create an empty list to store dataframe header information
fielding_header_list = []

# Grab the table header information to use as column headers in our dataframe
for tr in table.select('tr:has(th)'):
    ths = [th.get_text(strip=True) for th in tr.select('th')]
    fielding_header_list.append(ths)

# For loop returns a list of lists, and we only need the first list 
fielding_df_headers = fielding_header_list[0]

# Remove the first item from our headers list, it is the index header that we do not need
# fielding_df_headers.remove('Rk')
#fielding_df_headers.append("Year")

# Set column headers equal to our list
fielding_stats_df.columns = fielding_df_headers
fielding_stats_df

    

Unnamed: 0,Rk,Name,Age,Tm,Lg,G,GS,CG,Inn,Ch,...,DP,Fld%,Rtot,Rtot/yr,Rdrs,Rdrs/yr,Rgood,RF/9,RF/G,Pos Summary
0,Cory Abbott,26,WSN,NL,16,9,0,48.0,3,2,...,1.000,,,0,0,0,0.56,0.19,P,2018
1,CJ Abrams,21,TOT,NL,87,78,72,696.2,327,106,...,.954,-5,-8,-5,-8,-2,4.03,3.43,SS-2B-OF,2018
2,Albert Abreu,26,TOT,AL,33,0,0,38.2,9,2,...,.778,,,0,0,0,1.63,0.21,P,2018
3,Bryan Abreu,25,HOU,AL,55,0,0,60.1,8,2,...,1.000,,,0,0,0,1.19,0.15,P,2018
4,José Abreu,35,CHW,AL,128,128,125,1136.1,1016,954,...,.989,-3,-3,1,1,-2,7.96,7.85,1B,2018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1483,Angel Zerpa,22,KCR,AL,3,2,0,11.0,2,0,...,.500,,,-1,-18,0,0.82,0.33,P,2018
1484,T.J. Zeuch,26,CIN,NL,3,3,0,10.2,5,3,...,1.000,,,0,0,0,4.22,1.67,P,2018
1485,Bradley Zimmer,29,TOT,ZZ,99,28,24,369.2,118,116,...,.983,8,25,6,19,0,2.82,1.17,OF,2018
1486,Bruce Zimmermann,27,BAL,AL,15,13,0,73.2,6,1,...,1.000,,,-1,-3,0,0.73,0.40,P,2018


In [36]:
soup


<!DOCTYPE html>

<html class="no-js" data-root="/home/br/build" data-version="klecko-" lang="en">
<head>
<meta charset="utf-8"/>
<meta content="ie=edge" http-equiv="x-ua-compatible"/>
<meta content="width=device-width, initial-scale=1.0, maximum-scale=2.0" name="viewport">
<link href="https://cdn.ssref.net/req/202303161" rel="dns-prefetch"/>
<!-- Quantcast Choice. Consent Manager Tag v2.0 (for TCF 2.0) -->
<script async="true" type="text/javascript">
    (function() {
	var host = window.location.hostname;
	var element = document.createElement('script');
	var firstScript = document.getElementsByTagName('script')[0];
	var url = 'https://cmp.quantcast.com'
	    .concat('/choice/', 'XwNYEpNeFfhfr', '/', host, 
		    '/choice.js?tag_version=V2');
	var uspTries = 0;
	var uspTriesLimit = 3;
	element.async = true;
	element.type = 'text/javascript';
	element.src = url;
	
	firstScript.parentNode.insertBefore(element, firstScript);
	
	function makeStub() {
	    var TCF_LOCATOR_NAME = '__tcfapiLo

In [27]:
soup


<!DOCTYPE html>

<html class="no-js" data-root="/home/br/build" data-version="klecko-" lang="en">
<head>
<meta charset="utf-8"/>
<meta content="ie=edge" http-equiv="x-ua-compatible"/>
<meta content="width=device-width, initial-scale=1.0, maximum-scale=2.0" name="viewport">
<link href="https://cdn.ssref.net/req/202303161" rel="dns-prefetch"/>
<!-- Quantcast Choice. Consent Manager Tag v2.0 (for TCF 2.0) -->
<script async="true" type="text/javascript">
    (function() {
	var host = window.location.hostname;
	var element = document.createElement('script');
	var firstScript = document.getElementsByTagName('script')[0];
	var url = 'https://cmp.quantcast.com'
	    .concat('/choice/', 'XwNYEpNeFfhfr', '/', host, 
		    '/choice.js?tag_version=V2');
	var uspTries = 0;
	var uspTriesLimit = 3;
	element.async = true;
	element.type = 'text/javascript';
	element.src = url;
	
	firstScript.parentNode.insertBefore(element, firstScript);
	
	function makeStub() {
	    var TCF_LOCATOR_NAME = '__tcfapiLo

In [6]:
# Change types of columns to numeric for columns with number values
batter_stats_df[['Age', 'R','HR','RBI','SB','BA','PA','OPS','OPS+']] = batter_stats_df[['Age', 'R','HR','RBI','SB','BA','PA','OPS','OPS+']].apply(pd.to_numeric)

# Drop any players with 0 plate appearances to remove null values and change PA type to integer
batter_stats_df.dropna(subset=['PA'], axis = 0 , inplace= True)

# Remove any players with fewer than 100 plate appearances
filtered_batter_stats_df = batter_stats_df[batter_stats_df['PA'] >= 100]

# Select the columns we want for our batter analysis
final_batter_stats_df = filtered_batter_stats_df[['Year','Name','Tm','Age','R','HR','RBI','SB','BA','PA','OPS','OPS+','Pos\xa0Summary']]

In [7]:
# Sort by index to prepare to drop duplicates
final_batter_stats_df = final_batter_stats_df.sort_index()

# Drop duplicate entries of Player Name and Year
# This is to eliminate partial season data for players who played for 2+ teams in one season
final_batter_stats_df = final_batter_stats_df.drop_duplicates(subset=['Year', 'Name'])
final_batter_stats_df

Unnamed: 0,Year,Name,Tm,Age,R,HR,RBI,SB,BA,PA,OPS,OPS+,Pos Summary
0,2022,CJ Abrams*,TOT,21.0,33,2,21,7,0.246,302,0.604,76.0,64/H9D
5,2022,José Abreu,CHW,35.0,85,15,75,0,0.304,679,0.824,133.0,*3D
6,2022,Ronald Acuna Jr.,ATL,24.0,71,15,50,29,0.266,533,0.764,114.0,9D/H8
7,2022,Willy Adames,MIL,26.0,83,31,98,8,0.238,617,0.756,112.0,*6/DH
8,2022,Riley Adams,WSN,26.0,14,5,10,0,0.176,155,0.555,60.0,2/HD3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6504,2018,Bradley Zimmer*,CLE,25.0,14,2,9,4,0.226,114,0.611,65.0,8/H
6505,2018,Ryan Zimmerman,WSN,33.0,33,13,51,1,0.264,323,0.824,114.0,3H
6507,2018,Ben Zobrist#,CHC,37.0,67,9,58,3,0.305,520,0.817,117.0,497H/3D
6508,2018,Mike Zunino,SEA,27.0,37,20,44,0,0.201,405,0.669,85.0,*2/HD


In [None]:
final_batter_stats_df['FPTS'] = ''
final_batter_stats_df['AVG_FPTS'] = ''

for index, row in final_batter_stats_df.iterrows():
    final_batter_stats_df['FPTS'] = (final_batter_stats_df['PTS'] + final_batter_stats_df['FG'] - final_batter_stats_df['FGA'] + final_batter_stats_df['FT'] - final_batter_stats_df['FTA'] + final_batter_stats_df['TRB'] + final_batter_stats_df['AST'] + (1.5 * final_batter_stats_df['STL']) + (1.5 * final_batter_stats_df['BLK']) - final_batter_stats_df['TOV'])
    final_batter_stats_df['AVG_FPTS'] = (final_batter_stats_df['FPTS']/final_batter_stats_df['G'])


In [8]:
# Sort data by name alphabetically, then by year in descending order
final_batter_stats_df = final_batter_stats_df.sort_values(['Year','Name'], ascending=[True, True])

# Eliminate Baseball Reference's name badges for accolades
final_batter_stats_df['Name'] = final_batter_stats_df['Name'].str.extract('([^\*|#]*)')

cleaned_player_list = []
for player in final_batter_stats_df['Name']:
    player = player.replace("\xa0", " ")
    cleaned_player_list.append(player)

final_batter_stats_df['Name'] = cleaned_player_list        

In [9]:
# Create a new dataframe for stats percentile calculations
percentile_df = pd.DataFrame(columns = ['Year', 'Name', 'Tm', 'Age', 'BA', 'R', 'HR', 'RBI', 'SB', 'PA'])

# Carry over columnns from final_batter_stats_df that shouldn't be comparatively ranked 
percentile_df['Year'] = final_batter_stats_df['Year']
percentile_df['Name'] = final_batter_stats_df['Name']
percentile_df['Age'] = final_batter_stats_df['Age']
percentile_df['Tm'] = final_batter_stats_df['Tm']

# Calculate the percentile rank for each player in each season, seperately, then add all the seasons in one dataframe
for year in last_five_years:
    year_df = final_batter_stats_df.loc[final_batter_stats_df['Year'] == year]
    year_df['BA_Percentile'] = year_df['BA'].rank(pct=True)
    year_df['R_Percentile'] = year_df['R'].rank(pct=True)
    year_df['HR_Percentile'] = year_df['HR'].rank(pct=True)
    year_df['RBI_Percentile'] = year_df['RBI'].rank(pct=True)
    year_df['SB_Percentile'] = year_df['SB'].rank(pct=True)
    year_df.sort_values('Name', ascending=True)

    # Each of the seasons are added back to the percentile dataframe
    percentile_df = percentile_df.append(year_df, ignore_index=True)

percentile_df = percentile_df.sort_values(['Year','Name'], ascending=[True, True])
percentile_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  year_df['BA_Percentile'] = year_df['BA'].rank(pct=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  year_df['R_Percentile'] = year_df['R'].rank(pct=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  year_df['HR_Percentile'] = year_df['HR'].rank(pct=True)
A value is trying to be set on a copy o

Unnamed: 0,Year,Name,Tm,Age,BA,R,HR,RBI,SB,PA,OPS,OPS+,Pos Summary,BA_Percentile,R_Percentile,HR_Percentile,RBI_Percentile,SB_Percentile
0,2018,A.J. Ellis,SDP,37.0,,,,,,,,,,,,,,
3843,2018,A.J. Ellis,SDP,37.0,0.272,19,1,15,0,183,0.722,104.0,2H/D7,0.775056,0.198218,0.055679,0.149220,0.109131
1,2018,AJ Pollock,ARI,30.0,,,,,,,,,,,,,,
3844,2018,AJ Pollock,ARI,30.0,0.257,61,21,65,13,460,0.800,108.0,*8/H,0.615813,0.698218,0.817372,0.780624,0.888641
2,2018,Aaron Altherr,PHI,27.0,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2612,2022,Yuli Gurriel,HOU,38.0,0.242,53,8,53,8,584,0.647,84.0,*3/DH,0.547872,0.679787,0.492553,0.717021,0.788298
2144,2022,Zach McKinstry,TOT,27.0,,,,,,,,,,,,,,
2614,2022,Zach McKinstry,TOT,27.0,0.199,21,5,14,7,185,0.635,78.0,546/HD97,0.122340,0.241489,0.308511,0.131915,0.756383
2145,2022,Zack Collins,TOT,27.0,,,,,,,,,,,,,,


In [10]:
# Clean up the percentile dataframe, drop NaNs and remove unnecessary columns
percentile_df = percentile_df.dropna()

# Add a rank column that adds the percentiles from each category
percentile_df['Rank'] = (percentile_df['BA_Percentile'] + percentile_df['R_Percentile'] + percentile_df['HR_Percentile'] + percentile_df['RBI_Percentile'] + percentile_df['SB_Percentile'])
percentile_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  percentile_df['Rank'] = (percentile_df['BA_Percentile'] + percentile_df['R_Percentile'] + percentile_df['HR_Percentile'] + percentile_df['RBI_Percentile'] + percentile_df['SB_Percentile'])


Unnamed: 0,Year,Name,Tm,Age,BA,R,HR,RBI,SB,PA,OPS,OPS+,Pos Summary,BA_Percentile,R_Percentile,HR_Percentile,RBI_Percentile,SB_Percentile,Rank
3843,2018,A.J. Ellis,SDP,37.0,0.272,19,1,15,0,183,0.722,104.0,2H/D7,0.775056,0.198218,0.055679,0.149220,0.109131,1.287305
3844,2018,AJ Pollock,ARI,30.0,0.257,61,21,65,13,460,0.800,108.0,*8/H,0.615813,0.698218,0.817372,0.780624,0.888641,3.800668
3845,2018,Aaron Altherr,PHI,27.0,0.181,28,8,38,3,285,0.628,69.0,9H8/7,0.052339,0.308463,0.417595,0.471047,0.544543,1.793987
3846,2018,Aaron Hicks,NYY,28.0,0.248,90,27,79,11,581,0.833,127.0,*8/HD,0.493318,0.940980,0.924276,0.886414,0.849666,4.094655
3847,2018,Aaron Judge,NYY,26.0,0.278,77,27,67,6,498,0.919,150.0,9D/H8,0.827394,0.849666,0.924276,0.795100,0.716036,4.112472
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2611,2022,Yoán Moncada,CHW,27.0,0.212,41,12,51,2,433,0.626,76.0,5/H,0.228723,0.538298,0.642553,0.680851,0.440426,2.530851
2613,2022,Yu Chang,TOT,26.0,0.208,19,4,15,0,190,0.605,75.0,46/35HD1,0.195745,0.203191,0.232979,0.152128,0.114894,0.898936
2612,2022,Yuli Gurriel,HOU,38.0,0.242,53,8,53,8,584,0.647,84.0,*3/DH,0.547872,0.679787,0.492553,0.717021,0.788298,3.225532
2614,2022,Zach McKinstry,TOT,27.0,0.199,21,5,14,7,185,0.635,78.0,546/HD97,0.122340,0.241489,0.308511,0.131915,0.756383,1.560638


In [11]:
# Create a list of each unique player we have in our dataframe
player_list = percentile_df.Name.unique().tolist()

# Create a new dataframe for combined, averaged percentiles over the past 5 seasons
new_df = pd.DataFrame(columns = ['Name', 'Rank', 'Trend', 'Pos', 'Years', 'BA_Percentile', 'R_Percentile', 'HR_Percentile', 'RBI_Percentile', 'SB_Percentile'])

# Create a list for each percentile stat category for upcoming loop
player_trends = []
average_BA = []
average_R = []
average_HR = []
average_RBI = []
average_SB = []
average_Rank = []
year_count = []
pos = []

# Loop through each player, check if they played in the past two seasons. If not, remove them
for player in player_list:
    filter_df = percentile_df.loc[percentile_df['Name'] == player]
    filter_df = filter_df.sort_values(['Year'], ascending=[False])
    year_list = filter_df.Year.tolist()
    if (year_list[0] != last_year) and (year_list[0] != (last_year - 1)):
        player_list.remove(player)

# Update new dataframe with updated unique player list
new_df['Name'] = player_list        

# Loop through each player, locate their percentile stats for each season, average them out
for player in player_list:
    player_df = percentile_df.loc[percentile_df['Name'] == player]
    
    # We want to find the slope of the line of best fit for each player's overall ranking each season
    x = np.array(player_df['Year'], dtype = float)
    y = np.array(player_df['Rank'], dtype = float)
    slope, intercept = np.polyfit(x, y, 1)
    player_trends.append(slope)
    
    # Find average of each player's percentiles from previous 5 seasons
    average_BA.append(sum(player_df['BA_Percentile']) / len(player_df['BA_Percentile']))
    average_R.append(sum(player_df['R_Percentile']) / len(player_df['R_Percentile']))
    average_HR.append(sum(player_df['HR_Percentile']) / len(player_df['HR_Percentile']))
    average_RBI.append(sum(player_df['RBI_Percentile']) / len(player_df['RBI_Percentile']))
    average_SB.append(sum(player_df['SB_Percentile']) / len(player_df['SB_Percentile']))
    average_Rank.append(sum(player_df['Rank']) / len(player_df['Rank']))
    year_count.append(len(x))
    
    # Keep player positions for reference purposes during the draft
    pos.append(player_df['Pos\xa0Summary'].unique())

# Update new dataframe with the list data from each stat
new_df['Pos'] = pos
new_df['Trend'] = player_trends
new_df['BA_Percentile'] = average_BA
new_df['R_Percentile'] = average_R
new_df['HR_Percentile'] = average_HR
new_df['RBI_Percentile'] = average_RBI
new_df['SB_Percentile'] = average_SB
new_df['Rank'] = average_Rank

# Keep track of how many seasons are being considered, so we know how reliable the data is
new_df['Years'] = year_count



  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_

  exec(code_obj, self.user_global_ns, self.user_ns)


In [12]:
# Create a weighted rank column by adding trend data to the rank data and account for number of seasons played
# Basically, if you played all 5 seasons, your trend stat is added directly
# If you played fewer than all 5 seasons, your trend stat is reduced depending on how few seasons you played
new_df['Weighted Rank'] = (new_df['Rank'] + ((new_df['Trend'] * (new_df['Years'] - 1) / 4)))

# shift column 'Weighted Rank' to first position
first_column = new_df.pop('Weighted Rank')
  
# insert column using insert(position,column_name,first_column) function
new_df.insert(1, 'Weighted Rank', first_column)

new_df = new_df.sort_values('Weighted Rank', ascending = False)

In [13]:
# separate position column into a list for editing
new_pos_list = []
pos_list = new_df['Pos'].tolist()

# loop through list and pull only the last item, which represents player position listed from most recent season
for i in pos_list:
    j = i[-1]
    
    if re.search('/', j):
        k = re.sub("([^\/]+$)","",j)
        new_pos_list.append(k)
    else:
        new_pos_list.append(j)

cleaned_list = []
for pos in new_pos_list:
    placeholder = re.findall("[a-zA-Z0-9]+", pos)
    placeholder_2 = ''.join(placeholder)
    placeholder_3 = [d for d in placeholder_2]
    cleaned_list.append(placeholder_3)

cleaned_pos_list = []
for n_list in cleaned_list:
    
    placeholder_list = []
    for pos in n_list:
        if pos == '1':
            placeholder_list.append('P')
        elif pos == '2':
            placeholder_list.append('C')
        elif pos == '3':
            placeholder_list.append('1B')
        elif pos == '4':
            placeholder_list.append('2B')
        elif pos == '5':
            placeholder_list.append('3B')
        elif pos == '6':
            placeholder_list.append('SS')
        elif pos == ('7'):
            placeholder_list.append('OF')
        elif pos == ('8'):
            placeholder_list.append('OF')
        elif pos == ('9'):
            placeholder_list.append('OF')
        elif pos == ('D'):
            placeholder_list.append('DH')
        
    cleaned_pos_list.append(placeholder_list)        

temp_pos_list = []
for item in cleaned_pos_list:
    new_string = []
    for pos in item:
        string = str(pos)
        new_string = f'{new_string},{string}'
    temp_pos_list.append(new_string)

    
final_pos_list = []
for i in temp_pos_list:
    i = i.replace('[],', '')
    final_pos_list.append(i)
    
# replace old position column with new position column
new_df.drop('Pos', axis = 1, inplace = True)
new_df['Pos'] = final_pos_list

new_df.head(25)

Unnamed: 0,Name,Weighted Rank,Rank,Trend,Years,BA_Percentile,R_Percentile,HR_Percentile,RBI_Percentile,SB_Percentile,Pos
293,Trea Turner,4.70022,4.559453,0.140767,5,0.92256,0.968756,0.824153,0.853256,0.990728,"SS,2B"
343,Fernando Tatis Jr.,4.659651,4.461685,0.395932,3,0.864693,0.878693,0.916546,0.838189,0.963564,"SS,OF"
176,José Ramírez,4.638936,4.553868,0.085068,5,0.754271,0.928109,0.93827,0.958808,0.974411,"3B,DH"
585,Julio Rodríguez,4.635106,4.635106,0.001146,1,0.906383,0.920213,0.941489,0.887234,0.979787,OF
109,Freddie Freeman,4.624784,4.598302,0.026482,5,0.962727,0.98496,0.909528,0.961659,0.779428,1B
206,Manny Machado,4.570684,4.501576,0.069108,5,0.835513,0.925997,0.941398,0.961832,0.836835,"3B,DH"
425,Kyle Tucker,4.481808,4.396563,0.17049,3,0.770516,0.851543,0.870985,0.958803,0.944716,OF
237,Mookie Betts,4.462754,4.553842,-0.091088,5,0.86036,0.984929,0.92032,0.865404,0.92283,OF
595,Michael Harris II,4.451064,4.451064,0.001101,1,0.952128,0.882979,0.842553,0.818085,0.955319,OF
508,Randy Arozarena,4.401615,4.38089,0.082901,2,0.800543,0.902329,0.822574,0.875869,0.979574,"OF,DH,OF"


In [14]:
# Create a list to help create a dataframe from batter statistics data
pitcher_stats = []

for year in last_five_years:

    # input URL and use BeautifulSoup to parse through the page
    pitching_url = f'https://www.baseball-reference.com/leagues/majors/{year}-standard-pitching.shtml'
    pitching_soup = BeautifulSoup(requests.get(pitching_url).content, 'html.parser')

    # Grab the table element that has batter statistics
    pitching_table = BeautifulSoup(pitching_soup.select_one('#all_players_standard_pitching').find_next(text=lambda x: isinstance(x, Comment)), 'html.parser')

    # Grab data from table and put it into the list created above
    for tr in pitching_table.select('tr:has(td)'):
        tds = [td.get_text(strip=True) for td in tr.select('td')]
        tds.append(year)
        pitcher_stats.append(tds)
        

In [15]:
# Create dataframe for batter statistics
pitcher_stats_df = pd.DataFrame(pitcher_stats)

# Create an empty list to store dataframe header information
pitcher_header_list = []

# Grab the table header information to use as column headers in our dataframe
for tr in pitching_table.select('tr:has(th)'):
    ths = [th.get_text(strip=True) for th in tr.select('th')]
    pitcher_header_list.append(ths)

# For loop returns a list of lists, and we only need the first list 
pitcher_df_headers = pitcher_header_list[0]

# Remove the first item from our headers list, it is the index header that we do not need
pitcher_df_headers.remove('Rk')
pitcher_df_headers.append("Year")

# Set column headers equal to our list
pitcher_stats_df.columns = pitcher_df_headers

In [16]:
# Change types of columns to numeric for columns with number values
pitcher_stats_df[['Age', 'W','ERA','SO','SV','WHIP','ERA+','SO9','IP']] = pitcher_stats_df[['Age', 'W','ERA','SO','SV','WHIP','ERA+','SO9','IP']].apply(pd.to_numeric)

# Drop any players with NaN innings pitched, ERA, and WHIP to remove null values 
pitcher_stats_df.dropna(subset=['IP'], axis = 0 , inplace= True)
pitcher_stats_df.dropna(subset=['ERA'], axis = 0 , inplace= True)
pitcher_stats_df.dropna(subset=['WHIP'], axis = 0 , inplace= True)
pitcher_stats_df.replace([np.inf, -np.inf], np.nan, inplace=True)

# Remove any pitchers with fewer than 30 innings pitched
pitcher_stats_df = pitcher_stats_df[pitcher_stats_df['IP'] >= 30]

# Make ERA and WHIP negative so high values become "low" when sorted with all other columns
pitcher_stats_df['ERA'] = pitcher_stats_df['ERA'] * -1
pitcher_stats_df['WHIP'] = pitcher_stats_df['WHIP'] * -1

# Select the columns we want for our pitcher analysis
final_pitcher_stats_df = pitcher_stats_df[['Year','Name','Age', 'W','ERA','SO','SV','WHIP','ERA+','SO9','IP']]

# Eliminate Baseball Reference's name badges for accolades
final_pitcher_stats_df['Name'] = final_pitcher_stats_df['Name'].str.extract('([^\*|#]*)')

pitcher_list = final_pitcher_stats_df.Name.tolist()

cleaned_pitcher_list = []
for pitcher in pitcher_list:
    pitcher = pitcher.replace("\xa0", " ")
    cleaned_pitcher_list.append(pitcher) 
    
final_pitcher_stats_df['Name'] = cleaned_pitcher_list        


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_pitcher_stats_df['Name'] = final_pitcher_stats_df['Name'].str.extract('([^\*|#]*)')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_pitcher_stats_df['Name'] = cleaned_pitcher_list


In [17]:
# Sort by index to prepare to drop duplicates
final_pitcher_stats_df = final_pitcher_stats_df.sort_index()

# Drop duplicate entries of Player Name and Year
# This is to eliminate partial season data for players who played for 2+ teams in one season
final_pitcher_stats_df = final_pitcher_stats_df.drop_duplicates(subset=['Year', 'Name'])

In [18]:
# Sort data by name alphabetically, then by year in descending order
final_pitcher_stats_df = final_pitcher_stats_df.sort_values(['Year','Name'], ascending=[True, True])

In [19]:
# Create a new dataframe for stats percentile calculations
pitcher_percentile_df = pd.DataFrame(columns = ['Year', 'Name', 'Age', 'W', 'ERA', 'SO', 'SV', 'WHIP', 'SO9', 'IP'])

# Carry over columnns from final_pitcher_stats_df that shouldn't be comparatively ranked 
pitcher_percentile_df['Year'] = final_pitcher_stats_df['Year']
pitcher_percentile_df['Name'] = final_pitcher_stats_df['Name']
pitcher_percentile_df['Age'] = final_pitcher_stats_df['Age']

# Calculate the percentile rank for each player in each season, seperately, then add all the seasons in one dataframe
for year in last_five_years:
    year_df = final_pitcher_stats_df.loc[final_pitcher_stats_df['Year'] == year]
    year_df['W_Percentile'] = year_df['W'].rank(pct=True)
    year_df['ERA_Percentile'] = year_df['ERA'].rank(pct=True)
    year_df['SO_Percentile'] = year_df['SO'].rank(pct=True)
    year_df['SV_Percentile'] = year_df['SV'].rank(pct=True)
    year_df['WHIP_Percentile'] = year_df['WHIP'].rank(pct=True)
    year_df.sort_values('Name', ascending=True)

    # Each of the seasons are added back to the percentile dataframe
    pitcher_percentile_df = pitcher_percentile_df.append(year_df, ignore_index=True)

pitcher_percentile_df = pitcher_percentile_df.sort_values(['Year','Name'], ascending=[True, True])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  year_df['W_Percentile'] = year_df['W'].rank(pct=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  year_df['ERA_Percentile'] = year_df['ERA'].rank(pct=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  year_df['SO_Percentile'] = year_df['SO'].rank(pct=True)
A value is trying to be set on a copy

In [20]:
# Clean up the percentile dataframe, drop NaNs and remove unnecessary columns
pitcher_percentile_df = pitcher_percentile_df.dropna()

# Add a rank column that adds the percentiles from each category
pitcher_percentile_df['Rank'] = (pitcher_percentile_df['W_Percentile'] + pitcher_percentile_df['ERA_Percentile'] + pitcher_percentile_df['SO_Percentile'] + pitcher_percentile_df['SV_Percentile'] + pitcher_percentile_df['WHIP_Percentile'])


In [21]:
# Create a list of each unique player we have in our dataframe
pitcher_list = pitcher_percentile_df.Name.unique().tolist()

# Create a new dataframe for combined, averaged percentiles over the past 5 seasons
new_pitcher_df = pd.DataFrame(columns = ['Name', 'Rank', 'Trend', 'Years', 'W_Percentile', 'ERA_Percentile', 'SO_Percentile', 'SV_Percentile', 'WHIP_Percentile'])

# Create a list for each percentile stat category for upcoming loop
pitcher_trends = []
average_W = []
average_ERA = []
average_SO = []
average_SV = []
average_WHIP = []
pitcher_average_Rank = []
pitcher_year_count = []

cleaned_pitcher_list = []
for pitcher in pitcher_list:
    pitcher = pitcher.replace("\xa0", " ")
    cleaned_pitcher_list.append(pitcher) 

new_pitcher_df
    
# Loop through each player, check if they played in the past two seasons. If not, remove them
for pitcher in cleaned_pitcher_list:
    filter_df = pitcher_percentile_df.loc[pitcher_percentile_df['Name'] == pitcher]
    filter_df = filter_df.sort_values(['Year'], ascending=[False])
    year_list = filter_df.Year.tolist()
    if (year_list[0] != last_year) and (year_list[0] != (last_year - 1)):
        pitcher_list.remove(pitcher)

# Update new dataframe with updated unique player list
new_pitcher_df['Name'] = pitcher_list        

# Loop through each player, locate their percentile stats for each season, average them out
for pitcher in pitcher_list:
    pitcher_df = pitcher_percentile_df.loc[pitcher_percentile_df['Name'] == pitcher]
    
    # We want to find the slope of the line of best fit for each player's overall ranking each season
    x = np.array(pitcher_df['Year'], dtype = float)
    y = np.array(pitcher_df['Rank'], dtype = float)
    slope, intercept = np.polyfit(x, y, 1)
    pitcher_trends.append(slope)
    
    # Find average of each player's percentiles from previous 5 seasons
    average_W.append(sum(pitcher_df['W_Percentile']) / len(pitcher_df['W_Percentile']))
    average_ERA.append(sum(pitcher_df['ERA_Percentile']) / len(pitcher_df['ERA_Percentile']))
    average_SO.append(sum(pitcher_df['SO_Percentile']) / len(pitcher_df['SO_Percentile']))
    average_SV.append(sum(pitcher_df['SV_Percentile']) / len(pitcher_df['SV_Percentile']))
    average_WHIP.append(sum(pitcher_df['WHIP_Percentile']) / len(pitcher_df['WHIP_Percentile']))
    pitcher_average_Rank.append(sum(pitcher_df['Rank']) / len(pitcher_df['Rank']))
    pitcher_year_count.append(len(x))

# Update new dataframe with the list data from each stat
new_pitcher_df['Trend'] = pitcher_trends
new_pitcher_df['W_Percentile'] = average_W
new_pitcher_df['ERA_Percentile'] = average_ERA
new_pitcher_df['SO_Percentile'] = average_SO
new_pitcher_df['SV_Percentile'] = average_SV
new_pitcher_df['WHIP_Percentile'] = average_WHIP
new_pitcher_df['Rank'] = pitcher_average_Rank

# Keep track of how many seasons are being considered, so we know how reliable the data is
new_pitcher_df['Years'] = pitcher_year_count



  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_

In [22]:
# Create a weighted rank column by adding trend data to the rank data and account for number of seasons played
# Basically, if you played all 5 seasons, your trend stat is added directly
# If you played fewer than all 5 seasons, your trend stat is reduced depending on how few seasons you played
new_pitcher_df['Weighted Rank'] = (new_pitcher_df['Rank'] + ((new_pitcher_df['Trend'] * (new_pitcher_df['Years'] - 1) / 4)))

# shift column 'Weighted Rank' to first position
first_pitcher_column = new_pitcher_df.pop('Weighted Rank')
  
# insert column using insert(position,column_name,first_column) function
new_pitcher_df.insert(1, 'Weighted Rank', first_pitcher_column)

new_pitcher_df = new_pitcher_df.sort_values('Weighted Rank', ascending = False)
new_pitcher_df.head(25)


Unnamed: 0,Name,Weighted Rank,Rank,Trend,Years,W_Percentile,ERA_Percentile,SO_Percentile,SV_Percentile,WHIP_Percentile
516,Alexis Díaz,4.308917,4.308917,0.001066,1,0.791932,0.972399,0.676221,0.929936,0.938429
142,Justin Verlander,4.230547,4.22841,0.004273,3,0.990764,0.938395,0.985115,0.326198,0.987939
400,Devin Williams,4.200687,4.117935,0.331008,2,0.779559,0.943907,0.71247,0.901062,0.780937
546,Evan Phillips,4.185775,4.185775,0.001035,1,0.791932,0.997877,0.627389,0.77707,0.991507
298,Liam Hendriks,4.170686,4.216773,-0.092174,3,0.630785,0.905899,0.762817,0.98661,0.930662
435,Jordan Romano,4.089993,4.135401,-0.18163,2,0.708035,0.956547,0.621209,0.980428,0.869181
547,Félix Bautista,4.076433,4.076433,0.001008,1,0.529724,0.930998,0.705945,0.952229,0.957537
408,Emmanuel Clase,3.994186,3.988372,0.023256,2,0.440028,0.990464,0.595935,0.986258,0.975687
358,Alek Manoah,3.967463,3.861677,0.423143,2,0.928096,0.832282,0.90054,0.317778,0.882981
617,Spencer Strider,3.936306,3.936306,0.000973,1,0.907643,0.845011,0.978769,0.308917,0.895966


In [23]:
draft_df = pd.concat([new_df, new_pitcher_df], ignore_index=True, sort=False)
draft_df = draft_df.sort_values(by='Weighted Rank', ascending=False)
draft_df["Pos"].fillna("P", inplace = True)
draft_df.head(25)

Unnamed: 0,Name,Weighted Rank,Rank,Trend,Years,BA_Percentile,R_Percentile,HR_Percentile,RBI_Percentile,SB_Percentile,Pos,W_Percentile,ERA_Percentile,SO_Percentile,SV_Percentile,WHIP_Percentile
0,Trea Turner,4.70022,4.559453,0.140767,5,0.92256,0.968756,0.824153,0.853256,0.990728,"SS,2B",,,,,
1,Fernando Tatis Jr.,4.659651,4.461685,0.395932,3,0.864693,0.878693,0.916546,0.838189,0.963564,"SS,OF",,,,,
2,José Ramírez,4.638936,4.553868,0.085068,5,0.754271,0.928109,0.93827,0.958808,0.974411,"3B,DH",,,,,
3,Julio Rodríguez,4.635106,4.635106,0.001146,1,0.906383,0.920213,0.941489,0.887234,0.979787,OF,,,,,
4,Freddie Freeman,4.624784,4.598302,0.026482,5,0.962727,0.98496,0.909528,0.961659,0.779428,1B,,,,,
5,Manny Machado,4.570684,4.501576,0.069108,5,0.835513,0.925997,0.941398,0.961832,0.836835,"3B,DH",,,,,
6,Kyle Tucker,4.481808,4.396563,0.17049,3,0.770516,0.851543,0.870985,0.958803,0.944716,OF,,,,,
7,Mookie Betts,4.462754,4.553842,-0.091088,5,0.86036,0.984929,0.92032,0.865404,0.92283,OF,,,,,
8,Michael Harris II,4.451064,4.451064,0.001101,1,0.952128,0.882979,0.842553,0.818085,0.955319,OF,,,,,
9,Randy Arozarena,4.401615,4.38089,0.082901,2,0.800543,0.902329,0.822574,0.875869,0.979574,"OF,DH,OF",,,,,


In [24]:
######################################################################################################################
######################################################################################################################
######################################################################################################################
#### DRAFT DAY FUNCTIONS
   
# DROP A PLAYER 
def drafted(player):
    global draft_df
    global final_pitcher_stats_df
    global final_batter_stats_df
    draft_df = draft_df[draft_df.Name != player]
    final_pitcher_stats_df = final_pitcher_stats_df[final_pitcher_stats_df.Name != player]
    final_batter_stats_df = final_batter_stats_df[final_batter_stats_df.Name != player]
    return draft_df.head(25)
    
# FILTER PLAYERS BY POSITION
def position_filter(Pos):
    filtered_draft_df = draft_df[draft_df['Pos'].str.contains(Pos)]
    return filtered_draft_df.head(25)

# PULL PITCHING STAT CATEGORY LEADERS
def pitching_stat_leaders(CAT):
    global final_pitcher_stats_df
    pitching_filtered_draft_df = draft_df.sort_values([CAT], ascending=[False])
    return pitching_filtered_draft_df.head(25)

# PULL BATTING STAT CATEGORY LEADERS
def batting_stat_leaders(CAT):
    global final_batter_stats_df
    batting_filtered_draft_df = draft_df.sort_values([CAT], ascending=[False])
    return batting_filtered_draft_df.head(25)

In [252]:
drafted('Shohei Ohtani')
drafted('Trea Turner')
drafted('José Ramírez')
drafted('Aaron Judge')
drafted('Julio Rodríguez')
drafted('Mookie Betts')
drafted('Vladimir Guerrero Jr.')
drafted('Kyle Tucker')
drafted('Freddie Freeman')
drafted('Manny Machado')
drafted('Bobby Witt')
drafted('Juan Soto')
drafted('Fernando Tatis Jr.')
drafted('Mike Trout')
drafted('Gerrit Cole')
drafted('Corbin Burnes')
drafted('Yasiel Puig')
drafted('Ronald Acuna Jr.')
drafted('Bryce Harper')
drafted('Jose Altuve')
drafted('Sandy Alcantara')
drafted('Bo Bichette')
drafted('Rafael Devers')
drafted('Spencer Strider')
drafted('Paul Goldschmidt')
drafted('Austin Riley')
drafted('Jacob deGrom')
drafted('Randy Arozarena')
drafted('Aaron Nola')
drafted('Pete Alonso')
drafted('Marcus Semien')
drafted('Max Scherzer')
drafted('Brandon Woodruff')
drafted('Michael Harris II')
drafted('J.T. Realmuto')
drafted('Justin Verlander')
drafted('Francisco Lindor')
drafted('Matt Olson')
drafted('Zack Wheeler')
drafted('Corbin Carroll')
drafted('Shane Bieber')
drafted('Shane McClanahan')
drafted('Kevin Gausman')
drafted('Cedric Mullins')
drafted('Julio Urias')
drafted('Dylan Cease')
drafted('Whit Merrifield')
drafted('Luis Castillo')
drafted('Kyle Schwarber')
drafted('Ozzie Albies')
drafted('Corey Seager')
drafted('Alek Manoah')
drafted('Max Fried')
drafted('Nolan Arenado')
drafted('Clayton Kershaw')
drafted('Julio Urías')
drafted('Trevor Bauer')
drafted('Zac Gallen')
drafted('Luis Robert')
drafted('Will Smith')
drafted('Jazz Chisholm Jr.')
drafted('Emmanuel Clase')
drafted('Oneil Cruz')
drafted('Cristian Javier')
drafted('Wander Franco')
drafted('Devin Williams')
drafted('Dansby Swanson')
drafted('Framber Valdez')
drafted('Salvador Perez')
drafted('Xander Bogaerts')
drafted('Teoscar Hernández')
drafted('Yu Darvish')
drafted('Josh Hader')
drafted('Gunnar Henderson')
drafted('Jordan Romano')
drafted('Daulton Varsho')
drafted('Adolis García')
drafted('Starling Marte')
drafted('Trevor Story')
drafted('Triston McKenzie')
drafted('Ryan Pressly')
drafted('Adley Rutschman')
drafted('Logan Webb')
drafted("Tyler O'Neill")
drafted('Robbie Ray')
drafted('Vinnie Pasquantino')
drafted('George Springer')
drafted('George Kirby')
drafted('Tim Anderson')
drafted('Jose Abreu')
drafted('Alexis Díaz')
drafted('Bryan Reynolds')
drafted('Yordan Alvarez')
drafted('Sean Murphy')
drafted('Kris Bryant')
drafted('Steven Kwan')
drafted('Nestor Cortes')
drafted('Jeremy Peña')
drafted('Eloy Jimenez')
drafted('Raisel Iglesias')
drafted('Nathaniel Lowe')
drafted('Edwin Díaz')
drafted('Lucas Giolito')
drafted('Andres Gimenez')
drafted('José Abreu')
drafted('Hunter Greene')
drafted('Logan Gilbert')
drafted('Taylor Ward')
drafted('Tommy Edman')
drafted('Luis Severino')
drafted('Joe Musgrove')
drafted('Nick Castellano')
drafted('Ryan Mountcastle')
drafted('Lance Lynn')
drafted('Anthony Santander')
drafted('Max Muncy')
drafted('Willy Adames')
drafted('Tyler Glasnow')
drafted('David Robertson')
drafted('Christian Yelich')
drafted('Blake Snell')
drafted('Félix Bautista')
drafted('Camilo Doval')
drafted('Nick Lodolo')
drafted('Kenley Jansen')
drafted('C.J. Cron')
drafted('Andrew Vaughn')
drafted('Nick Castellanos')
drafted('Kyle Wright')
drafted('Carlos Correa')
drafted('Byron Buxton')
drafted('Luis Arraez')
drafted('Justin Turner')
drafted('Jose Altuve')
drafted('Hyun Jin Ryu')
drafted('Clay Holmes')
drafted('Willson Contreras')
drafted('Nico Hoerner')
drafted('Jake McCarthy')
drafted('Luis Garcia')
drafted('Joe Ryan')
drafted('Matt Chapman')
drafted('Freddy Peralta')
drafted('Gleyber Torres')
drafted('William Contreras')
drafted('MJ Melendez')
drafted('Christian Walker')
drafted('Joey Meneses')
drafted('Mitch Haniger')
drafted('Pablo Lopez')
drafted('Ranger Suarez')
drafted('Dustin May')
drafted('Chris Bassitt')
drafted('Hunter Renfroe')
drafted('Jon Lester')
drafted('Ranger Suárez')
drafted('Drew Rasmussen')
drafted('Vaughn Grissom')
drafted('Chris Sale')
drafted('Ketel Marte')
drafted('Jesus Luzardo')
drafted('Daniel Bard')
drafted('Charlie Morton')
drafted('Tony Gonsolin')
drafted('Rhys Hoskins')
drafted('Jose Miranda')
drafted('Kodai Senga')
drafted('Thairo Estrada')
drafted('Josh Rojas')
drafted('Jeffrey Springs')
drafted('Jordan Montgomery')
drafted('José Berríos')
drafted('Sonny Gray')
drafted('Masataka Yoshida')
drafted('Brady Singer')
drafted('Anthony Rizzo')
drafted('Giancarlo Stanton')
drafted('Patrick Sandoval')
drafted('Alex Verdugo')
drafted('David Bednar')
drafted('Andrew Heaney')
drafted('Javier Báez')
drafted('Jordan Walker')
drafted('Ty France')
drafted('Ian Happ')
drafted('Jake Cronenworth')
drafted('Riley Greene')
drafted('Scott Barlow')
drafted('Carlos Rodón')
drafted('Bubba Thompson')
drafted('Reid Detmers')
drafted('Hunter Brown')
drafted('Brandon Nimmo')
drafted('Adam Wainwright')
drafted('CJ Abrams')
drafted('Jack Flaherty')
drafted('Eugenio Suarez')
drafted('Brandon Lowe')
drafted('Josh Bell ')
drafted('Tyler Anderson')
drafted('Merrill Kelly')
drafted('Paul Sewald')
drafted('Roansy Contreras')
drafted('Michael Conforto')
drafted('Noah Syndergaard')
drafted('Jon Gray')
drafted('Jonathan India')
drafted('Andres Munoz')
drafted('Liam Hendriks')
drafted('Amed Rosario')
drafted("Ke'Bryan Hayes")
drafted('Lars Nootbaar')
drafted('Tyler Mahle ')
drafted('Evan Phillips')
drafted('John Schreiber')
drafted('Grayson Rodriguez')
drafted('Tyler Stephenson')
drafted('Taijuan Walker')
drafted('Pete Fairbanks')
drafted('Clarke Schmidt')
drafted('Hayden Wesneski')
drafted('Cody Bellinger')
drafted('Anthony Rendon')
drafted('Clarke Schmidt')
drafted('Hayden Wesneski')
# drafted('')
# drafted('')
# drafted('')
# drafted('')
# drafted('')
# drafted('')
# drafted('')
# drafted('')
# drafted('')
# drafted('')
# drafted('')
# drafted('')
# drafted('')

#position_filter('P')
#pitching_stat_leaders('W_Percentile')

Unnamed: 0,Name,Weighted Rank,Rank,Trend,Years,BA_Percentile,R_Percentile,HR_Percentile,RBI_Percentile,SB_Percentile,Pos,W_Percentile,ERA_Percentile,SO_Percentile,SV_Percentile,WHIP_Percentile
649,Chad Green,3.89066,3.762018,0.257285,3,,,,,,P,0.751083,0.751612,0.723093,0.687296,0.848933
43,Charlie Blackmon,3.813799,4.00241,-0.188611,5,0.865496,0.869815,0.749793,0.889942,0.627363,"DH,OF",,,,,
660,Giovanny Gallegos,3.797256,3.820853,-0.047193,3,,,,,,P,0.493898,0.841773,0.664244,0.87193,0.949007
48,DJ LeMahieu,3.751346,3.915253,-0.163907,5,0.862032,0.923846,0.697979,0.752738,0.678657,"3B,2B,1B",,,,,
669,Jhoan Duran,3.739915,3.739915,0.000925,1,,,,,,P,0.230361,0.970276,0.710191,0.908705,0.920382
52,Andrew Benintendi,3.733063,3.857025,-0.165283,4,0.838817,0.792125,0.574793,0.802781,0.848509,OF,,,,,
670,Blake Treinen,3.731013,3.73293,-0.003835,3,,,,,,P,0.755954,0.761945,0.594621,0.945127,0.675283
672,Craig Kimbrel,3.721691,3.812775,-0.182168,3,,,,,,P,0.619083,0.766319,0.679354,0.980217,0.767802
53,Luis Robert Jr.,3.714523,3.712746,0.003554,3,0.760863,0.688028,0.710858,0.701909,0.851088,OF,,,,,
673,Alex Reyes,3.698732,3.698732,0.000915,1,,,,,,P,0.902748,0.731501,0.717759,0.980973,0.365751
