<a href="https://colab.research.google.com/github/kbro4/Reliever-Value/blob/main/Individual_Position_Players.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import time
import warnings

In [None]:
warnings.filterwarnings("ignore", category=FutureWarning)

In [None]:
urls = {2013 : 'http://bbref.com/pi/shareit/GiIyl',
        2014 : 'http://bbref.com/pi/shareit/7sYZj',
        2015 : 'http://bbref.com/pi/shareit/JV9SN',
        2016 : 'http://bbref.com/pi/shareit/ar9zf',
        2017 : 'http://bbref.com/pi/shareit/Ceb7I',
        2018 : 'http://bbref.com/pi/shareit/8dMvy',
        2019 : 'http://bbref.com/pi/shareit/HNsOi',
        2021 : 'http://bbref.com/pi/shareit/o7qj0',
        2022 : 'http://bbref.com/pi/shareit/1sAou',
        2023 : 'http://bbref.com/pi/shareit/JX0ge'
}

In [None]:
def get_top_players(df):
  df = df[df['Rk'] != 'Rk']
  df = df.sort_values(by='WAR ▼', ascending=False)
  df = df.dropna(subset=df.columns.difference(['Salary']), how='all')
  return df

In [None]:
def update_database(new_df, total_df, year):
  for name in new_df['Name']:
    if name in total_df['Name'].values:
      total_df.loc[total_df['Name'] == name, year] = new_df.loc[new_df['Name'] == name]['WAR ▼'].iloc[0]
    else:
            # Add a new row with the person's name and specified value for '2013'
      new_row = {'Name': name, year: new_df.loc[new_df['Name'] == name]['WAR ▼'].iloc[0]}
      total_df = total_df.append(new_row, ignore_index=True)
            # Fill other columns with zeros
  return total_df

In [None]:
def get_top_thirty(df):
  df = df[df['Rk'] != 'Rk']
  df = df.sort_values(by='WAR ▼', ascending=False)
  df = df.dropna(subset=df.columns.difference(['Salary']), how='all')
  df = df.head(30)
  return df

In [None]:
def elite_consistency(df, total_df, year):
    sum_one_later = 0
    sum_two_later = 0
    num_rows = df.shape[0]  # Number of rows in the dataframe

    for index, row in df.iterrows():
        name = row['Name']

        # Check if the player's name exists in the total dataframe
        if name in total_df['Name'].values:
            player_data = total_df.loc[total_df['Name'] == name]
            # Check if data for year 2015 exists
            if year + 1 in player_data.columns:
              # Can hard code a value
                if player_data[year + 1].values[0] >= 4.0:
                  sum_one_later = sum_one_later + 1
            # Check if data for year 2016 exists
            if year + 2 in player_data.columns:
              # Can hard code a value
                if player_data[year + 2].values[0] >= 4.0:
                  sum_two_later = sum_two_later + 1

    return sum_one_later, sum_two_later


In [None]:
total_df = pd.DataFrame(columns=['Name'])

for key in urls:
  df = pd.read_html(urls[key])
  df = df[0]

  new_df = get_top_players(df)
  total_df = update_database(new_df, total_df, key)
  time.sleep(2)


In [None]:
total_df

Unnamed: 0,Name,2013,2014,2015,2016,2017,2018,2019,2021,2022,2023
0,Mike Trout,8.9,7.7,9.6,10.5,6.9,9.9,7.9,1.8,6.2,2.9
1,Andrew McCutchen,7.8,6.4,5.0,-0.3,3.0,2.5,1.8,1.4,1.1,1.5
2,Carlos Gómez,7.6,4.7,2.2,0.0,1.4,0.7,-0.3,,,
3,Miguel Cabrera,7.5,5.1,4.9,5.1,-0.9,0.2,0.0,-0.6,-1.1,-0.4
4,Josh Donaldson,7.2,6.9,7.1,7.2,4.6,0.9,5.4,3.2,2.2,0.1
...,...,...,...,...,...,...,...,...,...,...,...
3773,Jordyn Adams,,,,,,,,,,-0.7
3774,Colton Cowser*,,,,,,,,,,-0.8
3775,Tyler Soderstrom*,,,,,,,,,,-1.1
3776,Henry Davis,,,,,,,,,,-1.4


In [None]:
for column in total_df:
  if column != 'Name':
    total_df[column] = total_df[column].astype(float)

In [None]:
# Performance independence by year
war_change = total_df.iloc[:, 1:].diff(axis=1)
avg_change = war_change.abs().mean()
avg_change

2013         NaN
2014    0.740834
2015    0.745996
2016    0.671553
2017    0.742418
2018    0.678396
2019    0.677505
2021    0.773982
2022    1.039219
2023    1.165996
dtype: float64

In [None]:
for column in total_df:
  if column != 'Name' and column + 2!= 2020 and column != 2022:
    change_two = total_df[column + 2] - total_df[column]
    avg_change_two = change_two.abs().mean()
    print(column, avg_change_two)
  elif column == 2022:
    break

2013 0.8431906614785992
2014 0.8562005277044855
2015 0.809329829172142
2016 0.8459974587039389
2017 0.8504504504504504
2019 0.7739819004524887
2021 1.2896907216494846


In [None]:
for key in urls:
  df = pd.read_html(urls[key])
  df = df[0]

  print(key, elite_consistency(get_top_thirty(df), total_df, key))
  time.sleep(2)

2013 (12, 14)
2014 (13, 13)
2015 (18, 13)
2016 (17, 9)
2017 (13, 10)
2018 (14, 0)
2019 (0, 12)
2021 (14, 12)
2022 (15, 0)
2023 (0, 0)
