<a href="https://colab.research.google.com/github/kbro4/Reliever-Value/blob/main/Getting_Bullpen_Spending_2013_2015.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import time

In [2]:
import warnings

# Suppress all warnings
warnings.filterwarnings("ignore")

In [3]:
def convert_amount(value):
    numeric_string = ''.join(c for c in value if c.isdigit())
    return int(numeric_string)

In [5]:
mlb_teams = [
    'ARI', 'ATL', 'BAL', 'BOS', 'CHN', 'CHA', 'CIN', 'CLE', 'COL', 'DET',
    'HOU', 'KCA', 'ANA', 'LAN', 'MIA', 'MIL', 'MIN', 'NYN', 'NYA', 'OAK',
    'PHI', 'PIT', 'SDN', 'SFN', 'SEA', 'SLN', 'TBA', 'TEX', 'TOR', 'WAS'
]

In [6]:
# Filters dataframes for names of relief pitchers
def get_names(df):
  df = df[1]
  df = df.dropna(subset=['G'])
  df = df[~df['G'].isin(['G', '162'])]
  df = df.loc[df['GS'].astype(float) / df['G'].astype(float) > 0.5]
  df = df['Name']
  df = df.str.replace('*', '')

  return df

In [7]:
def swap_names(full_names):
    # Split the full name into individual names
    new_names = []
    for name in full_names:
      names = name.split()

    # Check if there are at least two names
      if len(names) >= 2:
        # Swap the first and last names
        swapped_name = f"{names[-1]} {names[0]}"
        new_names.append(swapped_name)
      else:
        # Return the original name if there are not enough names
        return new_names
    return new_names

In [8]:
# Gets spending on relief pitchers
def get_relief_spending(df, names):
  df = df[1]
  df = df.drop('Current Status', axis = 1).dropna()
  names = names.str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8')
  names = swap_names(names)
  df = df[~df['Player Sort'].isin(names)]
  df['Salary'] = df['Salary'].apply(convert_amount)
  return df['Salary'].sum()

In [9]:
# Scrapes data for each team's spending
for team in mlb_teams:
  print(team)
  for year in range(2013, 2016):
    url = f'https://legacy.baseballprospectus.com/compensation/index.php?team={team}&pos=RP&cyear={year}'

    df = pd.read_html(url)

    if team == 'KCA':
      url2 = f'https://www.baseball-reference.com/teams/KC/{year}.shtml'
      df2 = pd.read_html(url2)
    elif team == 'ANA':
      url2 = f'https://www.baseball-reference.com/teams/LAA/{year}.shtml'
      df2 = pd.read_html(url2)
    elif team == 'WAS':
      url2 = f'https://www.baseball-reference.com/teams/WSN/{year}.shtml'
      df2 = pd.read_html(url2)
    else :
      url2 = f'https://www.baseball-reference.com/teams/{team}/{year}.shtml'
      df2 = pd.read_html(url2)
      time.sleep(5)

    # Gets total relief pitcher spending by year
    names = get_names(df2)
    print(get_relief_spending(df, names))

ARI
23309500
18458500
19797000
ATL
7738750
12807500
13810000
BAL
15422500
13785820
17523000
BOS
21997000
20905125
30700000
CHN
23724500
19394500
29550000
CHA
13825000
12812000
21633700
CIN
17050833
26981272
17470500
CLE
16739200
11009600
12915100
COL
24760500
28479000
26172000
DET
12844500
15754500
22257500
HOU
5652300
8615100
19455700
KCA
11922175
13876500
24800000
ANA
11406250
16788500
17618500
LAN
18395333
39827433
19767500
MIA
5745400
9286900
15089000
MIL
13871000
12901000
18617000
MIN
8845000
12272500
12702500
NYN
15770250
13871475
19117450
NYA
20550875
12014000
19412400
OAK
15167500
20338400
19117500
PHI
21742000
29242467
17116500
PIT
5526500
9410000
15808000
SDN
13551200
17136600
22493800
SFN
21294500
23505000
24145000
SEA
5549700
11171100
12283800
SLN
8767000
13540000
12758000
TBA
8657500
17711750
17950000
TEX
16626800
17105006
12368900
TOR
22407500
22441600
6077000
WAS
25080500
29875000
17023100
