In [46]:
# imports
import pandas as pd

In [170]:
# files
# yahoo 
yahoo = pd.read_csv('../Collection_data/Yahoo/Yahoo_DF_player_export.csv')
# projections
pitcher_proj = pd.read_csv('../Projections/pitcher_projections_2021.csv')
batter_proj = pd.read_csv('../Projections/batter_projections_2021.csv')

In [171]:
yahoo.head(1)

Unnamed: 0,ID,First Name,Last Name,Position,Team,Opponent,Game,Time,Salary,FPPG,Probable Pitcher,Injury Status,Starting
0,mlb.p.7977,Andrew,McCutchen,OF,PHI,CHC,PHI@CHC,8:05PM EDT,19,8.3,,,


In [172]:
pitcher_proj.head(1)

Unnamed: 0,Name,Team,Pos,W,L,GMS,GS,SV,IP,H,...,ER,HR,BB,SO,PTS,ERA,AVG,WHIP,FPPG,Projected_FPPG
0,Kevin Gausman,SF,SP,8,3,17,17,0,108.2,60,...,21,8,27,124,127.66,1.74,0.157,0.8,7.509412,64.928085


## Steps to merge projections with yahoo player list
1. create name column 
2. drop unneeded columns
### starting is a very interesting column for position players if populated
3. reorder columns
4. create filter for injury column, as healthy players have ' '
5. split dataframe batters and pitchers 

In [173]:
# yahoo naming function
def yahoo_name(yahoo):
    # create empty list for names
    player_list = []
    # for each row concat names and append to list 
    for row in yahoo.index:
        player_list.append(yahoo['First Name'][row] + ' ' + yahoo['Last Name'][row])
        # make new column for name
    yahoo['Name'] = player_list
    return yahoo

In [174]:
# step 1
yahoo = yahoo_name(yahoo)

In [175]:
# step 2
yahoo.drop(columns=['First Name', 'Last Name', 'Game', 'Time', 'Starting'], inplace=True)

In [176]:
# step 3 
yahoo = yahoo[['ID', 'Name', 'Position', 'Team', 'Opponent', 'Salary', 'FPPG', 'Probable Pitcher', 'Injury Status']].copy()

In [177]:
# step 4
yahoo = yahoo.loc[yahoo['Injury Status']==' '].copy()

In [178]:
# step 5
batters = yahoo.loc[yahoo['Position']!='P']
pitchers = yahoo.loc[yahoo['Position']=='P']

## Pticher steps
1. filter by only starting pitchers
2. read in projections and merge 
3. drop any nulls
4. reorder columns
5. rename columns

In [179]:
# step 1
pitchers = pitchers.loc[pitchers['Probable Pitcher']=='Yes']

In [180]:
# step 2
pitcher_projections = pitchers.merge(pitcher_proj, how='left', on='Name')

In [181]:
# step 3
pitcher_projections.dropna(inplace=True)

In [182]:
# step 4
pitcher_projections = pitcher_projections[['ID', 'Position', 'Name', 'Salary', 'Team_x', 'Opponent', 'Projected_FPPG']].copy()

In [183]:
# step 5
pitcher_projections.rename(columns={'Team_x':'Team'}, inplace=True)

In [184]:
pitcher_projections

Unnamed: 0,ID,Position,Name,Salary,Team,Opponent,Projected_FPPG
0,mlb.p.10141,P,Zach Eflin,35,PHI,CHC,37.149562
1,mlb.p.10858,P,Adbert Alzolay,34,CHC,PHI,48.21669
2,mlb.p.10455,P,Tyler Mahle,44,CIN,MIL,67.253915
3,mlb.p.10098,P,Adrian Houser,29,MIL,CIN,67.502884
4,mlb.p.11227,P,JT Brubaker,36,PIT,NYM,44.353374
5,mlb.p.9321,P,Taijuan Walker,48,NYM,PIT,60.481388
6,mlb.p.11854,P,Alek Manoah,41,TOR,BAL,31.999013
7,mlb.p.10431,P,Keegan Akin,25,BAL,TOR,33.070699
8,mlb.p.8193,P,Max Scherzer,52,WAS,SD,54.845125
9,mlb.p.9095,P,Yu Darvish,51,SD,WAS,58.798487


## Batter Steps
1. merge
2. drop nulls
3. reorder
4. rename

In [185]:
batter_projections = batters.merge(batter_proj, how='left', on='Name')

In [186]:
batter_projections.dropna(inplace=True)

In [187]:
batter_projections = batter_projections[['ID', 'Position', 'Name', 'Salary', 'Team_x', 'Opponent','Projected_FPPG']].copy()

In [188]:
batter_projections.rename(columns={'Team_x':'Team'}, inplace=True)

In [189]:
batter_projections

Unnamed: 0,ID,Position,Name,Salary,Team,Opponent,Projected_FPPG
0,mlb.p.7977,OF,Andrew McCutchen,19,PHI,CHC,9.045121
1,mlb.p.8875,OF,Bryce Harper,21,PHI,CHC,9.766982
2,mlb.p.9247,2B,Jean Segura,17,PHI,CHC,7.983235
3,mlb.p.9282,SS,Didi Gregorius,15,PHI,CHC,6.719352
4,mlb.p.9446,2B,Brad Miller,7,PHI,CHC,6.492820
...,...,...,...,...,...,...,...
164,mlb.p.10532,1B,Alex Kirilloff,12,MIN,DET,7.537239
166,mlb.p.11221,3B,Luis Arraez,11,MIN,DET,6.930252
167,mlb.p.11345,OF,Gilberto Celestino,7,MIN,DET,4.807756
168,mlb.p.11347,OF,Trevor Larnach,16,MIN,DET,7.569768


In [190]:
# following are steps for creating a line up
# set cap for yahoo
salary_cap = 200

# sort pitcher by fppg projections
pitcher_projections.sort_values(by='Projected_FPPG', ascending=False, inplace=True, ignore_index=True)

# create a player list to 
lineup = []
# add top two pitchers based on projections
lineup.append(pitcher_projections.values[0])
lineup.append(pitcher_projections.values[1])

# need to update remaining salary
salary_cap -= (pitcher_projections['Salary'][0]+pitcher_projections['Salary'][1])


# with updated salary fill remaining roster based on position and highest fppg
# create position list for remaining roster spots
position_list = ['C', '1B', '2B', '3B', 'SS', 'OF', 'OF', 'OF']
# sort batters by FPPG
batter_projections.sort_values(by='Projected_FPPG', ascending=False, inplace=True, ignore_index=True)

# create count based on remaining positions
sal_count = 8

# create average salary variable for remaining players
avg_sal = salary_cap/sal_count

# create for loop for each position in list to take highest fppg
for pos in position_list:
    # setting counter to increase if player is already in list
    # this is inside the for loop beacuse it needs to be per position
    counter = 0
    # if salary greater than average move to next player
    for salary in batter_projections.loc[batter_projections['Position'] == pos]['Salary']:
        # test if salary is greater than average if it is increase counter
        if batter_projections.loc[batter_projections['Position'] == pos]['Salary'].values[counter] > avg_sal or batter_projections.loc[batter_projections['Position'] == pos]['Salary'].values[counter] > salary_cap:
            counter += 1
        else:
            # if less than average add player to list
            lineup.append(batter_projections.loc[batter_projections['Position'] == pos].values[counter])
            # drop player so no duplicates are added
            batter_projections.drop(batter_projections.loc[batter_projections['Position'] == pos].index.values[counter], inplace=True)
            # decrease sal_count
            sal_count -= 1
            # decrease salary cap
            salary_cap -= batter_projections.loc[batter_projections['Position'] == pos]['Salary'].values[counter]
            # create new average salary
            avg_sal = salary_cap/sal_count
            break
# create dataframe of lineup
df_lineup = pd.DataFrame(lineup, columns=['Id', 'Position', 'Name', 'Salary', 'Team', 'Opponent','Projected_FPPG'])

  avg_sal = salary_cap/sal_count


In [191]:
df_lineup

Unnamed: 0,Id,Position,Name,Salary,Team,Opponent,Projected_FPPG
0,mlb.p.11763,P,Tarik Skubal,39,DET,MIN,73.795161
1,mlb.p.10098,P,Adrian Houser,29,MIL,CIN,67.502884
2,mlb.p.8990,C,Robinson Chirinos,7,CHC,PHI,25.990382
3,mlb.p.10157,1B,Trey Mancini,9,BAL,TOR,9.009749
4,mlb.p.11692,2B,Jake Cronenworth,15,SD,WAS,8.757537
5,mlb.p.11093,3B,Patrick Wisdom,8,CHC,PHI,9.267305
6,mlb.p.8344,SS,Alcides Escobar,10,WAS,SD,8.52433
7,mlb.p.9561,OF,Jesse Winker,15,CIN,MIL,10.245097
8,mlb.p.11134,OF,Aristides Aquino,9,CIN,MIL,8.893638
9,mlb.p.10522,OF,Trent Grisham,14,SD,WAS,8.862743


In [192]:
df_lineup['Salary'].sum()

155