The purpose of this notebook is to create a function that takes the imported player list:
- clean to only needed information
- seperate by pither and batter
- merge each on player name to get projections from model
- merge back pitcher and batter data 
- output df for now

Next steps:
- build line up based on salary

In [3]:
# imports
import pandas as pd
import numpy as np

In [8]:
    # function
def build_lineup(df):
        # clean up fd to match column list above
        df.drop(columns=['First Name', 'Last Name', 'FPPG', 'Played', 'Injury Details', 'Tier', 'Batting Order', 'Roster Position'], inplace=True)

        # filling nulls for probable pitcher
        df['Probable Pitcher'].fillna('No', inplace=True)

        # fill nulls for injury indicator
        df['Injury Indicator'].fillna('Healthy', inplace=True)

        # renaming nickname column
        df.rename(columns={'Nickname': 'Name'}, inplace=True)

        # fitler to only healthy players
        df = df.loc[df['Injury Indicator'] == 'Healthy']

        # split using .loc by position and make new dataframe for pitchers
        pitchers = df.loc[df['Position']=='P']

        # split using .loc by position and make new dataframe for batters
        batters = df.loc[df['Position']!='P']

        # pitcher dataframe steps
        # save pitchers df to only starting pitchers
        pitchers = pitchers.loc[pitchers['Probable Pitcher']=='Yes']

        # read in pitcher projections
        pitcher_proj = pd.read_csv('../../Projections/pitcher_projections_2021.csv')

        # merge attempt
        pitcher_projections = pitchers.merge(pitcher_proj, how='left', on='Name')

        # drop nulls if any
        pitcher_projections.dropna(inplace=True)

        # overwrite df with only the columns needed
        pitcher_projections = pitcher_projections[['Id', 'Position', 'Name', 'Salary', 'Team_x', 'Opponent', 'Projected_FPPG']]

        # rename team column
        pitcher_projections.rename(columns={'Team_x' : 'Team'}, inplace=True)

        # batter dataframe steps
        # read in projections file
        batter_21 = pd.read_csv('../../Projections/batter_projections_2021.csv')

        # merge projections with batter df, creating new df
        batters_projections = batters.merge(batter_21, how='left', on='Name')

        # dropping batters with no projections
        batters_projections.dropna(inplace=True)

        # drop unneeded columns for merge with pitcher, overwrite current df
        batters_projections = batters_projections[['Id', 'Position', 'Name', 'Salary', 'Team_x', 'Opponent','Projected_FPPG']]
        
        # rename team column
        batters_projections.rename(columns={'Team_x' : 'Team'}, inplace=True)

        # following are steps for creating a line up
        # set cap for fanduel
        salary_cap = 35_000

        # sort pitcher by fppg projections
        pitcher_projections.sort_values(by='Projected_FPPG', ascending=False, inplace=True, ignore_index=True)

        # create a player list to 
        lineup = []
        lineup.append(pitcher_projections.values[0])

        # need to update remaining salary
        salary_cap -= pitcher_projections['Salary'][0]

        # with updated salary fill remaining roster based on position and highest fppg
        # create position list for remaining roster spots
        position_list = ['C', '1B', '2B', '3B', 'SS', 'OF', 'OF', 'OF']
        # clean position
        pos_list = [pos[:2] for pos in batters_projections['Position']]
        batters_projections['Position'] = pos_list
        # sort batters by FPPG
        batters_projections.sort_values(by='Projected_FPPG', ascending=False, inplace=True, ignore_index=True)

        # create count based on remaining positions
        sal_count = 8

        # create average salary variable for remaining players
        avg_sal = salary_cap/sal_count

        # create for loop for each position in list to take highest fppg
        for pos in position_list:
            # setting counter to increase if player is already in list
            # this is inside the for loop beacuse it needs to be per position
            counter = 0
            # if salary greater than average move to next player
            for salary in batters_projections.loc[batters_projections['Position'] == pos]['Salary']:
                # test if salary is greater than average if it is increase counter
                if batters_projections.loc[batters_projections['Position'] == pos]['Salary'].values[counter] > avg_sal or batters_projections.loc[batters_projections['Position'] == pos]['Salary'].values[counter] > salary_cap:
                    counter += 1
                else:
                    # if less than average add player to list
                    lineup.append(batters_projections.loc[batters_projections['Position'] == pos].values[counter])
                    # drop player so no duplicates are added
                    batters_projections.drop(batters_projections.loc[batters_projections['Position'] == pos].index.values[counter], inplace=True)
                    # decrease sal_count
                    sal_count -= 1
                    # decrease salary cap
                    salary_cap -= batters_projections.loc[batters_projections['Position'] == pos]['Salary'].values[counter]
                    # create new average salary
                    avg_sal = salary_cap/sal_count
                    break
        # create dataframe of lineup
        df_lineup = pd.DataFrame(lineup, columns=['Id', 'Position', 'Name', 'Salary', 'Team', 'Opponent','Projected_FPPG'])

        # return dataframe
        return df_lineup

---

In [65]:
# test function
fd = pd.read_csv('../../../../Downloads/FanDuel-MLB-2021 ET-05 ET-14 ET-58874-players-list.csv')

In [32]:
# overwrite imported data with dataframe from function
fd = build_lineup(fd)

  avg_sal = salary_cap/sal_count


In [33]:
fd['Salary'].sum()

31600

In [58]:
# create function to create template for upload
def make_template(df):
    template = df[['Position', 'Id']]
    template = template.set_index('Position').T
    template.rename(columns={'C':'C/1B', '1B':'UTIL'}, inplace=True)
    return template

In [60]:
# test with dataframe above
temp = make_template(fd2)

In [61]:
#display template
temp

Position,P,C/1B,UTIL,2B,3B,SS,OF,OF.1,OF.2
Id,58417-68654,58417-79174,58417-52175,58417-68587,58417-5104,58417-37982,58417-5763,58417-60643,58417-39086


In [66]:
def build_lineup_stack(df):
        # clean up fd to match column list above
        df.drop(columns=['First Name', 'Last Name', 'FPPG', 'Played', 'Injury Details', 'Tier', 'Batting Order', 'Roster Position'], inplace=True)

        # filling nulls for probable pitcher
        df['Probable Pitcher'].fillna('No', inplace=True)

        # fill nulls for injury indicator
        df['Injury Indicator'].fillna('Healthy', inplace=True)

        # renaming nickname column
        df.rename(columns={'Nickname': 'Name'}, inplace=True)

        # fitler to only healthy players
        df = df.loc[df['Injury Indicator'] == 'Healthy']

        # split using .loc by position and make new dataframe for pitchers
        pitchers = df.loc[df['Position']=='P']

        # split using .loc by position and make new dataframe for batters
        batters = df.loc[df['Position']!='P']

        # pitcher dataframe steps
        # save pitchers df to only starting pitchers
        pitchers = pitchers.loc[pitchers['Probable Pitcher']=='Yes']

        # read in pitcher projections
        pitcher_proj = pd.read_csv('../../Projections/pitcher_projections_2021.csv')

        # merge attempt
        pitcher_projections = pitchers.merge(pitcher_proj, how='left', on='Name')

        # drop nulls if any
        pitcher_projections.dropna(inplace=True)

        # overwrite df with only the columns needed
        pitcher_projections = pitcher_projections[['Id', 'Position', 'Name', 'Salary', 'Team_x', 'Opponent', 'AVG', 'Projected_FPPG']]

        # rename team column
        pitcher_projections.rename(columns={'Team_x' : 'Team'}, inplace=True)

        # batter dataframe steps
        # read in projections file
        batter_21 = pd.read_csv('../../Projections/batter_projections_2021.csv')

        # merge projections with batter df, creating new df
        batters_projections = batters.merge(batter_21, how='left', on='Name')

        # dropping batters with no projections
        batters_projections.dropna(inplace=True)

        # drop unneeded columns for merge with pitcher, overwrite current df
        batters_projections = batters_projections[['Id', 'Position', 'Name', 'Salary', 'Team_x', 'Opponent','Projected_FPPG']]
        
        # rename team column
        batters_projections.rename(columns={'Team_x' : 'Team'}, inplace=True)

        # following are steps for creating a line up
        # set cap for fanduel
        salary_cap = 35_000
        
        # sort by avg
        pitcher_projections.sort_values(by='AVG', ascending=False, inplace=True, ignore_index=True)
        # create list of teams to filter batters
        team_list = []
        for x in range(0,4):
            team_list.append(pitcher_projections['Opponent'][x])
        # drop avg
        pitcher_projections.drop(columns='AVG', inplace=True)
        
        # create team filter 
        team_filter = (batters_projections['Team'] == team_list[0]) | (batters_projections['Team'] == team_list[1]) |(batters_projections['Team'] == team_list[2]) |(batters_projections['Team'] == team_list[3])
        # new batter dataframe with team filter
        batters_projections = batters_projections[team_filter]
        # reset index
        batters_projections.reset_index(drop=True, inplace=True)

        # sort pitcher by fppg projections
        pitcher_projections.sort_values(by='Projected_FPPG', ascending=False, inplace=True, ignore_index=True)

        # create a player list to 
        lineup = []
        lineup.append(pitcher_projections.values[0])

        # need to update remaining salary
        salary_cap -= pitcher_projections['Salary'][0]

        # with updated salary fill remaining roster based on position and highest fppg
        # create position list for remaining roster spots
        position_list = ['C', '1B', '2B', '3B', 'SS', 'OF', 'OF', 'OF']
        # clean position
        pos_list = [pos[:2] for pos in batters_projections['Position']]
        batters_projections['Position'] = pos_list
        # sort batters by FPPG
        batters_projections.sort_values(by='Projected_FPPG', ascending=False, inplace=True, ignore_index=True)

        # create count based on remaining positions
        sal_count = 8

        # create average salary variable for remaining players
        avg_sal = salary_cap/sal_count

        # create for loop for each position in list to take highest fppg
        for pos in position_list:
            # setting counter to increase if player is already in list
            # this is inside the for loop beacuse it needs to be per position
            counter = 0
            # if salary greater than average move to next player
            for salary in batters_projections.loc[batters_projections['Position'] == pos]['Salary']:
                # test if salary is greater than average if it is increase counter
                if batters_projections.loc[batters_projections['Position'] == pos]['Salary'].values[counter] > avg_sal or batters_projections.loc[batters_projections['Position'] == pos]['Salary'].values[counter] > salary_cap:
                    print('y')
                    counter += 1
                else:
                    # if less than average add player to list
                    lineup.append(batters_projections.loc[batters_projections['Position'] == pos].values[counter])
                    # drop player so no duplicates are added
                    batters_projections.drop(batters_projections.loc[batters_projections['Position'] == pos].index.values[counter], inplace=True)
                    # decrease sal_count
                    sal_count -= 1
                    # decrease salary cap
                    salary_cap -= batters_projections.loc[batters_projections['Position'] == pos]['Salary'].values[counter]
                    # create new average salary
                    avg_sal = salary_cap/sal_count
                    break
        # create dataframe of lineup
        df_lineup = pd.DataFrame(lineup, columns=['Id', 'Position', 'Name', 'Salary', 'Team', 'Opponent','Projected_FPPG'])

        # return dataframe
        return df_lineup, pd.DataFrame(team_list)

In [67]:
fd4, team = build_lineup_stack(fd)
team

  avg_sal = salary_cap/sal_count


Unnamed: 0,0
0,CLE
1,NYY
2,WSH
3,OAK


In [68]:
fd4

Unnamed: 0,Id,Position,Name,Salary,Team,Opponent,Projected_FPPG
0,58874-79951,P,Nick Pivetta,8300,BOS,LAA,46.814891
1,58874-79918,C,Kyle Higashioka,2700,NYY,BAL,9.400274
2,58874-52175,1B,Matt Olson,3200,OAK,MIN,8.560223
3,58874-13771,2B,Josh Harrison,3100,WSH,ARI,7.543716
4,58874-68544,3B,Matt Chapman,2900,OAK,MIN,7.072461
5,58874-65881,SS,Trea Turner,4000,WSH,ARI,8.335203
6,58874-84133,OF,Franmil Reyes,3200,CLE,SEA,9.208146
7,58874-12302,OF,Giancarlo Stanton,3600,NYY,BAL,9.088541
8,58874-60643,OF,Aaron Judge,3300,NYY,BAL,8.133931
