The purpose of this notebook is to create a function that takes the imported player list:
- clean to only needed information
- seperate by pither and batter
- merge each on player name to get projections from model
- merge back pitcher and batter data 
- output df for now

Next steps:
- build line up based on salary

In [1]:
# imports
import pandas as pd
import numpy as np

In [2]:
# will need fanduel import
fd = pd.read_csv('../../../Downloads/FanDuel-MLB-2021 ET-05 ET-11 ET-58668-players-list.csv')

In [37]:
    # function
def build_lineup(df):
        # clean up fd to match column list above
        df.drop(columns=['First Name', 'Last Name', 'FPPG', 'Played', 'Injury Details', 'Tier', 'Batting Order', 'Roster Position'], inplace=True)

        # filling nulls for probable pitcher
        df['Probable Pitcher'].fillna('No', inplace=True)

        # fill nulls for injury indicator
        df['Injury Indicator'].fillna('Healthy', inplace=True)

        # renaming nickname column
        df.rename(columns={'Nickname': 'Name'}, inplace=True)

        # fitler to only healthy players
        df = df.loc[df['Injury Indicator'] == 'Healthy']

        # split using .loc by position and make new dataframe for pitchers
        pitchers = df.loc[df['Position']=='P']

        # split using .loc by position and make new dataframe for batters
        batters = df.loc[df['Position']!='P']

        # pitcher dataframe steps
        # save pitchers df to only starting pitchers
        pitchers = pitchers.loc[pitchers['Probable Pitcher']=='Yes']

        # read in pitcher projections
        pitcher_proj = pd.read_csv('../Projections/pitcher_projections_2021.csv')

        # merge attempt
        pitcher_projections = pitchers.merge(pitcher_proj, how='left', on='Name')

        # drop nulls if any
        pitcher_projections.dropna(inplace=True)

        # overwrite df with only the columns needed
        pitcher_projections = pitcher_projections[['Id', 'Position', 'Name', 'Salary', 'Team_x', 'Opponent', 'Projected_FPPG']]

        # rename team column
        pitcher_projections.rename(columns={'Team_x' : 'Team'}, inplace=True)

        # batter dataframe steps
        # read in projections file
        batter_21 = pd.read_csv('../Projections/batter_projections_2021.csv')

        # merge projections with batter df, creating new df
        batters_projections = batters.merge(batter_21, how='left', on='Name')

        # dropping batters with no projections
        batters_projections.dropna(inplace=True)

        # drop unneeded columns for merge with pitcher, overwrite current df
        batters_projections = batters_projections[['Id', 'Position', 'Name', 'Salary', 'Team_x', 'Opponent','Projected_FPPG']]
        
        # rename team column
        batters_projections.rename(columns={'Team_x' : 'Team'}, inplace=True)

        # following are steps for creating a line up
        # set cap for fanduel
        salary_cap = 35_000

        # sort pitcher by fppg projections
        pitcher_projections.sort_values(by='Projected_FPPG', ascending=False, inplace=True, ignore_index=True)

        # create a player list to 
        lineup = []
        lineup.append(pitcher_projections.values[0])

        # need to update remaining salary
        salary_cap -= pitcher_projections['Salary'][0]

        # with updated salary fill remaining roster based on position and highest fppg
        # create position list for remaining roster spots
        position_list = ['C', '1B', '2B', '3B', 'SS', 'OF', 'OF', 'OF']
        # clean position
        pos_list = [pos[:2] for pos in batters_projections['Position']]
        batters_projections['Position'] = pos_list
        # sort batters by FPPG
        batters_projections.sort_values(by='Projected_FPPG', ascending=False, inplace=True, ignore_index=True)

        # create count based on remaining positions
        sal_count = 8

        # create average salary variable for remaining players
        avg_sal = salary_cap/sal_count

        # create for loop for each position in list to take highest fppg
        for pos in position_list:
            # setting counter to increase if player is already in list
            # this is inside the for loop beacuse it needs to be per position
            counter = 0
            # if salary greater than average move to next player
            for salary in batters_projections.loc[batters_projections['Position'] == pos]['Salary']:
                # test if salary is greater than average if it is increase counter
                if batters_projections.loc[batters_projections['Position'] == pos]['Salary'].values[counter] > avg_sal:
                    counter += 1
                else:
                    # if less than average add player to list
                    lineup.append(batters_projections.loc[batters_projections['Position'] == pos].values[counter])
                    # drop player so no duplicates are added
                    batters_projections.drop(batters_projections.loc[batters_projections['Position'] == pos].index.values[counter], inplace=True)
                    # decrease sal_count
                    sal_count -= 1
                    # decrease salary cap
                    salary_cap -= batters_projections.loc[batters_projections['Position'] == pos]['Salary'].values[counter]
                    # create new average salary
                    avg_sal = salary_cap/sal_count
                    break
        # create dataframe of lineup
        df_lineup = pd.DataFrame(lineup, columns=['Id', 'Position', 'Name', 'Salary', 'Team', 'Opponent','Projected_FPPG'])

        # return dataframe
        return df_lineup

---

In [38]:
# test function
fd2 = pd.read_csv('../../../Downloads/FanDuel-MLB-2021 ET-05 ET-11 ET-58668-players-list.csv')

In [39]:
# overwrite imported data with dataframe from function
fd2 = build_lineup(fd2)

  avg_sal = salary_cap/sal_count


In [40]:
fd2

Unnamed: 0,Id,Position,Name,Salary,Team,Opponent,Projected_FPPG
0,58668-52159,P,Lance McCullers Jr.,8500,HOU,LAA,40.138767
1,58668-102513,C,Mario Feliciano,2000,MIL,STL,10.944532
2,58668-79087,1B,Michael Chavis,2100,BOS,OAK,8.677415
3,58668-60641,2B,Nick Gordon,2200,MIN,CWS,9.480607
4,58668-5104,3B,Pablo Sandoval,2000,ATL,TOR,9.73941
5,58668-80462,SS,Paul DeJong,3000,STL,MIL,7.140419
6,58668-79249,OF,Harrison Bader,2800,STL,MIL,8.581402
7,58668-82536,OF,Cedric Mullins,3100,BAL,NYM,8.551967
8,58668-12577,OF,Jarrod Dyson,2000,KC,DET,8.373596


In [58]:
# create function to create template for upload
def make_template(df):
    template = df[['Position', 'Id']]
    template = template.set_index('Position').T
    template.rename(columns={'C':'C/1B', '1B':'UTIL'}, inplace=True)
    return template

In [60]:
# test with dataframe above
temp = make_template(fd2)

In [61]:
#display template
temp

Position,P,C/1B,UTIL,2B,3B,SS,OF,OF.1,OF.2
Id,58417-68654,58417-79174,58417-52175,58417-68587,58417-5104,58417-37982,58417-5763,58417-60643,58417-39086
