In [1]:
import pandas as pd
from pulp import *
from scrape_data.data import get_hitter_data, get_pitcher_data # get_data

In [2]:
# load in inital data
hitter_data = get_hitter_data()
picher_data = get_pitcher_data()

In [3]:
# create a list for the positions
positions = ['1B', '2B', '3B', 'SS', 'C', 'SP', 'OF']

def full_dataframe(df1 = hitter_data, df2 = picher_data):
    df = df1.append(df2, sort = True, ignore_index = True)
    return df

# maybe slighly over-doing it but cleaner code
def binary_encoding(pos = positions):
    df = full_dataframe() # get the full dataframe
    df['Selected'] = 1 # create a temp selected column
    
    # encode the data using a lambda function
    for i in pos:
        df[i] = df['Position'].apply(lambda x: 1 if i in x else 0)
    return df 

final_df = binary_encoding()

In [4]:
# create player list for easier display at end
players_list = []

df_len = len(final_df['Proj']) # length for cleaner code in future
player_names = final_df['Player'] # easier future reference

# define the objective of the prob
prob = LpProblem('dfs_mlb', LpMaximize) # max total points

# define the type of the objective variable
selected = [LpVariable(player_names[i], cat='Binary') for i in range(df_len)]

def constraints(df, prob = prob):
    # define the constraints for the problem
    prob += (pulp.lpSum(selected[i] for i in range(df_len)) == 10)
    prob += (pulp.lpSum(df['SP'][i] * selected[i] for i in range(df_len)) == 2)
    prob += (pulp.lpSum(df['OF'][i] * selected[i] for i in range(df_len)) == 3)
    prob += (pulp.lpSum(df['C'][i] * selected[i] for i in range(df_len)) == 1)
    prob += (pulp.lpSum(df['1B'][i] * selected[i] for i in range(df_len)) == 1)
    prob += (pulp.lpSum(df['2B'][i] * selected[i] for i in range(df_len)) == 1)
    prob += (pulp.lpSum(df['3B'][i] * selected[i] for i in range(df_len)) == 1)
    prob += (pulp.lpSum(df['SS'][i] * selected[i] for i in range(df_len)) == 1)
    prob += (pulp.lpSum(df['Salary'][i] * selected[i] for i in range(df_len)) <=  50000)
    
    # define the objected function
    prob += (pulp.lpSum(df.loc[i, 'Proj'] * selected[i] for i in range(df_len)))
    return prob

def solve():
    # solve the problem and get the status
    status = constraints(final_df).solve()
    
    # check to see if optimal status
    for v in prob.variables():
            if v.varValue == 1:
                players_list.append(v.name)
    
    return players_list

In [5]:
solve()

['Aaron_Judge',
 'Cavan_Biggio',
 'Charlie_Blackmon',
 'Christian_Walker',
 'James_Paxton',
 'Joey_Lucchesi',
 'Josh_Rojas',
 'Max_Stassi',
 'Shohei_Ohtani',
 'Trevor_Story']