# Daily Fantasy Baseball Integer Program

In [1]:
import numpy as np
import pandas as pd
from pulp import *

Using license file /Users/petermiller/gurobi.lic
Academic license - for non-commercial use only
No parameters matching '_test' found


### Data Cleaning Functions

In [2]:
def read_players(path_to_pitchers, path_to_hitters):
    """Clean and combine PATH_TO_PITCHERS and PATH_TO_HITTERS 
        tables from Daily Fantasy Nerd"""
    pitchers = pd.read_csv(path_to_pitchers)
    pitchers = pitchers.reindex(columns=['Player Name', 'Pos', 'Salary',
                                         'Team', 'Opp', 'Proj FP'])
    hitters = pd.read_csv(path_to_hitters)
    hitters = hitters.reindex(columns=['Player Name', 'Pos', 'Salary',
                                       'Team', 'Opp', 'Proj FP'])
    pitchers = pitchers.rename(columns={'Player Name': 'Name','Pos' : 'Position'})
    hitters = hitters.rename(columns={'Player Name': 'Name','Pos' : 'Position'})
    data = pd.concat([pitchers, hitters])
    data['Opp'] = data['Opp'].str.replace('@', '')
    return data

In [3]:
def remove_players(data, names):
    """Remove players in list NAMES from DF"""
    data = data.copy(deep=True)
    for name in names:
        data = data[data['Name'] != name]
    data = data.reset_index()
    data = data.drop('index', axis=1)
    return data

In [4]:
def remove_teams(data, teams):
    """Remove TEAMS from DATA"""
    data = data.copy(deep=True)
    for team in teams:
        data = data[data['Team'] != team]
    data = data.reset_index()
    data = data.drop('index', axis=1)
    return data

In [5]:
def encode_position(data):
    """One Hot Encode Position from DATA
        and add columns to DATA"""
    data = data.copy(deep=True)
    P = np.zeros(data.shape[0], dtype=int)
    B1 = np.zeros(data.shape[0], dtype=int)
    B2 = np.zeros(data.shape[0], dtype=int)
    B3 = np.zeros(data.shape[0], dtype=int)
    C = np.zeros(data.shape[0], dtype=int)
    SS = np.zeros(data.shape[0], dtype=int)
    OF = np.zeros(data.shape[0], dtype=int)
    
    for i in range(data.shape[0]):
        if data.iloc[i, 1].find('SP') != -1:
            np.put(P, i, 1) 
        if data.iloc[i, 1].find('RP') != -1:
            np.put(P, i, 1)
        if data.iloc[i, 1].find('1B') != -1:
            np.put(B1, i, 1)
        if data.iloc[i, 1].find('2B') != -1:
            np.put(B2, i, 1)
        if data.iloc[i, 1].find('3B') != -1:
            np.put(B3, i, 1)
        if data.iloc[i, 1].find('C') != -1:
            np.put(C, i, 1)
        if data.iloc[i, 1].find('SS') != -1:
            np.put(SS, i, 1)
        if data.iloc[i, 1].find('OF') != -1:
            np.put(OF, i, 1)
    
    data['P'] = P
    data['1B'] = B1
    data['2B'] = B2
    data['3B'] = B3
    data['C'] = C
    data['SS'] = SS
    data['OF'] = OF
    return data

In [6]:
def encode_team(data):
    """One Hot Encodes Team column in DATA"""
    data = data.copy(deep=True)
    one_hot1 = pd.get_dummies(data['Team'], prefix='Team')
    data = pd.concat([data, one_hot1], axis= 1).reindex(data.index)
    one_hot2 = pd.get_dummies(data['Opp'], prefix='Opp')
    data = pd.concat([data, one_hot2], axis= 1).reindex(data.index)
    return data

### Integer Program Functions

In [7]:
def prepare_arguments(data, old_lineups):
    """Returns list of dictionaries that are arguments for generate_lineups"""
    player = [str(i) for i in range(data.shape[0])]
    point = {str(i): data['Proj FP'][i] for i in range(data.shape[0])} 
    cost = {str(i): data['Salary'][i] for i in range(data.shape[0])}
    P = {str(i): data['P'][i] for i in range(data.shape[0])}
    B1 = {str(i): data['1B'][i] for i in range(data.shape[0])}
    B2 = {str(i): data['2B'][i] for i in range(data.shape[0])}
    B3 = {str(i): data['3B'][i] for i in range(data.shape[0])}
    C = {str(i): data['C'][i] for i in range(data.shape[0])}
    SS = {str(i): data['SS'][i] for i in range(data.shape[0])}
    OF = {str(i): data['OF'][i] for i in range(data.shape[0])}
    return [player, point, cost, P, B1, B2, B3, C, SS, OF, old_lineups]

In [8]:
def create_lineup(data, player, point, cost, P, B1, B2, B3, C, SS, OF, old_lineups):
    """Returns list of names of optimal lineup given constraints."""
    # Problem Instantiation
    prob = LpProblem("DFS_Baseball", LpMaximize)
    player_vars = LpVariable.dicts("Players",player,0,1,LpBinary)
    
    # Postion/Cost/Overlap Constraints
    prob += lpSum([point[i] * player_vars[i] for i in player]), "Points"
    prob += lpSum([player_vars[i] for i in player]) == 10, "10_Players"
    prob += lpSum([cost[i] * player_vars[i] for i in player]) <= 50000, "Total_Cost"
    prob += lpSum([P[i] * player_vars[i] for i in player]) == 2, "2Pitchers"
    prob += lpSum([B1[i] * player_vars[i] for i in player]) == 1, "1B"
    prob += lpSum([B2[i] * player_vars[i] for i in player]) == 1, "2B"
    prob += lpSum([B3[i] * player_vars[i] for i in player]) == 1, "3B"
    prob += lpSum([C[i] * player_vars[i] for i in player]) == 1, "C"
    prob += lpSum([SS[i] * player_vars[i] for i in player]) == 1, "SS"
    prob += lpSum([OF[i] * player_vars[i] for i in player]) == 3, "OF"
    prob += lpSum([old_lineups.get(str(i)) * player_vars.get(str(i)) 
                   for i in range(data.shape[0])]) <= 6, "Overlap"
    teams = list(data['Team'].unique())
    num_teams = len(teams)
    
    # Must pick 5 players from same team
    used_5_in_team = [LpVariable("u{}".format(i + 1), cat="Binary") 
                      for i in range(num_teams)]
    i = 0
    for team in teams:
        prob += (5 * used_5_in_team[i] <= 
             lpSum(data['Team_' + team][k] * player_vars.get(str(k))
                   for k in range(data.shape[0])))
        i += 1
    prob += lpSum(used_5_in_team) >= 1
    
    # No pitcher vs batter constraint
    for team in teams:
        prob += ((8 * lpSum(P.get(str(k)) * player_vars.get(str(k)) * data['Team_' + team][k] 
                            for k in range(data.shape[0])) +
            lpSum((1 - P.get(str(k))) * player_vars.get(str(k)) * data['Opp_' + team][k] 
                            for k in range(data.shape[0])) <= 8))
    status = prob.solve()
    selection = {}
    for v in prob.variables():
        if len(v.name.split("_")) == 2:
            index = int(v.name.split("_")[1])
            selection[index] = v.varValue
    data['integer_programming'] = 0.0
    for i in selection:
        data.loc[i, 'integer_programming'] = selection[i]
    lineup = data[data['integer_programming'] == 1.0]
    indexes = list(data.index[data['integer_programming'] == 1.0])
    for idx in indexes:
        old_lineups[str(idx)] = 1
    lineup = data[data['integer_programming'] == 1.0]
    total_points = lineup['Proj FP'].sum()
    total_cost = lineup['Salary'].sum()
    total_players = lineup.shape[0]
    print("Total points: {}\nCost: ${}\nNumber of players: {}"
          .format(total_points, total_cost, total_players))
    return [lineup['Name'], old_lineups]

In [9]:
glob_players = set()
def create_multiple_lineups(data, num_lineups):
    """Prints NUM_LINEUPS lineups given DATA"""
    old_lineups = {str(i): 0 for i in range(data.shape[0])}
    for j in range(num_lineups):
        dicts = prepare_arguments(data, old_lineups)
        optimal_lineup, old_lineups = create_lineup(data, dicts[0], dicts[1], dicts[2], 
                     dicts[3], dicts[4], dicts[5], 
                     dicts[6], dicts[7], dicts[8], dicts[9], dicts[10])
        print('Lineup ' + str(j + 1) + ': ')
        print('\n')
        for name in optimal_lineup:
            glob_players.add(name)
            print(name + " ")
        print('\n')

In [10]:
def optimize_multiple_lineups(path_to_pitchers, path_to_hitters, rem_players, rem_teams, num_lineups):
    players_table = read_players(path_to_pitchers, path_to_hitters)
    players_table = remove_teams(players_table, rem_teams)
    players_table = remove_players(players_table, rem_players)
    players_table = encode_position(players_table)
    players_table = encode_team(players_table)
    create_multiple_lineups(players_table, num_lineups)

### edit paths, names in removal list, number of lineups -> run

In [11]:
path_to_pitchers = 'DFN_Pitchers_08082020.csv' 
path_to_hitters = 'DFN_Hitters_08082020.csv'
rem_teams = []
rem_names = []
optimize_multiple_lineups(path_to_pitchers, path_to_hitters, rem_names, rem_teams, 20)

Total points: 120.8
Cost: $50000
Number of players: 10
Lineup 1: 


Gerrit Cole 
Clayton Kershaw 
Christian Yelich 
Nick Castellanos 
Joc Pederson 
Niko Goodrum 
Austin Romine 
C.J. Cron 
Jonathan Schoop 
Jeimer Candelario 


Total points: 119.7
Cost: $49700
Number of players: 10
Lineup 2: 


Gerrit Cole 
Tyler Glasnow 
Mike Trout 
Mitch Garver 
Nick Castellanos 
Niko Goodrum 
C.J. Cron 
Jonathan Schoop 
Travis Demeritte 
Jeimer Candelario 


Total points: 117.8
Cost: $49200
Number of players: 10
Lineup 3: 


Gerrit Cole 
Chris Paddack 
Mike Trout 
Nelson Cruz 
Cody Bellinger 
Niko Goodrum 
Austin Romine 
Jonathan Schoop 
Miguel Cabrera 
Jeimer Candelario 


Total points: 117.0
Cost: $49900
Number of players: 10
Lineup 4: 


Gerrit Cole 
Tyler Glasnow 
Mike Trout 
Nick Castellanos 
Eugenio Suarez 
Freddy Galvis 
Phillip Ervin 
C.J. Cron 
Jonathan Schoop 
Curt Casali 


Total points: 115.80000000000001
Cost: $49900
Number of players: 10
Lineup 5: 


Gerrit Cole 
Tyler Glasnow 
Jose Rami

In [12]:
print("Number of players in lineups: {}".format(len(glob_players)))

Number of players in lineups: 89
