DraftKings NFL Constraint Satisfaction
===

This is the companion code to a [blog post](https://zwlevonian.medium.com/integer-linear-programming-with-pulp-optimizing-a-draftkings-nfl-lineup-5e7524dd42d3) I wrote on Medium.

In [1]:
import pandas as pd

In [2]:
import pulp

### Load in the weekly data

In [3]:
df = pd.read_csv('DKSalaries.csv')
len(df)

433

In [4]:
df.sample(n=5)

Unnamed: 0,Position,Name + ID,Name,ID,Roster Position,Salary,Game Info,TeamAbbrev,AvgPointsPerGame
150,QB,David Blough (15677096),David Blough,15677096,QB,4000,IND@DET 11/01/2020 01:00PM ET,DET,0.0
384,TE,Adam Trautman (15677765),Adam Trautman,15677765,TE/FLEX,2500,NO@CHI 11/01/2020 04:25PM ET,NO,1.28
31,QB,Matthew Stafford (15677049),Matthew Stafford,15677049,QB,6400,IND@DET 11/01/2020 01:00PM ET,DET,18.48
395,TE,Geoff Swaim (15677751),Geoff Swaim,15677751,TE/FLEX,2500,TEN@CIN 11/01/2020 01:00PM ET,TEN,1.0
296,WR,Tajae Sharpe (15677575),Tajae Sharpe,15677575,WR/FLEX,3000,MIN@GB 11/01/2020 01:00PM ET,MIN,0.0


In [5]:
# trim any postponed games, since those can't be included in a lineup
df = df[df['Game Info'] != 'Postponed']
len(df)

433

In [6]:
exclude_list = ['Emmanuel Sanders', 'Allen Lazard', 'Tim Patrick', 'Austin Ekeler', 'Jamison Crowder', 'Aaron Jones']
df = df[~df['Name'].isin(exclude_list)]
len(df)

427

In [7]:
# this is equivalent to an extra constraint that requires playing only players with a minimum cost
# does not apply to DST, since that's kind of a special category
df = df[(df.Salary >= 4000)|(df['Roster Position'] == 'DST')]
len(df)

244

### Create the constraint problem

Goal: maximize AvgPointsPerGame

 - TotalPlayers = 9
 - TotalSalary <= 50000
 - TotalPosition_WR = 3
 - TotalPosition_RB = 2
 - TotalPosition_TE = 1
 - TotalPosition_QB = 1
 - TotalPosition_FLEX = 1
 - TotalPosition_DST = 1
 - Each player in only one position (relevant only for FLEX)
 

In [8]:
prob = pulp.LpProblem('DK_NFL_weekly', pulp.LpMaximize)

In [9]:
player_vars = [pulp.LpVariable(f'player_{row.ID}', cat='Binary') for row in df.itertuples()]

In [10]:
# total assigned players constraint
prob += pulp.lpSum(player_var for player_var in player_vars) == 9

In [11]:
# position constraints
# TODO fix this, currently won't work
# as it makes the problem infeasible
def get_position_sum(player_vars, df, position):
    return pulp.lpSum([player_vars[i] * (position in df['Roster Position'].iloc[i]) for i in range(len(df))])
    
prob += get_position_sum(player_vars, df, 'QB') == 1
prob += get_position_sum(player_vars, df, 'DST') == 1

# to account for the FLEX position, we allow additional selections of the 3 FLEX-eligible roles
prob += get_position_sum(player_vars, df, 'RB') >= 2
prob += get_position_sum(player_vars, df, 'WR') >= 3
prob += get_position_sum(player_vars, df, 'TE') >= 1

In [12]:
# total salary constraint
prob += pulp.lpSum(df.Salary.iloc[i] * player_vars[i] for i in range(len(df))) <= 50000

In [13]:
# finally, specify the goal
prob += pulp.lpSum([df.AvgPointsPerGame.iloc[i] * player_vars[i] for i in range(len(df))])

In [14]:
# solve and print the status
prob.solve()
print(pulp.LpStatus[prob.status])

Optimal


In [15]:
# for each of the player variables, 
total_salary_used = 0
mean_AvgPointsPerGame = 0
for i in range(len(df)):
    if player_vars[i].value() == 1:
        row = df.iloc[i]
        print(row['Roster Position'], row.Name, row.TeamAbbrev, row.Salary, row.AvgPointsPerGame)
        total_salary_used += row.Salary
        mean_AvgPointsPerGame += row.AvgPointsPerGame
#mean_AvgPointsPerGame /= 9  # divide by total players in roster to get a mean
total_salary_used, mean_AvgPointsPerGame

RB/FLEX Alvin Kamara NO 8200 28.9
QB Russell Wilson SEA 7800 32.55
WR/FLEX Tyler Lockett SEA 7100 24.53
RB/FLEX Melvin Gordon III DEN 5600 16.72
WR/FLEX Chase Claypool PIT 5200 15.4
WR/FLEX Corey Davis TEN 5100 15.02
TE/FLEX Robert Tonyan GB 4300 11.83
TE/FLEX Jonnu Smith TEN 4100 12.38
DST Dolphins  MIA 2400 8.0


(49800, 165.33)