DraftKings NFL Constraint Satisfaction
===

See also [this blog post](https://levelup.gitconnected.com/dfs-lineup-optimizer-with-python-296e822a5309).

In [107]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
import os
import sys
from datetime import datetime

In [3]:
import pulp

### Load in the weekly data

In [94]:
df = pd.read_csv('DKSalaries.csv')
len(df)

470

In [95]:
df.sample(n=5)

Unnamed: 0,Position,Name + ID,Name,ID,Roster Position,Salary,Game Info,TeamAbbrev,AvgPointsPerGame
96,QB,Jarrett Stidham (15560246),Jarrett Stidham,15560246,QB,4900,Postponed,NE,4.7
371,WR,Mohamed Sanu (15560712),Mohamed Sanu,15560712,WR/FLEX,3000,MIA@SF 10/11/2020 04:05PM ET,SF,0.63
74,WR,Michael Gallup (15560558),Michael Gallup,15560558,WR/FLEX,5400,NYG@DAL 10/11/2020 04:25PM ET,DAL,12.38
40,RB,Jonathan Taylor (15560306),Jonathan Taylor,15560306,RB/FLEX,6200,IND@CLE 10/11/2020 04:25PM ET,IND,14.75
149,WR,N'Keal Harry (15560618),N'Keal Harry,15560618,WR/FLEX,4300,Postponed,NE,9.95


In [96]:
# trim any postponed games, since those can't be included in a lineup
df = df[df['Game Info'] != 'Postponed']
len(df)

391

In [97]:
# this is equivalent to an extra constraint that requires playing only players with a minimum cost
# does not apply to DST, since that's kind of a special category
df = df[(df.Salary >= 4000)|(df['Roster Position'] == 'DST')]
len(df)

132

### Create the constraint problem

Goal: maximize AvgPointsPerGame

 - TotalPlayers = 10
 - TotalSalary <= 50000
 - TotalPosition_WR = 3
 - TotalPosition_RB = 3
 - TotalPosition_TE = 1
 - TotalPosition_QB = 1
 - TotalPosition_FLEX = 1
 - TotalPosition_DST = 1
 - Each player in only one position (relevant only for FLEX)
 

In [98]:
prob = pulp.LpProblem('DK_NFL_weekly', pulp.LpMaximize)

In [99]:
player_vars = [pulp.LpVariable(f'player_{row.ID}', cat='Binary') for row in df.itertuples()]

In [100]:
# total assigned players constraint
prob += pulp.lpSum(player_var for player_var in player_vars) == 10

In [101]:
# position constraints
# TODO fix this, currently won't work
# as it makes the problem infeasible
def get_position_sum(player_vars, df, position):
    return pulp.lpSum([player_vars[i] * (position in df['Roster Position'].iloc[i]) for i in range(len(df))])
    
prob += get_position_sum(player_vars, df, 'QB') == 1
prob += get_position_sum(player_vars, df, 'DST') == 1

# to account for the FLEX position, we allow additional selections of the 3 FLEX-eligible roles
prob += get_position_sum(player_vars, df, 'RB') >= 3
prob += get_position_sum(player_vars, df, 'WR') >= 3
prob += get_position_sum(player_vars, df, 'TE') >= 1

In [102]:
# total salary constraint
prob += pulp.lpSum(df.Salary.iloc[i] * player_vars[i] for i in range(len(df))) <= 50000

In [103]:
# finally, specify the goal
prob += pulp.lpSum([df.AvgPointsPerGame.iloc[i] * player_vars[i] for i in range(len(df))])

In [104]:
prob.solve()

1

In [105]:
pulp.LpStatus[prob.status]

'Optimal'

In [106]:
total_salary_used = 0
mean_AvgPointsPerGame = 0
for i in range(len(df)):
    if player_vars[i].value() == 1:
        row = df.iloc[i]
        print(row['Roster Position'], row.Name, row.TeamAbbrev, row.Salary, row.AvgPointsPerGame)
        total_salary_used += row.Salary
        mean_AvgPointsPerGame += row.AvgPointsPerGame
mean_AvgPointsPerGame /= 10  # divide by total players in roster to get a mean
total_salary_used, mean_AvgPointsPerGame

TE/FLEX George Kittle SF 6600 26.2
RB/FLEX Raheem Mostert SF 6100 21.9
WR/FLEX Jamison Crowder NYJ 5800 23.95
QB Ryan Fitzpatrick MIA 5600 21.82
RB/FLEX Antonio Gibson WAS 5000 13.85
RB/FLEX Myles Gaskin MIA 4800 12.12
WR/FLEX Keelan Cole JAX 4700 12.6
WR/FLEX Laviska Shenault Jr. JAX 4500 11.6
WR/FLEX Sammy Watkins KC 4500 11.02
DST Dolphins  MIA 2400 5.25


(50000, 16.031)