# Read in data from datagolf

In [2]:
df <- read.csv('Data/draftkings_main_projections_euro.csv',stringsAsFactors=FALSE)
head(df)

datagolf_name,dk_name,sample_size,dk_id,dk_salary,std_dev,tee_time,early_late_wave,scoring_points,finish_points,manual_bump,total_points,value,max_exposure,projected_ownership,lineup_rule
<chr>,<chr>,<int>,<int>,<int>,<dbl>,<chr>,<int>,<dbl>,<dbl>,<int>,<dbl>,<dbl>,<int>,<chr>,<lgl>
"Fleetwood, Tommy",Tommy Fleetwood,150,15307221,11400,35.896,8:05 am,1,90.3,9.5,0,99.8,0.86,100,undefined,
"Coetzee, George",George Coetzee,150,15307222,10900,35.444,8:25 am,1,81.1,6.3,0,87.4,0.07,100,undefined,
"Li, Haotong",Hao-Tong Li,150,15307223,10700,34.703,8:05 am,1,73.0,4.2,0,77.2,-0.36,100,undefined,
"Fox, Ryan",Ryan Fox,150,15307224,10400,34.907,12:25 pm,0,74.7,4.7,0,79.4,-0.18,100,undefined,
"Scrivener, Jason",Jason Scrivener,150,15307225,10000,34.784,2:50 pm,0,74.1,4.6,0,78.7,-0.09,100,undefined,
"Kinhult, Marcus",Marcus Kinhult,150,15307226,9800,35.145,11:05 am,1,72.0,4.2,0,76.1,-0.15,100,undefined,


In [3]:
#Only keep what we care about
df <- df[c('dk_name','dk_id','dk_salary','total_points')]
head(df)

dk_name,dk_id,dk_salary,total_points
<chr>,<int>,<int>,<dbl>
Tommy Fleetwood,15307221,11400,99.8
George Coetzee,15307222,10900,87.4
Hao-Tong Li,15307223,10700,77.2
Ryan Fox,15307224,10400,79.4
Jason Scrivener,15307225,10000,78.7
Marcus Kinhult,15307226,9800,76.1


In [4]:
#Add addl columns
df['name_id'] = paste(df$dk_name," (",df$dk_id,")",sep="")
#Add column of ones
df['ones'] <- 1
df$ones <- as.integer(df$ones)
head(df)

dk_name,dk_id,dk_salary,total_points,name_id,ones
<chr>,<int>,<int>,<dbl>,<chr>,<int>
Tommy Fleetwood,15307221,11400,99.8,Tommy Fleetwood (15307221),1
George Coetzee,15307222,10900,87.4,George Coetzee (15307222),1
Hao-Tong Li,15307223,10700,77.2,Hao-Tong Li (15307223),1
Ryan Fox,15307224,10400,79.4,Ryan Fox (15307224),1
Jason Scrivener,15307225,10000,78.7,Jason Scrivener (15307225),1
Marcus Kinhult,15307226,9800,76.1,Marcus Kinhult (15307226),1


# Optimize by points_adj

Ref: https://towardsdatascience.com/integer-programming-in-r-33ee6f48a3c8

In [5]:
library('lpSolve')
library(dplyr)


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



In [6]:
t(data.matrix(df[c('dk_salary','ones')]))

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21
dk_salary,11400,10900,10700,10400,10000,9800,9600,9500,9200,9100,...,6100,6100,6000,6000,6000,6000,6000,6000,6000,6000
ones,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1


In [7]:
# Set coefficients of the objective function
f.obj <- df$total_points

# Set matrix corresponding to coefficients of constraints by rows
#f*dk_salary <= 50000
#f*ones == 6
f.con <- t(data.matrix(df[c('dk_salary','ones')]))

# Set unequality/equality signs
f.dir <- c("<=",
           "==")

# Set right hand side coefficients
f.rhs <- c(50000,
           6)

# # Variables final values
opt_team <- lp("max", f.obj, f.con, f.dir, f.rhs, int.vec = 1:4, all.bin = TRUE)$solution
df['opt_team'] <- opt_team
filter(df,opt_team==1)

dk_name,dk_id,dk_salary,total_points,name_id,ones,opt_team
<chr>,<int>,<int>,<dbl>,<chr>,<int>,<dbl>
Tommy Fleetwood,15307221,11400,99.8,Tommy Fleetwood (15307221),1,1
Paul Waring,15307231,9000,85.8,Paul Waring (15307231),1,1
Sebastian Heisele,15307245,7800,76.1,Sebastian Heisele (15307245),1,1
Antoine Rozner,15307249,7600,74.7,Antoine Rozner (15307249),1,1
Jack Senior,15307252,7500,73.6,Jack Senior (15307252),1,1
Julien Guerrier,15307295,6700,67.0,Julien Guerrier (15307295),1,1


In [8]:
#Grab just the team
df_opt <- filter(df,opt_team==1)
print('Team 1:')
print(paste(df_opt$dk_name))

[1] "Team 1:"
[1] "Tommy Fleetwood"   "Paul Waring"       "Sebastian Heisele"
[4] "Antoine Rozner"    "Jack Senior"       "Julien Guerrier"  


In [9]:
#Find top N teams
own_pen = 0.2
#df['points_adj'] = df$total_points - own_pen*df$projected_ownership
num_teams = 7
df_now <- df
to_remove = c("Thorbjorn Olesen",
             "Joost Luiten",
             "Paul Waring")
#to_remove <- c()
for (n in to_remove) {
    df_now <- filter(df_now,dk_name!=n) 
}
#Max sal
max_sal <- 9900
df_now <- subset(df_now,dk_salary<=max_sal)
base_exposure <- 50
exposure_gap <- 5

#Identify lock
lock = c()
#lock = c()
num_left <- 6
sal_left <- 50000
sal_used <- 0
lock_full_names <- c()
if (length(lock) > 0) {
    df_lock <- subset(df_now,dk_name %in% lock)
    df_now <- subset(df_now,! dk_name %in% lock)
    sal_used <- sum(df_lock$dk_salary) 
    num_lock <- length(lock)
    sal_left <- 50000 - sal_used
    num_left <- 6-num_lock
    lock_full_names <- df_lock$name_id
} 

for (i in seq(1:num_teams)) {
    # Set coefficients of the objective function
    #f.obj <- df_now$points_adj
    f.obj <- df_now$total_points

    # Set matrix corresponding to coefficients of constraints by rows
    #f*dk_salary <= 50000
    #f*ones == 6
    #f.con <- t(data.matrix(df_now[c('dk_salary','ones','projected_ownership')]))
    f.con <- t(data.matrix(df_now[c('dk_salary','ones')]))
    
    # Set unequality/equality signs
    f.dir <- c("<=",
               "==")
    
    # Set right hand side coefficients
    f.rhs <- c(sal_left,
               num_left)

    # # Variables final values
    #opt_val <- lp("max", f.obj, f.con, f.dir, f.rhs, int.vec = 1:4, all.bin = TRUE)$objective
    opt_team <- lp("max", f.obj, f.con, f.dir, f.rhs, int.vec = 1:4, all.bin = TRUE)$solution
    df_now['opt_team'] <- opt_team
    df_opt <- filter(df_now,opt_team==1)
    opt_sal <- sum(df_opt$dk_salary)
    
    #Print current best team
    print(paste('Team ',i,' (',opt_sal/1000,'):',sep=""))
    print(paste(df_opt$dk_name))
    
    #Add best team to df_teams
    if (i==1) {
        df_teams <- df_opt[c('name_id','dk_name','dk_salary')]
        df_teams['Exposure'] <- base_exposure
    } else {
        df_teams_new <- df_opt[c('name_id','dk_name','dk_salary')]
        df_teams_new['Exposure'] <- base_exposure - (i-1)*exposure_gap
        df_teams <- rbind(df_teams,df_teams_new)
    }
    
    #Remove current opt team
    df_now <- filter(df_now,opt_team==0)
}

[1] "Team 1 (49.3):"
[1] "Jordan L. Smith"   "Benjamin Hebert"   "Lorenzo Gagli"    
[4] "Chris Paisley"     "Sebastian Heisele" "Antoine Rozner"   
[1] "Team 2 (50):"
[1] "Marcus Kinhult" "John Catlin"    "Wade Ormsby"    "Matthew Jordan"
[5] "Wil Besseling"  "Jack Senior"   
[1] "Team 3 (50):"
[1] "Jorge Campillo"   "Guido Migliozzi"  "Ross Fisher"      "Johannes Veerman"
[5] "Steven Brown"     "Calum Hill"      
[1] "Team 4 (45.7):"
[1] "Jamie Donaldson" "Alexander Bjork" "Matthieu Pavon"  "Kalle Samooja"  
[5] "Scott Vincent"   "Jens Fahrbring" 
[1] "Team 5 (45.4):"
[1] "Wilco Nienaber"     "Robert Rock"        "Maximilian Kieffer"
[4] "Joakim Lagergren"   "Cormac Sharvin"     "Richard Bland"     
[1] "Team 6 (44.8):"
[1] "Brandon Stone"       "Pablo Larrazabal"    "Laurie Canter"      
[4] "Sebastian Soderberg" "Matthew Southgate"   "Julien Guerrier"    
[1] "Team 7 (43.4):"
[1] "Justin Walters"  "Marcus Armitage" "Oliver Fisher"   "Adrien Saddier" 
[5] "Wu Ashun"        "Nino Ber

# Just keep top n teams and sort by salary


In [13]:
teams_to_keep = 2
low_exposure = base_exposure - (teams_to_keep-1)*exposure_gap
df_teams <- filter(df_teams,Exposure >= low_exposure)
df_teams <- df_teams[order(-df_teams$dk_salary),]
df_teams

Unnamed: 0_level_0,name_id,dk_name,dk_salary,Exposure
Unnamed: 0_level_1,<chr>,<chr>,<int>,<dbl>
7,Marcus Kinhult (15307226),Marcus Kinhult,9800,45
1,Paul Waring (15307231),Paul Waring,9000,50
8,Jordan L. Smith (15307232),Jordan L. Smith,8900,45
2,Benjamin Hebert (15307235),Benjamin Hebert,8600,50
3,Lorenzo Gagli (15307236),Lorenzo Gagli,8500,50
9,Wade Ormsby (15307239),Wade Ormsby,8200,45
10,Johannes Veerman (15307241),Johannes Veerman,8000,45
4,Chris Paisley (15307244),Chris Paisley,7900,50
5,Sebastian Heisele (15307245),Sebastian Heisele,7800,50
6,Antoine Rozner (15307249),Antoine Rozner,7600,50


In [14]:
#Add some more names
more_names = c("Sebastian Soderberg",
              "Matthew Jordan",
              "Scott Vincent",
              "Tommy Fleetwood")
#more_names <- c()
if (length(more_names)>0) {
    names_to_add <- setdiff(more_names,df_teams$dk_name)
    df_more <- subset(df,dk_name %in% names_to_add)[c('name_id','dk_name','dk_salary')]
    df_more['Exposure'] = low_exposure - 5
    head(df_more)
}

Unnamed: 0_level_0,name_id,dk_name,dk_salary,Exposure
Unnamed: 0_level_1,<chr>,<chr>,<int>,<dbl>
1,Tommy Fleetwood (15307221),Tommy Fleetwood,11400,40
20,Matthew Jordan (15307240),Matthew Jordan,8100,40
33,Sebastian Soderberg (15307254),Sebastian Soderberg,7500,40
47,Scott Vincent (15307268),Scott Vincent,7200,40


In [15]:
if (length(more_names)>0) {
    df_teams <- rbind(df_teams,df_more)
    df_teams <- df_teams[order(-df_teams$dk_salary),]
}

#replace exposure with probabilities
df_teams$Exposure <- df_teams$Exposure / sum(df_teams$Exposure)
df_teams

Unnamed: 0_level_0,name_id,dk_name,dk_salary,Exposure
Unnamed: 0_level_1,<chr>,<chr>,<int>,<dbl>
13,Tommy Fleetwood (15307221),Tommy Fleetwood,11400,0.05479452
7,Marcus Kinhult (15307226),Marcus Kinhult,9800,0.06164384
1,Paul Waring (15307231),Paul Waring,9000,0.06849315
8,Jordan L. Smith (15307232),Jordan L. Smith,8900,0.06164384
2,Benjamin Hebert (15307235),Benjamin Hebert,8600,0.06849315
3,Lorenzo Gagli (15307236),Lorenzo Gagli,8500,0.06849315
9,Wade Ormsby (15307239),Wade Ormsby,8200,0.06164384
20,Matthew Jordan (15307240),Matthew Jordan,8100,0.05479452
10,Johannes Veerman (15307241),Johannes Veerman,8000,0.06164384
4,Chris Paisley (15307244),Chris Paisley,7900,0.06849315


# Generate lineups

In [16]:
col_names = c('G1','G2','G3','G4','G5','G6')
df_lineups = data.frame('test','test','test','test','test','test',stringsAsFactors=FALSE)
names(df_lineups) <- col_names
df_lineups

G1,G2,G3,G4,G5,G6
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
test,test,test,test,test,test


In [17]:
sample(df_teams$name_id,size=6,replace=FALSE,prob = df_teams$Exposure)

In [20]:
num_teams <- 52
n <- 0
min_sal <- 49500 - sal_used
max_sal <- 50000 - sal_used
max_own <- 70

#Reste df_teams
df_teams['Actual'] <- 0

#Reset df_lineups
col_names = c('G1','G2','G3','G4','G5','G6')
df_lineups = data.frame('test','test','test','test','test','test',stringsAsFactors=FALSE)
names(df_lineups) <- col_names

while(n<num_teams) {
    #Pick random team
    curr_team <- sample(df_teams$name_id,size=num_left,replace=FALSE,prob = df_teams$Exposure)
    df_now <- subset(df_teams,name_id %in% curr_team)
    #Get team in order of descending salary
    df_now <- df_now[order(-df_now$dk_salary),]
    curr_team <- df_now$name_id
    curr_sal <- sum(df_now$dk_salary)
    curr_own <- sum(df_now$projected_ownership)
    if (curr_sal>=min_sal & curr_sal<=max_sal & curr_own<=max_own) {
        curr_team <- c(lock_full_names,curr_team)
        df_lineups <- rbind(df_lineups,curr_team)
        n <- n+1
        if (n%%10 == 0) {
            print(n)
        }
        #Update df_teams
    }
}
head(df_lineups)

[1] 10
[1] 20
[1] 30
[1] 40
[1] 50


G1,G2,G3,G4,G5,G6
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
test,test,test,test,test,test
Marcus Kinhult (15307226),Benjamin Hebert (15307235),Wade Ormsby (15307239),Johannes Veerman (15307241),Wil Besseling (15307250),Jack Senior (15307252)
Marcus Kinhult (15307226),Paul Waring (15307231),Matthew Jordan (15307240),Johannes Veerman (15307241),Antoine Rozner (15307249),Jack Senior (15307252)
Paul Waring (15307231),Jordan L. Smith (15307232),Benjamin Hebert (15307235),Johannes Veerman (15307241),Sebastian Heisele (15307245),Sebastian Soderberg (15307254)
Marcus Kinhult (15307226),Lorenzo Gagli (15307236),Wade Ormsby (15307239),Johannes Veerman (15307241),Antoine Rozner (15307249),Jack Senior (15307252)
Marcus Kinhult (15307226),Paul Waring (15307231),Lorenzo Gagli (15307236),Wil Besseling (15307250),Jack Senior (15307252),Sebastian Soderberg (15307254)


In [21]:
write.csv(df_lineups,'Lineups/DKEntries_euro.csv')