# Read in data from datagolf

In [1]:
df <- read.csv('Data/draftkings_main_projections.csv',stringsAsFactors=FALSE)
head(df)

datagolf_name,dk_name,sample_size,dk_id,dk_salary,std_dev,tee_time,early_late_wave,scoring_points,finish_points,manual_bump,total_points,value,max_exposure,projected_ownership,lineup_rule
<chr>,<chr>,<int>,<int>,<int>,<dbl>,<chr>,<int>,<dbl>,<dbl>,<int>,<dbl>,<dbl>,<int>,<dbl>,<lgl>
"Zalatoris, Will",Will Zalatoris,150,15458783,10900,33.244,,0,80.4,8.0,0,88.4,1.0,100,4.166667,
"Conners, Corey",Corey Conners,150,15458784,10600,33.013,,0,74.5,6.1,0,80.6,0.65,100,4.166667,
"Hughes, Mackenzie",Mackenzie Hughes,150,15458785,10300,32.797,,0,71.7,5.2,0,76.9,0.51,100,4.166667,
"Burns, Sam",Sam Burns,150,15458786,10100,33.126,,0,75.4,6.3,0,81.7,0.8,100,4.166667,
"Long, Adam",Adam Long,150,15458787,9900,32.296,,0,67.7,4.3,0,72.0,0.33,100,4.166667,
"Grillo, Emiliano",Emiliano Grillo,150,15458788,9800,32.488,,0,68.3,4.6,0,72.9,0.4,100,4.166667,


In [2]:
#Only keep what we care about
df <- df[c('dk_name','dk_id','dk_salary','total_points',
           'projected_ownership','early_late_wave')]
head(df)

dk_name,dk_id,dk_salary,total_points,projected_ownership,early_late_wave
<chr>,<int>,<int>,<dbl>,<dbl>,<int>
Will Zalatoris,15458783,10900,88.4,4.166667,0
Corey Conners,15458784,10600,80.6,4.166667,0
Mackenzie Hughes,15458785,10300,76.9,4.166667,0
Sam Burns,15458786,10100,81.7,4.166667,0
Adam Long,15458787,9900,72.0,4.166667,0
Emiliano Grillo,15458788,9800,72.9,4.166667,0


In [3]:
#Add addl columns
df['name_id'] = paste(df$dk_name," (",df$dk_id,")",sep="")
own_pen = 0.4
df['points_adj'] = df$total_points - own_pen*df$projected_ownership
#Add column of ones
df['ones'] <- 1
df$ones <- as.integer(df$ones)
head(df)

dk_name,dk_id,dk_salary,total_points,projected_ownership,early_late_wave,name_id,points_adj,ones
<chr>,<int>,<int>,<dbl>,<dbl>,<int>,<chr>,<dbl>,<int>
Will Zalatoris,15458783,10900,88.4,4.166667,0,Will Zalatoris (15458783),86.73333,1
Corey Conners,15458784,10600,80.6,4.166667,0,Corey Conners (15458784),78.93333,1
Mackenzie Hughes,15458785,10300,76.9,4.166667,0,Mackenzie Hughes (15458785),75.23333,1
Sam Burns,15458786,10100,81.7,4.166667,0,Sam Burns (15458786),80.03333,1
Adam Long,15458787,9900,72.0,4.166667,0,Adam Long (15458787),70.33333,1
Emiliano Grillo,15458788,9800,72.9,4.166667,0,Emiliano Grillo (15458788),71.23333,1


# Optimize by points_adj

Ref: https://towardsdatascience.com/integer-programming-in-r-33ee6f48a3c8

In [4]:
library('lpSolve')
library(dplyr)


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



In [5]:
t(data.matrix(df[c('dk_salary','ones')]))

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21
dk_salary,10900,10600,10300,10100,9900,9800,9700,9600,9500,9400,...,6000,6000,6000,6000,6000,6000,6000,6000,6000,6000
ones,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1


In [6]:
# Set coefficients of the objective function
f.obj <- df$points_adj

# Set matrix corresponding to coefficients of constraints by rows
#f*dk_salary <= 50000
#f*ones == 6
f.con <- t(data.matrix(df[c('dk_salary','ones')]))

# Set unequality/equality signs
f.dir <- c("<=",
           "==")

# Set right hand side coefficients
f.rhs <- c(50000,
           6)

# # Variables final values
opt_team <- lp("max", f.obj, f.con, f.dir, f.rhs, int.vec = 1:4, all.bin = TRUE)$solution
df['opt_team'] <- opt_team
filter(df,opt_team==1)

dk_name,dk_id,dk_salary,total_points,projected_ownership,early_late_wave,name_id,points_adj,ones,opt_team
<chr>,<int>,<int>,<dbl>,<dbl>,<int>,<chr>,<dbl>,<int>,<dbl>
Will Zalatoris,15458783,10900,88.4,4.166667,0,Will Zalatoris (15458783),86.73333,1,1
Luke List,15458799,8700,77.6,4.166667,0,Luke List (15458799),75.93333,1,1
Adam Schenk,15458801,8500,74.7,4.166667,0,Adam Schenk (15458801),73.03333,1,1
Jhonattan Vegas,15458811,7700,74.6,4.166667,0,Jhonattan Vegas (15458811),72.93333,1,1
Keith Mitchell,15458814,7600,72.9,4.166667,0,Keith Mitchell (15458814),71.23333,1,1
Grayson Murray,15458877,6400,69.4,4.166667,0,Grayson Murray (15458877),67.73333,1,1


In [7]:
#Grab just the team
df_opt <- filter(df,opt_team==1)
print('Team 1:')
print(paste(df_opt$dk_name))

[1] "Team 1:"
[1] "Will Zalatoris"  "Luke List"       "Adam Schenk"     "Jhonattan Vegas"
[5] "Keith Mitchell"  "Grayson Murray" 


In [11]:
#Find top N teams
own_pen = 1.2
df['points_adj'] = df$total_points - own_pen*df$projected_ownership
num_teams = 11
df_now <- df
to_remove <- c('Aaron Wise',
               'Lucas Glover',
              'Grayson Murray',
              "Jhonattan Vegas",
              "Graham DeLaet")
#to_remove <- c()
for (n in to_remove) {
    df_now <- filter(df_now,dk_name!=n) 
}
#Max sal
max_sal <- 99900
min_sal <- 5000
df_now <- subset(df_now,dk_salary<=max_sal)
df_now <- subset(df_now,dk_salary>=min_sal)
base_exposure <- 50
exposure_gap <- 4

#wave split
#df_now <- subset(df_now,early_late_wave==1)

#Identify lock
lock <- c()
lock <- c()
num_left <- 6
sal_left <- 50000
sal_used <- 0
lock_full_names <- c()
if (length(lock) > 0) {
    df_lock <- subset(df_now,dk_name %in% lock)
    df_now <- subset(df_now,! dk_name %in% lock)
    sal_used <- sum(df_lock$dk_salary) 
    num_lock <- length(lock)
    sal_left <- 50000 - sal_used
    num_left <- 6-num_lock
    print(sal_left)
    lock_full_names <- df_lock$name_id
} 

for (i in seq(1:num_teams)) {
    # Set coefficients of the objective function
    f.obj <- df_now$points_adj
    #f.obj <- df_now$total_points

    # Set matrix corresponding to coefficients of constraints by rows
    #f*dk_salary <= 50000
    #f*ones == 6
    #f.con <- t(data.matrix(df_now[c('dk_salary','ones','projected_ownership')]))
    f.con <- t(data.matrix(df_now[c('dk_salary','ones',
                                   'projected_ownership')]))
    
    # Set unequality/equality signs
    f.dir <- c("<=",
               "==",
              "<=")
    
    # Set right hand side coefficients
    f.rhs <- c(sal_left,
               num_left,
              665)

    # # Variables final values
    #opt_val <- lp("max", f.obj, f.con, f.dir, f.rhs, int.vec = 1:4, all.bin = TRUE)$objective
    opt_team <- lp("max", f.obj, f.con, f.dir, f.rhs, int.vec = 1:4, all.bin = TRUE)$solution
    df_now['opt_team'] <- opt_team
    df_opt <- filter(df_now,opt_team==1)
    
    #Print current best team
    print(paste('Team ',i,':',sep=""))
    print(paste(df_opt$dk_name))
    
    #Add best team to df_teams
    if (i==1) {
        df_teams <- df_opt[c('name_id','dk_name','dk_salary','projected_ownership')]
        df_teams['Exposure'] <- base_exposure
    } else {
        df_teams_new <- df_opt[c('name_id','dk_name','dk_salary','projected_ownership')]
        df_teams_new['Exposure'] <- base_exposure - (i-1)*exposure_gap
        df_teams <- rbind(df_teams,df_teams_new)
    }
    
    #Remove current opt team
    df_now <- filter(df_now,opt_team==0)
}


[1] "Team 1:"
[1] "Will Zalatoris"  "Luke List"       "Patrick Rodgers" "Will Gordon"    
[5] "Keith Mitchell"  "Beau Hossler"   
[1] "Team 2:"
[1] "Corey Conners" "Sam Burns"     "Adam Schenk"   "Kurt Kitayama"
[5] "Brandon Hagy"  "Martin Laird" 
[1] "Team 3:"
[1] "Mackenzie Hughes"     "Thomas Detry"         "Matthias Schwab"     
[4] "Matt Jones"           "Joseph Bramlett"      "Kiradech Aphibarnrat"
[1] "Team 4:"
[1] "Charles Howell III" "Sepp Straka"        "Kristoffer Ventura"
[4] "Matthew NeSmith"    "Kevin Chappell"     "Ryan Brehm"        
[1] "Team 5:"
[1] "Emiliano Grillo" "Denny McCarthy"  "Branden Grace"   "Xinjun Zhang"   
[5] "Kevin Tway"      "Sean O'Hair"    
[1] "Team 6:"
[1] "Adam Long"       "Pat Perez"       "Charley Hoffman" "Kyle Stanley"   
[5] "Bronson Burgoon" "Vaughn Taylor"  
[1] "Team 7:"
[1] "Henrik Stenson"   "Henrik Norlander" "Brice Garnett"    "James Hahn"      
[5] "Seamus Power"     "Chris Kirk"      
[1] "Team 8:"
[1] "Bo Hoag"        "Doug Ghim"  

# Just keep top n teams and sort by salary


In [20]:
teams_to_keep = 7
low_exposure = base_exposure - (teams_to_keep-1)*exposure_gap
df_teams <- filter(df_teams,Exposure >= low_exposure)
df_teams <- df_teams[order(-df_teams$dk_salary),]
df_teams

Unnamed: 0_level_0,name_id,dk_name,dk_salary,projected_ownership,Exposure
Unnamed: 0_level_1,<chr>,<chr>,<int>,<dbl>,<dbl>
21,Hideki Matsuyama (15341693),Hideki Matsuyama,9000,16.07,30
25,Adam Scott (15341696),Adam Scott,8700,10.33,26
13,Tiger Woods (15341697),Tiger Woods,8600,8.13,38
5,Rickie Fowler (15341700),Rickie Fowler,8300,8.09,46
1,Shane Lowry (15341710),Shane Lowry,7700,2.36,50
9,Kevin Kisner (15341712),Kevin Kisner,7600,4.78,42
2,Billy Horschel (15341716),Billy Horschel,7500,2.94,50
17,Sergio Garcia (15341714),Sergio Garcia,7500,4.56,34
26,Sungjae Im (15341715),Sungjae Im,7500,8.36,26
3,Joaquin Niemann (15341717),Joaquin Niemann,7400,2.26,50


In [21]:
#Override some exposures
# df_teams$Exposure[2] = 110
# df_teams$Exposure[3] = 110
df_teams

Unnamed: 0_level_0,name_id,dk_name,dk_salary,projected_ownership,Exposure
Unnamed: 0_level_1,<chr>,<chr>,<int>,<dbl>,<dbl>
21,Hideki Matsuyama (15341693),Hideki Matsuyama,9000,16.07,30
25,Adam Scott (15341696),Adam Scott,8700,10.33,26
13,Tiger Woods (15341697),Tiger Woods,8600,8.13,38
5,Rickie Fowler (15341700),Rickie Fowler,8300,8.09,46
1,Shane Lowry (15341710),Shane Lowry,7700,2.36,50
9,Kevin Kisner (15341712),Kevin Kisner,7600,4.78,42
2,Billy Horschel (15341716),Billy Horschel,7500,2.94,50
17,Sergio Garcia (15341714),Sergio Garcia,7500,4.56,34
26,Sungjae Im (15341715),Sungjae Im,7500,8.36,26
3,Joaquin Niemann (15341717),Joaquin Niemann,7400,2.26,50


In [23]:
#Add some more names
more_names <- c("J.J. Spaun")
#more_names <- c()
if (length(more_names)>0) {
    names_to_add <- setdiff(more_names,df_teams$dk_name)
    df_more <- subset(df,dk_name %in% names_to_add)[c('name_id','dk_name','dk_salary','projected_ownership')]
    df_more['Exposure'] = low_exposure - 5
    df_more
}


Unnamed: 0_level_0,name_id,dk_name,dk_salary,projected_ownership,Exposure
Unnamed: 0_level_1,<chr>,<chr>,<int>,<dbl>,<dbl>
89,Connor Syme (15341770),Connor Syme,6400,0.09,21
98,Sami Valimaki (15341783),Sami Valimaki,6300,0.09,21


In [55]:
if (length(more_names)>0) {   
    df_teams <- rbind(df_teams,df_more)
    df_teams <- df_teams[order(-df_teams$dk_salary),]
    #replace exposure with probabilities
}
df_teams$Exposure <- df_teams$Exposure / sum(df_teams$Exposure)
df_teams

Unnamed: 0_level_0,name_id,dk_name,dk_salary,projected_ownership,Exposure
Unnamed: 0_level_1,<chr>,<chr>,<int>,<dbl>,<dbl>
17,Hideki Matsuyama (15341693),Hideki Matsuyama,9000,16.07,0.03195489
21,Adam Scott (15341696),Adam Scott,8700,10.33,0.02819549
9,Tiger Woods (15341697),Tiger Woods,8600,8.13,0.03947368
5,Rickie Fowler (15341700),Rickie Fowler,8300,8.09,0.04323308
25,Viktor Hovland (15341703),Viktor Hovland,8000,10.87,0.02443609
1,Shane Lowry (15341710),Shane Lowry,7700,2.36,0.04699248
10,Kevin Kisner (15341712),Kevin Kisner,7600,4.78,0.03947368
2,Billy Horschel (15341716),Billy Horschel,7500,2.94,0.04699248
13,Sergio Garcia (15341714),Sergio Garcia,7500,4.56,0.03571429
22,Sungjae Im (15341715),Sungjae Im,7500,8.36,0.02819549


# Generate lineups

In [56]:
col_names = c('G1','G2','G3','G4','G5','G6')
df_lineups = data.frame('test','test','test','test','test','test',stringsAsFactors=FALSE)
names(df_lineups) <- col_names
df_lineups

G1,G2,G3,G4,G5,G6
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
test,test,test,test,test,test


In [57]:
sample(df_teams$name_id,size=6,replace=FALSE,prob = df_teams$Exposure)

In [112]:
num_teams <- 77
n <- 0
min_sal <- 4900 - sal_used
max_sal <- 50000 - sal_used
max_own <- 200

#Reste df_teams
df_teams['Actual'] <- 0

#Reset df_lineups
col_names = c('G1','G2','G3','G4','G5','G6')
df_lineups = data.frame('test','test','test','test','test','test',stringsAsFactors=FALSE)
names(df_lineups) <- col_names

while(n<num_teams) {
    #Pick random team
    curr_team <- sample(df_teams$name_id,size=num_left,replace=FALSE,prob = df_teams$Exposure)
    df_now <- subset(df_teams,name_id %in% curr_team)
    #Get team in order of descending salary
    df_now <- df_now[order(-df_now$dk_salary),]
    curr_team <- df_now$name_id
    curr_sal <- sum(df_now$dk_salary)
    curr_own <- sum(df_now$projected_ownership)
    if (curr_sal>=min_sal & curr_sal<=max_sal & curr_own<=max_own) {
        curr_team <- c(lock_full_names,curr_team)
        df_lineups <- rbind(df_lineups,curr_team)
        n <- n+1
        if (n%%10 == 0) {
            print(n)
        }
        #Update df_teams
    }
}
head(df_lineups,10)

[1] 10
[1] 20
[1] 30
[1] 40
[1] 50
[1] 60
[1] 70
[1] 80
[1] 90
[1] 100
[1] 110
[1] 120
[1] 130
[1] 140
[1] 150


G1,G2,G3,G4,G5,G6
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
test,test,test,test,test,test
Webb Simpson (15246098),Collin Morikawa (15246099),Xander Schauffele (15246104),Scottie Scheffler (15246108),Ryan Palmer (15246120),Cameron Smith (15246123)
Justin Thomas (15246097),Collin Morikawa (15246099),Bryson DeChambeau (15246100),Kevin Kisner (15246112),Cameron Champ (15246121),Cameron Smith (15246123)
Jon Rahm (15246096),Justin Thomas (15246097),Kevin Kisner (15246112),Joaquin Niemann (15246114),Lanto Griffin (15246118),Ryan Palmer (15246120)
Collin Morikawa (15246099),Rory McIlroy (15246101),Daniel Berger (15246103),Scottie Scheffler (15246108),Joaquin Niemann (15246114),Cameron Champ (15246121)
Jon Rahm (15246096),Webb Simpson (15246098),Sungjae Im (15246110),Kevin Kisner (15246112),Joaquin Niemann (15246114),Sebastian Munoz (15246116)
Dustin Johnson (15246095),Collin Morikawa (15246099),Kevin Kisner (15246112),Viktor Hovland (15246113),Sebastian Munoz (15246116),Cameron Smith (15246123)
Justin Thomas (15246097),Webb Simpson (15246098),Brendon Todd (15246109),Tyrrell Hatton (15246111),Joaquin Niemann (15246114),Lanto Griffin (15246118)
Jon Rahm (15246096),Webb Simpson (15246098),Scottie Scheffler (15246108),Tyrrell Hatton (15246111),Billy Horschel (15246117),Kevin Na (15246119)
Jon Rahm (15246096),Daniel Berger (15246103),Scottie Scheffler (15246108),Sungjae Im (15246110),Tyrrell Hatton (15246111),Cameron Smith (15246123)


In [113]:
write.csv(df_lineups,'Lineups/DKEntries.csv')