# NFL DFS

This is an overview of . . .

## Setting up the R Environment

In [1]:
library(dplyr, warn.conflicts = FALSE)
library(ggplot2)
library(reshape2)

# see appendix for more details about this library
library(ffopt)

## Setting up the Database Connection

See the Appendix for additional details.

In [5]:
require(RPostgreSQL)
con <- dbcon()

## DFS Data: 2015 + 2016 seasons

In [6]:
# get dfs data
dfs = dataset.dfs(con)
str(dfs)

'data.frame':	10094 obs. of  12 variables:
 $ gsis_id    : chr  "2015091302" "2015091301" "2015091305" "2015091309" ...
 $ season_year: int  2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 ...
 $ week       : int  1 1 1 1 1 1 1 1 1 1 ...
 $ player_name: chr  "Carolina Defense" "Seattle Defense" "New York J Defense" "Denver Defense" ...
 $ player_pos : chr  "DST" "DST" "DST" "DST" ...
 $ team_code  : chr  "CAR" "SEA" "NYJ" "DEN" ...
 $ opp        : chr  "JAC" "STL" "CLE" "BAL" ...
 $ dk_salary  : int  3100 3400 2900 2800 2700 2800 3000 2800 2800 2800 ...
 $ dk_points  : num  21 19 17 16 16 15 15 14 14 13 ...
 $ ou         : num  41 42 40 46 46 40 46 42 42 46 ...
 $ imptot     : num  22 23 22 25 21 18.5 25 19 20 21 ...
 $ opp_imptot : num  19 19 18 21 25 21.5 21 23 22 25 ...


In [None]:
# test optimizer
Lineups(dfs %>% 
          rename(position=player_pos, fppg=dk_points, salary=dk_salary) %>%
          filter(season_year==2016, week==1), 
          n=2, max.points=200
)

In [65]:
# list of 17 weeks, 2 years of optimal lineups (34 elements total)
myls <- vector("list", length = 34)
i = 1
for (y in c(2015, 2016)) {
    for (w in seq(1:17)) {
        myls[[i]] = Lineups(dfs %>% 
          rename(position=player_pos, fppg=dk_points, salary=dk_salary) %>%
          filter(season_year==y, week==w, fppg > 10), n=100)
        i = i + 1
    }
}

In [66]:
optdf = do.call(rbind.data.frame, myls) %>%
   select(-gsis_id, -sim.id, -ou, -imptot, -opp_imptot) %>%
   arrange(season_year, week, lineup.id)

In [78]:
optdf %>% 
  group_by(season_year, week, lineup.id, position) %>%
  summarize(n = n())

season_year,week,lineup.id,position,n
2015,1,1,QB,1
2015,1,1,RB,2
2015,1,1,WR,3
2015,1,1,TE,2
2015,1,1,DST,1
2015,1,2,QB,1
2015,1,2,RB,2
2015,1,2,WR,3
2015,1,2,TE,2
2015,1,2,DST,1


In [67]:
# what top 100 lineups, on average, score for each position
optdf %>% group_by(season_year, week, lineup.id, position) %>%
  summarize(n = n(), pts = round(mean(fppg), 2)) %>%
  group_by(season_year, position) %>%
  summarize(pts = mean(pts), n = mean(n))

season_year,position,pts,n
2015,QB,34.61582,1.0
2015,RB,31.88566,2.656471
2015,WR,35.31019,3.0
2015,TE,29.50135,1.343529
2015,DST,22.20941,1.0
2016,QB,32.94442,1.0
2016,RB,32.99702,2.715882
2016,WR,32.61875,3.0
2016,TE,27.36874,1.284118
2016,DST,20.66294,1.0


# Appendix

## Database Setup

In [3]:
dbcon <- function(conf='~/.rconfig') {
  source(conf)
  drv <- dbDriver('PostgreSQL')
  dbConnect(drv, dbname = nfldb.database, user=nfldb.username, password=nfldb.password)
}

In [4]:
dataset.dfs <- function(connxn, seas=2014) {
  q = paste0(
      "SELECT * FROM vw_dfs WHERE season_year >= ", seas)
  dbGetQuery(connxn, q)
}