# This notebook creates a dataset for min/max winning scores
- min win df score
- max win df score
- median team score
- 75th percentile team score
- mean team score
- number of slate games
- slate type (classic/showdown)
- mean df score for each player position
- mean df score of top 50% for each position for the season prior to game
- median df score for each player position
- median df score of top 50% for each position for the season prior to game

In [5]:
# Load all daily fantasy contest data
from datetime import date
import pandas as pd

# fanduel/draftkings/yahoo
SERVICE = 'fanduel'
SPORT = 'mlb'
# style is CLASSIC or SHOWDOWN 
STYLE = 'CLASSIC'
# GPP or DOUBLE_UP
CONTEST_TYPE = 'GPP'
MIN_DATE = date(2020, 1, 1)
MAX_DATE = date(2021, 1, 1)

def infer_contest_style(title):
    if SERVICE == 'draftkings':
        return 'SHOWDOWN' if 'Showdown' in title else 'CLASSIC'
    if SERVICE == 'fanduel':
        return 'SHOWDOWN' if '@' in title else 'CLASSIC'
    raise NotImplementedError(f"Could not infer contest style for {SERVICE=} {title=}")    
    
def infer_contest_type(title):
    if SERVICE == 'draftkings':
        return 'DOUBLE_UP' if 'Double Up' in title else 'GPP'
    if SERVICE == 'fanduel':
        return 'DOUBLE_UP' if title.startswith('50/50') else 'GPP'
    raise NotImplementedError(f"Could not infer contest type for {SERVICE=} {title=}")
    

contest_df = pd.read_csv(f"{SERVICE}.contest.csv", parse_dates=['date']) \
               .query('sport == @SPORT and @MIN_DATE <= date < @MAX_DATE') \
               [['contest_id', 'date', 'title', 'top_score', 'last_winning_score']]
contest_df.date = contest_df.date.dt.normalize()

# add style and type
contest_df['style'] = contest_df.title.map(infer_contest_style)
contest_df['type'] = contest_df.title.map(infer_contest_type)
queries = []
if STYLE is not None:
    queries.append('style == @STYLE')
if CONTEST_TYPE is not None:
    queries.append('type == @CONTEST_TYPE')
if len(queries) > 0:
    contest_df = contest_df.query(' and '.join(queries))
display(contest_df)

Unnamed: 0,contest_id,date,title,top_score,last_winning_score,style,type
32,https://www.fanduel.com/entry/AOWHWHFXQ,2020-10-21,$1K Wed MLB Pinch Hit ($0.25 to Enter),160.95,79.4,CLASSIC,GPP
36,https://www.fanduel.com/entry/EMMHHXEXB,2020-10-17,$4K Sat MLB Pinch Hit ($0.25 to Enter),103.0,60.7,CLASSIC,GPP
41,https://www.fanduel.com/entry/FGGOMMUCN,2020-09-12,$5K Sat MLB Wiffle Ball (Only $0.05 to Enter!),260.6,125.4,CLASSIC,GPP
43,https://www.fanduel.com/entry/ENNYPADEU,2020-09-04,$10K Fri MLB Pinch Hit (Only $0.25 to Enter),260.1,159.0,CLASSIC,GPP
47,https://www.fanduel.com/entry/CSMMGODCC,2020-09-03,$11K Thu MLB Pinch Hit (Only $0.25 to Enter),235.0,134.7,CLASSIC,GPP
49,https://www.fanduel.com/entry/EQHPFBOGW,2020-09-02,$8K Wed MLB Wiffle Ball,276.6,139.8,CLASSIC,GPP
52,https://www.fanduel.com/entry/CIKKGKDCJ,2020-09-01,$7K Tue MLB Wiffle Ball (150 Entries Max),370.9,180.6,CLASSIC,GPP
55,https://www.fanduel.com/entry/DHNWCUVRM,2020-08-31,$400 Prospect Weekly FDP Loyalty Free Play (4 ...,249.8,174.6,CLASSIC,GPP
56,https://www.fanduel.com/entry/DOMQGXJRA,2020-08-31,$1K Mon MLB Wiffle Ball (150 Entries Max),111.55,42.9,CLASSIC,GPP
57,https://www.fanduel.com/entry/EGRHZULZU,2020-08-31,$15K Mon MLB Pinch Hit (Only $0.25 to Enter),261.7,130.1,CLASSIC,GPP


In [8]:
draft_df = pd.read_csv(f"{SERVICE}.draft.csv", parse_dates=['date']) \
             .query('sport == @SPORT and @MIN_DATE <= date < @MAX_DATE')
# draft_df['title'] = draft_df.contest.map(lambda contest: contest.split('-', 3)[3])
draft_df['service'] = draft_df.contest.map(lambda contest: contest.split('-', 1)[0])
if SERVICE == 'fanduel':
    service_filter = 'fd'
elif SERVICE == 'draftkings':
    service_filter = 'dk'
elif SERVICE == 'yahoo':
    service_filter = 'y'
else:
    raise NotImplementedError()
draft_df = draft_df.query('service == @service_filter')[['position', 'name', 'team_abbr', 'contest_id']]
display(draft_df)

Unnamed: 0,position,name,team_abbr,contest_id
1175,MVP - 2X Points,Randy Arozarena,TAM,https://www.fanduel.com/entry/AOWHWHFXQ
1176,STAR - 1.5X Points,Mookie Betts,LOS,https://www.fanduel.com/entry/AOWHWHFXQ
1177,UTIL,Ji-Man Choi,TAM,https://www.fanduel.com/entry/AOWHWHFXQ
1178,UTIL,Max Muncy,LOS,https://www.fanduel.com/entry/AOWHWHFXQ
1179,UTIL,Willy Adames,TAM,https://www.fanduel.com/entry/AOWHWHFXQ
...,...,...,...,...
4238,SS,David Fletcher,LAA,https://www.fanduel.com/entry/CAJTUCPCP
4239,OF,Eloy Jiménez,CWS,https://www.fanduel.com/entry/CAJTUCPCP
4240,OF,Brian Goodwin,LAA,https://www.fanduel.com/entry/CAJTUCPCP
4241,OF,Trent Grisham,SDP,https://www.fanduel.com/entry/CAJTUCPCP


In [10]:
from fantasy_py import FANTASY_SERVICE_DOMAIN, lineup, util

service_cls = util.CLSRegistry.get_class(FANTASY_SERVICE_DOMAIN, SERVICE)
abbr_remaps = service_cls.get_team_abbr_remapping(SPORT)

def fix_team_abbr(abbr) -> str:
    """ make team abbreviations consistent with DB """
    return abbr_remaps.get(abbr) or abbr

# add team/lineup draft data
team_contest_df = pd.merge(contest_df, draft_df, on='contest_id')

team_contest_df.team_abbr = team_contest_df.team_abbr.map(fix_team_abbr)
display(f"{len(team_contest_df.contest_id.unique())} contests")
display(team_contest_df)

'18 contests'

Unnamed: 0,contest_id,date,title,top_score,last_winning_score,style,type,position,name,team_abbr
0,https://www.fanduel.com/entry/AOWHWHFXQ,2020-10-21,$1K Wed MLB Pinch Hit ($0.25 to Enter),160.95,79.4,CLASSIC,GPP,MVP - 2X Points,Randy Arozarena,TB
1,https://www.fanduel.com/entry/AOWHWHFXQ,2020-10-21,$1K Wed MLB Pinch Hit ($0.25 to Enter),160.95,79.4,CLASSIC,GPP,STAR - 1.5X Points,Mookie Betts,LAD
2,https://www.fanduel.com/entry/AOWHWHFXQ,2020-10-21,$1K Wed MLB Pinch Hit ($0.25 to Enter),160.95,79.4,CLASSIC,GPP,UTIL,Ji-Man Choi,TB
3,https://www.fanduel.com/entry/AOWHWHFXQ,2020-10-21,$1K Wed MLB Pinch Hit ($0.25 to Enter),160.95,79.4,CLASSIC,GPP,UTIL,Max Muncy,LAD
4,https://www.fanduel.com/entry/AOWHWHFXQ,2020-10-21,$1K Wed MLB Pinch Hit ($0.25 to Enter),160.95,79.4,CLASSIC,GPP,UTIL,Willy Adames,TB
...,...,...,...,...,...,...,...,...,...,...
145,https://www.fanduel.com/entry/BTKZUEEOT,2020-08-07,$17K Fri MLB Pinch Hit (Only $0.25 to Enter),275.90,114.8,CLASSIC,GPP,SS,David Fletcher,LAA
146,https://www.fanduel.com/entry/BTKZUEEOT,2020-08-07,$17K Fri MLB Pinch Hit (Only $0.25 to Enter),275.90,114.8,CLASSIC,GPP,OF,Charlie Blackmon,COL
147,https://www.fanduel.com/entry/BTKZUEEOT,2020-08-07,$17K Fri MLB Pinch Hit (Only $0.25 to Enter),275.90,114.8,CLASSIC,GPP,OF,Trent Grisham,SD
148,https://www.fanduel.com/entry/BTKZUEEOT,2020-08-07,$17K Fri MLB Pinch Hit (Only $0.25 to Enter),275.90,114.8,CLASSIC,GPP,OF,Brian Goodwin,LAA


In [16]:
import os

# group contests together and create team sets used in each contest
def common_title(titles):
    return os.path.commonprefix(titles.tolist())

teams_contest_df = pd.DataFrame(
    team_contest_df.groupby(
        ['contest_id', 'date', 'style', 'type']
    ).agg(
        {'team_abbr': set,
         'title': common_title,
         'top_score': lambda score: score.mean(),
         'last_winning_score': lambda score: score.mean()}
    )
).reset_index()
teams_contest_df = teams_contest_df.rename(columns={'team_abbr': 'teams'})
teams_contest_df['draft_team_count'] = teams_contest_df.teams.map(len)

display(f"{len(teams_contest_df)} team sets")
display(teams_contest_df)

'18 team sets'

Unnamed: 0,contest_id,date,style,type,teams,title,top_score,last_winning_score,draft_team_count
0,https://www.fanduel.com/entry/AMHDEMBQX,2020-08-23,CLASSIC,GPP,"{CHW, LAA, BOS, CLE, SF}",$6K Sun MLB Wiffle Ball (150 Entries Max),281.7,123.1,5
1,https://www.fanduel.com/entry/AOWHWHFXQ,2020-10-21,CLASSIC,GPP,"{TB, LAD}",$1K Wed MLB Pinch Hit ($0.25 to Enter),160.95,79.4,2
2,https://www.fanduel.com/entry/BNAGTNUIJ,2020-08-29,CLASSIC,GPP,"{SF, WAS, MIL, SEA}",$10K Sat MLB Pinch Hit (Only $0.25 to Enter),247.3,118.3,4
3,https://www.fanduel.com/entry/BRVIRZZNN,2020-08-18,CLASSIC,GPP,"{NYY, PHI, MIN, CLE, TOR}",$15K Tue MLB Pinch Hit (Only $0.25 to Enter),297.3,187.1,5
4,https://www.fanduel.com/entry/BTKZUEEOT,2020-08-07,CLASSIC,GPP,"{MIN, COL, PIT, LAA, SD, TOR}",$17K Fri MLB Pinch Hit (Only $0.25 to Enter),275.9,114.8,6
5,https://www.fanduel.com/entry/BXEJFRQYM,2020-08-16,CLASSIC,GPP,"{PHI, WAS, COL, MIA, SEA, NYM}",$15K Sun MLB Pinch Hit (Only $0.25 to Enter),252.3,137.2,6
6,https://www.fanduel.com/entry/CIKKGKDCJ,2020-09-01,CLASSIC,GPP,"{DET, KC, PHI, LAD, HOU, TB}",$7K Tue MLB Wiffle Ball (150 Entries Max),370.9,180.6,6
7,https://www.fanduel.com/entry/CPWYTKTIG,2020-08-10,CLASSIC,GPP,"{OAK, BOS, HOU, MIL, SF}",$18K Mon MLB Pinch Hit (Only $0.25 to Enter),316.4,162.0,5
8,https://www.fanduel.com/entry/CSMMGODCC,2020-09-03,CLASSIC,GPP,"{LAA, BOS, TOR, LAD}",$11K Thu MLB Pinch Hit (Only $0.25 to Enter),235.0,134.7,4
9,https://www.fanduel.com/entry/CVTYTKFPT,2020-08-21,CLASSIC,GPP,"{CIN, BOS, PHI, CLE}",$14K Fri MLB Pinch Hit (Only $0.25 to Enter),266.7,148.1,4


In [17]:
# load fantasy data
import sqlite3
import pandas as pd

db_filename = "/home/delano/Google Drive/fantasy/mlb/mlb_2020.db"

conn = sqlite3.connect(db_filename)
sql = f"""
select distinct daily_fantasy_slate.id as slate_id, date, 
    daily_fantasy_slate.name as slate_name, style as contest_style, abbr
from daily_fantasy_slate 
    join daily_fantasy_cost on daily_fantasy_slate.id = daily_fantasy_cost.daily_fantasy_slate_id
    join team on team_id = team.id
where service = '{SERVICE}' and '{MIN_DATE}' <= date <= '{MAX_DATE}' and style = '{STYLE}'
"""
print(sql)
db_df = pd.read_sql_query(sql, conn, parse_dates=['date'])
with pd.option_context('max_rows', 100):
    display(db_df)

# get team sets
slate_df = pd.DataFrame(
    db_df.groupby(
        ['slate_id', 'date', 'slate_name', 'contest_style']
    ).agg(
        {'abbr': set}
    )
).reset_index()
slate_df = slate_df.set_index('date').rename(columns={'abbr': 'teams'})
with pd.option_context('max_rows', 100):
    display(slate_df)


select distinct daily_fantasy_slate.id as slate_id, date, 
    daily_fantasy_slate.name as slate_name, style as contest_style, abbr
from daily_fantasy_slate 
    join daily_fantasy_cost on daily_fantasy_slate.id = daily_fantasy_cost.daily_fantasy_slate_id
    join team on team_id = team.id
where service = 'fanduel' and '2020-01-01' <= date <= '2021-01-01' and style = 'CLASSIC'



Unnamed: 0,slate_id,date,slate_name,contest_style,abbr
0,3847,2020-07-23,Main,CLASSIC,SF
1,3847,2020-07-23,Main,CLASSIC,LAD
2,3847,2020-07-23,Main,CLASSIC,BAL
3,3847,2020-07-23,Main,CLASSIC,BOS
4,3847,2020-07-23,Main,CLASSIC,CHC
...,...,...,...,...,...
3111,7639,2020-10-16,Main,CLASSIC,TB
3112,7654,2020-10-17,Main,CLASSIC,ATL
3113,7654,2020-10-17,Main,CLASSIC,LAD
3114,7654,2020-10-17,Main,CLASSIC,HOU


Unnamed: 0_level_0,slate_id,slate_name,contest_style,teams
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-07-23,3847,Main,CLASSIC,"{BAL, ARI, SEA, CHC, BOS, CLE, LAD, MIL, MIN, ..."
2020-07-24,3850,Main,CLASSIC,"{BAL, ARI, SEA, CHC, BOS, CLE, LAD, MIL, MIN, ..."
2020-07-25,3851,Main,CLASSIC,"{BAL, SEA, CHC, BOS, CLE, LAD, MIL, MIN, PHI, ..."
2020-07-25,3852,All Day,CLASSIC,"{BAL, WAS, ARI, SEA, BOS, CHC, CLE, LAD, MIL, ..."
2020-07-26,3855,Main,CLASSIC,"{BAL, WAS, ARI, SEA, CHC, BOS, CLE, MIL, MIN, ..."
...,...,...,...,...
2020-10-08,7490,After Hours,CLASSIC,"{NYY, TB, SD, LAD}"
2020-10-14,7590,Main,CLASSIC,"{HOU, ATL, TB, LAD}"
2020-10-15,7636,Main,CLASSIC,"{HOU, ATL, TB, LAD}"
2020-10-16,7639,Main,CLASSIC,"{HOU, ATL, TB, LAD}"


In [18]:
import numpy as np
from typing import Optional


def get_slate_id(contest_row) -> Optional[int]:
    """ 
    guesses the db slate id contest_row
    """
    try:
        date_slates = slate_df.loc[[contest_row.date]]
    except KeyError as ke:
        print(f"Key error finding slates for {contest_row.date}")
        return None
    try:
        slates = date_slates.query("@contest_row.teams <= teams")
    except Exception as e:
        print(f"Unhandled exception querying for teams date {contest_row.date}")
        # display(date_slates)
        raise
        
    slates_found = len(slates)
    if slates_found == 0:
        print(f"On {contest_row.date} the {len(date_slates)} db slates don't match contest teams {contest_row.teams}. "
              "DB slate team sets were:")
        with pd.option_context('max_colwidth', None):
            display(date_slates[['slate_name', 'teams']])
        return None
    if slates_found > 1:
        # display(slates)
        slates = slates.head(1)
        print(f"{slates_found} slates matched contest {contest_row.date} '{contest_row.title}'. "
              f"Using '{slates.iloc[0].slate_name}'")
    return slates.iloc[0].slate_id
    
# teams_contest_df['slate_id'] = slate_ids
# display(teams_contest_df)
# display(team_contest_df.date)
# display(slate_df.index)
# key_ = pd.to_datetime(team_contest_df.loc[0].date)
# key_ = pd.to_datetime('2020-08-23')
# key_ = datetime(2020, 7, 23)
# print(f"{key_=}")
# display(slate_df.loc[key_])
slate_ids = teams_contest_df.apply(get_slate_id, axis=1)
teams_contest_df['slate_id'] = slate_ids
teams_contest_df.slate_id = teams_contest_df.slate_id.astype('Int64')
display(teams_contest_df)

2 slates matched contest 2020-08-23 00:00:00 '$6K Sun MLB Wiffle Ball (150 Entries Max)'. Using 'Main'
Key error finding slates for 2020-10-21 00:00:00
2 slates matched contest 2020-08-29 00:00:00 '$10K Sat MLB Pinch Hit (Only $0.25 to Enter)'. Using 'Main'
2 slates matched contest 2020-08-18 00:00:00 '$15K Tue MLB Pinch Hit (Only $0.25 to Enter)'. Using 'Main'
2 slates matched contest 2020-08-07 00:00:00 '$17K Fri MLB Pinch Hit (Only $0.25 to Enter)'. Using 'Main'
2 slates matched contest 2020-08-16 00:00:00 '$15K Sun MLB Pinch Hit (Only $0.25 to Enter)'. Using 'Main'
2 slates matched contest 2020-09-01 00:00:00 '$7K Tue MLB Wiffle Ball (150 Entries Max)'. Using 'Main'
2 slates matched contest 2020-08-10 00:00:00 '$18K Mon MLB Pinch Hit (Only $0.25 to Enter)'. Using 'Main'
2 slates matched contest 2020-09-03 00:00:00 '$11K Thu MLB Pinch Hit (Only $0.25 to Enter)'. Using 'Main'
2 slates matched contest 2020-08-21 00:00:00 '$14K Fri MLB Pinch Hit (Only $0.25 to Enter)'. Using 'Main'
2 s

Unnamed: 0,contest_id,date,style,type,teams,title,top_score,last_winning_score,draft_team_count,slate_id
0,https://www.fanduel.com/entry/AMHDEMBQX,2020-08-23,CLASSIC,GPP,"{CHW, LAA, BOS, CLE, SF}",$6K Sun MLB Wiffle Ball (150 Entries Max),281.7,123.1,5,5036.0
1,https://www.fanduel.com/entry/AOWHWHFXQ,2020-10-21,CLASSIC,GPP,"{TB, LAD}",$1K Wed MLB Pinch Hit ($0.25 to Enter),160.95,79.4,2,
2,https://www.fanduel.com/entry/BNAGTNUIJ,2020-08-29,CLASSIC,GPP,"{SF, WAS, MIL, SEA}",$10K Sat MLB Pinch Hit (Only $0.25 to Enter),247.3,118.3,4,5481.0
3,https://www.fanduel.com/entry/BRVIRZZNN,2020-08-18,CLASSIC,GPP,"{NYY, PHI, MIN, CLE, TOR}",$15K Tue MLB Pinch Hit (Only $0.25 to Enter),297.3,187.1,5,4620.0
4,https://www.fanduel.com/entry/BTKZUEEOT,2020-08-07,CLASSIC,GPP,"{MIN, COL, PIT, LAA, SD, TOR}",$17K Fri MLB Pinch Hit (Only $0.25 to Enter),275.9,114.8,6,3906.0
5,https://www.fanduel.com/entry/BXEJFRQYM,2020-08-16,CLASSIC,GPP,"{PHI, WAS, COL, MIA, SEA, NYM}",$15K Sun MLB Pinch Hit (Only $0.25 to Enter),252.3,137.2,6,4526.0
6,https://www.fanduel.com/entry/CIKKGKDCJ,2020-09-01,CLASSIC,GPP,"{DET, KC, PHI, LAD, HOU, TB}",$7K Tue MLB Wiffle Ball (150 Entries Max),370.9,180.6,6,5725.0
7,https://www.fanduel.com/entry/CPWYTKTIG,2020-08-10,CLASSIC,GPP,"{OAK, BOS, HOU, MIL, SF}",$18K Mon MLB Pinch Hit (Only $0.25 to Enter),316.4,162.0,5,3921.0
8,https://www.fanduel.com/entry/CSMMGODCC,2020-09-03,CLASSIC,GPP,"{LAA, BOS, TOR, LAD}",$11K Thu MLB Pinch Hit (Only $0.25 to Enter),235.0,134.7,4,5824.0
9,https://www.fanduel.com/entry/CVTYTKFPT,2020-08-21,CLASSIC,GPP,"{CIN, BOS, PHI, CLE}",$14K Fri MLB Pinch Hit (Only $0.25 to Enter),266.7,148.1,4,4877.0
