# This notebook creates a dataset for min/max winning scores
- min win df score
- max win df score
- median team score
- 75th percentile team score
- mean team score
- number of slate games
- slate type (classic/showdown)
- mean df score for each player position
- mean df score of top 50% for each position for the season prior to game
- median df score for each player position
- median df score of top 50% for each position for the season prior to game

In [1]:
# Load all daily fantasy contest data
from datetime import date
import re
from typing import Optional

import pandas as pd

from fantasy_py import ContestStyle
from fantasy_py.lineup.strategy import GeneralPrizePool, FiftyFifty

# DB_FILENAME = "/home/delano/Google Drive/fantasy/nfl/nfl_2020.db"
# SPORT = 'nfl'
# MIN_DATE = date(2020, 8, 1)
# MAX_DATE = date(2021, 4, 1)
# SEASON = 2020

DB_FILENAME = "/home/delano/Google Drive/fantasy/mlb/mlb_2020.db"
SPORT = 'mlb'
MIN_DATE = date(2020, 1, 1)
MAX_DATE = date(2021, 1, 1)
SEASON = 2020

# DB_FILENAME = "/home/delano/Google Drive/fantasy/nba/nba_20192020.db"
# SPORT = 'nba'
# MIN_DATE = date(2019, 8, 1)
# MAX_DATE = date(2020, 8, 1)
# SEASON = 20192020

# DB_FILENAME = "/home/delano/Google Drive/fantasy/nba/nhl_20192020.db"
# SPORT = 'nhl'
# MIN_DATE = date(2020, 8, 1)
# MAX_DATE = date(2021, 4, 1)
# SEASON = 20192020

# fanduel/draftkings/yahoo
SERVICE = 'draftkings'
STYLE: Optional[ContestStyle] = ContestStyle.CLASSIC
# GPP or DOUBLE_UP
CONTEST_TYPE = GeneralPrizePool

def infer_contest_style(title) -> ContestStyle:
    if SERVICE == 'draftkings':
        if ('Showdown' in title or
            re.match('.*.{2,3} vs .{2,3}\)', title)):
           return ContestStyle.SHOWDOWN
        return ContestStyle.CLASSIC
    if SERVICE == 'fanduel':
        return ContestStyle.SHOWDOWN if '@' in title else ContestStyle.CLASSIC
    if SERVICE == 'yahoo':
        if (' Cup ' in title or 
            ' to 1st]' in title or 
            ' 50/50' in title or
            'QuickMatch vs ' in title or 
            'H2H vs ' in title or
            '-Team' in title or   # N-team contests are classic
            'Freeroll' in title or
            'Quadruple Up' in title or
            title.endswith('Guaranteed [No Management Fee]')):
           return ContestStyle.CLASSIC
    raise NotImplementedError(f"Could not infer contest style for {SERVICE=} {title=}")    
    
def infer_contest_type(title) -> str:
    if SERVICE == 'draftkings':
        if re.match('.* vs\. [^)]+$', title):
            return 'H2H'
        return FiftyFifty.NAME if 'Double Up' in title else GeneralPrizePool.NAME
    if SERVICE == 'fanduel':
        if 'Head-to-head' in title:
            return 'H2H'
        return FiftyFifty.NAME if title.startswith('50/50') else GeneralPrizePool.NAME
    if SERVICE == 'yahoo':
        if (' QuickMatch vs ' in title or 
            'H2H vs ' in title):
            return 'H2H'
        if ' 50/50' in title:
            return FiftyFifty.NAME
        if (' Cup ' in title or 
            ' to 1st]' in title or 
            'Freeroll' in title or
            'Quadruple Up' in title or
            title.endswith('-Team') or                        # multi-team games are GPP if not caught by 50/50
            title.endswith('Team Winner Takes All') or        # treat winner takes all like a gpp
            title.endswith('Guaranteed [No Management Fee]')):
           return GeneralPrizePool.NAME
    raise NotImplementedError(f"Could not infer contest type for {SERVICE=} {title=}")
    

contest_df = pd.read_csv(SERVICE + ".contest.csv", parse_dates=['date']) \
               .query('sport == @SPORT and @MIN_DATE <= date < @MAX_DATE') \
               [['contest_id', 'date', 'title', 'top_score', 'last_winning_score']]
contest_df.date = contest_df.date.dt.normalize()

# add style and type
contest_df['style'] = contest_df.title.map(infer_contest_style)
contest_df['type'] = contest_df.title.map(infer_contest_type)
queries = []
if STYLE is not None:
    print(f"Filtering for {STYLE=}")
    queries.append('style == @STYLE')
if CONTEST_TYPE is not None:
    print(f"Filtering for {CONTEST_TYPE=}")
    queries.append('type == @CONTEST_TYPE.NAME')
if len(queries) > 0:
    contest_df = contest_df.query(' and '.join(queries))

# with pd.option_context('max_rows', 1000, 'max_colwidth', 100):
display(contest_df.sort_values(['style', 'type']))

Filtering for STYLE=<ContestStyle.CLASSIC: 'classic'>
Filtering for CONTEST_TYPE=<class 'fantasy_py.lineup.strategy.bet_lineup.GeneralPrizePool'>


Unnamed: 0,contest_id,date,title,top_score,last_winning_score,style,type
103,94461340,2020-10-16,MLB $1K Quarter Jukebox [Just $0.25!],168.05,134.8,classic,GPP
145,91766081,2020-09-12,MLB $300 Dime Time [Just $0.10!] (9/10),200.3,149.7,classic,GPP
146,91766081,2020-09-12,MLB $300 Dime Time [Just $0.10!] (8/10),200.3,149.7,classic,GPP
147,91766081,2020-09-12,MLB $300 Dime Time [Just $0.10!] (7/10),200.3,149.7,classic,GPP
148,91766081,2020-09-12,MLB $300 Dime Time [Just $0.10!] (6/10),200.3,149.7,classic,GPP
149,91766081,2020-09-12,MLB $300 Dime Time [Just $0.10!] (5/10),200.3,149.7,classic,GPP
150,91766081,2020-09-12,MLB $300 Dime Time [Just $0.10!] (4/10),200.3,149.7,classic,GPP
151,91766081,2020-09-12,MLB $300 Dime Time [Just $0.10!] (3/10),200.3,149.7,classic,GPP
152,91766081,2020-09-12,MLB $300 Dime Time [Just $0.10!] (2/10),200.3,149.7,classic,GPP
153,91766081,2020-09-12,MLB $300 Dime Time [Just $0.10!] (10/10),200.3,149.7,classic,GPP


In [2]:
draft_df = pd.read_csv(SERVICE + ".draft.csv", parse_dates=['date']) \
             .query('sport == @SPORT and @MIN_DATE <= date < @MAX_DATE')
display(draft_df)
draft_df['service'] = draft_df.contest.map(lambda contest: contest.split('-', 1)[0])
if SERVICE == 'fanduel':
    SERVICE_ABBR = 'fd'
elif SERVICE == 'draftkings':
    SERVICE_ABBR = 'dk'
elif SERVICE == 'yahoo':
    SERVICE_ABBR = 'y'
else:
    raise NotImplementedError()
draft_df = draft_df.query('service == @SERVICE_ABBR and team_abbr.notnull()') \
    [['position', 'name', 'team_abbr', 'contest_id']]
display(draft_df)

Unnamed: 0,position,name,team_abbr,draft_pct,contest,date,sport,contest_id
2272,CPT,T. Gonsolin,LAD,6.7,dk-mlb-20201021-MLB Showdown $2.5K Quarter Juk...,2020-10-21 20:08:00,mlb,94886275
2273,,T. Gonsolin,LAD,19.9,dk-mlb-20201021-MLB Showdown $2.5K Quarter Juk...,2020-10-21 20:08:00,mlb,94886275
2274,CPT,B. Snell,TB,16.3,dk-mlb-20201021-MLB Showdown $2.5K Quarter Juk...,2020-10-21 20:08:00,mlb,94886275
2275,,B. Snell,TB,39.9,dk-mlb-20201021-MLB Showdown $2.5K Quarter Juk...,2020-10-21 20:08:00,mlb,94886275
2276,CPT,J. Turner,LAD,5.4,dk-mlb-20201021-MLB Showdown $2.5K Quarter Juk...,2020-10-21 20:08:00,mlb,94886275
...,...,...,...,...,...,...,...,...
15021,P,D. Duffy,KC,15.6,dk-mlb-20200803-MLB $6K Quarter Jukebox [Just ...,2020-08-03 19:05:00,mlb,89034802
15022,3B,K. Bryant,CHC,4.1,dk-mlb-20200803-MLB $6K Quarter Jukebox [Just ...,2020-08-03 19:05:00,mlb,89034802
15023,C,W. Contreras,CHC,6.5,dk-mlb-20200803-MLB $6K Quarter Jukebox [Just ...,2020-08-03 19:05:00,mlb,89034802
15024,SS,J. Crawford,SEA,8.3,dk-mlb-20200803-MLB $6K Quarter Jukebox [Just ...,2020-08-03 19:05:00,mlb,89034802


Unnamed: 0,position,name,team_abbr,contest_id
2272,CPT,T. Gonsolin,LAD,94886275
2273,,T. Gonsolin,LAD,94886275
2274,CPT,B. Snell,TB,94886275
2275,,B. Snell,TB,94886275
2276,CPT,J. Turner,LAD,94886275
...,...,...,...,...
15021,P,D. Duffy,KC,89034802
15022,3B,K. Bryant,CHC,89034802
15023,C,W. Contreras,CHC,89034802
15024,SS,J. Crawford,SEA,89034802


In [4]:
from fantasy_py import FANTASY_SERVICE_DOMAIN, lineup, util

service_cls = util.CLSRegistry.get_class(FANTASY_SERVICE_DOMAIN, SERVICE)
abbr_remaps = service_cls.get_team_abbr_remapping(SPORT)

def fix_team_abbr(abbr) -> str:
    """ make team abbreviations consistent with DB """
    assert isinstance(abbr, str), "expected all players to have a team abbr!"
    return abbr_remaps.get(abbr) or abbr

# add team/lineup draft data
team_contest_df = pd.merge(contest_df, draft_df, on='contest_id')

team_contest_df.team_abbr = team_contest_df.team_abbr.map(fix_team_abbr)
print(f"{len(team_contest_df.contest_id.unique())} contests")
display(team_contest_df)

20 contests


Unnamed: 0,contest_id,date,title,top_score,last_winning_score,style,type,position,name,team_abbr
0,94461340,2020-10-16,MLB $1K Quarter Jukebox [Just $0.25!],168.05,134.8,classic,GPP,P,F. Valdez,HOU
1,94461340,2020-10-16,MLB $1K Quarter Jukebox [Just $0.25!],168.05,134.8,classic,GPP,P,B. Snell,TB
2,94461340,2020-10-16,MLB $1K Quarter Jukebox [Just $0.25!],168.05,134.8,classic,GPP,C,T. d'Arnaud,ATL
3,94461340,2020-10-16,MLB $1K Quarter Jukebox [Just $0.25!],168.05,134.8,classic,GPP,1B,F. Freeman,ATL
4,94461340,2020-10-16,MLB $1K Quarter Jukebox [Just $0.25!],168.05,134.8,classic,GPP,2B,M. Brosseau,TB
...,...,...,...,...,...,...,...,...,...,...
9050,89034802,2020-08-03,MLB $6K Quarter Jukebox [Just $0.25!] (1/4),196.85,164.1,classic,GPP,P,D. Duffy,KC
9051,89034802,2020-08-03,MLB $6K Quarter Jukebox [Just $0.25!] (1/4),196.85,164.1,classic,GPP,3B,K. Bryant,CHC
9052,89034802,2020-08-03,MLB $6K Quarter Jukebox [Just $0.25!] (1/4),196.85,164.1,classic,GPP,C,W. Contreras,CHC
9053,89034802,2020-08-03,MLB $6K Quarter Jukebox [Just $0.25!] (1/4),196.85,164.1,classic,GPP,SS,J. Crawford,SEA


In [5]:
import os

# group contests together and create team sets used in each contest
def common_title(titles):
    return os.path.commonprefix(titles.tolist())

teams_contest_df = pd.DataFrame(
    team_contest_df.groupby(
        ['contest_id', 'date', 'style', 'type']
    ).agg(
        {'team_abbr': set,
         'title': common_title,
         'top_score': lambda score: score.mean(),
         'last_winning_score': lambda score: score.mean()}
    )
).reset_index()
teams_contest_df = teams_contest_df.rename(columns={'team_abbr': 'teams'})
teams_contest_df['draft_team_count'] = teams_contest_df.teams.map(len)

display(f"{len(teams_contest_df)} team sets")
display(teams_contest_df)

'20 team sets'

Unnamed: 0,contest_id,date,style,type,teams,title,top_score,last_winning_score,draft_team_count
0,89034802,2020-08-03,classic,GPP,"{PIT, PHI, CHC, COL, KC, OAK, ATL, NYM, MIN, C...",MLB $6K Quarter Jukebox [Just $0.25!] (,196.85,164.1,15
1,89499229,2020-08-10,classic,GPP,"{COL, OAK, TEX, ARI, BOS, WAS, MIN, CHW, SF, S...",MLB $6K Quarter Jukebox [Just $0.25!] (,222.85,197.55,15
2,89800790,2020-08-14,classic,GPP,"{DET, KC, LAD, SD, PHI, TEX, CIN, ARI, OAK, NY...",MLB $250 Quarter Jukebox [Just $0.25!] (,207.75,135.75,19
3,89806346,2020-08-15,classic,GPP,"{COL, ARI, OAK, BOS, TEX, WAS, LAD, SF, SEA, L...",MLB $4K Quarter Jukebox [Just $0.25!] (,191.05,148.05,14
4,89857779,2020-08-16,classic,GPP,"{PHI, CHC, KC, ATL, WAS, MIN, CHW, CLE, NYM, S...",MLB $4K Quarter Jukebox [Just $0.25!] (,191.95,108.8,14
5,89967670,2020-08-18,classic,GPP,"{PHI, CHC, TOR, STL, ATL, BOS, WAS, MIN, CHW, ...",MLB $5K Quarter Jukebox [Just $0.25!] (,215.65,175.05,15
6,90230204,2020-08-21,classic,GPP,"{PHI, CHC, KC, STL, OAK, TEX, ATL, BOS, ARI, M...",MLB $150 Quarter Jukebox [Just $0.25!] (,170.75,124.7,17
7,90301250,2020-08-23,classic,GPP,"{PIT, DET, KC, CHC, STL, TOR, BOS, MIN, CLE, C...",MLB $4K Quarter Jukebox [Just $0.25!] (,175.85,145.35,14
8,90485170,2020-08-25,classic,GPP,"{PIT, DET, CHC, COL, STL, OAK, TEX, KC, ARI, C...",MLB $300 Quarter Jukebox [Just $0.25!] (,196.95,164.95,16
9,90487845,2020-08-26,classic,GPP,"{DET, KC, COL, STL, ARI, TEX, CHC, NYM, LAD, S...",MLB $4K Quarter Jukebox [Just $0.25!] (,193.25,167.55,11


In [6]:
# load slate data from db
import sqlite3
import pandas as pd

conn = sqlite3.connect(DB_FILENAME)
sql = f"""
select distinct daily_fantasy_slate.id as slate_id, date, 
    daily_fantasy_slate.name as slate_name, style as contest_style, abbr
from daily_fantasy_slate 
    join daily_fantasy_cost on daily_fantasy_slate.id = daily_fantasy_cost.daily_fantasy_slate_id
    join team on team_id = team.id
where service = '{SERVICE}' and '{MIN_DATE}' <= date <= '{MAX_DATE}' and season = {SEASON}
"""

if STYLE is not None:
    sql += f" and style = '{STYLE.name}'"

print(sql)
db_df = pd.read_sql_query(sql, conn, parse_dates=['date'])
# with pd.option_context('max_rows', 100):
#     display(db_df)
conn.close()

# get team sets
slate_db_df = pd.DataFrame(
    db_df.groupby(
        ['slate_id', 'date', 'slate_name', 'contest_style']
    ).agg(
        {'abbr': set}
    )
).reset_index()
slate_db_df = slate_db_df.set_index('date').rename(columns={'abbr': 'teams'})
slate_db_df['team_count'] = slate_db_df.teams.map(len)
with pd.option_context('max_rows', 100):
    display(slate_db_df)


select distinct daily_fantasy_slate.id as slate_id, date, 
    daily_fantasy_slate.name as slate_name, style as contest_style, abbr
from daily_fantasy_slate 
    join daily_fantasy_cost on daily_fantasy_slate.id = daily_fantasy_cost.daily_fantasy_slate_id
    join team on team_id = team.id
where service = 'draftkings' and '2020-01-01' <= date <= '2021-01-01' and season = 2020
 and style = 'CLASSIC'


Unnamed: 0_level_0,slate_id,slate_name,contest_style,teams,team_count
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-07-23,3686,Unnamed-CLASSIC-Slate-36859,CLASSIC,"{SF, WAS, NYY, LAD}",4
2020-07-24,3689,Unnamed-CLASSIC-Slate-36862,CLASSIC,"{KC, LAD, SD, PHI, TEX, LAA, BAL, COL, STL, OA...",22
2020-07-24,3693,Unnamed-CLASSIC-Slate-37254,CLASSIC,"{OAK, ARI, LAD, SF, SEA, LAA, SD, HOU}",8
2020-07-25,3696,Unnamed-CLASSIC-Slate-37274,CLASSIC,"{LAD, TB, PHI, TEX, LAA, BAL, COL, TOR, STL, O...",22
2020-07-25,3699,(Afternoon),CLASSIC,"{PHI, COL, OAK, ATL, TEX, NYM, LAD, SF, SEA, L...",12
...,...,...,...,...,...
2020-10-13,7547,Unnamed-CLASSIC-Slate-40698,CLASSIC,"{LAD, TB, HOU, ATL}",4
2020-10-14,7577,Unnamed-CLASSIC-Slate-40779,CLASSIC,"{LAD, TB, HOU, ATL}",4
2020-10-15,7625,Unnamed-CLASSIC-Slate-40850,CLASSIC,"{LAD, TB, HOU, ATL}",4
2020-10-16,7628,Unnamed-CLASSIC-Slate-40863,CLASSIC,"{LAD, TB, HOU, ATL}",4


In [8]:
import numpy as np
from typing import Optional


def get_slate_info(contest_row) -> Optional[pd.Series]:
    """ 
    guesses the db slate id contest_row
    returns - series of (slate_id, number of teams playing in slate)
    """
    try:
        date_slates = slate_db_df.loc[[contest_row.date]].sort_values('team_count')
    except KeyError as ke:
        print(f"Key error finding slates for {contest_row.date}")
        return None
    try:
        slates = date_slates.query("@contest_row.teams <= teams")
    except Exception as e:
        print(f"Unhandled exception querying for teams date {contest_row.date}")
        # display(date_slates)
        raise
        
    slates_found = len(slates)
    if slates_found == 0:
        print(f"On {contest_row.date} the {len(date_slates)} db slates don't match contest teams {contest_row.teams}. "
              "DB slate team sets were:")
        with pd.option_context('max_colwidth', None):
            display(date_slates[['slate_name', 'teams']])
        return None
    if slates_found > 1:
        # display(slates)
        slates = slates.head(1)
        print(f"{slates_found} slates matched contest {contest_row.date} '{contest_row.title}'. "
              f"Using '{slates.iloc[0].slate_name}'")
    return slates.iloc[0][['slate_id', 'team_count']]
    
slate_ids_df = teams_contest_df.apply(get_slate_info, axis=1)
display(slate_ids_df)

2 slates matched contest 2020-09-08 00:00:00 'MLB $1K Quarter Jukebox [Just $0.25!]  (Night) ('. Using 'Unnamed-CLASSIC-Slate-39402'
2 slates matched contest 2020-09-10 00:00:00 'MLB $500 Quarter Jukebox [Just $0.25!] (Night) ('. Using '(Night)'


Unnamed: 0,slate_id,team_count
0,3752,16
1,3803,18
2,4255,24
3,4264,14
4,4480,16
5,4580,22
6,4840,26
7,5002,14
8,5208,20
9,5217,20


In [27]:
# slate game score info
conn = sqlite3.connect(DB_FILENAME)

# for mlb double headers this will cause inaccuracy for players that played in both games
sql = f"""
select distinct daily_fantasy_slate.id as slate_id, game.id as game_id, game.score_home, game.score_away
from daily_fantasy_slate
    join daily_fantasy_cost on daily_fantasy_slate.id = daily_fantasy_cost.daily_fantasy_slate_id
    join game on (game.date = daily_fantasy_slate.date and
                  game.season = daily_fantasy_slate.season and 
                  (daily_fantasy_cost.team_id in (game.away_team_id, game.home_team_id)))
where daily_fantasy_slate.id in ({','.join(map(str, slate_ids_df.slate_id))})
"""

# print(sql)
db_team_score_df = pd.read_sql_query(sql, conn, parse_dates=['date'])
conn.close()
# display(db_team_score_df)
team_score_df = db_team_score_df.melt(id_vars=['slate_id', 'game_id'], value_vars=['score_home', 'score_away']) \
          .groupby(['slate_id']) \
          .agg({'value': 'median'}) \
          .rename(columns={'value': 'median_team_score'})
display(team_score_df)
# def slate_pos_score(row) -> dict:
#     """ 
#     retrieve slate data used in predictions
#     returns - dict mapping key to value for a slate stat
#     """
#     return {'': row.slate_id}

# addl_slate_df = slate_ids_df.apply(slate_data, axis='columns', result_type='expand')
# display(addl_slate_df)

# predict_df = pd.concat(
#     [teams_contest_df[['date', 'style', 'type', 'top_score', 'last_winning_score']],
#      slate_ids_df,
#      addl_slate_df],
#     axis='columns',
# )
    
# display(predict_df)


select distinct daily_fantasy_slate.id as slate_id, game.id as game_id, game.score_home, game.score_away
from daily_fantasy_slate
    join daily_fantasy_cost on daily_fantasy_slate.id = daily_fantasy_cost.daily_fantasy_slate_id
    join game on (game.date = daily_fantasy_slate.date and
                  game.season = daily_fantasy_slate.season and 
                  (daily_fantasy_cost.team_id in (game.away_team_id, game.home_team_id)))
where daily_fantasy_slate.id in (3752,3803,4255,4264,4480,4580,4840,5002,5208,5217,5530,5530,5689,5766,5766,5780,6145,6235,6370,7628)



Unnamed: 0_level_0,median_team_score
slate_id,Unnamed: 1_level_1
3752,4.5
3803,5.5
4255,5.0
4264,6.0
4480,4.5
4580,5.5
4840,4.5
5002,4.0
5208,4.0
5217,5.0


In [28]:
# get position scores
conn = sqlite3.connect(DB_FILENAME)

# for mlb double headers this will cause inaccuracy for players that played in both games
sql = f"""
select daily_fantasy_slate.id as slate_id, positions as position, value as score
from daily_fantasy_slate
    join daily_fantasy_cost on daily_fantasy_slate.id = daily_fantasy_cost.daily_fantasy_slate_id
    join game on (game.date = daily_fantasy_slate.date and
                  game.season = daily_fantasy_slate.season and 
                  (daily_fantasy_cost.team_id in (game.away_team_id, game.home_team_id)))
    join calculation_datum on (calculation_datum.game_id = game.id and 
                               calculation_datum.player_id = daily_fantasy_cost.player_id and
                               calculation_datum.team_id = daily_fantasy_cost.team_id)
    join statistic on calculation_datum.statistic_id = statistic.id
where daily_fantasy_slate.id in ({','.join(map(str, slate_ids_df.slate_id))}) and
    statistic.name = '{SERVICE_ABBR}_score'
"""

# print(sql)
db_df = pd.read_sql_query(sql, conn, parse_dates=['date'])
conn.close()
db_pos_scores_df = db_df.assign(position=db_df.position.str.split('/')) \
             .explode('position') \
             .groupby(['slate_id', 'position']) \
             .agg('median') \
             .rename(columns={'score': 'score'}) \
             .reset_index(level='position') \
             .pivot(columns='position', values='score')
display(pos_scores_df)


select daily_fantasy_slate.id as slate_id, positions as position, value as score
from daily_fantasy_slate
    join daily_fantasy_cost on daily_fantasy_slate.id = daily_fantasy_cost.daily_fantasy_slate_id
    join game on (game.date = daily_fantasy_slate.date and
                  game.season = daily_fantasy_slate.season and 
                  (daily_fantasy_cost.team_id in (game.away_team_id, game.home_team_id)))
    join calculation_datum on (calculation_datum.game_id = game.id and 
                               calculation_datum.player_id = daily_fantasy_cost.player_id and
                               calculation_datum.team_id = daily_fantasy_cost.team_id)
    join statistic on calculation_datum.statistic_id = statistic.id
where daily_fantasy_slate.id in (3752,3803,4255,4264,4480,4580,4840,5002,5208,5217,5530,5530,5689,5766,5766,5780,6145,6235,6370,7628) and
    statistic.name = 'dk_score'



position,1B,2B,3B,C,OF,RP,SP,SS
slate_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
3752,3.0,2.0,3.0,6.5,5.0,8.908,11.104,5.0
3803,5.0,3.0,5.0,3.0,5.0,8.975,1.0,5.0
4255,6.0,3.0,5.0,3.0,7.0,8.554,16.5,4.0
4264,8.0,5.0,7.0,3.0,5.0,,12.008,8.0
4480,3.0,5.0,2.0,5.0,3.0,15.3,11.158,4.0
4580,5.0,5.0,5.0,4.0,4.5,6.5215,11.4,5.0
4840,5.0,3.0,4.5,5.0,5.0,8.875,6.704,4.5
5002,3.0,5.0,7.0,2.0,4.5,10.65,5.8,2.0
5208,7.0,3.0,7.0,2.0,3.0,-7.25,11.05,7.0
5217,7.0,3.0,3.0,4.0,5.0,12.05,16.15,5.0


In [None]:
# def slate_pos_score(row) -> dict:
#     """ 
#     retrieve slate data used in predictions
#     returns - dict mapping key to value for a slate stat
#     """
#     return {'': row.slate_id}

# addl_slate_df = slate_ids_df.apply(slate_data, axis='columns', result_type='expand')
# display(addl_slate_df)

# predict_df = pd.concat(
#     [teams_contest_df[['date', 'style', 'type', 'top_score', 'last_winning_score']],
#      slate_ids_df,
#      addl_slate_df],
#     axis='columns',
# )
    
# display(predict_df)