# Daily fantasy predictive model

Predict scores for non-rookies from 1985 onwards

|Stat|Points|
|----|------|
|Points (PT)|1|
|Rebounds (REB)|1.2|
|Assists (AST)|1.5|
|Steals (ST)|3|
|Blocks (BLK)|3|
|Turnovers (TO)|1|

Features:
- previous stats
- volatility of stats
- trend in stats
- time off
- home/away
- player profile
- total of stats + utilization + mins for players on team
- total of stats + utilization + mins for players on team by position
- total of stats for players on opposing team
- total of stats for players on opposing team by position
- plus-minus (only available as of 2001)

stat per game \* teammate inflation \* opponent inflation

In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.options.display.max_colwidth = 999
pd.options.display.max_rows = 99
pd.options.display.max_columns = 99

In [3]:
%matplotlib inline

## Import data

### Season dataset

In [5]:
seasons_df = pd.read_parquet('data/season.parquet')

# convert season to integer
seasons_df.season = seasons_df.season.str.split('-').str[0].astype(int)

### Games dataset

In [6]:
team_games_df = pd.read_parquet('data/normalized_games.parquet')

game_cols = ['boxscore_link', 'season_link', 'start', 
             'notes', 'overtime', 'attendance', 
             'date', 'playoffs']

games_df = team_games_df[game_cols].drop_duplicates()

# duplicates due to mismatch in playoffs assignment, resulting from some teams having shortened seasons
duplicated_games = games_df.loc[games_df.boxscore_link.duplicated(), 'boxscore_link']
games_df.loc[games_df.boxscore_link.isin(duplicated_games), 'playoffs'] = True
games_df = games_df.drop_duplicates()

team_games_df = team_games_df[
    ['boxscore_link', 'team', 'team_link', 'pts', 'location']
]

### Players dataset

In [7]:
players_df = pd.read_parquet('data/player.parquet')

def height_string_to_float(series):
    return (
        series
         .str.split("-", expand=True)
         .apply(lambda x: x.astype(float))
         .rename(columns = {0: "feet", 1: "inches"})
         .eval('feet + inches/12')
    )
players_df.height = height_string_to_float(players_df.height)

players_df.birth_date = pd.to_datetime(players_df.birth_date)

for pos in ['F', 'C', 'G']:
    players_df[f"pos_{pos.lower()}"] = players_df.pos.fillna("").str.contains(pos).astype(int)

players_df['n_pos'] = players_df[['pos_f', 'pos_c', 'pos_g']].sum(axis=1)

for pos in ['F', 'C', 'G']:
    players_df[f"pos_{pos.lower()}_scaled"] = players_df[f"pos_{pos.lower()}"] / players_df["n_pos"]

### Basic boxscores dataset

In [13]:
basic_boxscores_df = pd.read_parquet('data/basic_boxscore.parquet')

basic_boxscores_df = basic_boxscores_df.rename(columns={'game_url': 'boxscore_link'})

# there are a few games with duplicated boxscore records -> remove duplicates
basic_boxscores_df = basic_boxscores_df.sort_values(by=['boxscore_link', 'minutes'])
basic_boxscores_df = basic_boxscores_df[
    ~basic_boxscores_df.eval('boxscore_link + player_link + team').duplicated()
].copy()

### Advanced boxscores dataset

In [14]:
advanced_boxscores_df = pd.read_parquet('data/adv_boxscore.parquet')

advanced_boxscores_df = advanced_boxscores_df.rename(columns={'game_url': 'boxscore_link'})

# there are a few games with duplicated boxscore records -> remove duplicates
advanced_boxscores_df = advanced_boxscores_df.sort_values(by=['boxscore_link', 'minutes'])
advanced_boxscores_df = advanced_boxscores_df[
    ~advanced_boxscores_df.eval('boxscore_link + player_link + team').duplicated()
].copy()

## Combine datasets

In [17]:
denormalized_master_data = (
    basic_boxscores_df
        .drop(columns=['boxscore', 'player'])
        .merge(
            advanced_boxscores_df
                .drop(columns=['boxscore', 'mp', 'player', 'reason', 'role', 'team', 'minutes'])
                .eval('adv = 1'),
            on=['boxscore_link', 'player_link'],
            how='left'
        )
        .merge(
            games_df, 
            on='boxscore_link', 
            how='left',
        )
        .merge(
            team_games_df,
            on=['boxscore_link', 'team'],
            suffixes=['', '_team']
        )
        .merge(
            players_df,
            on='player_link',
            how='left'
        )
        .merge(
            seasons_df,
            on='season_link',
            how='left'
        )
        .assign(team_game_num=lambda x: x.groupby(['team', 'season_link', 'playoffs'])['date'].rank(method='dense'))
        .assign(game_num=lambda x: x.groupby(['player_link', 'playoffs'])['date'].rank(method='first'))
        .sort_values(by=['player_link', 'date'])
)

In [18]:
denormalized_master_data.head()

Unnamed: 0,ast,blk,drb,fg,fg3,fg3_pct,fg3a,fg_pct,fga,ft,ft_pct,fta,boxscore_link,mp,orb,pf,player_link,plus_minus,pts,reason,role,stl,team,tov,trb,minutes,ast_pct,blk_pct,def_rtg,drb_pct,efg_pct,fg3a_per_fga_pct,fta_per_fga_pct,off_rtg,orb_pct,stl_pct,tov_pct,trb_pct,ts_pct,usg_pct,adv,season_link,start,notes,overtime,attendance,date,playoffs,team_link,pts_team,location,birth_date,college,height,player,pos,weight,year_max,year_min,pos_f,pos_c,pos_g,n_pos,pos_f_scaled,pos_c_scaled,pos_g_scaled,league,season,team_game_num,game_num
626140,1.0,0.0,1.0,0.0,0.0,,0.0,0.0,1.0,0.0,,0.0,https://www.basketball-reference.com/boxscores/199011020POR.html,5:00,1.0,0.0,https://www.basketball-reference.com/players/a/abdelal01.html,,0.0,,reserve,0.0,Portland Trail Blazers,0.0,2.0,5.0,29.1,0.0,88.0,20.9,0.0,0.0,0.0,73.0,20.0,0.0,0.0,20.4,0.0,7.8,1.0,https://www.basketball-reference.com/leagues/NBA_1991.html,,,,12884,1990-11-02,False,https://www.basketball-reference.com/teams/POR/1991.html,90.0,home,1968-06-24,Duke University,6.833333,Alaa Abdelnaby,F-C,240.0,1995.0,1991.0,1.0,1.0,0.0,2.0,0.5,0.5,0.0,NBA,1990,1.0,1.0
626545,0.0,0.0,0.0,0.0,0.0,,0.0,,0.0,0.0,,0.0,https://www.basketball-reference.com/boxscores/199011060LAL.html,4:00,0.0,3.0,https://www.basketball-reference.com/players/a/abdelal01.html,,0.0,,reserve,0.0,Portland Trail Blazers,1.0,0.0,4.0,0.0,0.0,116.0,0.0,,,,0.0,0.0,0.0,100.0,0.0,,10.6,1.0,https://www.basketball-reference.com/leagues/NBA_1991.html,,,OT,16361,1990-11-06,False,https://www.basketball-reference.com/teams/POR/1991.html,125.0,away,1968-06-24,Duke University,6.833333,Alaa Abdelnaby,F-C,240.0,1995.0,1991.0,1.0,1.0,0.0,2.0,0.5,0.5,0.0,NBA,1990,3.0,2.0
627351,1.0,0.0,3.0,2.0,0.0,,0.0,0.5,4.0,0.0,,0.0,https://www.basketball-reference.com/boxscores/199011110POR.html,8:00,1.0,1.0,https://www.basketball-reference.com/players/a/abdelal01.html,,4.0,,reserve,0.0,Portland Trail Blazers,0.0,4.0,8.0,13.6,0.0,94.0,38.3,0.5,0.0,0.0,117.0,14.0,0.0,0.0,26.7,0.5,18.8,1.0,https://www.basketball-reference.com/leagues/NBA_1991.html,,,,12884,1990-11-11,False,https://www.basketball-reference.com/teams/POR/1991.html,138.0,home,1968-06-24,Duke University,6.833333,Alaa Abdelnaby,F-C,240.0,1995.0,1991.0,1.0,1.0,0.0,2.0,0.5,0.5,0.0,NBA,1990,5.0,3.0
627573,0.0,0.0,1.0,0.0,0.0,,0.0,0.0,2.0,0.0,,0.0,https://www.basketball-reference.com/boxscores/199011130POR.html,3:00,0.0,0.0,https://www.basketball-reference.com/players/a/abdelal01.html,,0.0,,reserve,0.0,Portland Trail Blazers,1.0,1.0,3.0,0.0,0.0,109.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,33.3,18.0,0.0,34.1,1.0,https://www.basketball-reference.com/leagues/NBA_1991.html,,,,12884,1990-11-13,False,https://www.basketball-reference.com/teams/POR/1991.html,155.0,home,1968-06-24,Duke University,6.833333,Alaa Abdelnaby,F-C,240.0,1995.0,1991.0,1.0,1.0,0.0,2.0,0.5,0.5,0.0,NBA,1990,6.0,4.0
627851,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,2.0,1.0,0.5,2.0,https://www.basketball-reference.com/boxscores/199011150POR.html,6:00,1.0,0.0,https://www.basketball-reference.com/players/a/abdelal01.html,,1.0,,reserve,0.0,Portland Trail Blazers,0.0,1.0,6.0,0.0,0.0,123.0,0.0,0.0,0.0,1.0,71.0,21.6,0.0,0.0,10.5,0.174,18.2,1.0,https://www.basketball-reference.com/leagues/NBA_1991.html,,,,12884,1990-11-15,False,https://www.basketball-reference.com/teams/POR/1991.html,141.0,home,1968-06-24,Duke University,6.833333,Alaa Abdelnaby,F-C,240.0,1995.0,1991.0,1.0,1.0,0.0,2.0,0.5,0.5,0.0,NBA,1990,7.0,5.0


## Filter data

In [None]:
# remove playoffs, last 5 games of the season and games before 2000
filtered_master_data = (denormalized_master_data
    .query('season >= 2000')
    .query('')

## Generate features

In [75]:
# add avg statistics for previous 5 games
prev_5g_avg = (denormalized_master_data
     .groupby(['player_link', 'playoffs'])[['pts', 'stl', 'tov', 'trb', 'minutes', 'ast', 'blk']]
     .apply(lambda x: x.shift(1).rolling(window=5, min_periods=5).mean())
     .rename(columns=lambda col: f"{col}_prev_5g_avg")
)

In [79]:
denormalized_master_data = (denormalized_master_data
    .drop(columns=prev_5g_avg.columns, errors='ignore')
    .join(prev_5g_avg)
)

## Split into test and control
- All: non-playoff games, not including first 5 games or last 5 games
- Train: 2001 to 2015
- Test: 2016, 2017

In [80]:
filtered_data = (denormalized_master_data
    .query('playoffs == False')
    .assign(total_team_games=lambda x: x.groupby(['team', 'season_link', 'playoffs'])['team_game_num'].transform(max))
    .query('team_game_num > 5')
    .query('team_game_num <= total_team_games - 5')
)

In [81]:
train = filtered_data.query('season >= 2001 and season <= 2015')
test = filtered_data.query('season >= 2016 and season <= 2017')

In [86]:
train[train.pts_prev_5g_avg.isna() & ~train.minutes.isna()]

Unnamed: 0,ast,blk,drb,fg,fg3,fg3_pct,fg3a,fg_pct,fga,ft,ft_pct,fta,boxscore_link,mp,orb,pf,player_link,plus_minus,pts,reason,role,stl,team,tov,trb,minutes,ast_pct,blk_pct,def_rtg,drb_pct,efg_pct,fg3a_per_fga_pct,fta_per_fga_pct,off_rtg,orb_pct,stl_pct,tov_pct,trb_pct,ts_pct,usg_pct,adv,season_link,start,notes,overtime,attendance,date,playoffs,team_link,pts_team,location,birth_date,college,height,player,pos,weight,year_max,year_min,pos_f,pos_c,pos_g,n_pos,pos_f_scaled,pos_c_scaled,pos_g_scaled,league,season,team_game_num,game_num,pts_prev_5g_avg,stl_prev_5g_avg,tov_prev_5g_avg,trb_prev_5g_avg,minutes_prev_5g_avg,ast_prev_5g_avg,blk_prev_5g_avg,total_team_games
1007973,0.0,0.0,0.0,0.0,0.0,,0.0,0.000,2.0,0.0,,0.0,https://www.basketball-reference.com/boxscores/200602210DET.html,1:18,0.0,0.0,https://www.basketball-reference.com/players/a/ackeral01.html,-2.0,0.0,,reserve,0.0,Detroit Pistons,0.0,0.0,1.300000,0.0,0.0,107.0,0.0,0.000,0.000,0.000,0.0,0.0,0.0,0.0,0.0,0.000,68.2,1.0,https://www.basketball-reference.com/leagues/NBA_2006.html,7:30 pm,,,22076,2006-02-21,False,https://www.basketball-reference.com/teams/DET/2006.html,97.0,home,1983-01-21,Pepperdine University,6.416667,Alex Acker,G,185.0,2009.0,2006.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,NBA,2005,52.0,1.0,,,,,,,,82.0
1008350,0.0,0.0,0.0,0.0,0.0,,0.0,,0.0,0.0,,0.0,https://www.basketball-reference.com/boxscores/200602230DET.html,1:05,0.0,0.0,https://www.basketball-reference.com/players/a/ackeral01.html,-8.0,0.0,,reserve,0.0,Detroit Pistons,1.0,0.0,1.083333,0.0,0.0,110.0,0.0,,,,0.0,0.0,0.0,100.0,0.0,,45.9,1.0,https://www.basketball-reference.com/leagues/NBA_2006.html,8:00 pm,,,22076,2006-02-23,False,https://www.basketball-reference.com/teams/DET/2006.html,88.0,home,1983-01-21,Pepperdine University,6.416667,Alex Acker,G,185.0,2009.0,2006.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,NBA,2005,53.0,2.0,,,,,,,,82.0
1008775,0.0,0.0,0.0,1.0,0.0,,0.0,1.000,1.0,0.0,,0.0,https://www.basketball-reference.com/boxscores/200602260DET.html,2:04,0.0,1.0,https://www.basketball-reference.com/players/a/ackeral01.html,-9.0,2.0,,reserve,0.0,Detroit Pistons,1.0,0.0,2.066667,0.0,0.0,93.0,0.0,1.000,0.000,0.000,64.0,0.0,0.0,50.0,0.0,1.000,46.8,1.0,https://www.basketball-reference.com/leagues/NBA_2006.html,12:00 pm,,,22076,2006-02-26,False,https://www.basketball-reference.com/teams/DET/2006.html,90.0,home,1983-01-21,Pepperdine University,6.416667,Alex Acker,G,185.0,2009.0,2006.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,NBA,2005,55.0,3.0,,,,,,,,82.0
1176865,0.0,1.0,2.0,1.0,0.0,,0.0,1.000,1.0,4.0,1.000,4.0,https://www.basketball-reference.com/boxscores/201211170BOS.html,12:06,2.0,0.0,https://www.basketball-reference.com/players/a/acyqu01.html,5.0,6.0,,reserve,2.0,Toronto Raptors,0.0,4.0,12.100000,0.0,6.5,99.0,24.0,1.000,0.000,4.000,232.0,20.3,9.0,0.0,22.0,1.087,11.1,1.0,https://www.basketball-reference.com/leagues/NBA_2013.html,12:30 pm,,,18624,2012-11-17,False,https://www.basketball-reference.com/teams/TOR/2013.html,89.0,away,1990-10-06,Baylor University,6.583333,Quincy Acy,F,240.0,2018.0,2013.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,NBA,2012,9.0,2.0,,,,,,,,82.0
1178651,0.0,0.0,2.0,0.0,0.0,,0.0,0.000,1.0,1.0,0.500,2.0,https://www.basketball-reference.com/boxscores/201211280MEM.html,4:20,0.0,1.0,https://www.basketball-reference.com/players/a/acyqu01.html,-2.0,1.0,,reserve,0.0,Toronto Raptors,0.0,2.0,4.333333,0.0,0.0,103.0,58.3,0.000,0.000,2.000,60.0,0.0,0.0,0.0,28.0,0.266,20.5,1.0,https://www.basketball-reference.com/leagues/NBA_2013.html,8:00 pm,,,14603,2012-11-28,False,https://www.basketball-reference.com/teams/TOR/2013.html,82.0,away,1990-10-06,Baylor University,6.583333,Quincy Acy,F,240.0,2018.0,2013.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,NBA,2012,16.0,3.0,,,,,,,,82.0
1181128,0.0,0.0,0.0,1.0,0.0,,0.0,1.000,1.0,0.0,,0.0,https://www.basketball-reference.com/boxscores/201212140TOR.html,2:11,0.0,0.0,https://www.basketball-reference.com/players/a/acyqu01.html,1.0,2.0,,reserve,0.0,Toronto Raptors,0.0,0.0,2.183333,0.0,0.0,92.0,0.0,1.000,0.000,0.000,200.0,0.0,0.0,0.0,0.0,1.000,21.2,1.0,https://www.basketball-reference.com/leagues/NBA_2013.html,7:00 pm,,,19132,2012-12-14,False,https://www.basketball-reference.com/teams/TOR/2013.html,95.0,home,1990-10-06,Baylor University,6.583333,Quincy Acy,F,240.0,2018.0,2013.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,NBA,2012,24.0,4.0,,,,,,,,82.0
1182912,0.0,0.0,1.0,0.0,0.0,,0.0,,0.0,0.0,,0.0,https://www.basketball-reference.com/boxscores/201212260SAS.html,2:57,1.0,0.0,https://www.basketball-reference.com/players/a/acyqu01.html,-3.0,0.0,,reserve,0.0,Toronto Raptors,0.0,2.0,2.950000,0.0,0.0,111.0,46.5,,,,216.0,37.0,0.0,,41.2,,0.0,1.0,https://www.basketball-reference.com/leagues/NBA_2013.html,8:30 pm,,,18581,2012-12-26,False,https://www.basketball-reference.com/teams/TOR/2013.html,80.0,away,1990-10-06,Baylor University,6.583333,Quincy Acy,F,240.0,2018.0,2013.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,NBA,2012,29.0,5.0,,,,,,,,82.0
1204024,1.0,0.0,1.0,3.0,2.0,0.500,4.0,0.600,5.0,1.0,0.500,2.0,https://www.basketball-reference.com/boxscores/201311090TOR.html,17:49,1.0,2.0,https://www.basketball-reference.com/players/a/acyqu01.html,-9.0,9.0,,reserve,0.0,Toronto Raptors,0.0,2.0,17.816667,8.4,0.0,111.0,6.9,0.800,0.800,0.400,165.0,7.1,0.0,0.0,7.0,0.765,14.7,1.0,https://www.basketball-reference.com/leagues/NBA_2014.html,7:00 pm,,,17211,2013-11-09,False,https://www.basketball-reference.com/teams/TOR/2014.html,115.0,home,1990-10-06,Baylor University,6.583333,Quincy Acy,F,240.0,2018.0,2013.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,NBA,2013,7.0,32.0,,,,,,,,82.0
1204215,1.0,1.0,0.0,0.0,0.0,0.000,1.0,0.000,1.0,0.0,,0.0,https://www.basketball-reference.com/boxscores/201311110HOU.html,8:47,0.0,3.0,https://www.basketball-reference.com/players/a/acyqu01.html,-3.0,0.0,,reserve,2.0,Toronto Raptors,0.0,0.0,8.783333,17.4,11.4,76.0,0.0,0.000,1.000,0.000,45.0,0.0,11.8,0.0,0.0,0.000,4.8,1.0,https://www.basketball-reference.com/leagues/NBA_2014.html,8:00 pm,,2OT,18134,2013-11-11,False,https://www.basketball-reference.com/teams/TOR/2014.html,104.0,away,1990-10-06,Baylor University,6.583333,Quincy Acy,F,240.0,2018.0,2013.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,NBA,2013,8.0,33.0,,,,,,,,82.0
1204578,0.0,0.0,1.0,0.0,0.0,,0.0,,0.0,4.0,0.667,6.0,https://www.basketball-reference.com/boxscores/201311130MEM.html,5:16,0.0,0.0,https://www.basketball-reference.com/players/a/acyqu01.html,0.0,4.0,,reserve,0.0,Toronto Raptors,0.0,1.0,5.266667,0.0,0.0,102.0,23.4,,,,165.0,0.0,0.0,0.0,11.8,0.758,24.8,1.0,https://www.basketball-reference.com/leagues/NBA_2014.html,8:00 pm,,,15971,2013-11-13,False,https://www.basketball-reference.com/teams/TOR/2014.html,103.0,away,1990-10-06,Baylor University,6.583333,Quincy Acy,F,240.0,2018.0,2013.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,NBA,2013,9.0,34.0,,,,,,,,82.0


## Test model - pts scored

Candidate model (linear): 
- avg pts in prev 5 games

In [82]:
from sklearn.linear_model import LinearRegression

In [83]:
predictors = ['pts_prev_5g_avg']
target = 'pts'
reg = LinearRegression().fit(train[predictors].fillna(0).values, train[target].fillna(0).values)

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').