# Daily Fantasy Model Feature Engineering

Generate features used in daily fantasy predictive model

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from time import sleep
from datetime import date
import os

import pandas as pd
import numpy as np
import pymongo

import transformations as trn

In [3]:
pd.options.display.max_columns = 999

In [4]:
NBA_MONGO_KEY = os.environ.get("NBA_MONGO_KEY")
client = pymongo.MongoClient(f"mongodb+srv://nba-fantasy:{NBA_MONGO_KEY}@nba-fantasy-hu5fx.gcp.mongodb.net/test?retryWrites=true&w=majority")

## Download inputs

columns: player, game, date, stats (points, ast, blk, tov, fga, fgm, fta, ftm, 3pm, reb, stl)

### Boxscore stats

In [5]:
game_stats_cursor = client["nbafantasy"]["gamelog"].aggregate([
    {
        "$project": {
            "_id": 0,
            "game_id": "$game.id",
            "team_id": "$team.id",
            "player_id": "$player.id",
            "date": "$game.startTime",
            "fg3a": "$stats.fieldGoals.fg3PtAtt",
            "fg3m": "$stats.fieldGoals.fg3PtMade",
            "reb": "$stats.rebounds.reb",
            "ast": "$stats.offense.ast",
            "pts": "$stats.offense.pts",
            "tov": "$stats.defense.tov",
            "stl": "$stats.defense.stl",
            "blk": "$stats.defense.blk",
            "fta": "$stats.freeThrows.ftAtt",
            "ftm": "$stats.freeThrows.ftMade",
            "fga": "$stats.fieldGoals.fgAtt",
            "fgm": "$stats.fieldGoals.fgMade",
            "minSeconds": "$stats.miscellaneous.minSeconds",
        }
    }
])
game_stats = (
    pd.DataFrame(game_stats_cursor)
    .assign(date=lambda x:pd.to_datetime(x["date"]).dt.tz_convert('US/Eastern'))
)

game_stats["non_scoring_pts"] = game_stats["ast"] * 1.5 + game_stats["blk"] * 3 + game_stats["reb"] * 1.2 + game_stats["stl"] * 3 - game_stats["tov"]
game_stats["fg2m"] = game_stats["fgm"] - game_stats["fg3m"]
game_stats["fg2a"] = game_stats["fga"] - game_stats["fg3a"]

game_stats = game_stats.assign(**{"ftp": lambda x: x["ftm"] / x["fta"]})
game_stats = game_stats.assign(**{"fg2p": lambda x: x["fg2m"] / x["fg2a"]})
game_stats = game_stats.assign(**{"fg3p": lambda x: x["fg3m"] / x["fg3a"]})

for stat in ["fta", "ftm", "fg2a", "fg2m", "fg3a", "fg3m", "non_scoring_pts"]:
    game_stats[f"{stat}_per_min"] = game_stats[stat] / (game_stats["minSeconds"] / 60)

In [6]:
game_stats.head()

Unnamed: 0,game_id,team_id,player_id,date,fg3a,fg3m,reb,ast,pts,tov,stl,blk,fta,ftm,fga,fgm,minSeconds,non_scoring_pts,fg2m,fg2a,ftp,fg2p,fg3p,fta_per_min,ftm_per_min,fg2a_per_min,fg2m_per_min,fg3a_per_min,fg3m_per_min,non_scoring_pts_per_min
0,47697,92,13742,2018-11-03 19:30:00-04:00,0,0,7,2,2,2,1,2,0,0,2,1,1837,18.4,1,2,,0.5,,0.0,0.0,0.065324,0.032662,0.0,0.0,0.60098
1,47700,99,15289,2018-11-03 21:00:00-04:00,0,0,0,0,2,0,0,0,0,0,1,1,117,0.0,1,1,,1.0,,0.0,0.0,0.512821,0.512821,0.0,0.0,0.0
2,47699,106,9480,2018-11-03 20:30:00-04:00,0,0,12,6,22,1,0,0,0,0,21,11,2381,22.4,11,21,,0.52381,,0.0,0.0,0.529189,0.277194,0.0,0.0,0.564469
3,47700,98,15218,2018-11-03 21:00:00-04:00,1,0,0,0,0,0,0,0,0,0,1,0,313,0.0,0,0,,,0.0,0.0,0.0,0.0,0.0,0.191693,0.0,0.0
4,47701,97,9452,2018-11-03 22:00:00-04:00,2,0,13,2,4,0,2,2,0,0,8,2,1926,30.6,2,6,,0.333333,0.0,0.0,0.0,0.186916,0.062305,0.062305,0.0,0.953271


In [7]:
game_stats.loc[lambda x: x.duplicated(subset=["game_id", "player_id", 'team_id'])]

Unnamed: 0,game_id,team_id,player_id,date,fg3a,fg3m,reb,ast,pts,tov,stl,blk,fta,ftm,fga,fgm,minSeconds,non_scoring_pts,fg2m,fg2a,ftp,fg2p,fg3p,fta_per_min,ftm_per_min,fg2a_per_min,fg2m_per_min,fg3a_per_min,fg3m_per_min,non_scoring_pts_per_min


### Games teams

In [8]:
game_teams_cursor = client["nbafantasy"]["game"].aggregate([
    {
        "$project": {
            "_id": 0,
            "game_id": "$schedule.id",
            "away_team_id": "$schedule.awayTeam.id",
            "home_team_id": "$schedule.homeTeam.id",
            "status": "$schedule.playedStatus"
        }
    }
])
game_teams = pd.DataFrame(game_teams_cursor)

In [9]:
game_teams.head()

Unnamed: 0,game_id,away_team_id,home_team_id,status
0,31176,88,91,COMPLETED
1,31177,86,89,COMPLETED
2,31178,110,101,COMPLETED
3,31188,94,95,COMPLETED
4,31179,85,82,COMPLETED


In [10]:
game_teams.loc[lambda x: x.duplicated(subset=["game_id"])]

Unnamed: 0,game_id,away_team_id,home_team_id,status


In [11]:
(
    game_stats
    .merge(game_teams, on=["game_id"], how="outer")
    .query("away_team_id != away_team_id")
)

Unnamed: 0,game_id,team_id,player_id,date,fg3a,fg3m,reb,ast,pts,tov,stl,blk,fta,ftm,fga,fgm,minSeconds,non_scoring_pts,fg2m,fg2a,ftp,fg2p,fg3p,fta_per_min,ftm_per_min,fg2a_per_min,fg2m_per_min,fg3a_per_min,fg3m_per_min,non_scoring_pts_per_min,away_team_id,home_team_id,status


In [12]:
(
    game_stats
    .merge(game_teams, on=["game_id"], how="outer")
    .query("player_id != player_id and status != 'UNPLAYED'")
)

Unnamed: 0,game_id,team_id,player_id,date,fg3a,fg3m,reb,ast,pts,tov,stl,blk,fta,ftm,fga,fgm,minSeconds,non_scoring_pts,fg2m,fg2a,ftp,fg2p,fg3p,fta_per_min,ftm_per_min,fg2a_per_min,fg2m_per_min,fg3a_per_min,fg3m_per_min,non_scoring_pts_per_min,away_team_id,home_team_id,status


### Game lineups

- There are ~200 duplicate players in lineups (if both starter and bench, assume starter is correct)
- There are ~800 games in which the player lineup position is missing (assume bench)

In [13]:
cursor = client["nbafantasy"]["lineup"].aggregate(
    [
        {'$unwind': {'path': '$teamLineups'}},
        {'$unwind': {'path': '$teamLineups.actual.lineupPositions'}},
        {
            '$match': {
                'teamLineups.actual.lineupPositions.player': {'$ne': None}
            }
        },
        {
            '$project': {
                '_id': 0,
                'game_id': '$game.id',
                'team_id': '$teamLineups.team.id', 
                'player_id': '$teamLineups.actual.lineupPositions.player.id', 
                'player_role': '$teamLineups.actual.lineupPositions.position',
                'position': '$teamLineups.actual.lineupPositions.player.position'
            }
        },
        {
            '$sort': {
                'game_id': 1, 
                'team_id': 1, 
                'player_role': 1
            }
        }
    ]
)

historical_lineups = (
    pd.DataFrame(cursor)
    .assign(player_role=lambda x: x["player_role"].str.replace("[0-9]", ""))
    # there are ~200 cases of duplicate player roles (assume starter is correct)
    .sort_values(by=["game_id", "team_id", "player_id", "player_role"])
    .loc[lambda x: ~x.duplicated(subset=["game_id", "team_id", "player_id"], keep="last")]
)

In [14]:
historical_lineups.head()

Unnamed: 0,game_id,team_id,player_id,player_role,position
6,31176,88,9202,Starter,C
7,31176,88,9203,Starter,SG
2,31176,88,9205,Bench,PG
3,31176,88,9206,Bench,SG
4,31176,88,9207,Starter,SF


### Players

In [19]:
cursor = client["nbafantasy"]["player"].aggregate([
    {
        "$project": {
            "_id": 1,
            "firstName": 1,
            "lastName": 1,
            "height": 1,
            "weight": 1,
            "birth_date": {'$toDate': '$birthDate'},
            "draft_year": "$drafted.year",
            "draft_pick": "$drafted.overallPick"
        }
    }
])
players = (
    pd.DataFrame(cursor)
    .rename(columns={"_id": "player_id"})
    .assign(birth_date=lambda x: x["birth_date"].dt.date)
    .assign(
        height=lambda x: list(
            map(
                lambda x: int(x[0]) + int(x[1]) / 12 if x else None,
                x["height"].str.replace('"', '').str.split("'")
            )
        )
    )
)

### DFS info

In [20]:
cursor = client["nbafantasy"]["dfs"].aggregate([
    {
        "$project": {
            "_id": 0,
            "player_id": "$player.id",
            "team_id": "$team.id",
            "game_id": "$game.id",
            "position": "$player.position",
            "salary": 1
        }
    }
])
dfs_info = (
    pd.DataFrame(cursor)
    .pipe(
        lambda x: (
            pd.concat([x, pd.get_dummies(x['position'], prefix='position')], axis=1)
        )
    )
)

### Upcoming games and players

In [21]:
upcoming_lineups = (
    pd.read_csv("data/rotogrinders_expected_lineup.csv")
    .merge(game_teams, how="left", on=["game_id"])
    .assign(
        opp_team_id=lambda x: 
            np.where(
                (x["team_id"] == x["home_team_id"]),
                x["away_team_id"],
                x["home_team_id"]
            )
    )
    .assign(
        home_game=lambda x: (x["team_id"] == x["home_team_id"]).astype(int)
    )
    .drop(columns=["away_team_id", "home_team_id", "status"])
)

In [22]:
upcoming_lineups

Unnamed: 0,team,player,starter,player_id,match_quality,team_id,game_id,opp_team_id,home_game
0,MIL,Kyle Korver,0,9089,1.000000,90,53655,82,1
1,MIL,Brook Lopez,1,9118,1.000000,90,53655,82,1
2,MIL,Wesley Matthews,1,9179,1.000000,90,53655,82,1
3,MIL,George Hill,0,9248,1.000000,90,53655,82,1
4,MIL,Giannis Antetokounmpo,1,9325,1.000000,90,53655,82,1
...,...,...,...,...,...,...,...,...,...
118,ORL,Khem Birch,1,13804,1.000000,95,53652,102,0
119,ORL,Amile Jefferson,0,13819,1.000000,95,53652,102,0
120,ORL,Mohamed Bamba,0,15278,1.000000,95,53652,102,0
121,ORL,Gary Clark,0,15310,1.000000,95,53652,102,0


## Feature generation

Features:
- average stats (fantasy stats + minutes)
- average of stats allowed by opponent teams
- average of stats allowed by opponent teams in same position (TODO)
- change in starter position, relative to previous games
- change in team average stats
- changes in team average stats in same positions
- draft position
- years in league
- age
- change in stats
- home/away
- days since last game
- travel between games (TODO)
- time of game (TODO)

### Denormalized inputs

- Merge input datasets
- Add column transformations
- Filter games in which players were inactive

In [None]:
denormalized_stats = (
    game_stats
    .merge(game_teams, how="left", on=["game_id"])
    .merge(historical_lineups, how="left", on=["game_id", "team_id", "player_id"])
    .merge(players, how="left", on=["player_id"])
    .assign(age=lambda x: (x["date"].dt.date -  x["birth_date"]).dt.days)
    .assign(
        home_game=lambda x: (x["team_id"] == x["home_team_id"]).astype(int)
    )
    .assign(
        opp_team_id=lambda x: 
            np.where(
                (x["team_id"] == x["home_team_id"]),
                x["away_team_id"],
                x["home_team_id"]
            )
    )
    .assign(
        time_since_draft=lambda x:
            (
                x["date"].dt.date
                 - x["draft_year"].apply(
                         lambda year: pd.datetime(int(year), 7, 1) if ~np.isnan(year) else None
                 ).dt.date
            ).dt.days
    )
    .assign(
        starter=lambda x: (x["player_role"] == "Starter").astype(int)
    )
    .drop(columns=["birth_date", "away_team_id", "home_team_id", "draft_year", "player_role"])
    .loc[lambda x: x["minSeconds"] > 0]

)

### Average stats (fantasy stats + minutes)

In [None]:
stats = ["fta", "ftm", "fg2a", "fg2m", "fg3a", "fg3m", "non_scoring_pts", "minSeconds"]
post_agg_funcs = {
    "ftp": lambda x: x["ftm"] / x["fta"],
    "fg2p": lambda x: x["fg2m"] / x["fg2a"],
    "fg3p": lambda x: x["fg3m"] / x["fg3a"],
    "fta_per_min": lambda x: x["fta"] / x["minSeconds"],
    "ftm_per_min": lambda x: x["ftm"] / x["minSeconds"],
    "fg2a_per_min": lambda x: x["fg2a"] / x["minSeconds"],
    "fg2m_per_min": lambda x: x["fg2m"] / x["minSeconds"],
    "fg3a_per_min": lambda x: x["fg3a"] / x["minSeconds"],
    "fg3m_per_min": lambda x: x["fg3m"] / x["minSeconds"],
    "non_scoring_pts_per_min": lambda x: x["non_scoring_pts"] / x["minSeconds"],
}

rolling_avg_3g_hist = trn.PlayerAverage(window=3, stats=stats, post_agg_stats=post_agg_funcs).historical_features(denormalized_stats)
rolling_avg_3g = trn.PlayerAverage(window=3, stats=stats, post_agg_stats=post_agg_funcs).current_features(upcoming_lineups, denormalized_stats)

rolling_avg_9g_hist = trn.PlayerAverage(window=9, stats=stats, post_agg_stats=post_agg_funcs).historical_features(denormalized_stats)
rolling_avg_9g = trn.PlayerAverage(window=9, stats=stats, post_agg_stats=post_agg_funcs).current_features(upcoming_lineups, denormalized_stats)

rolling_avg_27g_hist = trn.PlayerAverage(window=27, stats=stats, post_agg_stats=post_agg_funcs).historical_features(denormalized_stats)
rolling_avg_27g = trn.PlayerAverage(window=27, stats=stats, post_agg_stats=post_agg_funcs).current_features(upcoming_lineups, denormalized_stats)

rolling_avg_81g_hist = trn.PlayerAverage(window=81, stats=stats, post_agg_stats=post_agg_funcs).historical_features(denormalized_stats)
rolling_avg_81g = trn.PlayerAverage(window=81, stats=stats, post_agg_stats=post_agg_funcs).current_features(upcoming_lineups, denormalized_stats)

In [None]:
rolling_avg_27g_hist.head()

In [None]:
rolling_avg_27g.head()

### Average of stats allowed by opponent teams

- try average points allowed
- try average points allowed above team average

In [None]:
# rolling average stats total for opp_team_id
stats = ["fta", "ftm", "fg2a", "fg2m", "fg3a", "fg3m", "non_scoring_pts", "minSeconds"]
post_agg_funcs = {
    "ftp": lambda x: x["ftm"] / x["fta"],
    "fg2p": lambda x: x["fg2m"] / x["fg2a"],
    "fg3p": lambda x: x["fg3m"] / x["fg3a"],
    "fta_per_min": lambda x: x["fta"] / x["minSeconds"],
    "ftm_per_min": lambda x: x["ftm"] / x["minSeconds"],
    "fg2a_per_min": lambda x: x["fg2a"] / x["minSeconds"],
    "fg2m_per_min": lambda x: x["fg2m"] / x["minSeconds"],
    "fg3a_per_min": lambda x: x["fg3a"] / x["minSeconds"],
    "fg3m_per_min": lambda x: x["fg3m"] / x["minSeconds"],
    "non_scoring_pts_per_min": lambda x: x["non_scoring_pts"] / x["minSeconds"],
}

opp_allowed_3g_hist = trn.OpponentAverageAllowed(window=3, stats=stats, post_agg_stats=post_agg_funcs).historical_features(denormalized_stats)
opp_allowed_9g_hist = trn.OpponentAverageAllowed(window=9, stats=stats, post_agg_stats=post_agg_funcs).historical_features(denormalized_stats)
opp_allowed_27g_hist = trn.OpponentAverageAllowed(window=27, stats=stats, post_agg_stats=post_agg_funcs).historical_features(denormalized_stats)
opp_allowed_81g_hist = trn.OpponentAverageAllowed(window=81, stats=stats, post_agg_stats=post_agg_funcs).historical_features(denormalized_stats)

opp_allowed_3g = trn.OpponentAverageAllowed(window=3, stats=stats, post_agg_stats=post_agg_funcs).current_features(upcoming_lineups, denormalized_stats)
opp_allowed_9g = trn.OpponentAverageAllowed(window=9, stats=stats, post_agg_stats=post_agg_funcs).current_features(upcoming_lineups, denormalized_stats)
opp_allowed_27g = trn.OpponentAverageAllowed(window=27, stats=stats, post_agg_stats=post_agg_funcs).current_features(upcoming_lineups, denormalized_stats)
opp_allowed_81g = trn.OpponentAverageAllowed(window=81, stats=stats, post_agg_stats=post_agg_funcs).current_features(upcoming_lineups, denormalized_stats)

In [None]:
opp_allowed_3g_hist.head()

In [None]:
opp_allowed_3g.head()

### Average of stats allowed by opponent teams, above team average

In [None]:
stats = ["fta", "ftm", "fg2a", "fg2m", "fg3a", "fg3m", "non_scoring_pts", "minSeconds"]
post_agg_funcs = {
    "ftp": lambda x: x["ftm"] / x["fta"],
    "fg2p": lambda x: x["fg2m"] / x["fg2a"],
    "fg3p": lambda x: x["fg3m"] / x["fg3a"],
    "fta_per_min": lambda x: x["fta"] / x["minSeconds"],
    "ftm_per_min": lambda x: x["ftm"] / x["minSeconds"],
    "fg2a_per_min": lambda x: x["fg2a"] / x["minSeconds"],
    "fg2m_per_min": lambda x: x["fg2m"] / x["minSeconds"],
    "fg3a_per_min": lambda x: x["fg3a"] / x["minSeconds"],
    "fg3m_per_min": lambda x: x["fg3m"] / x["minSeconds"],
    "non_scoring_pts_per_min": lambda x: x["non_scoring_pts"] / x["minSeconds"],
}

opp_allowed_abv_avg_3g_hist = trn.OpponentAboveAverageAllowed(window=3, stats=stats, post_agg_stats=post_agg_funcs).historical_features(denormalized_stats)
opp_allowed_abv_avg_9g_hist = trn.OpponentAboveAverageAllowed(window=9, stats=stats, post_agg_stats=post_agg_funcs).historical_features(denormalized_stats)
opp_allowed_abv_avg_27g_hist = trn.OpponentAboveAverageAllowed(window=27, stats=stats, post_agg_stats=post_agg_funcs).historical_features(denormalized_stats)
opp_allowed_abv_avg_81g_hist = trn.OpponentAboveAverageAllowed(window=81, stats=stats, post_agg_stats=post_agg_funcs).historical_features(denormalized_stats)

opp_allowed_abv_avg_3g = trn.OpponentAboveAverageAllowed(window=3, stats=stats, post_agg_stats=post_agg_funcs).current_features(upcoming_lineups, denormalized_stats)
opp_allowed_abv_avg_9g = trn.OpponentAboveAverageAllowed(window=9, stats=stats, post_agg_stats=post_agg_funcs).current_features(upcoming_lineups, denormalized_stats)
opp_allowed_abv_avg_27g = trn.OpponentAboveAverageAllowed(window=27, stats=stats, post_agg_stats=post_agg_funcs).current_features(upcoming_lineups, denormalized_stats)
opp_allowed_abv_avg_81g = trn.OpponentAboveAverageAllowed(window=81, stats=stats, post_agg_stats=post_agg_funcs).current_features(upcoming_lineups, denormalized_stats)

In [None]:
opp_allowed_abv_avg_3g_hist.head()

In [None]:
opp_allowed_abv_avg_3g.head()

### Average of stats allowed by opponent teams in same position

In [None]:
from sklearn.metrics import pairwise_distances

In [None]:
position_avgs = (
    denormalized_stats
    .query("position in ('C', 'PG', 'PF', 'SF', 'SG')")
    .groupby(["position"])[stats]
    .mean()
    .apply(lambda x: x / x.mean() * 100)
    .loc[["C", "PF", "SF", "SG", "PG"]]
)

In [None]:
position_distances = pd.DataFrame(pairwise_distances(position_avgs, metric="euclidean"), index=position_avgs.index, columns=position_avgs.index)
position_distances

### Change in starter position, relative to previous games

In [None]:
starter_hist = denormalized_stats[["game_id", "team_id", "player_id", "starter"]]

In [None]:
starter = upcoming_lineups[["player_id", "starter"]]

In [None]:
avg_start_rate_3g_hist = trn.PrevStartingRate(window=3).historical_features(denormalized_stats)
avg_start_rate_9g_hist = trn.PrevStartingRate(window=9).historical_features(denormalized_stats)
avg_start_rate_27g_hist = trn.PrevStartingRate(window=27).historical_features(denormalized_stats)
avg_start_rate_81g_hist = trn.PrevStartingRate(window=81).historical_features(denormalized_stats)

avg_start_rate_3g = trn.PrevStartingRate(window=3).current_features(upcoming_lineups, denormalized_stats)
avg_start_rate_9g = trn.PrevStartingRate(window=9).current_features(upcoming_lineups, denormalized_stats)
avg_start_rate_27g = trn.PrevStartingRate(window=27).current_features(upcoming_lineups, denormalized_stats)
avg_start_rate_81g = trn.PrevStartingRate(window=81).current_features(upcoming_lineups, denormalized_stats)

In [None]:
avg_start_rate_9g_hist.head()

In [None]:
avg_start_rate_9g.head()

### Sum of teammates historical average stats

In [None]:
stats = ["fta", "ftm", "fg2a", "fg2m", "fg3a", "fg3m", "non_scoring_pts", "minSeconds"]
post_agg_funcs = {
    "ftp": lambda x: x["ftm"] / x["fta"],
    "fg2p": lambda x: x["fg2m"] / x["fg2a"],
    "fg3p": lambda x: x["fg3m"] / x["fg3a"],
    "fta_per_min": lambda x: x["fta"] / x["minSeconds"],
    "ftm_per_min": lambda x: x["ftm"] / x["minSeconds"],
    "fg2a_per_min": lambda x: x["fg2a"] / x["minSeconds"],
    "fg2m_per_min": lambda x: x["fg2m"] / x["minSeconds"],
    "fg3a_per_min": lambda x: x["fg3a"] / x["minSeconds"],
    "fg3m_per_min": lambda x: x["fg3m"] / x["minSeconds"],
    "non_scoring_pts_per_min": lambda x: x["non_scoring_pts"] / x["minSeconds"],
}

current_teammate_avg_3g_hist = trn.CurrentTeammateAvgStats(window=3, stats=stats, post_agg_stats=post_agg_funcs).historical_features(denormalized_stats)
current_teammate_avg_3g = trn.CurrentTeammateAvgStats(window=3, stats=stats, post_agg_stats=post_agg_funcs).current_features(upcoming_lineups, denormalized_stats)

current_teammate_avg_9g_hist = trn.CurrentTeammateAvgStats(window=9, stats=stats, post_agg_stats=post_agg_funcs).historical_features(denormalized_stats)
current_teammate_avg_9g = trn.CurrentTeammateAvgStats(window=9, stats=stats, post_agg_stats=post_agg_funcs).current_features(upcoming_lineups, denormalized_stats)

current_teammate_avg_27g_hist = trn.CurrentTeammateAvgStats(window=27, stats=stats, post_agg_stats=post_agg_funcs).historical_features(denormalized_stats)
current_teammate_avg_27g = trn.CurrentTeammateAvgStats(window=27, stats=stats, post_agg_stats=post_agg_funcs).current_features(upcoming_lineups, denormalized_stats)

current_teammate_avg_81g_hist = trn.CurrentTeammateAvgStats(window=81, stats=stats, post_agg_stats=post_agg_funcs).historical_features(denormalized_stats)
current_teammate_avg_81g = trn.CurrentTeammateAvgStats(window=81, stats=stats, post_agg_stats=post_agg_funcs).current_features(upcoming_lineups, denormalized_stats)

In [None]:
current_teammate_avg_9g_hist.head()

In [None]:
current_teammate_avg_9g.head()

### Average of teammates in-game stats

In [None]:
stats = ["fta", "ftm", "fg2a", "fg2m", "fg3a", "fg3m", "non_scoring_pts", "minSeconds"]
post_agg_funcs = {
    "ftp": lambda x: x["ftm"] / x["fta"],
    "fg2p": lambda x: x["fg2m"] / x["fg2a"],
    "fg3p": lambda x: x["fg3m"] / x["fg3a"],
    "fta_per_min": lambda x: x["fta"] / x["minSeconds"],
    "ftm_per_min": lambda x: x["ftm"] / x["minSeconds"],
    "fg2a_per_min": lambda x: x["fg2a"] / x["minSeconds"],
    "fg2m_per_min": lambda x: x["fg2m"] / x["minSeconds"],
    "fg3a_per_min": lambda x: x["fg3a"] / x["minSeconds"],
    "fg3m_per_min": lambda x: x["fg3m"] / x["minSeconds"],
    "non_scoring_pts_per_min": lambda x: x["non_scoring_pts"] / x["minSeconds"],
}

historical_teammate_avg_3g_hist = trn.HistoricalTeammateStats(window=3, stats=stats, post_agg_stats=post_agg_funcs).historical_features(denormalized_stats)
historical_teammate_avg_3g = trn.HistoricalTeammateStats(window=3, stats=stats, post_agg_stats=post_agg_funcs).current_features(upcoming_lineups, denormalized_stats)

historical_teammate_avg_9g_hist = trn.HistoricalTeammateStats(window=9, stats=stats, post_agg_stats=post_agg_funcs).historical_features(denormalized_stats)
historical_teammate_avg_9g = trn.HistoricalTeammateStats(window=9, stats=stats, post_agg_stats=post_agg_funcs).current_features(upcoming_lineups, denormalized_stats)

historical_teammate_avg_27g_hist = trn.HistoricalTeammateStats(window=27, stats=stats, post_agg_stats=post_agg_funcs).historical_features(denormalized_stats)
historical_teammate_avg_27g = trn.HistoricalTeammateStats(window=27, stats=stats, post_agg_stats=post_agg_funcs).current_features(upcoming_lineups, denormalized_stats)

historical_teammate_avg_81g_hist = trn.HistoricalTeammateStats(window=81, stats=stats, post_agg_stats=post_agg_funcs).historical_features(denormalized_stats)
historical_teammate_avg_81g = trn.HistoricalTeammateStats(window=81, stats=stats, post_agg_stats=post_agg_funcs).current_features(upcoming_lineups, denormalized_stats)

In [None]:
historical_teammate_avg_9g_hist.head()

In [None]:
historical_teammate_avg_9g.head()

### Days since last game

In [None]:
days_since_last_game_hist = (
    denormalized_stats
    .sort_values(["player_id", "date"])
    .assign(last_game=lambda x: np.minimum(5, x.groupby(["player_id"])["date"].diff(1).map(lambda y: y.days + y.seconds / 60 / 60 / 24)))
    .assign(back_to_back=lambda x: x["last_game"].le(1.5).astype(int))
    .assign(stale=lambda x: x["last_game"].ge(5).astype(int))
    [["game_id", "team_id", "player_id", "last_game", "back_to_back", "stale"]]
)

In [None]:
days_since_last_game = (
    denormalized_stats
    .sort_values(["player_id", "date"])
    .assign(last_game=lambda x: np.minimum(5, x.groupby(["player_id"])["date"].diff(1).map(lambda y: y.days + y.seconds / 60 / 60 / 24)))
    .groupby(["player_id"])
    .apply(lambda x: x.nlargest(1, "date"))
    .reset_index(drop=True)
    .query("player_id in @upcoming_lineups.player_id")
    .assign(back_to_back=lambda x: x["last_game"].le(1.5).astype(int))
    .assign(stale=lambda x: x["last_game"].ge(5).astype(int))
    [["player_id", "last_game", "back_to_back", "stale"]]
)

In [None]:
days_since_last_game_hist.head()

In [None]:
days_since_last_game.head()

## Merge features

In [None]:
index = ["player_id", "game_id", "team_id"]
target = [
    "fta", "ftm", "fg2a", "fg2m", "fg3a", "fg3m", "non_scoring_pts", "minSeconds",
    "ftp", "fg2p", "fg3p", "fta_per_min", "ftm_per_min", "fg2a_per_min", "fg2m_per_min",
    "fg3a_per_min", "fg3m_per_min", "non_scoring_pts_per_min"
]

features_historical = (
    denormalized_stats[index + target + ["date", "home_game"]]
    .assign(fanduel_score=trn.fanduel_score)
    .merge(rolling_avg_3g_hist, how="left", on=index)
    .merge(rolling_avg_9g_hist, how="left", on=index)
    .merge(rolling_avg_27g_hist, how="left", on=index)
    .merge(rolling_avg_81g_hist, how="left", on=index)
    .merge(opp_allowed_3g_hist, how="left", on=index)
    .merge(opp_allowed_9g_hist, how="left", on=index)    
    .merge(opp_allowed_27g_hist, how="left", on=index)    
    .merge(opp_allowed_81g_hist, how="left", on=index)
    .merge(opp_allowed_abv_avg_3g_hist, how="left", on=index)
    .merge(opp_allowed_abv_avg_9g_hist, how="left", on=index)    
    .merge(opp_allowed_abv_avg_27g_hist, how="left", on=index)    
    .merge(opp_allowed_abv_avg_81g_hist, how="left", on=index)
    .merge(historical_teammate_avg_3g_hist, how="left", on=index)
    .merge(historical_teammate_avg_9g_hist, how="left", on=index)
    .merge(historical_teammate_avg_27g_hist, how="left", on=index)
    .merge(historical_teammate_avg_81g_hist, how="left", on=index)
    .merge(current_teammate_avg_3g_hist, how="left", on=index)
    .merge(current_teammate_avg_9g_hist, how="left", on=index)
    .merge(current_teammate_avg_27g_hist, how="left", on=index)
    .merge(current_teammate_avg_81g_hist, how="left", on=index)
    .merge(starter_hist, how="left", on=index)
    .merge(avg_start_rate_3g_hist, how="left", on=index)
    .merge(avg_start_rate_9g_hist, how="left", on=index)
    .merge(avg_start_rate_27g_hist, how="left", on=index)
    .merge(avg_start_rate_81g_hist, how="left", on=index)
    .merge(dfs_info, how="left", on=index)
    .merge(players.drop(columns=["draft_year", "birth_date"]), how="left", on=["player_id"])
    .merge(days_since_last_game_hist, how="left", on=index)
    .assign(game_rank=lambda x: x.groupby(["player_id", "team_id"])["date"].rank())
    .query("game_rank > 10")
    .drop(columns=["game_rank"])
    .query("minSeconds_9g_avg >= 600")
)

In [None]:
index = ["player_id", "game_id", "team_id"]

features = (
    upcoming_lineups[index + ["home_game"]]
    .merge(rolling_avg_3g, how="left", on=["player_id"])
    .merge(rolling_avg_9g, how="left", on=["player_id"])
    .merge(rolling_avg_27g, how="left", on=["player_id"])
    .merge(rolling_avg_81g, how="left", on=["player_id"])
    .merge(opp_allowed_3g, how="left", on=["player_id"])
    .merge(opp_allowed_9g, how="left", on=["player_id"])    
    .merge(opp_allowed_27g, how="left", on=["player_id"])    
    .merge(opp_allowed_81g, how="left", on=["player_id"])
    .merge(opp_allowed_abv_avg_3g, how="left", on=["player_id"])
    .merge(opp_allowed_abv_avg_9g, how="left", on=["player_id"])    
    .merge(opp_allowed_abv_avg_27g, how="left", on=["player_id"])    
    .merge(opp_allowed_abv_avg_81g, how="left", on=["player_id"])
    .merge(historical_teammate_avg_3g, how="left", on=["player_id"])
    .merge(historical_teammate_avg_9g, how="left", on=["player_id"])
    .merge(historical_teammate_avg_27g, how="left", on=["player_id"])
    .merge(historical_teammate_avg_81g, how="left", on=["player_id"])
    .merge(current_teammate_avg_3g, how="left", on=index)
    .merge(current_teammate_avg_9g, how="left", on=index)
    .merge(current_teammate_avg_27g, how="left", on=index)
    .merge(current_teammate_avg_81g, how="left", on=index)
    .merge(starter, how="left", on=["player_id"])
    .merge(avg_start_rate_3g, how="left", on=["player_id"])
    .merge(avg_start_rate_9g, how="left", on=["player_id"])
    .merge(avg_start_rate_27g, how="left", on=["player_id"])
    .merge(avg_start_rate_81g, how="left", on=["player_id"])
    .merge(dfs_info, how="left", on=index)
    .merge(players.drop(columns=["draft_year", "birth_date"]), how="left", on=["player_id"])
    .merge(days_since_last_game, how="left", on=["player_id"])
    .query("minSeconds_9g_avg >= 600")
)

In [None]:
features_historical.head()

In [None]:
features.head()

## Check for duplicates

In [None]:
features_historical.loc[features_historical[index].duplicated()]

In [None]:
features.loc[features["player_id"].duplicated()]

## Save

In [None]:
features_historical.to_pickle("data/historical_features_and_targets.pkl")

In [None]:
features.to_pickle("data/todays_lineup_features.pkl")