# Daily Fantasy Model

Build a model to predict daily fantasy scores and generate a lineup for upcoming games based on the model.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from time import sleep
from datetime import date

import pandas as pd
import numpy as np
import pulp as plp
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression, RidgeCV, Ridge
from sklearn.metrics import r2_score
from xgboost import XGBRegressor
import matplotlib.pyplot as plt 

In [None]:
import transformations as trn
import lineupselector as lns

In [None]:
pd.options.display.max_columns = 999

## Model build

### Download features

In [None]:
features = pd.read_pickle("data/historical_features_and_targets.pkl")

In [None]:
features_upcoming = pd.read_pickle("data/todays_lineup_features.pkl")

### Training and validation data

In [None]:
index = ["player_id", "game_id", "team_id", "date"]
target = [
    "fta", "ftm", "fg2a", "fg2m", "fg3a", "fg3m", "non_scoring_pts", "minSeconds",
    "ftp", "fg2p", "fg3p", "fta_per_min", "ftm_per_min", "fg2a_per_min", "fg2m_per_min",
    "fg3a_per_min", "fg3m_per_min", "non_scoring_pts_per_min", "fanduel_score"
]

In [None]:
training, test = train_test_split(
    features.sample(frac=1).sort_values(by=["game_id"], ascending=False),
    test_size=0.2,
    shuffle=False
)

In [None]:
preprocessor = Pipeline(
    steps=[
        ('drop_columns', trn.DropColumns(columns=index + target + ["position", "firstName", "lastName"])),
        ('missingflag', trn.PandasMissingIndicator()),
        ('imputer', trn.PandasImputer()),
        ('scaler', trn.PandasStandardScalar()),
        ('reduction', trn.PandasVarianceThreshold(threshold=0)),
    ]
)

In [None]:
training_inputs = preprocessor.fit_transform(training)
test_inputs = preprocessor.transform(test)
prediction_inputs = preprocessor.transform(features_upcoming)

### Build models

Build models for total fanduel score as well as per min stats and number of seconds played and compare results.

In [None]:
models = {
    stat: RidgeCV(alphas=[10**x for x in range(-3, 6+1)]).fit(training_inputs, training[stat])
    for stat in [
        "minSeconds", "ftm_per_min", "fg2m_per_min", "fg3m_per_min", "non_scoring_pts_per_min", "fanduel_score"
    ]
}

In [None]:
for stat, model in models.items():
    training[f"{stat}_prediction"] = model.predict(training_inputs)
    test[f"{stat}_prediction"] = model.predict(test_inputs)
    features_upcoming[f"{stat}_prediction"] = model.predict(prediction_inputs)

In [None]:
def fanduel_score_composite(x, suffix=""):
    return (
        (x["ftm_per_min" + suffix] + 2 * x["fg2m_per_min" + suffix] + 3 * x["fg3m_per_min" + suffix] + x["non_scoring_pts_per_min" + suffix])
        * (x["minSeconds" + suffix] / 60)
    )

training["fanduel_score_composite_prediction"] = fanduel_score_composite(training, suffix="_prediction")
test["fanduel_score_composite_prediction"] = fanduel_score_composite(test, suffix="_prediction")
features_upcoming["fanduel_score_composite_prediction"] = fanduel_score_composite(features_upcoming, suffix="_prediction")

In [None]:
model = RidgeCV(alphas=[10**x for x in range(-3, 6+1)]).fit(training_inputs, training["fanduel_score"])

In [None]:
print("Model score on training: {}".format(r2_score(training["fanduel_score"], training["fanduel_score_prediction"])))
print("Model score on testing: {}".format(r2_score(test["fanduel_score"], test["fanduel_score_prediction"])))

In [None]:
print("Model score on training: {}".format(r2_score(training["fanduel_score"], training["fanduel_score_composite_prediction"])))
print("Model score on testing: {}".format(r2_score(test["fanduel_score"], test["fanduel_score_composite_prediction"])))

### Feature importance

In [None]:
plt.figure(figsize=(50,5))
(
    pd.DataFrame(
        zip(model.coef_, abs(model.coef_)),
        columns=["coef", "abs_coef"],
        index= training_inputs.columns
    )
    .sort_values(by=["abs_coef"], ascending=False)
    ["coef"][0:]
    .plot.bar()
)

## Examine model calibration

In [None]:
(
    training
    .assign(prediction_bin=lambda x: pd.qcut(x["fanduel_score_prediction"], 25))
    .assign(residual=lambda x: x["fanduel_score_prediction"] - x["fanduel_score"])
    .groupby("prediction_bin")["residual"].mean()
    .plot.bar(title="Training residuals by prediction bins")
)

In [None]:
(
    test
    .assign(prediction_bin=lambda x: pd.qcut(x["fanduel_score_prediction"], 25))
    .assign(residual=lambda x: x["fanduel_score_prediction"] - x["fanduel_score"])
    .groupby("prediction_bin")["residual"].mean()
    .plot.bar(title="Test residuals by prediction bins")
)

### Manual override nba lineups

Some players are listed in different positions on mysportsfeed versus fanduel. Note that fanduel player positions may change on a day-to-day basis.

In [None]:
features_upcoming = (
    features_upcoming
    .pipe(trn.update_position, 9151, "C")
    .pipe(trn.update_position, 9476, "PF")
    .pipe(trn.update_position, 15208, "SG")
    .pipe(trn.update_position, 9170, "SG")
    .pipe(trn.update_position, 10090, "SF")
    .pipe(trn.update_position, 9107, "SF")
    .pipe(trn.update_position, 9369, "SF")
    .pipe(trn.update_position, 9170, "SG")
    .pipe(trn.update_position, 9244, "C")
    .pipe(trn.update_position, 17286, "SF")
    .pipe(trn.update_position, 9475, "SG")
    .pipe(trn.update_position, 13786, "SG")
    .pipe(trn.update_position, 9507, "SF")
    .pipe(trn.update_position, 13791, "PG")
    .pipe(trn.update_position, 15211, "PF")
    .query("position == position")
)

Manually add players on a blocklist to prevent inclusion in the optimal lineup e.g. if there is breaking news of an injury, minutes restriction, etc.

In [None]:
blocklisted = [
    9523,  # Bradley Beal
    9157,  # Kyrie Irving
    9285,  # D'lo Russel
    9354,  # Anthony Davis
    9250,  # Paul George
    17286,  # Kevin Porter
    10120,  # Henry Ellenson
    9158,  # Lebron James
    9286,  # Larry Nance
]
features_upcoming["blocklisted"] = features_upcoming.player_id.isin(blocklisted).astype(int)

## Fantasy lineup optimization

In [None]:
features_upcoming["selection"] = lns.FanDuelOptimizer(target="fanduel_score_prediction").add_lineup_selection(features_upcoming)

In [None]:
features_upcoming.query("blocklisted == 1")

In [None]:
features_upcoming.query("selection == 1")["fanduel_score_prediction"].sum(), features_upcoming.query("selection == 1")["salary"].sum()

In [None]:
features_upcoming.query("selection == 1").sort_values(by=["position"])[["player_id", "game_id", "team_id", "firstName", "lastName", "position", "salary", "fanduel_score_prediction"]]

In [None]:
(
    features_upcoming
    .sort_values(by=["fanduel_score_prediction"], ascending=False)
    .head(10)
    [["player_id", "game_id", "team_id", "firstName", "lastName", "fanduel_score_prediction"]]
)

## Historical performance validation

In [None]:
selections = (
    test
    .query("salary == salary")
    .assign(blocklisted=0)
    .assign(game_day=lambda x: x.date.dt.strftime("%Y%m%d"))
    .groupby(["game_day"])
    .apply(lambda x: x.assign(selection=lns.FanDuelOptimizer(target="fanduel_score_prediction").add_lineup_selection))
    .reset_index(drop=True)
)

In [None]:
(
    selections
    .query("selection == 1")
    .groupby(["game_day"])[["fanduel_score_prediction", "fanduel_score"]]
    .sum()
    .assign(**{f"above_{limit}": lambda x: (x["fanduel_score"] > limit).astype(int) for limit in range(260, 401, 10)})
    .describe()
)

In [None]:
(
    selections
    .query("selection == 1")
    .groupby(["game_day"])[["fanduel_score_prediction", "fanduel_score"]]
    .sum()
    .assign(diff=lambda x: x["fanduel_score_prediction"] - x["fanduel_score"])
    .assign(diff_deciles=lambda x: pd.qcut(x["diff"], 20))
    ["diff_deciles"]
    .value_counts()
    .sort_index()
)