# Daily Fantasy Model

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from time import sleep
from datetime import date

import pandas as pd
import numpy as np
import pulp as plp
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression, RidgeCV, Ridge
from sklearn.metrics import r2_score
from xgboost import XGBRegressor
import matplotlib.pyplot as plt 

In [None]:
import transformations as trn
import lineupselector as lns

In [None]:
pd.options.display.max_columns = 999

## Model build

### Download features

In [None]:
features = pd.read_pickle("historical_features_and_targets.pkl")

In [None]:
features_upcoming = pd.read_pickle("todays_lineup_features.pkl")

### Training and validation data

In [None]:
index = ["player_id", "game_id", "team_id", "date"]
target = ["fg3m", "reb", "ast", "pts", "tov", "stl", "blk", "ftm", "fgm", "fanduel_score"]

In [None]:
training, test = train_test_split(
    features.sample(frac=1).sort_values(by=["game_id"], ascending=False),
    test_size=0.2,
    shuffle=False
)

In [None]:
# training = (
#     training
#     .assign(avg_fd_score=lambda x: x.groupby(["player_id"])["fanduel_score"].transform("mean"))
#     .query("avg_fd_score > 15")
#     .drop(columns=["avg_fd_score"])
# )

In [None]:
preprocessor = Pipeline(
    steps=[
        ('drop_columns', trn.DropColumns(columns=index + target + ["position", "firstName", "lastName"])),
        ('missingflag', trn.PandasMissingIndicator()),
        ('imputer', trn.PandasImputer()),
        ('scaler', trn.PandasStandardScalar()),
        ('reduction', trn.PandasVarianceThreshold(threshold=0)),
    ]
)

In [None]:
training_inputs = preprocessor.fit_transform(training)
test_inputs = preprocessor.transform(test)
prediction_inputs = preprocessor.transform(features_upcoming)

### Build models

In [None]:
model = RidgeCV(alphas=[10**x for x in range(-3, 6+1)]).fit(training_inputs, training["fanduel_score"])
print("Model score on training: {} with alpha: {}".format(model.score(training_inputs, training["fanduel_score"]), model.alpha_))
print("Model score on testing: {}".format(model.score(test_inputs, test["fanduel_score"])))

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(50,5))
(
    pd.DataFrame(
        zip(model.coef_, abs(model.coef_)),
        columns=["coef", "abs_coef"],
        index= training_inputs.columns
    )
    .sort_values(by=["abs_coef"], ascending=False)
    ["coef"][0:]
    .plot.bar()
)

### Add fanduel predictions

In [None]:
training["fanduel_score_prediction"] = model.predict(training_inputs)
test["fanduel_score_prediction"] = model.predict(test_inputs)
features_upcoming["fanduel_score_prediction"] = model.predict(prediction_inputs)

## Model calibration

In [None]:
(
    training
    .assign(prediction_bin=lambda x: pd.qcut(x["fanduel_score_prediction"], 25))
    .assign(residual=lambda x: x["fanduel_score_prediction"] - x["fanduel_score"])
    .groupby("prediction_bin")["residual"].mean()
    .plot.bar(title="Training residuals by prediction bins")
)

In [None]:
(
    test
    .assign(prediction_bin=lambda x: pd.qcut(x["fanduel_score_prediction"], 25))
    .assign(residual=lambda x: x["fanduel_score_prediction"] - x["fanduel_score"])
    .groupby("prediction_bin")["residual"].mean()
    .plot.bar(title="Test residuals by prediction bins")
)

## Fantasy lineup optimization

In [None]:
# features_upcoming.loc[lambda x: x["player_id"] == 9244, "position"] = "C"
# features_upcoming.loc[lambda x: x["player_id"] == 9177, "position"] = "C"


In [None]:
features_upcoming.loc[lambda x: x["player_id"] == 17195, "position"] = "PF"
features_upcoming.loc[lambda x: x["player_id"] == 17195, "position_SF"] = 0
features_upcoming.loc[lambda x: x["player_id"] == 17195, "position_PF"] = 1
features_upcoming.loc[lambda x: x["player_id"] == 17195, "position_PG"] = 0
features_upcoming.loc[lambda x: x["player_id"] == 17195, "position_SG"] = 0
features_upcoming.loc[lambda x: x["player_id"] == 17195, "position_C"] = 0

features_upcoming.loc[lambda x: x["player_id"] == 9420, "position"] = "PF"
features_upcoming.loc[lambda x: x["player_id"] == 9420, "position_SF"] = 0
features_upcoming.loc[lambda x: x["player_id"] == 9420, "position_PF"] = 1
features_upcoming.loc[lambda x: x["player_id"] == 9420, "position_C"] = 0
features_upcoming.loc[lambda x: x["player_id"] == 9420, "position_PG"] = 0
features_upcoming.loc[lambda x: x["player_id"] == 9420, "position_SG"] = 0

features_upcoming.loc[lambda x: x["player_id"] == 15222, "position"] = "C"
features_upcoming.loc[lambda x: x["player_id"] == 15222, "position_C"] = 1
features_upcoming.loc[lambda x: x["player_id"] == 15222, "position_PF"] = 0
features_upcoming.loc[lambda x: x["player_id"] == 15222, "position_SF"] = 0
features_upcoming.loc[lambda x: x["player_id"] == 15222, "position_PG"] = 0
features_upcoming.loc[lambda x: x["player_id"] == 15222, "position_SG"] = 0

features_upcoming.loc[lambda x: x["player_id"] == 9100, "position"] = "C"
features_upcoming.loc[lambda x: x["player_id"] == 9100, "position_C"] = 1
features_upcoming.loc[lambda x: x["player_id"] == 9100, "position_PF"] = 0
features_upcoming.loc[lambda x: x["player_id"] == 9100, "position_SF"] = 0
features_upcoming.loc[lambda x: x["player_id"] == 9100, "position_PG"] = 0
features_upcoming.loc[lambda x: x["player_id"] == 9100, "position_SG"] = 0

In [None]:
blacklisted = (
#     features_upcoming.query("game_id in (53379,53380)")["player_id"].to_list()
#     + [15212, 15262, 13753, 9091, 13742,9354,9346]
    []
)
features_upcoming["blacklisted"] = features_upcoming.player_id.isin(blacklisted).astype(int)
features_upcoming["selection"] = lns.FanDuelOptimizer(target="fanduel_score_prediction").add_lineup_selection(features_upcoming)

In [None]:
features_upcoming.query("selection == 1")["fanduel_score_prediction"].sum(), features_upcoming.query("selection == 1")["salary"].sum()

In [None]:
features_upcoming.query("selection == 1").sort_values(by=["position"])[["player_id", "game_id", "team_id", "firstName", "lastName", "position", "salary", "fanduel_score_prediction"]]

In [None]:
(
    features_upcoming
    .sort_values(by=["fanduel_score_prediction"], ascending=False)
    .head(10)
    [["player_id", "game_id", "team_id", "firstName", "lastName", "fanduel_score_prediction"]]
)

## Historical performance validation

In [None]:
selections = (
    test
    .query("salary == salary")
    .assign(blacklisted=0)
    .assign(game_day=lambda x: x.date.dt.strftime("%Y%m%d"))
    .groupby(["game_day"])
    .apply(lambda x: x.assign(selection=lns.FanDuelOptimizer(target="fanduel_score_prediction").add_lineup_selection))
    .reset_index(drop=True)
)

In [None]:
(
    selections
    .query("selection == 1")
    .groupby(["game_day"])[["fanduel_score_prediction", "fanduel_score"]]
    .sum()
    .assign(above_260=lambda x: (x["fanduel_score"] > 260).astype(int))
    .assign(above_270=lambda x: (x["fanduel_score"] > 270).astype(int))
    .assign(above_280=lambda x: (x["fanduel_score"] > 280).astype(int))
    .assign(above_290=lambda x: (x["fanduel_score"] > 290).astype(int))
    .assign(above_300=lambda x: (x["fanduel_score"] > 300).astype(int))
    .assign(above_310=lambda x: (x["fanduel_score"] > 310).astype(int))
    .assign(above_320=lambda x: (x["fanduel_score"] > 320).astype(int))
    .describe()
)

In [None]:
(
    selections
    .query("selection == 1")
    .groupby(["game_day"])[["fanduel_score_prediction", "fanduel_score"]]
    .sum()
    .assign(diff=lambda x: x["fanduel_score_prediction"] - x["fanduel_score"])
    .assign(diff_deciles=lambda x: pd.qcut(x["diff"], 20))
    ["diff_deciles"]
    .value_counts()
    .sort_index()
)