In [None]:
from fantasyfootball.data import FantasyData
from fantasyfootball.benchmarking import filter_to_prior_week, process_benchmark_preds
from fantasyfootball.data import FantasyData
from fantasyfootball.features import FantasyFeatures

# fantasy_data = FantasyData(season_year_start=2015, season_year_end=2021)

In [None]:
# extract the dataset and the name of our outcome variable (yvar)
fantasy_df = fantasy_data.data
yvar = fantasy_df.columns[-1]

In [None]:
# peak at a few observations
fantasy_df.tail()

In [None]:
# forecasting parameters
player_positions = ["QB","RB", "WR", "TE"]
test_season_years = [2020, 2021]
test_season_weeks = list(range(2, 17))
# columns common to all prediction sources
keys = ["season_year", "week", "name", "team","position","pid"]

# columns to use for each position as features. 
# Note that raw features are not transformed in any way.
input_features = {
    "QB": {
        "raw_features": ["draftkings_salary", "avg_windspeed", "avg_temp"],
        "position_features": ["passing_cmp"],
    },
    "RB": {
        "raw_features": ["draftkings_salary", "fanduel_salary"],
        "position_features": ["rushing_yds"],
    },
    "WR": {
        "raw_features": ["draftkings_salary", "has_dnp_tag"],
        "position_features": ["receiving_rec"],
    },
    "TE": {
        "raw_features": ["draftkings_salary", "fanduel_salary"],
        "position_features": ["receiving_rec"],
    },
}

In [None]:
from itertools import product
for season_year, season_week in list(product(test_season_years, test_season_weeks)):
    if (season_week + 1) > max(test_season_weeks):
        continue
    for position in player_positions:
        # select all data prior to the week you want to create predictions for
        backtest_df = fantasy_df.filter_to_prior_week(
            season_year=season_year, week_number=season_week
        )
        #
        fantasy_features = FantasyFeatures(backtest_df, y=yvar, position=position)
        # prep data for prediction
        fantasy_features.log_transform_y()
        fantasy_features.filter_inactive_games(status_column="is_active")
        fantasy_features.create_future_week()
        fantasy_features.filter_n_games_played_by_season(min_games_played=2)
        fantasy_features.add_lag_feature(
            n_week_lag=1,
            lag_columns=[yvar] + input_features.get(position).get("position_features"),
        )
        fantasy_features.add_moving_avg_feature(
            n_week_window=4,
            window_columns=[yvar]
            + input_features.get(position).get("position_features"),
        )
        # extract new features and transformed feat
        derived_features, feature_df = fantasy_features.create_ff_signature()
        # filter to "train" (past week) from "test" (future week)
        hist_df = feature_df[feature_df["is_future_week"] == 0]
        future_df = feature_df[feature_df["is_future_week"] == 1]
        #
        all_features = (
            input_features.get(position).get("raw_features") + derived_features
        )
        # split historical X and y
        X_hist, y_hist = hist_df[all_features + [yvar]].get_features_targets(
            yvar, all_features
        )
        # get future X
        break
    break