# NHL Models

## Data Export
```
# dump player data
dumpdata.sc --seasons 20152016 20162017 20172018 20182019 20192020 20202021 20212022 --stats "*" --calc_stats "*" --progress nhl_hist_20072008-20212022.scored.db --no_players --target_calc_stats DKnhl FDnhl Ynhl --hist_recent_games 5 --hist_recent_mode ma --extra_stats "*" -f nhl_train_team.csv
```

In [None]:
import pandas as pd

RANDOM_SEED = 1
TRAINING_TIME = 1200
VALIDATION_SEASON = 2022
TARGET_FEATURE = "calc:dk_performance_score"

df = pd.read_csv("/fantasy/lol_train_player.csv")
df.pos = df.pos.astype(str)


In [None]:
import sklearn.model_selection

cleaned_df = pd.get_dummies(df, columns=["pos"])
cleaned_df["extra:is_home"] = df["extra:is_home"].astype(int)
feature_cols = [
    col
    for col in cleaned_df
    if col == "pos" or col.startswith("extra") or ":recent" in col or ":std" in col
]

train_test_df = cleaned_df.query("season != @VALIDATION_SEASON")

X = train_test_df[feature_cols]
y = train_test_df[TARGET_FEATURE]
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, y, random_state=RANDOM_SEED
)

validation_df = cleaned_df.query("season == @VALIDATION_SEASON")
X_val = validation_df[feature_cols]
y_val = validation_df[TARGET_FEATURE]
print(
    f"Training will use {len(feature_cols)} features, {len(X_train)} training cases, {len(X_test)} test cases, {len(X_val)} validation test cases",
)

In [None]:
import autosklearn.regression
import sklearn.model_selection
import sklearn.metrics
from pprint import pprint

sk_automl = autosklearn.regression.AutoSklearnRegressor(
    seed=RANDOM_SEED, time_left_for_this_task=TRAINING_TIME, memory_limit=-1
)
sk_automl.fit(X_train, y_train)
print(sk_automl.leaderboard())
pprint(sk_automl.show_models(), indent=4)

y_hat = sk_automl.predict(X_test)
print("Test R2 score:", sklearn.metrics.r2_score(y_test, y_hat))
y_hat_val = sk_automl.predict(X_val)
print("Validation R2 score:", sklearn.metrics.r2_score(y_val, y_hat_val))


In [None]:
from tpot import TPOTRegressor

tpot_automl = TPOTRegressor(
    random_state=RANDOM_SEED,
    max_time_mins=TRAINING_TIME / 60,
    verbosity=2,
)
tpot_automl.fit(X_train, y_train)
pprint(tpot_automl.fitted_pipeline_)

y_hat = tpot_automl.predict(X_test)
print("Test R2 score:", sklearn.metrics.r2_score(y_test, y_hat))
y_hat_val = tpot_automl.predict(X_val)
print("Validation R2 score:", sklearn.metrics.r2_score(y_val, y_hat_val))