# NHL Models

## Data Export
```
DB_FILE=${FANTASY_HOME}/nhl_hist_20072008-20212022.scored.db
SEASONS='20152016 20162017 20172018 20182019 20192020 20202021 20212022'
SKATER_STATS=("assist*" "fo*" "*away" "goal" "goal_pp" "goal_sh" 
    "goal_t" "goal_w" "hit" line "p*" "shot*" "toi_ev" "toi_pp" "toi_sh")
GOALIE_STATS='goal_ag loss save toi_g win'

# skater data
dumpdata.sc --seasons $SEASONS --progress \
    $DB_FILE --no_teams \
    --pos LW RW W C D \
    --stats "${SKATER_STATS[@]}" --current_extra is_home \
    --target_stats shot assist goal --target_calc_stats "*" \
    --player_team_stats "*" --opp_team_stats "*" \
    --hist_recent_games 5 --hist_recent_mode ma \
    --format parquet -f nhl_skater.pq

# goalie data
dumpdata.sc --seasons $SEASONS --progress \
    $DB_FILE --no_teams \
    --pos G \
    --stats $GOALIE_STATS --current_extra is_home \
    --target_calc_stats "*" --target_stats goal_ag save \
    --player_team_stats "*" --opp_team_stats "*" \
    --hist_recent_games 5 --hist_recent_mode ma \
    --format parquet -f nhl_goalie.pq

# team data
dumpdata.sc --seasons $SEASONS --progress \
    $DB_FILE --no_players \
    --stats "*" --target_stats goal win --current_extra is_home \
    --opp_team_stats "*" \
    --hist_recent_games 5 --hist_recent_mode ma \
    --format parquet -f nhl_team.pq
```

In [None]:
RANDOM_SEED = 1
VALIDATION_SEASON = 20212022
RECENT_GAMES = 5
TRAINING_SEASONS = [
    20152016,
    20162017,
    20172018,
    20182019,
    20192020,
    20202021,
]
COLS_TO_DROP = None
REUSE_EXISTING = True
MISSING_DATA_THRESHOLD = .07

In [None]:
# Model Creation
import sys

sys.path.append("..")

from fantasy_py import PlayerOrTeam
from train_test import load_data, model_and_test

# TARGET = ("stat", "goal")
# MODEL_NAME = "NHL-team-goal"
# TARGET = ("stat", "win")
# MODEL_NAME = "NHL-team-win"

# DATA_FILENAME = "nhl_team.pq"
# P_OR_T = PlayerOrTeam.TEAM
# INCLUDE_POS = None
# TARGET_POS = None
# TRAINING_TIME = 600


# TARGET = ("calc", "dk_score")
# MODEL_NAME = "NHL-skater-DK"
# TARGET = ("stat", "shot")
# MODEL_NAME = "NHL-skater-shot"
# TARGET = ("stat", "assist")
# MODEL_NAME = "NHL-skater-assist"
# TARGET = ("stat", "goal")
# MODEL_NAME = "NHL-skater-goal"

# DATA_FILENAME = "nhl_skater.pq"
# INCLUDE_POS = True
# P_OR_T = PlayerOrTeam.PLAYER
# TARGET_POS = ["LW", "RW", "W", "C", "D"]
# TRAINING_TIME = 1200


# TARGET = ("calc", "dk_score")
# MODEL_NAME = "NHL-goalie-DK"
# TARGET = ("stat", "save")
# MODEL_NAME = "NHL-goalie-save"
# TARGET = ("stat", "goal_ag")
# MODEL_NAME = "NHL-goalie-ag"

# DATA_FILENAME = "nhl_goalie.pq"
# P_OR_T = PlayerOrTeam.PLAYER
# INCLUDE_POS = False
# TARGET_POS = ["G"]
# TRAINING_TIME = 600


raw_df, tt_data, one_hot_stats = load_data(
    DATA_FILENAME,
    TARGET,
    VALIDATION_SEASON,
    include_position=INCLUDE_POS,
    col_drop_filters=COLS_TO_DROP,
    seed=RANDOM_SEED,
    missing_data_threshold=MISSING_DATA_THRESHOLD,
)

assert one_hot_stats is None

for automl_type in ["tpot"]:  # , "autosk"]:
    model = model_and_test(
        MODEL_NAME,
        VALIDATION_SEASON,
        tt_data,
        TARGET,
        TRAINING_TIME,
        automl_type,
        P_OR_T,
        RECENT_GAMES,
        TRAINING_SEASONS,
        seed=RANDOM_SEED,
        target_pos=TARGET_POS,
        training_pos=TARGET_POS,
        raw_df=raw_df,
        reuse_existing=REUSE_EXISTING,
    )