# LOL Models

## Data Export
For LOL modeling is done for performance score because that is the per game score which
translates better across different leagues
```
# player LOL data
dumpdata.sc --seasons 2016 2017 2018 2019 2020 2021 2022 --stats "*" --progress lol_hist_2014-2022.scored.db \
    --no_teams --target_calc_stats "*performance_score" --hist_recent_games 3 \
    --hist_recent_mode ma --extra_stats "*" -f lol_train_player.csv

# team LOL data
dumpdata.sc --seasons 2014 2015 2016 2017 2018 2019 2020 2021 2022 --stats "*" --progress \
    lol_hist_2014-2022.scored.db --no_players --target_calc_stats "*performance_score" \
    --hist_recent_games 3 --hist_recent_mode ma --extra_stats "*" \
    -f lol_train_team.csv    
```

In [None]:
import pandas as pd

RANDOM_SEED = 1
TRAINING_TIME = 1800
VALIDATION_SEASON = 2022

# players
# TARGET_FEATURE = "calc:dk_performance_score"
# MODEL_PREFIX = "model-lol-player"
# df = pd.read_csv("/fantasy/lol_train_player.csv")
# df.pos = df.pos.astype(str)
# df = pd.get_dummies(df, columns=["pos"])

#teams
TARGET_FEATURE = "calc:dk_performance_score"
MODEL_PREFIX = "model-lol-team"
df = pd.read_csv("/fantasy/lol_train_team.csv")


In [None]:
import sklearn.model_selection

cleaned_df = df
cleaned_df["extra:is_home"] = df["extra:is_home"].astype(int)
feature_cols = [
    col
    for col in cleaned_df
    if col == "pos" or col.startswith("extra") or ":recent" in col or ":std" in col
]

train_test_df = cleaned_df.query("season != @VALIDATION_SEASON")

X = train_test_df[feature_cols]
y = train_test_df[TARGET_FEATURE]
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, y, random_state=RANDOM_SEED
)

validation_df = cleaned_df.query("season == @VALIDATION_SEASON")
X_val = validation_df[feature_cols]
y_val = validation_df[TARGET_FEATURE]
print(
    f"Training will use {len(feature_cols)} features, {len(X_train)} training cases, {len(X_test)} test cases, {len(X_val)} validation test cases",
)


In [None]:
from pprint import pprint
from datetime import datetime

import autosklearn.regression
import sklearn.model_selection
import sklearn.metrics
import joblib

sk_automl = autosklearn.regression.AutoSklearnRegressor(
    seed=RANDOM_SEED, time_left_for_this_task=TRAINING_TIME, memory_limit=-1
)
sk_automl.fit(X_train, y_train)
print(sk_automl.leaderboard())
pprint(sk_automl.show_models(), indent=4)

y_hat = sk_automl.predict(X_test)
print("Test R2 score:", sklearn.metrics.r2_score(y_test, y_hat))
y_hat_val = sk_automl.predict(X_val)
print("Validation R2 score:", sklearn.metrics.r2_score(y_val, y_hat_val))

filename = f"{MODEL_PREFIX}-autosk-{TARGET_FEATURE}-{TRAINING_TIME}.{datetime.now().isoformat().rsplit('.', 1)[0]}.pkl"
print(f"Exporting model to '{filename}'")
joblib.dump(sk_automl, filename)


In [None]:
from tpot import TPOTRegressor

tpot_automl = TPOTRegressor(
    random_state=RANDOM_SEED,
    max_time_mins=TRAINING_TIME / 60,
    verbosity=3,
)
tpot_automl.fit(X_train, y_train)
pprint(tpot_automl.fitted_pipeline_)

y_hat = tpot_automl.predict(X_test)
print("Test R2 score:", sklearn.metrics.r2_score(y_test, y_hat))
y_hat_val = tpot_automl.predict(X_val)
print("Validation R2 score:", sklearn.metrics.r2_score(y_val, y_hat_val))

filename = f"{MODEL_PREFIX}-tpot-{TARGET_FEATURE}-{TRAINING_TIME}.{datetime.now().isoformat().rsplit('.', 1)[0]}.pkl"
print(f"Exporting model to '{filename}'")
joblib.dump(tpot_automl.fitted_pipeline_, filename)
