# Repair initial models
Do not run this top to bottom, each cell has a different fix for a
different problems that occured during the development of the updated
model object

In [None]:
data_cols: dict[str, list[str]] = {}
"""previously infered data columns, model-filename->columns"""

imputations: dict[str, dict[str, float]] = {}
"""previously infered imputation values, model-filename->dict[col-name,impute-value]"""

data_filenames: dict[tuple[str, str], str] = {
    ("mlb", "h"): "mlb_hitter.csv",
    ("mlb", "p"): "mlb_pitcher.csv",
    ("nba", "dk"): "nba_player.csv",
    ("nfl", "def"): "nfl_team.csv",
    ("nfl", "wrte"): "nfl_wrte.csv",
    ("nfl", "qb"): "nfl_qb.csv",
    ("nfl", "rb"): "nfl_rb.csv",
}


In [None]:
from glob import glob
import json
import os

from train_test import load_csv, infer_feature_cols, infer_imputes

"""
The original models did not include the input columns or impute values. 
This notebook will add 'input_col' to the 'data_def' and impute_values to 
trained_parameters for all existing models
"""


# iterate through all .model files
for model_filepath in glob("*model"):
    print(f"Adding cols to '{model_filepath}")

    # load model json
    with open(model_filepath, "r") as f_:
        model_json = json.load(f_)
    include_position = (
        model_json["training_data_def"]["include_pos"]
        if model_json["training_data_def"]["target"][1] == "P"
        else None
    )
    model_positions = model_json["training_data_def"].get("training_pos")
    print(f"{model_json['name']=} {include_position}")

    # load data used to create the model
    sport: str
    extra: str
    sport, extra = model_json["name"].lower().split("-", 3)[:2]
    data_filename = os.path.join(
        os.environ["FANTASY_HOME"],
        data_filenames.get((sport, extra), f"{sport}_{extra}.csv"),
    )

    print(f"Getting cols&imputation for {sport=} {extra=} in '{data_filename}'")
    if data_filename not in data_cols or data_filename not in imputations:
        train_df = load_csv(
            data_filename,
            include_position=include_position,
        )

    if data_filename not in data_cols:
        cols = train_df.columns.to_list()
        data_cols[data_filename] = infer_feature_cols(cols, include_position)
    feature_cols = data_cols[data_filename]

    if data_filename not in imputations:
        imputations[data_filename] = infer_imputes(train_df[feature_cols])
    impute_values = imputations[data_filename]

    print(f"For '{data_filename}'")
    print(f"\t{len(feature_cols)} input columns", feature_cols)
    print(f"\t{len(impute_values)} imputation values", impute_values)

    # update model json with the column names
    model_json["training_data_def"]["input_cols"] = feature_cols
    model_json["trained_parameters"]["impute_values"] = impute_values

    # save updated json
    # with open(model_filepath, "w") as f_:
    #     json.dump(model_json, f_, indent="\t")

    # print("Wrote file\n--------------------------------------\n")


In [None]:
from glob import glob
import json


for model_filepath in glob("*model"):
    # load model json
    with open(model_filepath, "r") as f_:
        model_json = json.load(f_)
    recent_games = model_json["training_data_def"]["recent_games"]

    new_imputes = {}
    impute_sources = {}
    no_impute = []

    for col in model_json["training_data_def"]["input_cols"]:
        if not col.endswith(":std-mean"):
            continue
        feature = col.rsplit(":", 1)[0]
        if (
            impute_value := model_json["trained_parameters"]["impute_values"].get(col)
        ) is not None:
            impute_sources[feature] = col
            new_imputes[feature] = impute_value
            continue

        source = feature + ":recent-mean"
        if (
            impute_value := model_json["trained_parameters"]["impute_values"].get(
                source
            )
        ) is not None:
            impute_sources[feature] = source
            new_imputes[feature] = impute_value
            continue

        for recent_i in range(1, recent_games + 1):
            recent_key = feature + f":recent-{recent_i}"
            if (
                impute_value := model_json["trained_parameters"]["impute_values"].get(
                    recent_key
                )
            ) is None:
                continue
            impute_sources[feature] = source
            new_imputes[feature] = impute_value
            break
        else:
            no_impute.append(col)
    print(f"For '{model_filepath}':\n\t{no_impute=}")
    for k_, value in new_imputes.items():
        print(f"\t{k_}\t{impute_sources[k_]}\t{value}")
    print()

    model_json["trained_parameters"]["impute_values"] = new_imputes

    # save updated json
    # with open(model_filepath, "w") as f_:
    #     json.dump(model_json, f_, indent="\t")

    # print("Wrote file\n--------------------------------------\n")
