# Use this notebook to serialize/export models to ONNX

In [None]:
import pandas as pd
import numpy as np

# load the results
df = pd.read_csv(
    "results.tsv", 
    # quotechar="'",
    index_col=False, 
    sep='\t',
    usecols=['Sport', 'Service', 'Style', 'Type', 'y', 'ModelType', 'Params'],
)

# drop everything after the seperator
seperator_idx = np.where(df['Sport'].str.startswith('*'))[0][0]
df = df.iloc[:seperator_idx]

with pd.option_context('display.max_rows', 1000, 'display.max_colwidth', 1000):
    display(df)

In [None]:
import ast

from skl2onnx import convert_sklearn
import numpy as np
from skl2onnx.common.data_types import FloatTensorType

from fantasy_py import ContestStyle, CLSRegistry, CONTEST_DOMAIN, lineup

from automl import create_automl_model, error_report, get_df_types
from generate_train_test import generate_train_test, load_csv


DEFAULT_PCA_COMPONENTS = 5


def train_export(
    sport, service, style: ContestStyle,
    contest_type: str, model_type: str,
    y_type, params,
    dryrun=False,
):
    contest_style = ContestStyle[style.upper()]
    contest_type_cls = CLSRegistry.get_class(CONTEST_DOMAIN, contest_type)
    full_model_name = f'{sport}_{service}_{contest_style}_{contest_type}_{model_type}_{y_type}'
    print(f"Exporting model '{full_model_name}'")

    data_df = load_csv(sport, service, contest_style, contest_type_cls)
    assert len(data_df) > 0, "CSV load returned no data"

    try:
        model_def: dict = ast.literal_eval(params)
    except Exception:
        print("Failed to parse params", params)
        raise

    model_cols = model_def.pop(
        'model_cols') if 'model_cols' in model_def else None
    train_test_data = generate_train_test(
        data_df,
        model_cols=model_cols,
        random_state=5,
    )
    if train_test_data is None:
        display("Failed to generate a train/test data set from...", data_df)
    (X_train, X_test, y_top_train, y_top_test,
     y_last_win_train, y_last_win_test) = train_test_data

    create_model_params = {}
    if model_type.endswith('-pca'):
        create_model_params['pca_components'] = (
            model_def.pop('n_components')
            if 'n_components' in model_def else
            DEFAULT_PCA_COMPONENTS
        )

    if model_type.startswith('skautoml'):
        create_model_params.update({
            'framework': 'skautoml',
            # 'overwrite': True,
        })
    elif model_type.startswith('tpot'):
        create_model_params = {
            'framework': 'tpot',
        }
    else:
        raise ValueError(f"Don't know how to process model type {model_type}")

    if y_type == 'top':
        y_train = y_top_train
        y_test = y_top_test
    elif y_type == 'last':
        y_train = y_last_win_train
        y_test = y_last_win_test
    else:
        raise ValueError(f"Unexpected y of {y_type}")

    # add all remaining
    create_model_params.update(model_def)
    sk_model, fit_params = create_automl_model(
        full_model_name,
        seed=1,
        **create_model_params,
    )
    if dryrun:
        print(f"{dryrun=} skipping fit and model export for {full_model_name}")
        return

    print("Training model...")
    sk_model.fit(X_train, y_train, **fit_params)
    error_report(sk_model, X_test, y_test,
                 full_model_name + f": model_cols={model_def['model_cols']}")

    print(f"Exporting model to {full_model_name}.onnx")
    df_types = get_df_types(X_train)
    display(df_types)

    onnx_model = convert_sklearn(sk_model, full_model_name,
                                 df_types,
                                 final_types=[('variable1', FloatTensorType([1, 1]))])
    with open(full_model_name + ".onnx", "wb") as f:
        f.write(onnx_model.SerializeToString())

In [None]:
from collections import namedtuple

DRYRUN = True
row = None
models = df[df.Sport != 'nhl'].iterrows()
# models = df.iterrows()
# models_dict = {
#     'Sport': 'nhl', 
#     'Service': 'fanduel',
#     'Style': 'classic',
#     'Type': 'GPP', 
#     'ModelType': 'tpot', 
#     'y': 'top',
#     'Params': '{"generations": 100, "early_stop": 10, "population_size": 100, "n_jobs": 3}'
# }
# models = [
#     (None, namedtuple("test_model", models_dict.keys())(*models_dict.values()))
# ]
for _, row in models:
    try:
        train_export(row.Sport, row.Service, row.Style,
                    row.Type, row.ModelType, row.y,
                    row.Params, dryrun=DRYRUN)
    except Exception:
        display(f"Failed to train: {row.Sport=} {row.Service=} {row.Style=} {row.Type=} {row.ModelType=} {row.y=} {row.Params=}")
        raise

print("Done!")
