# Model Testing for Daily Fantasy Scores
Generate models that predict for the minimum and maximum winning scores for a slate.
Log results to an evaluation result folder including evaluation results, model descriptions 
and model serializations.

In [None]:
import logging
import log

log.setup()

LOGGER = logging.getLogger('Model.Testing')
LOGGER.info("logger ready")

In [None]:
from itertools import product
from datetime import datetime
import os
from typing import Literal
from pprint import pformat

import pandas as pd
from tqdm.notebook import tqdm, trange

from generate_train_test import generate_train_test, load_csv
from automl import create_automl_model, error_report
from serialize import serialize_model, get_tpot_export_code


EVAL_RESULT_COL_ORDER = [
    'Sport', 'Service', 'Type', 'Style', 'Target', 'R2',
    'RMSE', 'MAE', 'ModelType', 'Date', 'Params'
]   

def log_eval_results(eval_results: list[dict], name: str,
                     csv_folder: str = "eval_results") -> pd.DataFrame:
    """
    write all evaluation results to csv file in the temp folder and return the dataframe
    also write file(s) that describe the final model(s)
    """
    if len(eval_results) == 0:
        LOGGER.warn("No evaluation results to save")
        return

    df = pd.DataFrame(eval_results)[EVAL_RESULT_COL_ORDER] \
        .sort_values(['Sport', 'Service', 'Type', 'Style', 'Target', 'ModelType'])
    if not os.path.isdir(csv_folder):
        os.mkdir(csv_folder)
    results_filepath = os.path.join(csv_folder, name + ".csv")
    df.to_csv(results_filepath, index=False)

    LOGGER.info(f"Evaluation results written to '{results_filepath}")
    return df

def finalize_error_results(
    results: dict, 
    shared_results_dict, 
    target, 
    automl_params, 
    model_cols,
    X_train, y_train,
    error_results_folder: str = None
) -> dict:
    """ 
    update and return results to include model information for a single model evaluation 

    results: dict of results from a single model evaluation from create_automl_model()
    """
    finalized_results = results.copy()
    finalized_results['target'] = target
    finalized_results['Params'] = automl_params.copy()
    if model_cols:
        finalized_results['Params']['model_cols'] = model_cols
    finalized_results.update(shared_results_dict)

    if error_results_folder:
        serialize_model(
            results['model'], finalized_results['ModelType'], 
            X_train, y_train, 
            finalized_results['ModelType'] + "-" + target, 
            model_folder=error_results_folder,
            model_desc_folder=error_results_folder,
        )   
    return results


In [None]:
"""
models that can be evaluated :
'all-top' - All available input data predicting the top score
'all-lws' - All available input data predicting last winning score
'bps-top' - best possible score predicting the top score
'bps-lws' - best possible score predicting the last winning score
'pca-top' - PCA transformed input data predicting the top score
'pca-lws' - PCA transformed input data predicting last winning score
"""
_VALID_MODELS = Literal[
    'all-top', 'all-lws',
    'bps-top', 'bps-lws',
    'pca-top', 'pca-lws',
]


def evaluate_models(
    sport, service, style, contest_type, framework, automl_params, pbar,
    pca_components=5, data_folder="data",
    models_to_test: set[_VALID_MODELS] | None = None
) -> tuple[dict, list]:
    """
    models_to_test - set/list of the models to test. if None then all models tested. possible models are
    returns tuple of (models, evaluation results)
    """
    models = {}
    eval_results = []
    shared_results_dict = {
        'Sport': sport,
        'Service': service,
        'Style': style.name,
        'Type': contest_type.NAME,
        'ModelType': framework,
        'Date': datetime.now().strftime("%Y%m%d"),
    }
    df = load_csv(sport, service, style, contest_type, data_folder=data_folder)

    model_col_options: list[None | set[str]] = []
    if models_to_test is None:
        final_models_to_test = {'all-top', 'all-lws',
                                'bps-top', 'bps-lws', 
                                'pca-top', 'pca-lws'}
    else:
        final_models_to_test = models_to_test

    if len(final_models_to_test & {'all-top', 'all-lws', 'pca-top', 'pca-lws'}) > 0:
        model_col_options.append(None)
    if len(final_models_to_test & {'bps-top', 'bps-lws'}) > 0:
        model_col_options.append({'best-possible-score'})

    # iterate over the model feature cols
    for model_cols in model_col_options:
        model_data = generate_train_test(
            df,
            model_cols=model_cols,
            random_state=automl_params['random_state'],
        )

        if model_data is None or len(model_data[0]) < 5:
            LOGGER.error(
                "Not enough training data available! Only found %i cases. Skipping all training",
                len(model_data[0]) if model_data else 0
            )
            return None, None

        (X_train, X_test, y_top_train, y_top_test,
         y_last_win_train, y_last_win_test) = model_data

        model_ys = [
            ('top-score', y_top_train, y_top_test),
            ('last-win-score', y_last_win_train, y_last_win_test),
        ]

        # models for top and last winning score
        for target, y_train, y_test in model_ys:
            LOGGER.info("training model=%s cols=%s", target, model_cols)
            model_desc = f"{sport}-{service}-{style.name}-{contest_type.NAME}-{target}-{framework}:{model_cols or 'all'}"
            pbar.set_postfix_str(model_desc)

            cam_result = create_automl_model(
                target,
                framework=framework,
                X_train=X_train,
                y_train=y_train,
                X_test=X_test,
                y_test=y_test,
                model_desc=model_desc,
                **automl_params
            )

            pbar.update()

            results = finalize_error_results(
                cam_result, shared_results_dict, target,
                automl_params, model_cols, framework,
                X_train, y_train,
            )
            log_eval_results([results], model_desc)
            eval_results.append(results)
            models[model_desc] = cam_result['model']

        if len({'pca-top', 'pca-lws'} & final_models_to_test) == 0:
            continue

        # pca models only when using multiple data columns and requested
        if model_cols is not None and len(model_cols) == 1:
            if model_cols != {'best-possible-score'}:
                LOGGER.debug(
                    f"Skipping pca models due to lack of data columns. {model_cols=}"
                )
            continue

        for target, y_train, y_test in model_ys:
            pca_target = target + '-pca'
            LOGGER.info("training model=%s cols=%s",
                        pca_target, model_cols)
            model_desc = f"{sport}-{service}-{style.name}-{contest_type.NAME}-{pca_target}-{framework}:{model_cols or 'all'}"
            pbar.set_postfix_str(model_desc)
            cam_result = create_automl_model(
                pca_target,
                pca_components=pca_components,
                framework=framework,
                X_train=X_train, y_train=y_train,
                X_test=X_test, y_test=y_test,
                model_desc=model_desc,
                **automl_params
            )
            pbar.update()

            results = finalize_error_results(
                cam_result, shared_results_dict, target,
                automl_params, model_cols, framework,
                X_train, y_train,
            )
            results['Params']['n_components'] = pca_components
            results['ModelType'] += '-pca'
            log_eval_results([results], model_desc)
            eval_results.append(results)
            models[model_desc] = cam_result['model']

    return models, eval_results


def multi_run(
    model_params: dict, styles, sports, services, contest_types,
    models_to_test: set[_VALID_MODELS] | None = None
):
    LOGGER.info("starting multirun")
    models = {}
    eval_results = []
    progress_total = len(sports) * len(styles) * len(services) * len(contest_types) * len(model_params) * \
        (len(models_to_test) if models_to_test else 6)
    pbar = tqdm(total=progress_total, desc="Modeling")
    try:
        for (
            sport, service, style, contest_type, (framework, params)
        ) in product(
            sports, services, styles, contest_types, model_params.items()
        ):
            try:
                (new_models, new_eval_results) = evaluate_models(
                    sport, service, style, contest_type, framework, params, pbar,
                    models_to_test=models_to_test)
                if new_models is None:
                    LOGGER.warning("No models generated for %s-%s-%s-%s",
                                   sport, service, style.name, contest_type.NAME)
                else:
                    models.update(new_models)
                    eval_results += new_eval_results
            except FileNotFoundError as ex:
                LOGGER.error(
                    "Data file required for modeling not found", exc_info=ex
                )
    except (Exception, KeyboardInterrupt) as ex:
        LOGGER.error("Unhandled exception! ", exc_info=ex)
        return models, eval_results, ex
    LOGGER.info("finished multirun")
    return models, eval_results, None


In [None]:
import pandas as pd

from fantasy_py import ContestStyle
from fantasy_py.lineup.strategy import GeneralPrizePool, FiftyFifty

SPORTS = ['nhl', 'nfl', 'mlb', 'nba', 'lol']
STYLES = [ContestStyle.CLASSIC, ContestStyle.SHOWDOWN]
RANDOM_SEED = 0
SERVICES = ['fanduel', 'draftkings', 'yahoo']
CONTEST_TYPES = [FiftyFifty, GeneralPrizePool]
MODELS_TO_TEST = {
    'all-top', 'all-lws'
}

AUTOML_PARAMS = {
    'skautoml': {
        'per_run_time_limit': 120,
        'max_train_time': 900,
        'n_jobs': 4,
        'random_state': RANDOM_SEED,
    },
    'tpot': {
        'population_size': 100,
        'n_jobs': 4,
        'verbosity': 2,
        'max_train_time': 1200,
        'generations': 100,
        'early_stop': 15,
        'template': 'Selector-Transformer-Regressor',
        'random_state': RANDOM_SEED,
    }
}

(models, eval_results, unhandled_exception) = multi_run(
    AUTOML_PARAMS, STYLES, SPORTS, SERVICES, CONTEST_TYPES, models_to_test=MODELS_TO_TEST
)


In [None]:
eval_result_col_order = [
    'Sport', 'Service', 'Type', 'Style', 'Target', 'R2',
    'RMSE', 'MAE', 'ModelType', 'Date', 'Params'
]
if len(eval_results):
    eval_results_df = pd.DataFrame(eval_results)[eval_result_col_order] \
        .sort_values(['Sport', 'Service', 'Type', 'Style', 'Target', 'ModelType'])

    eval_results_df = log_eval_results(
        eval_results,
        "all_eval_results"
    )

    if unhandled_exception:
        import traceback
        print(traceback.format_exc(limit=None, chain=True))

    with pd.option_context(
        'display.max_rows', 1000,
        'display.max_columns', 100,
        'display.max_colwidth', None
    ):
        display(eval_results_df)

    print(eval_results_df.to_csv(index=False, sep="\t"))
