# Model Testing for Daily Fantasy Scores
Generate models that predict for the minimum and maximum winning scores for a slate.
Log results to an evaluation result folder including evaluation results, model descriptions 
and model serializations.

In [None]:
import logging
import log

log.setup()

LOGGER = logging.getLogger('Model.Testing')
LOGGER.info("logger ready")

In [None]:
from itertools import product
from datetime import datetime
import os
from typing import Literal
from pprint import pformat

import pandas as pd
from tqdm.notebook import tqdm, trange
import onnx
from pypmml import Model

from generate_train_test import generate_train_test, load_csv
from automl import create_automl_model, error_report, JpmmlModel, PMMLFileFramework
from serialize import serialize_model, SerializeFailure


EVAL_RESULT_COL_ORDER = [
    'Sport', 'Service', 'Type', 'Style', 'Target', 'R2',
    'RMSE', 'MAE', 'ModelType', 'Date', 'Params'
]   

def log_eval_results(eval_results: list[dict], name: str,
                     csv_folder: str = "eval_results") -> pd.DataFrame:
    """
    write all evaluation results to csv file in the temp folder and return the dataframe
    also write file(s) that describe the final model(s)
    """
    if len(eval_results) == 0:
        LOGGER.warn("No evaluation results to save")
        return

    df = pd.DataFrame(eval_results)[EVAL_RESULT_COL_ORDER] \
        .sort_values(['Sport', 'Service', 'Type', 'Style', 'Target', 'ModelType'])
    if not os.path.isdir(csv_folder):
        os.mkdir(csv_folder)
    results_filepath = os.path.join(csv_folder, name + ".csv")
    df.to_csv(results_filepath, index=False)

    LOGGER.info(f"Evaluation results written to '{results_filepath}")
    return df

def finalize_results(
    results: dict, 
    shared_results_dict, 
    target, 
    automl_params, 
    X_train, y_train,
    model_desc: str = None,
    results_folder: str = None,
    serialize_format: str | None = None
) -> tuple[dict, str]:
    """ 
    update and return error results to include model information for a single model evaluation and 
    serialize the model and model description

    results: dict of results from a single model evaluation from create_automl_model()
    """
    finalized_results = results['eval_result'].copy()
    finalized_results['Target'] = target
    finalized_results['Params'] = automl_params.copy()
    finalized_results.update(shared_results_dict)
    model_filepath = None
    if results_folder:
        if model_desc is None:
            raise ValueError("If result_folder is defined then model_desc must not be None")
        _, model_filepath = serialize_model(
            results['model'], finalized_results['ModelType'], 
            X_train, y_train, 
            model_desc,
            model_folder=results_folder,
            model_desc_folder=results_folder,
            output_format=serialize_format,
        )
    return finalized_results, model_filepath


PMML_FILE_FRAMEWORK: PMMLFileFramework


def validate_exported_model(
    filepath, cam_result, model_desc, y_fallback, X_test, y_test,
    r2_tolerance: float = None,
):
    LOGGER.info("loading exported model from '%s'", filepath)
    if filepath.endswith('onnx'):
        model = onnx.load(filepath)
        onnx.checker.check_model(model)
        # print("inputs: ", [input.name for input in model.graph.input])
        # print("outputs: ", [output.name for output in model.graph.output])    
    elif filepath.endswith('pmml'):
        if PMML_FILE_FRAMEWORK == 'pypmml':
            model = Model.load(filepath)    
        elif PMML_FILE_FRAMEWORK.startswith('jpmml'):
            model = JpmmlModel(filepath, PMML_FILE_FRAMEWORK)
        else:
            raise ValueError(f"Unsupported PMML file framework: {PMML_FILE_FRAMEWORK}")
    else:
        raise ValueError(f"Unknown file extension for model file '{filepath}'")

    file_results, file_predictions = error_report(model, X_test, y_test, 
                                                  desc=filepath,
                                                  y_fallback=y_fallback,
                                                  show_results=False)
    
    if file_results is None:
        raise SerializeFailure({
            'cause': f"Error calculating error metrics for exported model at '{filepath}'", 
        })

    if (r2_diff := abs(file_results['R2'] - cam_result['eval_result']['R2'])) > r2_tolerance:
        raise SerializeFailure({
            'cause': f"R2 Difference between {model_desc} model in-memory and exported exceeds tolerance of {r2_tolerance}",
            "r2_diff": r2_diff,
            "eval_result": cam_result['eval_result'],
            "file_result": file_results,
            "eval_predictions": cam_result['predictions'],
            "file_predictions": file_predictions,
        })
    


In [None]:
from pathlib import Path
from glob import iglob

from sklearn.pipeline import Pipeline
from sklearn2pmml.decoration import ContinuousDomain
from sklearn_pandas import DataFrameMapper
from sklearn2pmml import make_pmml_pipeline
import numpy as np

"""
models that can be evaluated :
'all-top' - All available input data predicting the top score
'all-lws' - All available input data predicting last winning score
"""
_VALID_MODELS = Literal[
    'all-top', 'all-lws',
]

def targets_from_models_to_test(models_to_test: _VALID_MODELS) -> list[str]:
    targets = []
    for model_to_test in models_to_test:
        if model_to_test == 'all-top':
            targets.append('top-score')
        elif model_to_test == 'all-lws':
            targets.append('last-winning-score')
        else:
            raise ValueError(f"Unknown model to test: {model_to_test}")
    return targets

def evaluate_models(
    sport, service, style, contest_type, framework, automl_params, pbar,
    pca_components=5, data_folder="data",
    models_to_test: set[_VALID_MODELS] | None = None,
    results_folder: str = None,
    overwrite=False,
    serialize_format=None,
    r2_tolerance: float = None,
) -> tuple[dict, list, list[tuple[str, str]]]:
    """
    models_to_test - set/list of the models to test. if None then all models tested. possible models are
    returns tuple of (models, evaluation results, failed models)
    """
    models = {}
    eval_results = []
    shared_results_dict = {
        'Sport': sport,
        'Service': service,
        'Style': style.name,
        'Type': contest_type.NAME,
        'ModelType': framework,
        'Date': datetime.now().strftime("%Y%m%d"),
    }
    failed_models: tuple[str, dict] = []

    if models_to_test is None:
        final_models_to_test = {'all-top', 'all-lws'}
    else:
        final_models_to_test = models_to_test

    try: 
        df = load_csv(sport, service, style, contest_type, data_folder=data_folder)
    except FileNotFoundError as ex:
        LOGGER.error(
            f"Data file required for modeling not found {ex}. Skipping %s-%s-%s-%s",
            sport, service, style.name, contest_type.NAME
        )
        pbar.update(len(final_models_to_test))
        failed_models = [
            (f"{sport}-{service}-{style.name}-{contest_type.NAME}-{target}-{framework}", {'cause': "No data file found"})
            for target in targets_from_models_to_test(final_models_to_test)
        ]
        return None, None, failed_models

    model_data = generate_train_test(
        df,
        model_cols=None,
        random_state=automl_params['random_state'],
    )

    if model_data is None or len(model_data[0]) < 5:
        LOGGER.error(
            "Not enough training data available! Only found %i training cases. Skipping all training",
            len(model_data[0]) if model_data else 0
        )
        pbar.update(len(final_models_to_test))
        failed_models = [
            (f"{sport}-{service}-{style.name}-{contest_type.NAME}-{target}-{framework}", {'cause': "Insufficient data"})
            for target in targets_from_models_to_test(final_models_to_test)
        ]
        return None, None, failed_models

    (X_train, X_test, y_top_train, y_top_test,
        y_last_win_train, y_last_win_test) = model_data

    model_ys = []
    if 'all-top' in final_models_to_test:
        model_ys.append(('top-score', y_top_train, y_top_test))
    if 'all-lws' in final_models_to_test:
        model_ys.append(('last-win-score', y_last_win_train, y_last_win_test))
        
    if len(model_ys) == 0:
        raise ValueError(f"No models to test: {final_models_to_test}")

    # models for top and last winning score
    for target, y_train, y_test in model_ys:
        model_desc = f"{sport}-{service}-{style.name}-{contest_type.NAME}-{target}-{framework}"
        if results_folder:
            result_filepath = os.path.join(results_folder, model_desc + ".csv")
            if not overwrite and os.path.isfile(result_filepath):
                LOGGER.info("Found results file '%s', skipping", result_filepath)
                existing_result = pd.read_csv(result_filepath).to_dict('records')[0]
                eval_results.append(existing_result)
                pbar.update()
                continue
            for old_failed_file in iglob(os.path.join(results_folder, model_desc + "*.failed")):
                LOGGER.info("deleting old failed file '%s'", old_failed_file)
                os.remove(old_failed_file)

        LOGGER.info("training model=%s", model_desc)
        pbar.set_postfix_str(model_desc)

        # TODO: this should be in create_automl_model() or in some other preprocessing function
        if serialize_format == 'pmml':
            # pre feature eng step
            feature_eng_pipeline = Pipeline([
                (
                    "mapper", 
                    DataFrameMapper([
                        ([col], ContinuousDomain()) for col in X_train.columns
                    ])
                )
            ])
            # fit against the output of the transformation
            Xt = feature_eng_pipeline.fit_transform(X_train)
            Xt = Xt.astype(float)
        else:
            Xt = X_train
                    
        # TODO: this should be in serialize
        if serialize_format == 'pmml':
            def post_process(model):        
                if framework == 'tpot':
                    model = model.fitted_pipeline_
                pipeline = Pipeline(feature_eng_pipeline.steps + model.steps)
                model = make_pmml_pipeline(
                    pipeline, active_fields=X_train.columns, target_fields=[y_train.name]
                )
                return model
        else:
            post_process = None
            
        cam_result = create_automl_model(
            target,
            framework=framework,
            X_train=Xt,
            y_train=y_train,
            X_test=X_test,
            y_test=y_test,
            model_desc=model_desc,
            target_output=serialize_format,
            post_process_model=post_process,
            **automl_params
        )

        models[model_desc] = cam_result['model']
        pbar.update()

        model_desc_filepath = None
        try:
            results, model_filepath = finalize_results(
                cam_result, shared_results_dict, target,
                automl_params,
                X_train, y_train,
                model_desc=model_desc,
                results_folder=results_folder,
                serialize_format=serialize_format,
            )
            if results_folder:
                log_eval_results([results], model_desc, csv_folder=results_folder)
                validate_exported_model(
                    model_filepath, cam_result, model_desc, np.median(y_train),
                    X_test, y_test, r2_tolerance=r2_tolerance,
                )
            eval_results.append(results)
        except Exception as e:
            LOGGER.error("Failure during serialization of model %s", model_desc, exc_info=e)
            failed_models.append((model_desc, e.args[0]))
            if results_folder:
                if os.path.isfile(result_filepath):
                    pattern = os.path.join(results_folder, model_desc + "*")
                    LOGGER.info("Due to serialization failure, renaming model result files '%s' -> '%s.failed", 
                                pattern, pattern)
                    for failed_file in iglob(pattern):
                        if failed_file.endswith(".failed"):
                            continue
                        os.rename(failed_file, failed_file + ".failed")
                else:
                    LOGGER.info("Touching fail file '%s.failed", result_filepath)
                    Path(result_filepath + ".failed").touch()
            if not isinstance(e, SerializeFailure):
                raise

    return models, eval_results, failed_models


def multi_run(
    model_params: dict, styles, sports, services, contest_types,
    models_to_test: set[_VALID_MODELS] | None = None,
    results_folder: str = None,
    serialize_format='pmml',
    r2_tolerance=None,
):
    LOGGER.info("starting multirun")
    models = {}
    eval_results = []
    progress_total = len(sports) * len(styles) * len(services) * len(contest_types) * len(model_params) * \
        (len(models_to_test) if models_to_test else 6)
    pbar = tqdm(total=progress_total, desc="Modeling")
    all_failed_models = []
    try:
        for (
            sport, service, style, contest_type, (framework, params)
        ) in product(
            sports, services, styles, contest_types, model_params.items()
        ):
            (new_models, new_eval_results, failed_models) = evaluate_models(
                sport, service, style, contest_type, framework, params, pbar,
                models_to_test=models_to_test, 
                results_folder=results_folder,
                serialize_format=serialize_format,
                r2_tolerance=r2_tolerance,
            )
            all_failed_models += failed_models
            if new_models is None:
                LOGGER.warning("No models generated for %s-%s-%s-%s",
                                sport, service, style.name, contest_type.NAME)
            else:
                models.update(new_models)
                eval_results += new_eval_results
    except Exception as ex:
        LOGGER.error("Unhandled exception! ", exc_info=ex)
        return models, eval_results, ex, all_failed_models
    LOGGER.info("finished multirun.")
    pbar.close()
    return models, eval_results, None, all_failed_models


In [None]:
import pandas as pd

from fantasy_py import ContestStyle
from fantasy_py.lineup.strategy import GeneralPrizePool, FiftyFifty

SPORTS = ['nfl']  # ['nhl', 'nfl', 'mlb', 'nba', 'lol']
STYLES = [ContestStyle.CLASSIC] # [ContestStyle.CLASSIC, ContestStyle.SHOWDOWN]
RANDOM_SEED = 0
SERVICES = ['fanduel'] # ['fanduel', 'draftkings', 'yahoo']
CONTEST_TYPES = [GeneralPrizePool] # [FiftyFifty, GeneralPrizePool]
MODELS_TO_TEST = {'all-top'} # {'all-top', 'all-lws'}
SERIALIZE_FORMAT = 'pmml'
PMML_FILE_FRAMEWORK = 'jpmml-file'
R2_TOLERANCE = 0.25

AUTOML_PARAMS = {
    # TODO: try skautoml once onnx export is possible
    # 'skautoml': {
    #     'per_run_time_limit': 120,
    #     'max_train_time': 900,
    #     'n_jobs': 4,
    #     'random_state': RANDOM_SEED,
    # },
    'tpot': {
        'population_size': 100,
        'n_jobs': 6,
        'verbosity': 2,
        'max_train_time': 1200,
        'generations': 2, # 100,
        'early_stop': 15,
        'template': 'Selector-Transformer-Regressor',
        'random_state': RANDOM_SEED,
    }
}

(models, eval_results, unhandled_exception, failed_models) = multi_run(
    AUTOML_PARAMS, STYLES, SPORTS, SERVICES, CONTEST_TYPES, 
    models_to_test=MODELS_TO_TEST,
    results_folder='eval_results',
    serialize_format=SERIALIZE_FORMAT,
    r2_tolerance=R2_TOLERANCE,
)

In [None]:
eval_result_col_order = [
    'Sport', 'Service', 'Type', 'Style', 'Target', 'R2',
    'RMSE', 'MAE', 'ModelType', 'Date', 'Params'
]

print(f"{len(eval_results) + len(failed_models)} models evaluated, {len(failed_models)} failed, {len(eval_results)} successful")
for n, failure in enumerate(failed_models):
    print(f"failure #{n + 1}: {failure[0]}\n\tcause='{failure[1]['cause']}'")


if len(eval_results):
    eval_results_df = pd.DataFrame(eval_results)[eval_result_col_order] \
        .sort_values(['Sport', 'Service', 'Type', 'Style', 'Target', 'ModelType'])

    eval_results_df = log_eval_results(
        eval_results,
        "all_eval_results"
    )

    if unhandled_exception:
        import traceback
        print(traceback.format_exc(limit=None, chain=True))

    print(f"{len(eval_results)} successfully serialized models")
    with pd.option_context(
        'display.max_rows', 1000,
        'display.max_columns', 100,
        'display.max_colwidth', None
    ):
        display(eval_results_df)

    print(eval_results_df.to_csv(index=False, sep="\t"))
