## Preparation
Run all cells.

In [1]:
import xgb_test as xt
import xgb_model as x
import pandas as pd
import pyinputplus as pyip

In [2]:
def load_and_run(**kwargs):
    train, tourn = x.load_data()
    feature_names = x.load_features(train)
    train = x.feature_interactions_intel_dexte(train)
    tourn = x.feature_interactions_intel_dexte(tourn)
    feature_names = x.load_features(train)
    feature_names_orig = feature_names[:310]
    train, tourn, model = xt.test_model_xgb(train, tourn, feature_names, **kwargs)
    return train, tourn, model, feature_names_orig

In [3]:
def diagnostics_all(train, tourn, feature_names_orig):
    diagnostics0 = xt.diagnostics(train, tourn, feature_names_orig)
    diagnostics0 = {f'0_neut_{k}': v for k, v in diagnostics0.items()}
    
    train50 = x.neut_by_era(train, feature_names_orig, .5)
    tourn50 = x.neut_by_era(tourn, feature_names_orig, .5)
    diagnostics50 = xt.diagnostics(train50, tourn50, feature_names_orig)
    diagnostics50 = {f'0.5_neut_{k}': v for k, v in diagnostics50.items()}
    
    train100 = x.neut_by_era(train, feature_names_orig, 1)
    tourn100 = x.neut_by_era(tourn, feature_names_orig, 1)
    diagnostics100 = xt.diagnostics(train100, tourn100, feature_names_orig)
    diagnostics100 = {f'1_neut_{k}': v for k, v in diagnostics100.items()}
    
    # concat dicts
    diag = {**diagnostics0, **diagnostics50, **diagnostics100}
    diag_df = pd.DataFrame([diag])
    return diag_df

In [4]:
def combine_params_diagnostics(model, diag_df):
    model_params_df = pd.DataFrame([model.get_params()])
    params_diagnostics = pd.concat([model_params_df, diag_df], axis=1)
    return params_diagnostics

In [5]:
def append_cv_scores(params_diagnostics):    
    try:
        df = pd.read_csv('params_diagnostics.csv', index_col=0)
    except:
        print(f'{x.get_time()} No file named params_diagnostics.csv')
        return params_diagnostics
    else:
        combined = pd.concat([df, params_diagnostics], axis=0).reset_index(drop=True)
    return combined

In [6]:
def export_cv_scores(combined):
    choice = pyip.inputChoice(['y', 'n'], prompt='Export to params_diagnostics.csv? y/n...')
    if choice == 'y' or choice == 'Y':
        print(f'{x.get_time()} Exporting...', end='', flush=True)
        combined.to_csv('params_diagnostics.csv')
        print(f'{x.get_time()} Done.')
    else:
        print(f'{x.get_time()} Not exporting.')   

## Run diagnostics
1. Include model parameters in the arguments for `load_and_run()`.
1. Run all cells.

In [7]:
train, tourn, model, feature_names_orig = load_and_run(n_estimators=4000, colsample_bytree=0.07, learning_rate=0.003, max_depth=5, subsample=0.75)

08:52:19 Loading data from round 263...08:53:03 Done.
08:53:03 Loaded 310 features.
08:53:03 Adding 2nd order interactions between intelligence and dexterity features...08:53:07 Done.
08:53:07 Adding 2nd order interactions between intelligence and dexterity features...08:53:20 Done.
08:53:20 Loaded 661 features.
08:53:20 Training model...
XGBRegressor(base_score=None, booster=None, colsample_bylevel=None,
             colsample_bynode=None, colsample_bytree=0.07, gamma=None,
             gpu_id=None, importance_type='gain', interaction_constraints=None,
             learning_rate=0.003, max_delta_step=None, max_depth=5,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             n_estimators=4000, n_jobs=None, num_parallel_tree=None,
             random_state=None, reg_alpha=None, reg_lambda=None,
             scale_pos_weight=None, subsample=0.75, tree_method='gpu_hist',
             validate_parameters=None, verbosity=None)
08:58:42 Done.
08:58:42 Generat

In [8]:
diag_df = diagnostics_all(train, tourn, feature_names_orig)
params_diagnostics = combine_params_diagnostics(model, diag_df)
params_diagnostics

08:59:22 Calculating diagnostics...08:59:43 Done.
08:59:43 Neutralizing 50.0% by era...09:00:06 Done.
09:00:06 Neutralizing 50.0% by era...09:01:24 Done.
09:01:24 Calculating diagnostics...09:01:45 Done.
09:01:45 Neutralizing 100% by era...09:02:07 Done.
09:02:07 Neutralizing 100% by era...09:03:20 Done.
09:03:20 Calculating diagnostics...09:03:43 Done.


Unnamed: 0,objective,base_score,booster,colsample_bylevel,colsample_bynode,colsample_bytree,gamma,gpu_id,importance_type,interaction_constraints,...,1_neut_validation_corr_std,1_neut_validation_sharpe,1_neut_max_drawdown,1_neut_max_feature_exposure,1_neut_feature_neutral_mean,1_neut_validation_mmc_mean,1_neut_validation_mmc_sharpe,1_neut_corr_plus_mmc_mean,1_neut_corr_plus_mmc_sharpe,1_neut_corr_with_example_preds
0,reg:squarederror,0.5,gbtree,1,1,0.07,0,0,gain,,...,0.01493,1.404405,-0.025133,0.01591,0.017351,0.005682,0.45997,0.02665,1.090948,0.507508


In [9]:
combined = append_cv_scores(params_diagnostics)
export_cv_scores(combined)

Export to params_diagnostics.csv? y/n...y
09:04:44 Exporting...09:04:44 Done.
