## Tune parameters for final models

## Code setup

In [1]:
%load_ext autoreload

  and should_run_async(code)


In [45]:
%autoreload 2

import numpy as np
import pandas as pd
from scipy import stats
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import RandomizedSearchCV

from augury.ml_estimators import BasicEstimator, ConfidenceEstimator
from augury.sklearn.metrics import match_accuracy_scorer, bits_scorer
from augury.sklearn.model_selection import year_cv_split
from augury.ml_data import MLData
from augury.settings import CV_YEAR_RANGE, SEED

# Pretty arbitrary, but a CV of the ConfidenceEstimator takes about 2 mins,
# so this would run for a bit under 2 hrs for that model.
N_ITER = 50

np.random.seed(SEED)

In [3]:
data = MLData(train_year_range=(max(CV_YEAR_RANGE),))
data.data

  and should_run_async(code)


2021-03-05 21:05:12,281 - kedro.io.data_catalog - INFO - Loading data from `full_data` (JSONDataSet)...


Unnamed: 0,Unnamed: 1,Unnamed: 2,team,oppo_team,round_type,venue,prev_match_oppo_team,oppo_prev_match_oppo_team,date,team_goals,team_behinds,score,...,oppo_rolling_prev_match_time_on_ground_skew,oppo_rolling_prev_match_time_on_ground_std,oppo_last_year_brownlow_votes_sum,oppo_last_year_brownlow_votes_max,oppo_last_year_brownlow_votes_min,oppo_last_year_brownlow_votes_skew,oppo_last_year_brownlow_votes_std,oppo_cum_matches_played,oppo_rolling_prev_match_goals_plus_rolling_prev_match_behinds,oppo_rolling_prev_match_goals_divided_by_rolling_prev_match_goals_plus_rolling_prev_match_behinds
Adelaide,1991,1,Adelaide,Hawthorn,Regular,Football Park,0,Melbourne,1991-03-22 03:56:00+00:00,24,11,155,...,0.0,0.0,72,15,0,1.565197,4.070433,80,1,0
Adelaide,1991,2,Adelaide,Carlton,Regular,Football Park,Hawthorn,Fitzroy,1991-03-31 03:56:00+00:00,12,9,81,...,0.0,0.0,51,16,0,2.449132,3.913203,60,1,0
Adelaide,1991,3,Adelaide,Sydney,Regular,S.C.G.,Carlton,Hawthorn,1991-04-07 03:05:00+00:00,19,18,132,...,0.0,0.0,33,7,0,1.403576,2.433862,92,1,0
Adelaide,1991,4,Adelaide,Essendon,Regular,Windy Hill,Sydney,North Melbourne,1991-04-13 03:30:00+00:00,6,11,47,...,0.0,0.0,71,13,0,1.262708,4.524495,69,1,0
Adelaide,1991,5,Adelaide,West Coast,Regular,Subiaco,Essendon,North Melbourne,1991-04-21 05:27:00+00:00,9,11,65,...,0.0,0.0,48,9,0,0.913203,3.218368,48,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Western Bulldogs,2021,19,Western Bulldogs,Adelaide,Regular,Eureka Stadium,Gold Coast,West Coast,2021-07-24 02:20:00+00:00,0,0,0,...,0.0,0.0,0,0,0,0.000000,0.000000,0,0,0
Western Bulldogs,2021,20,Western Bulldogs,Melbourne,Regular,M.C.G.,Adelaide,Gold Coast,2021-07-31 02:20:00+00:00,0,0,0,...,0.0,0.0,0,0,0,0.000000,0.000000,0,0,0
Western Bulldogs,2021,21,Western Bulldogs,Essendon,Regular,Docklands,Melbourne,Sydney,2021-08-07 02:20:00+00:00,0,0,0,...,0.0,0.0,0,0,0,0.000000,0.000000,0,0,0
Western Bulldogs,2021,22,Western Bulldogs,Hawthorn,Regular,York Park,Essendon,Collingwood,2021-08-14 02:11:00+00:00,0,0,0,...,0.0,0.0,0,0,0,0.000000,0.000000,0,0,0


In [9]:
X_train, y_train = data.train_data

## Tune margin estimator

In [30]:
basic_estimator = BasicEstimator()

[param for param in basic_estimator.get_params() if 'ridge__' in param]

['pipeline__ridge__alpha',
 'pipeline__ridge__copy_X',
 'pipeline__ridge__fit_intercept',
 'pipeline__ridge__max_iter',
 'pipeline__ridge__normalize',
 'pipeline__ridge__random_state',
 'pipeline__ridge__solver',
 'pipeline__ridge__tol']

In [31]:
BASIC_PARAM_GRID = {
    # Current param for tipresias_2020 is about 0.038
    'pipeline__pipeline__correlationselector__threshold': stats.uniform(0.01, 0.1),
    'pipeline__ridge__alpha': stats.uniform(0.0, 1.0),
}

basic_search = RandomizedSearchCV(
    basic_estimator,
    BASIC_PARAM_GRID,
    n_jobs=-1,
    n_iter=N_ITER,
    scoring=match_accuracy_scorer,
    cv=year_cv_split(X_train, CV_YEAR_RANGE),
    random_state=SEED,
    error_score='raise',
    verbose=5,
)

In [32]:
basic_search.fit(*data.train_data)

  and should_run_async(code)


Fitting 5 folds for each of 50 candidates, totalling 250 fits


RandomizedSearchCV(cv=[(array([ True,  True,  True, ..., False, False, False]),
                        array([False, False, False, ..., False, False, False])),
                       (array([ True,  True,  True, ..., False, False, False]),
                        array([False, False, False, ..., False, False, False])),
                       (array([ True,  True,  True, ..., False, False, False]),
                        array([False, False, False, ..., False, False, False])),
                       (array([ True,  True,  True, ..., False, Fal...
                                                                      Ridge())])),
                   n_iter=50, n_jobs=-1,
                   param_distributions={'pipeline__pipeline__correlationselector__threshold': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7ff659ea7f10>,
                                        'pipeline__ridge__alpha': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7ff659e9ae50>},
                 

In [33]:
basic_search.cv_results_

  and should_run_async(code)


{'mean_fit_time': array([61.19071541, 15.5772696 , 11.55399632,  8.46737638,  7.99366012,
         8.62175016,  8.31077919,  7.69593253,  8.03070736,  7.98794603,
         7.55175753,  7.66267481,  8.48110085,  8.43280692,  8.10685267,
         8.07695284,  8.46409788,  8.05096731,  8.12596369,  8.28113399,
         7.9517839 ,  8.42065573,  7.72102222,  7.84796443,  9.22405729,
         8.02721276,  7.61375008,  7.80594668,  8.7439857 ,  7.9085381 ,
         7.99390111,  7.48742604,  7.78042955,  7.86460137,  7.75742908,
         8.10063801,  8.0745894 ,  7.90804677,  7.62098126,  8.38172541,
         8.07890477,  7.74743018,  7.81033301,  7.54784479,  8.34926653,
         8.36165237,  8.05687428,  8.27125311,  7.61538124,  7.50818133]),
 'std_fit_time': array([25.21537329,  3.27527582,  1.94142581,  0.62371607,  0.53091891,
         0.13587483,  0.4108918 ,  0.22610821,  0.41164112,  0.17654684,
         0.28929593,  0.42058448,  0.5192184 ,  0.13976022,  0.58350806,
         0.49955

In [34]:
basic_results = (
    pd
    .DataFrame(basic_search.cv_results_)
    .sort_values('rank_test_score')
    .filter(regex='mean_test_score|param_')
    .sort_index(axis=1)
)

basic_results.head(20)

Unnamed: 0,mean_test_score,param_pipeline__pipeline__correlationselector__threshold,param_pipeline__ridge__alpha
11,0.70409,0.039214,0.366362
32,0.703124,0.038093,0.542696
41,0.703119,0.04309,0.063558
39,0.703119,0.045847,0.115869
23,0.70022,0.041171,0.520068
42,0.70022,0.041098,0.325183
8,0.70022,0.040424,0.524756
18,0.70022,0.040461,0.097672
22,0.699259,0.035878,0.662522
30,0.697322,0.048868,0.271349


In [35]:
basic_results.to_json('4.0-ridge-param-scores.json', indent=2, orient='records')

  and should_run_async(code)


In [56]:
basic_results.iloc[0, :].to_dict()

{'mean_test_score': 0.7040898644528868,
 'param_pipeline__pipeline__correlationselector__threshold': 0.03921446485352182,
 'param_pipeline__ridge__alpha': 0.3663618432936917}

## Tune confidence estimator

In [42]:
data.label_col = 'result'
data._y_data = None

confidence_estimator = ConfidenceEstimator()

[param for param in confidence_estimator.get_params() if 'xgbclassifier__' in param]

['pipeline__xgbclassifier__objective',
 'pipeline__xgbclassifier__use_label_encoder',
 'pipeline__xgbclassifier__base_score',
 'pipeline__xgbclassifier__booster',
 'pipeline__xgbclassifier__colsample_bylevel',
 'pipeline__xgbclassifier__colsample_bynode',
 'pipeline__xgbclassifier__colsample_bytree',
 'pipeline__xgbclassifier__gamma',
 'pipeline__xgbclassifier__gpu_id',
 'pipeline__xgbclassifier__importance_type',
 'pipeline__xgbclassifier__interaction_constraints',
 'pipeline__xgbclassifier__learning_rate',
 'pipeline__xgbclassifier__max_delta_step',
 'pipeline__xgbclassifier__max_depth',
 'pipeline__xgbclassifier__min_child_weight',
 'pipeline__xgbclassifier__missing',
 'pipeline__xgbclassifier__monotone_constraints',
 'pipeline__xgbclassifier__n_estimators',
 'pipeline__xgbclassifier__n_jobs',
 'pipeline__xgbclassifier__num_parallel_tree',
 'pipeline__xgbclassifier__random_state',
 'pipeline__xgbclassifier__reg_alpha',
 'pipeline__xgbclassifier__reg_lambda',
 'pipeline__xgbclassifie

In [46]:
CONFIDENCE_PARAM_GRID = {
    # Current param for tipresias_2020 is about 0.038
    'pipeline__pipeline__correlationselector__threshold': stats.uniform(0.01, 0.1),
    'pipeline__xgbclassifier__booster': ['gbtree', 'gblinear', 'dart'],
    'pipeline__xgbclassifier__colsample_bylevel': stats.uniform(0.8, 0.2),
    'pipeline__xgbclassifier__colsample_bytree': stats.uniform(0.8, 0.2),
    'pipeline__xgbclassifier__learning_rate': stats.uniform(0.1, 0.04),
    'pipeline__xgbclassifier__max_depth': np.arange(2, 10),
    'pipeline__xgbclassifier__n_estimators': np.arange(75, 150),
    'pipeline__xgbclassifier__reg_alpha': stats.uniform(0.05, 0.05),
    'pipeline__xgbclassifier__reg_lambda': stats.uniform(0.1, 0.04),
    'pipeline__xgbclassifier__subsample': stats.uniform(0.8, 0.2),
}

confidence_search = RandomizedSearchCV(
    confidence_estimator,
    CONFIDENCE_PARAM_GRID,
    n_jobs=-1,
    n_iter=N_ITER,
    scoring=bits_scorer,
    cv=year_cv_split(X_train, CV_YEAR_RANGE),
    random_state=SEED,
    error_score='raise',
    verbose=5,
)

In [47]:
confidence_search.fit(*data.train_data)

  and should_run_async(code)


Fitting 5 folds for each of 50 candidates, totalling 250 fits


RandomizedSearchCV(cv=[(array([ True,  True,  True, ..., False, False, False]),
                        array([False, False, False, ..., False, False, False])),
                       (array([ True,  True,  True, ..., False, False, False]),
                        array([False, False, False, ..., False, False, False])),
                       (array([ True,  True,  True, ..., False, False, False]),
                        array([False, False, False, ..., False, False, False])),
                       (array([ True,  True,  True, ..., False, Fal...
                                        'pipeline__xgbclassifier__reg_alpha': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7ff65a5b3640>,
                                        'pipeline__xgbclassifier__reg_lambda': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7ff65a5b3c40>,
                                        'pipeline__xgbclassifier__subsample': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7ff65a47d2

In [48]:
confidence_search.cv_results_

  and should_run_async(code)


{'mean_fit_time': array([ 77.27178001, 290.57223258,  94.59100313, 160.80090022,
         81.00646458,  27.84111538,  51.2287993 ,  22.47058172,
         32.65863657,  24.55940833,  23.40714278, 103.35066938,
        162.08751478,  37.25249996, 167.00573206, 115.31697125,
        197.58857031, 222.16056275,  73.72047381,  46.55870438,
         27.21914835, 155.02434616,  72.94724712, 140.42689795,
        134.67295499,  31.533287  , 144.57080059,  62.88905478,
         25.50690126,  25.4083766 , 130.01732993,  78.38074093,
        272.18774185, 291.94261866,  28.32142777, 265.6236598 ,
        247.0652854 ,  58.10893221, 102.06972313,  45.12688389,
         20.60446715,  29.00510812,  35.78216734, 122.16626205,
         29.01499357,  26.77970452,  59.71991382,  83.55425453,
         24.59276152,  77.01045671]),
 'std_fit_time': array([ 4.56927733, 10.65039577,  2.16079745,  4.75712407,  1.48392323,
         2.85065965,  4.89625545,  2.73954289,  2.74017923,  1.6539056 ,
         1.4971

In [49]:
confidence_results = (
    pd
    .DataFrame(confidence_search.cv_results_)
    .sort_values('rank_test_score')
    .filter(regex='mean_test_score|param_')
    .sort_index(axis=1)
)

confidence_results.head(20)

Unnamed: 0,mean_test_score,param_pipeline__pipeline__correlationselector__threshold,param_pipeline__xgbclassifier__booster,param_pipeline__xgbclassifier__colsample_bylevel,param_pipeline__xgbclassifier__colsample_bytree,param_pipeline__xgbclassifier__learning_rate,param_pipeline__xgbclassifier__max_depth,param_pipeline__xgbclassifier__n_estimators,param_pipeline__xgbclassifier__reg_alpha,param_pipeline__xgbclassifier__reg_lambda,param_pipeline__xgbclassifier__subsample
36,3.765773,0.045597,gbtree,0.824033,0.868376,0.103672,8,136,0.085183,0.118967,0.819567
14,3.635238,0.091801,gbtree,0.860956,0.832931,0.121364,7,111,0.084622,0.110776,0.848825
17,3.292308,0.108565,dart,0.934427,0.952324,0.109506,8,112,0.068389,0.125292,0.926706
23,2.396609,0.07919,gbtree,0.844854,0.942436,0.10949,6,122,0.087325,0.125985,0.969845
3,2.135429,0.061423,gbtree,0.80929,0.921509,0.106821,8,92,0.097444,0.138625,0.961679
16,1.946873,0.013689,gbtree,0.995323,0.882207,0.101322,9,75,0.061978,0.105796,0.897891
30,1.912266,0.060152,gbtree,0.838855,0.814188,0.115871,7,89,0.068779,0.103759,0.915656
11,1.873976,0.020789,dart,0.979153,0.895074,0.122531,5,97,0.080221,0.121594,0.840612
15,1.79087,0.026829,gbtree,0.880767,0.812978,0.110157,5,113,0.074862,0.112035,0.856968
24,1.722447,0.075761,gbtree,0.908508,0.901763,0.125453,7,104,0.098651,0.115724,0.978409


In [50]:
confidence_results.to_json('4.0-confidence-param-scores.json', indent=2, orient='records')

  and should_run_async(code)


In [54]:
confidence_results.iloc[0, :].to_dict()

{'mean_test_score': 3.7657729114107013,
 'param_pipeline__pipeline__correlationselector__threshold': 0.04559726786512616,
 'param_pipeline__xgbclassifier__booster': 'gbtree',
 'param_pipeline__xgbclassifier__colsample_bylevel': 0.8240329295611285,
 'param_pipeline__xgbclassifier__colsample_bytree': 0.8683759333432803,
 'param_pipeline__xgbclassifier__learning_rate': 0.10367196263253768,
 'param_pipeline__xgbclassifier__max_depth': 8,
 'param_pipeline__xgbclassifier__n_estimators': 136,
 'param_pipeline__xgbclassifier__reg_alpha': 0.0851828929690012,
 'param_pipeline__xgbclassifier__reg_lambda': 0.11896695316349301,
 'param_pipeline__xgbclassifier__subsample': 0.8195668321302003}