In [2]:
from setup import * #gets the real data



In [5]:
import numpy as np
import numpy.random as rn
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import tree, dummy, preprocessing, ensemble
from sklearn.metrics import accuracy_score, roc_auc_score
import xgboost as xgb
import dtreeviz
import yellowbrick.model_selection as ms
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, pyll
from typing import Any, Dict, Union, Sequence
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
#also, must install graphviz for plotting to work with xgb

In [19]:
params={'random_state': 42}
rounds = [{'max_depth': hp.quniform('max_depth', 1, 8, 1), #tree
    'min_child_weight': hp.loguniform('min_child_weight',-2,3)},
    {'subsample': hp.uniform('subsample', 0.5, 1), #stochastic
    'colsample_bytree': hp.uniform('colsample_bytree', .5, 1)},
    {'reg_alpha': hp.uniform('reg_alpha', 0, 10),
    'reg_lambda': hp.uniform('reg_lambda',1,10)},
    {'gamma': hp.loguniform('gamma', -10, 10), #regularization
    'learning_rate': hp.loguniform('learning_rate',-7,0), #boosting
} ] #note these are grouped into dictionaries


In [20]:
def hyperparameter_tuning(space: Dict[str, Union[float, int]],\
    X_train: pd.DataFrame, y_train: pd.Series, X_test: pd.DataFrame,\
    y_test: pd.Series, early_stopping_rounds: int=50, metric:callable=accuracy_score) -> Dict[str, Any]:
    """
    Function for tuning hyperparameters. This is the wrong way to do it! Just a grid search/stepwise. 
    A better way is done in chapter 13. The better way takes a fraction of the time for the same parameters!
    """
    int_vals = ['max_depth','reg_alpha']
    space={k: (int(val) if k in int_vals else val) for k, val in space.items()}
    space['early_stopping_rounds'] = early_stopping_rounds
    model = xgb.XGBClassifier(**space)
    evaluation = [(X_train, y_train), (X_test, y_test)]
    model.fit(X_train, y_train, eval_set=evaluation, verbose=False)
    pred = model.predict(X_test)
    score = metric(y_test, pred)
    return {'loss': -score, 'status': STATUS_OK, 'model':model}

In [54]:
all_trials = []
counter = 0
param_dict = dict()
for round in rounds:
    params = {**params, **round} #putting these two together has the ones on the right overwrite the ones on the left!
    trials = Trials()
    best = fmin(fn=lambda space: hyperparameter_tuning(space, X_train, y_train, X_test, y_test), space=params, \
    algo=tpe.suggest, max_evals=20, trials=trials, timeout=60*5 ) #put max_evals way down. that's all.
    params = {**params, **best} #each round, update params with the best so far from this round
    param_dict[counter] = params
    counter += 1
    all_trials.append(trials)

# in the end, the params that comes out is the best params across all rounds!

100%|██████████| 20/20 [00:04<00:00,  4.18trial/s, best loss: -0.7635359116022099]
100%|██████████| 20/20 [00:04<00:00,  4.66trial/s, best loss: -0.7668508287292818]
100%|██████████| 20/20 [00:03<00:00,  5.28trial/s, best loss: -0.7690607734806629]
100%|██████████| 20/20 [00:03<00:00,  5.57trial/s, best loss: -0.7624309392265194]


In [55]:
param_dict

{0: {'random_state': 42,
  'max_depth': 4.0,
  'min_child_weight': 2.458212305327815,
  'subsample': 0.5759675916879781,
  'colsample_bytree': 0.6700438134020338,
  'reg_alpha': 3.2912936851132777,
  'reg_lambda': 1.6849564284757554,
  'gamma': 0.002619509439664793,
  'learning_rate': 0.27445215578940063},
 1: {'random_state': 42,
  'max_depth': 4.0,
  'min_child_weight': 2.458212305327815,
  'subsample': 0.8892271693608613,
  'colsample_bytree': 0.9301432941452263,
  'reg_alpha': 3.2912936851132777,
  'reg_lambda': 1.6849564284757554,
  'gamma': 0.002619509439664793,
  'learning_rate': 0.27445215578940063},
 2: {'random_state': 42,
  'max_depth': 4.0,
  'min_child_weight': 2.458212305327815,
  'subsample': 0.8892271693608613,
  'colsample_bytree': 0.9301432941452263,
  'reg_alpha': 0.8990193851150607,
  'reg_lambda': 3.5050157401650095,
  'gamma': 0.002619509439664793,
  'learning_rate': 0.27445215578940063},
 3: {'random_state': 42,
  'max_depth': 4.0,
  'min_child_weight': 2.4582123

In [58]:
{**params,**round} #putting these two together has the ones on the right overwrite the ones on the left!

{'random_state': 42,
 'max_depth': 4.0,
 'min_child_weight': 2.458212305327815,
 'subsample': 0.8892271693608613,
 'colsample_bytree': 0.9301432941452263,
 'reg_alpha': 0.8990193851150607,
 'reg_lambda': 3.5050157401650095,
 'gamma': <hyperopt.pyll.base.Apply at 0x7f55006c0c10>,
 'learning_rate': <hyperopt.pyll.base.Apply at 0x7f55006c1590>}

In [None]:
# above is the same as the following, except this does the change in place and that creates a new one.
#for items in round:
#    params[item] = round[item]

In [62]:
{**params}

{'random_state': 42,
 'max_depth': 4.0,
 'min_child_weight': 2.458212305327815,
 'subsample': 0.8892271693608613,
 'colsample_bytree': 0.9301432941452263,
 'reg_alpha': 0.8990193851150607,
 'reg_lambda': 3.5050157401650095,
 'gamma': 0.060498412496152204,
 'learning_rate': 0.16400037064490072}

In [60]:
{**round}

{'gamma': <hyperopt.pyll.base.Apply at 0x7f55006c0c10>,
 'learning_rate': <hyperopt.pyll.base.Apply at 0x7f55006c1590>}

In [23]:
def plot_3d_mesh(df: pd.DataFrame, x_col: str, y_col: str, z_col: str) -> go.Figure:
    fig = go.Figure(data=[go.Mesh3d(x=df[x_col], y=df[y_col], z=df[z_col], intensity=df[z_col]/ df[z_col].min(),
        hovertemplate=f"{z_col}: %{{z}}<br>{x_col}: %{{x}}<br>{y_col}: %{{y}}<extra></extra>")],
    )
    fig.update_layout(
        title=dict(text=f'{y_col} vs {x_col}'),
        scene=dict(
            xaxis_title=x_col,
            yaxis_title=y_col,
            zaxis_title=z_col),
        width=700,
        margin=dict(r=20, b=10, l=10, t=50)
    )
    return fig

In [24]:
def trial2df(trial: Sequence[Dict[str, Any]]) -> pd.DataFrame:
    """
    Convert a trial object (sequence of trial dictionaries) to a Pandas DataFrame.

    Parameters:
    trial: List[Dict[str, Any]]
        A list of trial dictionaries.

    Returns
    pd.DataFrame - A DataFrame with columns for the loss, trial id, and values from each trial dictionary.
    """
    vals = [] #will be a list of dictionaries, then a pd.DataFrame
    for t in trial:
        result = t['result']
        misc = t['misc']
        val = {k:(v[0] if isinstance(v, list) else v) for k, v in misc['vals'].items()
               } #dictionary.
        val['loss'] = result['loss']
        val['tid'] = t['tid']
        vals.append(val)
    return pd.DataFrame(vals)

In [25]:
plot_3d_mesh(trial2df(all_trials[2]),'reg_alpha','reg_lambda','loss')

In [37]:
all_trials[2].best_trial

{'state': 2,
 'tid': 4,
 'spec': None,
 'result': {'loss': -0.7624309392265194,
  'status': 'ok',
  'model': XGBClassifier(base_score=None, booster=None, callbacks=None,
                colsample_bylevel=None, colsample_bynode=None,
                colsample_bytree=0.8896302716546785, device=None,
                early_stopping_rounds=50, enable_categorical=False,
                eval_metric=None, feature_types=None, gamma=None,
                grow_policy=None, importance_type=None,
                interaction_constraints=None, learning_rate=None, max_bin=None,
                max_cat_threshold=None, max_cat_to_onehot=None,
                max_delta_step=None, max_depth=4, max_leaves=None,
                min_child_weight=15.207966409339303, missing=nan,
                monotone_constraints=None, multi_strategy=None, n_estimators=None,
                n_jobs=None, num_parallel_tree=None, random_state=42, ...)},
 'misc': {'tid': 4,
  'cmd': ('domain_attachment', 'FMinIter_Domain'),
  '

In [33]:
x = all_trials[3]

In [49]:
params

{'random_state': 42,
 'max_depth': 4.0,
 'min_child_weight': 15.207966409339303,
 'subsample': 0.5194636430874894,
 'colsample_bytree': 0.8896302716546785,
 'reg_alpha': 2.3415514873685828,
 'reg_lambda': 9.384235797771396,
 'gamma': 0.28111479690424057,
 'learning_rate': 0.550836065208579}

In [35]:

x.vals

{'gamma': [382.72769820941124,
  806.9142409241664,
  59.06405594422093,
  0.00039014557674611126,
  0.28111479690424057,
  19.061502286270322,
  10.737799574392632,
  414.99163244507065,
  5600.055679899932,
  0.0019292105293202107,
  4087.306032576545,
  1.169267863384736,
  0.03363152010259897,
  0.061911023241134185,
  1592.631834983687,
  0.0019130687504521607,
  18.179278138630384,
  19.62190378347311,
  943.6493783706082,
  0.0011189122127534242],
 'learning_rate': [0.011457219696628341,
  0.017656739002366927,
  0.043227926118063247,
  0.005768592990512203,
  0.550836065208579,
  0.032147388760611406,
  0.12335579318680964,
  0.007754135282312461,
  0.003578045066667875,
  0.06752695815870569,
  0.005065888756386213,
  0.09048011486099009,
  0.1343670170672564,
  0.20754102497349314,
  0.010465355860511357,
  0.001892850635380048,
  0.12981661322573268,
  0.009976361779017876,
  0.8034071641807218,
  0.07344767346228218]}

In [36]:

x.best_trial

{'state': 2,
 'tid': 4,
 'spec': None,
 'result': {'loss': -0.7602209944751381,
  'status': 'ok',
  'model': XGBClassifier(base_score=None, booster=None, callbacks=None,
                colsample_bylevel=None, colsample_bynode=None,
                colsample_bytree=0.8896302716546785, device=None,
                early_stopping_rounds=50, enable_categorical=False,
                eval_metric=None, feature_types=None, gamma=0.28111479690424057,
                grow_policy=None, importance_type=None,
                interaction_constraints=None, learning_rate=0.550836065208579,
                max_bin=None, max_cat_threshold=None, max_cat_to_onehot=None,
                max_delta_step=None, max_depth=4, max_leaves=None,
                min_child_weight=15.207966409339303, missing=nan,
                monotone_constraints=None, multi_strategy=None, n_estimators=None,
                n_jobs=None, num_parallel_tree=None, random_state=42, ...)},
 'misc': {'tid': 4,
  'cmd': ('domain_attachme

In [42]:

for tri in all_trials:
    print(tri.best_trial)

{'state': 2, 'tid': 14, 'spec': None, 'result': {'loss': -0.7668508287292818, 'status': 'ok', 'model': XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=50,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=4, max_leaves=None,
              min_child_weight=15.207966409339303, missing=nan,
              monotone_constraints=None, multi_strategy=None, n_estimators=None,
              n_jobs=None, num_parallel_tree=None, random_state=42, ...)}, 'misc': {'tid': 14, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'workdir': None, 'idxs': {'max_depth': [14], 'min_child_weigh

In [47]:
for tri in all_trials:
    print(f'loss: { tri.best_trial['result']['loss'] }, best_trial_result: {tri.best_trial['result']}')

SyntaxError: f-string: unmatched '[' (1308022532.py, line 2)

In [48]:
for tri in all_trials:
    

KeyError: 0