## Importing Libraries

In [2]:
import pandas as pd
import numpy as np
import sagemaker, boto3
import sagemaker.amazon.common as smac
sage_client = boto3.Session().client('sagemaker')
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner

##  HPO Tuning Analytics

In [10]:
tuning_job_name='V3Blackbelt-201213-2200'

# run this cell to check current status of hyperparameter tuning job
tuning_job_result = sage_client.describe_hyper_parameter_tuning_job(HyperParameterTuningJobName=tuning_job_name)

status = tuning_job_result['HyperParameterTuningJobStatus']
if status != 'Completed':
    print('Reminder: the tuning job has not been completed.')
    
job_count = tuning_job_result['TrainingJobStatusCounters']['Completed']
print("%d training jobs have completed" % job_count)
    
is_minimize = (tuning_job_result['HyperParameterTuningJobConfig']['HyperParameterTuningJobObjective']['Type'] != 'Maximize')
objective_name = tuning_job_result['HyperParameterTuningJobConfig']['HyperParameterTuningJobObjective']['MetricName']

20 training jobs have completed


## HPO Tuner and its best model

In [11]:
from pprint import pprint
if tuning_job_result.get('BestTrainingJob',None):
    print("Best model found so far:")
    pprint(tuning_job_result['BestTrainingJob'])
else:
    print("No training jobs have reported results yet.")

Best model found so far:
{'CreationTime': datetime.datetime(2020, 12, 13, 22, 6, 26, tzinfo=tzlocal()),
 'FinalHyperParameterTuningJobObjectiveMetric': {'MetricName': 'test:binary_classification_accuracy',
                                                 'Value': 0.7124711275100708},
 'ObjectiveStatus': 'Succeeded',
 'TrainingEndTime': datetime.datetime(2020, 12, 13, 22, 11, 26, tzinfo=tzlocal()),
 'TrainingJobArn': 'arn:aws:sagemaker:us-east-1:719009365707:training-job/v3blackbelt-201213-2200-020-240888ce',
 'TrainingJobName': 'V3Blackbelt-201213-2200-020-240888ce',
 'TrainingJobStatus': 'Completed',
 'TrainingStartTime': datetime.datetime(2020, 12, 13, 22, 8, 31, tzinfo=tzlocal()),
 'TunedHyperParameters': {'bias_init_sigma': '0.00027401056839555956',
                          'bias_lr': '0.006550021000384885',
                          'bias_wd': '1.3494417573713629e-08',
                          'factors_init_sigma': '0.0006446642260676662',
                          'factors_lr':

## Tabulating HPO training job analytics

In [12]:
tuning_job_name='V3Blackbelt-201213-2200'
tunerresult = sagemaker.HyperparameterTuningJobAnalytics(tuning_job_name)
full_df = tunerresult.dataframe()

full_df = tunerresult.dataframe()

if len(full_df) > 0:
    df = full_df[full_df['FinalObjectiveValue'] > -float('inf')]
    if len(df) > 0:
        df = df.sort_values('FinalObjectiveValue', ascending=is_minimize)
        print("Number of training jobs with valid objective: %d" % len(df))
        print({"lowest":min(df['FinalObjectiveValue']),"highest": max(df['FinalObjectiveValue'])})
        pd.set_option('display.max_colwidth', -1)  # Don't truncate TrainingJobName        
    else:
        print("No training jobs have reported valid results yet.")
        
df


Number of training jobs with valid objective: 20
{'lowest': 0.4839620292186737, 'highest': 0.7124711275100708}


Unnamed: 0,bias_init_sigma,bias_lr,bias_wd,factors_init_sigma,factors_lr,factors_wd,linear_init_sigma,linear_lr,linear_wd,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
0,0.0002740106,0.006550021,1.349442e-08,0.0006446642,0.001545676,0.0001114012,4.669172e-07,0.002052876,0.0002449683,V3Blackbelt-201213-2200-020-240888ce,Completed,0.712471,2020-12-13 22:08:31+00:00,2020-12-13 22:11:26+00:00,175.0
1,0.0005111076,0.02368308,1.166224e-06,0.0001273874,3.655298e-08,0.04483975,9.996352e-08,0.08952713,2.603324e-05,V3Blackbelt-201213-2200-019-875a149f,Completed,0.710675,2020-12-13 22:08:33+00:00,2020-12-13 22:11:37+00:00,184.0
8,0.07653696,0.08975183,3.912799,4.14064e-07,0.01189309,0.0007250407,0.07008308,0.1530352,2.713528e-06,V3Blackbelt-201213-2200-012-184aea36,Completed,0.706569,2020-12-13 22:08:03+00:00,2020-12-13 22:11:13+00:00,190.0
3,28.26342,0.008015635,0.925359,7.466552e-08,0.008554517,0.0001456824,4.8846e-05,0.0003360118,9.981838e-08,V3Blackbelt-201213-2200-017-32a11ab6,Completed,0.70535,2020-12-13 22:08:36+00:00,2020-12-13 22:11:47+00:00,191.0
12,0.0256124,0.03270305,0.0003998587,3.917142e-06,7.079697e-05,0.006770757,0.826838,0.2254987,2.399299e-05,V3Blackbelt-201213-2200-008-3d17c402,Completed,0.704196,2020-12-13 22:02:51+00:00,2020-12-13 22:05:43+00:00,172.0
16,4.187282,0.5984551,2.646901,0.2668094,1.291434e-05,20.72951,8.070802,0.09528089,3.502329e-06,V3Blackbelt-201213-2200-004-cb935d25,Completed,0.700282,2020-12-13 22:02:16+00:00,2020-12-13 22:05:10+00:00,174.0
4,0.5887826,2.994439,0.0003448528,8.378929e-07,1.244282e-07,0.0005955022,1.084503e-07,0.00556963,2.763498e-06,V3Blackbelt-201213-2200-016-f4f642ac,Completed,0.689184,2020-12-13 22:08:19+00:00,2020-12-13 22:11:27+00:00,188.0
14,9.02062e-08,2.619983e-08,11.45021,0.1166008,4.270428e-05,5.034846e-06,1.139001e-07,0.4053933,0.001434377,V3Blackbelt-201213-2200-006-be99fd47,Completed,0.647998,2020-12-13 22:02:41+00:00,2020-12-13 22:05:47+00:00,186.0
17,6.306584e-07,0.01085678,5.593909e-06,1.414644e-08,1.541493e-06,0.5599804,1.081003e-05,31.3699,4.961945e-08,V3Blackbelt-201213-2200-003-bf58c9da,Completed,0.644534,2020-12-13 22:02:37+00:00,2020-12-13 22:05:45+00:00,188.0
5,0.004949567,1.304413e-08,0.0004066204,0.2287576,0.02002409,1.402135e-05,0.01429673,5.53439,3.593777e-06,V3Blackbelt-201213-2200-015-6104aaba,Completed,0.640813,2020-12-13 22:08:15+00:00,2020-12-13 22:11:19+00:00,184.0


## Plotting the graph with results from Hyperparameter tuner

In [13]:
import bokeh
import bokeh.io
bokeh.io.output_notebook()
from bokeh.plotting import figure, show
from bokeh.models import HoverTool

class HoverHelper():

    def __init__(self, tuning_analytics):
        self.tuner = tuning_analytics

    def hovertool(self):
        tooltips = [
            ("FinalObjectiveValue", "@FinalObjectiveValue"),
            ("TrainingJobName", "@TrainingJobName"),
        ]
        for k in self.tuner.tuning_ranges.keys():
            tooltips.append( (k, "@{%s}" % k) )

        ht = HoverTool(tooltips=tooltips)
        return ht

    def tools(self, standard_tools='pan,crosshair,wheel_zoom,zoom_in,zoom_out,undo,reset'):
        return [self.hovertool(), standard_tools]

hover = HoverHelper(tunerresult)

p = figure(plot_width=900, plot_height=400, tools=hover.tools(), x_axis_type='datetime')
p.circle(source=df, x='TrainingStartTime', y='FinalObjectiveValue')
show(p)


## Plotting the graph with results from Hyperparameter tuner

In [14]:
ranges = tunerresult.tuning_ranges
figures = []
for hp_name, hp_range in ranges.items():
    categorical_args = {}
    if hp_range.get('Values'):
        # This is marked as categorical.  Check if all options are actually numbers.
        def is_num(x):
            try:
                float(x)
                return 1
            except:
                return 0           
        vals = hp_range['Values']
        if sum([is_num(x) for x in vals]) == len(vals):
            # Bokeh has issues plotting a "categorical" range that's actually numeric, so plot as numeric
            print("Hyperparameter %s is tuned as categorical, but all values are numeric" % hp_name)
        else:
            # Set up extra options for plotting categoricals.  A bit tricky when they're actually numbers.
            categorical_args['x_range'] = vals

    # Now plot it
    p = figure(plot_width=500, plot_height=500, 
               title="Objective vs %s" % hp_name,
               tools=hover.tools(),
               x_axis_label=hp_name, y_axis_label=objective_name,
               **categorical_args)
    p.circle(source=df, x=hp_name, y='FinalObjectiveValue')
    figures.append(p)
show(bokeh.layouts.Column(*figures))