## Importing Libraries

In [3]:
import pandas as pd
import numpy as np
import sagemaker, boto3
import sagemaker.amazon.common as smac
sage_client = boto3.Session().client('sagemaker')
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner

##  HPO Tuning Analytics

In [4]:
tuning_job_name='FM-classifier-201223-1311'

# run this cell to check current status of hyperparameter tuning job
tuning_job_result = sage_client.describe_hyper_parameter_tuning_job(HyperParameterTuningJobName=tuning_job_name)

status = tuning_job_result['HyperParameterTuningJobStatus']
if status != 'Completed':
    print('Reminder: the tuning job has not been completed.')
    
job_count = tuning_job_result['TrainingJobStatusCounters']['Completed']
print("%d training jobs have completed" % job_count)
    
is_minimize = (tuning_job_result['HyperParameterTuningJobConfig']['HyperParameterTuningJobObjective']['Type'] != 'Maximize')
objective_name = tuning_job_result['HyperParameterTuningJobConfig']['HyperParameterTuningJobObjective']['MetricName']

5 training jobs have completed


## HPO Tuner and its best model

In [5]:
from pprint import pprint
if tuning_job_result.get('BestTrainingJob',None):
    print("Best model found so far:")
    pprint(tuning_job_result['BestTrainingJob'])
else:
    print("No training jobs have reported results yet.")

Best model found so far:
{'CreationTime': datetime.datetime(2020, 12, 23, 13, 11, 14, tzinfo=tzlocal()),
 'FinalHyperParameterTuningJobObjectiveMetric': {'MetricName': 'test:binary_classification_accuracy',
                                                 'Value': 0.6117021441459656},
 'ObjectiveStatus': 'Succeeded',
 'TrainingEndTime': datetime.datetime(2020, 12, 23, 13, 17, 49, tzinfo=tzlocal()),
 'TrainingJobArn': 'arn:aws:sagemaker:us-east-1:719009365707:training-job/fm-classifier-201223-1311-003-d73a385e',
 'TrainingJobName': 'FM-classifier-201223-1311-003-d73a385e',
 'TrainingJobStatus': 'Completed',
 'TrainingStartTime': datetime.datetime(2020, 12, 23, 13, 13, 46, tzinfo=tzlocal()),
 'TunedHyperParameters': {'bias_init_sigma': '0.00017774647411181405',
                          'bias_lr': '2.836455267625859e-07',
                          'bias_wd': '19.542377833088086',
                          'factors_init_sigma': '0.17273521804549025',
                          'factors_lr'

## Tabulating HPO training job analytics

In [6]:
tuning_job_name='FM-classifier-201223-1311'
tunerresult = sagemaker.HyperparameterTuningJobAnalytics(tuning_job_name)
full_df = tunerresult.dataframe()

full_df = tunerresult.dataframe()

if len(full_df) > 0:
    df = full_df[full_df['FinalObjectiveValue'] > -float('inf')]
    if len(df) > 0:
        df = df.sort_values('FinalObjectiveValue', ascending=is_minimize)
        print("Number of training jobs with valid objective: %d" % len(df))
        print({"lowest":min(df['FinalObjectiveValue']),"highest": max(df['FinalObjectiveValue'])})
        pd.set_option('display.max_colwidth', -1)  # Don't truncate TrainingJobName        
    else:
        print("No training jobs have reported valid results yet.")
        
df


Number of training jobs with valid objective: 5
{'lowest': 0.5195335745811462, 'highest': 0.6117021441459656}


Unnamed: 0,FinalObjectiveValue,TrainingElapsedTimeSeconds,TrainingEndTime,TrainingJobName,TrainingJobStatus,TrainingStartTime,bias_init_sigma,bias_lr,bias_wd,factors_init_sigma,factors_lr,factors_wd,linear_init_sigma,linear_lr,linear_wd
2,0.611702,243.0,2020-12-23 13:17:49-05:00,FM-classifier-201223-1311-003-d73a385e,Completed,2020-12-23 13:13:46-05:00,0.000178,2.836455e-07,19.542378,0.1727352,8.791259,5e-06,5.102413e-07,2.479965e-07,0.002566219
3,0.578405,237.0,2020-12-23 13:17:34-05:00,FM-classifier-201223-1311-002-7ca256b8,Completed,2020-12-23 13:13:37-05:00,1e-06,7.667707,1.3e-05,0.09128576,0.001972,2e-06,2.183154e-05,6.974528e-07,0.06674409
0,0.527527,228.0,2020-12-23 13:24:06-05:00,FM-classifier-201223-1311-005-382a5505,Completed,2020-12-23 13:20:18-05:00,2e-06,0.1175898,0.003464,1.124088,0.000372,0.000166,0.7724552,2.026263e-05,1.703505e-05
4,0.519624,220.0,2020-12-23 13:16:53-05:00,FM-classifier-201223-1311-001-db5c3b0a,Completed,2020-12-23 13:13:13-05:00,0.015279,0.0004246569,0.00037,1.325564e-08,0.000189,1.356414,5.455975e-08,3.811238e-08,5.528408e-08
1,0.519534,241.0,2020-12-23 13:23:48-05:00,FM-classifier-201223-1311-004-c1efa5a2,Completed,2020-12-23 13:19:47-05:00,2e-06,0.1175898,0.005366,1.74142,0.000577,0.000166,1.196675,3.139054e-05,1.703505e-05


## Plotting the graph with results from Hyperparameter tuner

In [8]:
! pip install bokeh

Collecting bokeh
  Downloading bokeh-1.4.0.tar.gz (32.4 MB)
[K     |████████████████████████████████| 32.4 MB 16.0 MB/s eta 0:00:01
Building wheels for collected packages: bokeh
  Building wheel for bokeh (setup.py) ... [?25ldone
[?25h  Created wheel for bokeh: filename=bokeh-1.4.0-py3-none-any.whl size=23689200 sha256=e71070a7b7abde5704cb0162bc49e53c76e0cccabea0ae2e0dfdd4e341fe9cc5
  Stored in directory: /Users/rrevuru/Library/Caches/pip/wheels/87/c0/a6/b8e4c7f5067646c50a2ead3d637cf10249700200184f507073
Successfully built bokeh
Installing collected packages: bokeh
Successfully installed bokeh-1.4.0
You should consider upgrading via the '/Users/rrevuru/.pyenv/versions/3.5.4/bin/python3.5 -m pip install --upgrade pip' command.[0m


In [9]:
import bokeh
import bokeh.io
bokeh.io.output_notebook()
from bokeh.plotting import figure, show
from bokeh.models import HoverTool

class HoverHelper():

    def __init__(self, tuning_analytics):
        self.tuner = tuning_analytics

    def hovertool(self):
        tooltips = [
            ("FinalObjectiveValue", "@FinalObjectiveValue"),
            ("TrainingJobName", "@TrainingJobName"),
        ]
        for k in self.tuner.tuning_ranges.keys():
            tooltips.append( (k, "@{%s}" % k) )

        ht = HoverTool(tooltips=tooltips)
        return ht

    def tools(self, standard_tools='pan,crosshair,wheel_zoom,zoom_in,zoom_out,undo,reset'):
        return [self.hovertool(), standard_tools]

hover = HoverHelper(tunerresult)

p = figure(plot_width=900, plot_height=400, tools=hover.tools(), x_axis_type='datetime')
p.circle(source=df, x='TrainingStartTime', y='FinalObjectiveValue')
show(p)


## Plotting the graph with results from Hyperparameter tuner

In [10]:
ranges = tunerresult.tuning_ranges
figures = []
for hp_name, hp_range in ranges.items():
    categorical_args = {}
    if hp_range.get('Values'):
        # This is marked as categorical.  Check if all options are actually numbers.
        def is_num(x):
            try:
                float(x)
                return 1
            except:
                return 0           
        vals = hp_range['Values']
        if sum([is_num(x) for x in vals]) == len(vals):
            # Bokeh has issues plotting a "categorical" range that's actually numeric, so plot as numeric
            print("Hyperparameter %s is tuned as categorical, but all values are numeric" % hp_name)
        else:
            # Set up extra options for plotting categoricals.  A bit tricky when they're actually numbers.
            categorical_args['x_range'] = vals

    # Now plot it
    p = figure(plot_width=500, plot_height=500, 
               title="Objective vs %s" % hp_name,
               tools=hover.tools(),
               x_axis_label=hp_name, y_axis_label=objective_name,
               **categorical_args)
    p.circle(source=df, x=hp_name, y='FinalObjectiveValue')
    figures.append(p)
show(bokeh.layouts.Column(*figures))