Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.<br /><br />Licensed under the Amazon Software License (the "License"). You may not<br />use this file except in compliance with the License. A copy of the<br />License is located at:<br />   http://aws.amazon.com/asl/<br />or in the "license" file accompanying this file. This file is distributed<br />on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express<br />or implied. See the License for the specific language governing permissions<br />and limitations under the License.
# Analyze tuning job results in Notebook

In [None]:
import smhpolib.analysis

In [None]:
region = 'us-west-2'

In [None]:
import os
os.environ["AWS_REGION"] = region

In [None]:
# Specify the names of lots of tuning jobs here...
TUNING_JOB_NAMES=[
    "tuning-job-1",
    "tuning-job-2",
    "tuning-job-3",#...
]

In [None]:
# Load all the tuning jobs for analysis
import pandas as pd
df_list = []
for tuning_job_name in TUNING_JOB_NAMES:
    tuning = smhpolib.analysis.TuningJob(tuning_job_name, max_training_jobs=500)
    df_this = tuning.hyperparam_dataframe()
    df_this['TuningJobName'] = tuning_job_name
    df_list.append(df_this)
    df = pd.concat(df_list)
    print("Fetched %s: %d more total %d" % (tuning_job_name, len(df_this), len(df)))

In [None]:
df.groupby('TuningJobName').size()

In [None]:
# Filter on those with a value FinalObjectiveValue
all_df=df
print("Total: %d" % len(all_df))
df = all_df[df['FinalObjectiveValue'] > -float('inf')]
print("Valid objective: %d" % len(df))
# and sort it so the best show up at top
df = df.sort_values('FinalObjectiveValue', ascending=False)

In [None]:
import pandas as pd
pd.set_option('display.max_colwidth', -1)  # Don't truncate TrainingJobName
df

## See TuningJob results vs time

In [None]:
import bokeh
import bokeh.io
bokeh.io.output_notebook()
from bokeh.plotting import figure, show
import bokeh.palettes

In [None]:
def big_warp_palette(size, palette_func, warp=1):
    """setting warp < 1 exagerates the high end.
    setting warp > 1 exagerates the low end"""
    p = palette_func(256)
    out = []
    for i in range(size):
        f = i / (size - 1.0) # from 0-1 inclusive
        f **= warp
        idx = int(f * 255)
        out.append(p[idx])
    return out

palette = big_warp_palette(len(df),bokeh.palettes.plasma, 0.5)

In [None]:
p = figure(plot_width=900, plot_height=400)
p.circle(df['TrainingStartTime'],df['FinalObjectiveValue'],color=palette)
show(p)

In [None]:
min(df['FinalObjectiveValue']), max(df['FinalObjectiveValue'])

## Look at correlation between objective and individual HP's

In [None]:
# Which hyperparameters to look for correlations for
all_hyperparameters = tuning.hyperparam_ranges().keys()
all_hyperparameters

In [None]:
df = df.sort_values('FinalObjectiveValue', ascending=False)
for hp in all_hyperparameters:
    p = figure(plot_width=500, plot_height=500, 
                title="Final objective vs %s" % hp,
                x_axis_label=hp, y_axis_label="objective")
    p.circle(df[hp],df['FinalObjectiveValue'], color=palette)
    show(p)


## Correlations between hyper-parameters

In [None]:
# Use the full browser window
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
import numpy as np
def warp_size(cnt,lo,hi,warp):
    out = list(hi - np.arange(0,1,1.0/cnt)**warp * (hi-lo))
    out = out[:cnt]  # Sometimes arange has a rounding error and gives +1
    return out
sizes = warp_size(len(df),2,10,0.5)
# Plot the better points larger.  Good to have a lower warp here, so that the medium-good points are still large, but only the really good ones are large and purple
opacity = warp_size(len(df),0.5,1,1)

In [None]:
import bokeh.layouts
screen_width_px = 1200
plot_sz = int(screen_width_px / len(all_hyperparameters))
figures = []
for j, hp2 in enumerate(all_hyperparameters):
    figure_row = []
    for i, hp1 in enumerate(all_hyperparameters):
        #if i>=j: continue  # only show lower diagonal
        #if i>j: continue  # only lower diagonal, include self-corr
        p = figure(plot_width=plot_sz, plot_height=plot_sz, 
               x_axis_label=hp1, y_axis_label=hp2)
        p.toolbar_location = None
        p.toolbar.logo = None
        p.circle(list(df[hp1]),list(df[hp2]), alpha=0.7, size=sizes, color=palette)
        figure_row.append(p)
    figures.append(figure_row)
show(bokeh.layouts.gridplot(children=figures))