In [1]:
import sagemaker
import boto3
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner

import numpy as np                                # For matrix operations and numerical processing
import pandas as pd                               # For munging tabular data
import os 
 
region = boto3.Session().region_name    
smclient = boto3.Session().client('sagemaker')

role = sagemaker.get_execution_role()
sess = sagemaker.Session()

prefix = 'sagemaker/invoice-prediction'

In [2]:
%store -r bucket

Now we'll copy the file to S3 for Amazon SageMaker training to pickup.

In [3]:
boto3.Session().resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train/train.csv')).upload_file('train.csv')
boto3.Session().resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'validation/validation.csv')).upload_file('validation.csv')

In [4]:
from sagemaker.amazon.amazon_estimator import get_image_uri

sess = sagemaker.Session()

container = get_image_uri(region, 'xgboost', repo_version='latest')

xgb = sagemaker.estimator.Estimator(container,
                                    role, 
                                    train_instance_count=1, 
                                    train_instance_type='ml.m4.xlarge',
                                    output_path='s3://{}/{}/output'.format(bucket, prefix),
                                    base_job_name='invoice-pred',
                                    sagemaker_session=sess)

xgb.set_hyperparameters(eval_metric='mae',
                        objective='reg:linear',
                        num_round=100,
                        subsample=0.7)

	get_image_uri(region, 'xgboost', '0.90-1').


In [5]:
hyperparameter_ranges = {'eta': ContinuousParameter(0, 1),
                        'min_child_weight': ContinuousParameter(1, 10),
                        'alpha': ContinuousParameter(0, 2),
                        'max_depth': IntegerParameter(10, 20)}

In [6]:
objective_metric_name = 'validation:mae'

In [7]:
tuner = HyperparameterTuner(xgb,
                            objective_metric_name,
                            hyperparameter_ranges,
                            max_jobs=2,
                            max_parallel_jobs=2,
                            objective_type='Minimize',
                            base_tuning_job_name='hpo-invoice-pred'
                           )

## Launch_Hyperparameter_Tuning
Now we can launch a hyperparameter tuning job by calling *fit()* function. After the hyperparameter tuning job is created, we can go to SageMaker console to track the progress of the hyperparameter tuning job until it is completed.

In [8]:
s3_input_train = sagemaker.s3_input(s3_data='s3://{}/{}/train'.format(bucket, prefix), content_type='csv')
s3_input_validation = sagemaker.s3_input(s3_data='s3://{}/{}/validation/'.format(bucket, prefix), content_type='csv')

tuner.fit({'train': s3_input_train, 'validation': s3_input_validation}, include_cls_metadata=False)

In [13]:
print(tuner.latest_tuning_job.name)

hpo-invoice-pred-191210-1944


Once launched, you can check in the console the status of the hyperparameter tunning job and the status of each one of the (in this case) 2 tunning jobs created. Once it's finished you can check the name of the best training job, based on the mae metric evaluated on validation. 

### Here you need to wait until all the training jobs finish, to return the name of the best training job

Check the status of your training jobs at <a href='https://console.aws.amazon.com/sagemaker/home?region=us-east-1#/jobs'>the console</a>

In [16]:
best_job = tuner.best_training_job()
print(best_job)

hpo-invoice-pred-191210-1944-001-3ea020c0


In [17]:
%store best_job

Stored 'best_job' (str)
