In [1]:
import boto3
import pandas as pd
import os
import sagemaker
from sagemaker.estimator import Estimator
from sagemaker.parameter import IntegerParameter
from sagemaker.s3 import S3Uploader
from sagemaker.tuner import HyperparameterTuner
from sklearn.model_selection import train_test_split

region = boto3.Session().region_name
sm_client = boto3.client('sagemaker', region_name=region)

In [26]:
df = pd.read_csv('../data/train.csv')
df_train, df_val = train_test_split(df, test_size=0.2, shuffle=True, random_state=444)
df_train.to_csv('train.csv', index=False)
df_val.to_csv('validation.csv', index=False)
uploader = S3Uploader()
display(uploader.upload('train.csv', 's3://sagemaker-input-test-1/data'))
display(uploader.upload('validation.csv', 's3://sagemaker-input-test-1/data'))
display(uploader.upload('../data/test.csv', 's3://sagemaker-input-test-1/data'))
os.remove('train.csv')
os.remove('validation.csv')

's3://sagemaker-input-test-1/data/train.csv'

's3://sagemaker-input-test-1/data/validation.csv'

's3://sagemaker-input-test-1/data/test.csv'

In [2]:
estimator = Estimator(
#     sagemaker_session=sagemaker.LocalSession(), # comment it out for hyperparameter search
    role='AmazonSageMaker-ExecutionRole-20210301T231771',
    # image_uri='test-sagemaker-train-container:latest',
    image_uri='487576569445.dkr.ecr.eu-west-1.amazonaws.com/test-sagemaker-train-container:latest', # for remote training
    instance_type='ml.m5.large', # set it to launch optimization of hyperparameters
#     instance_type='local',
    instance_count=1,
    hyperparameters={
        'n_estimators': 150,
    }
)

In [3]:
estimator.fit({
    'train': 's3://sagemaker-input-test-1/data/train.csv',
    'validation': 's3://sagemaker-input-test-1/data/validation.csv'
})

2021-04-01 16:33:27 Starting - Starting the training job...
2021-04-01 16:33:51 Starting - Launching requested ML instancesProfilerReport-1617294805: InProgress
......
2021-04-01 16:34:52 Starting - Preparing the instances for training...
2021-04-01 16:35:32 Downloading - Downloading input data
2021-04-01 16:35:32 Training - Downloading the training image....[34m2021-04-01 16:36:08,188 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-04-01 16:36:14,433 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-04-01 16:36:14,445 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-04-01 16:36:14,456 sagemaker-training-toolkit INFO     Invoking user script
[0m
[34mTraining Env:
[0m
[34m{
    "additional_framework_parameters": {},
    "channel_input_dirs": {
        "validation": "/opt/ml/input/data/validation",
        "train": "/opt/ml/input/data/train"

In [15]:
uri = os.path.join(
    estimator.output_path, 
    estimator.jobs[0].job_name,
    'output'
)

In [16]:
uri

's3://sagemaker-eu-west-1-487576569445/test-sagemaker-train-container-2021-04-01-16-33-24-535/output'

In [14]:
tuner = HyperparameterTuner(
    max_jobs=3,
    max_parallel_jobs=1,
    estimator=estimator, 
    objective_metric_name='f1',
    hyperparameter_ranges={
        'n_estimators': IntegerParameter(min_value=15, max_value=200)
    },
    metric_definitions=[
        {
            'Name': 'f1',
            'Regex':'^F1=(.*?)$'
        }
    ]
)

In [21]:
tuner.fit({'train': 's3://sagemaker-input-test-1/data/train.csv',
           'validation': 's3://sagemaker-input-test-1/data/validation.csv'
          })

................................................................................................................................!


In [27]:
best_estimator = tuner.best_estimator()


2021-03-14 20:41:17 Starting - Preparing the instances for training
2021-03-14 20:41:17 Downloading - Downloading input data
2021-03-14 20:41:17 Training - Training image download completed. Training in progress.
2021-03-14 20:41:17 Uploading - Uploading generated training model
2021-03-14 20:41:17 Completed - Training job completed


In [28]:
best_estimator.hyperparameters()

{'_tuning_objective_metric': 'f1', 'n_estimators': '22'}