In [1]:
%%time
import sagemaker
from sagemaker import get_execution_role

import boto3

role = get_execution_role()
print(role)

sess = sagemaker.Session()
bucket = 'sagemaker-multi-label-test'
prefix = 'ic-multilabel'

print('using bucket %s'%bucket)

arn:aws:iam::466363126778:role/AWSGlueServiceSageMakerNotebookRole
using bucket sagemaker-multi-label-test
CPU times: user 682 ms, sys: 100 ms, total: 782 ms
Wall time: 715 ms


In [2]:
s3train_lst = 's3://{}/{}/train_lst/'.format(bucket, prefix)
s3validation_lst = 's3://{}/{}/validation_lst/'.format(bucket, prefix)

s3train = 's3://{}/{}/train/'.format(bucket, prefix)
s3validation = 's3://{}/{}/validation/'.format(bucket, prefix)

In [3]:
from sagemaker.image_uris import retrieve, config_for_framework
training_image = retrieve('image-classification', sess.boto_region_name, )
print (training_image)



685385470294.dkr.ecr.eu-west-1.amazonaws.com/image-classification:1


In [4]:
s3_output_location = 's3://{}/{}/output'.format(bucket, prefix)

In [5]:
multilabel_ic = sagemaker.estimator.Estimator(
                                             training_image,
                                             role, 
                                             instance_count=1, 
                                             instance_type='ml.p3.2xlarge',
                                             volume_size = 5,
                                             max_run = 36000,
                                             input_mode= 'File',
                                             output_path=s3_output_location,
                                             sagemaker_session=sess
                                             )

In [6]:
multilabel_ic.set_hyperparameters(
                                 num_layers=18,
                                 image_shape = "3,224,224",
                                 num_classes=2,
                                 resize=256,
                                 epochs=60,
                                 top_k='2',
                                 num_training_samples=449,
                                 use_weighted_loss=1,
                                 augmentation_type = 'crop_color_transform',
                                 precision_dtype='float32',
                                 multi_label=1
                                )




In [7]:
from time import gmtime, strftime 
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner

tuning_job_name = "imageclassif-job-{}".format(strftime("%d-%H-%M-%S", gmtime()))

hyperparameter_ranges = {'learning_rate': ContinuousParameter(0.00001, 1.0),
                         'mini_batch_size': IntegerParameter(16, 64),
                         'optimizer': CategoricalParameter(['sgd', 'adam', 'rmsprop', 'nag'])}

objective_metric_name = 'validation:accuracy'

tuner = HyperparameterTuner(multilabel_ic, 
                            objective_metric_name, 
                            hyperparameter_ranges,
                            objective_type='Maximize', 
                            max_jobs=2, 
                            max_parallel_jobs=2)



In [7]:
train_data = sagemaker.inputs.TrainingInput(s3train, distribution='FullyReplicated', 
                        content_type='application/x-image', s3_data_type='S3Prefix')
validation_data = sagemaker.inputs.TrainingInput(s3validation, distribution='FullyReplicated', 
                             content_type='application/x-image', s3_data_type='S3Prefix')
train_data_lst = sagemaker.inputs.TrainingInput(s3train_lst, distribution='FullyReplicated', 
                        content_type='application/x-image', s3_data_type='S3Prefix')
validation_data_lst = sagemaker.inputs.TrainingInput(s3validation_lst, distribution='FullyReplicated', 
                             content_type='application/x-image', s3_data_type='S3Prefix')
data_channels = {'train': train_data, 'validation': validation_data, 'train_lst': train_data_lst, 
                 'validation_lst': validation_data_lst}

In [9]:
tuner.fit(data_channels, job_name=tuning_job_name, include_cls_metadata=False)
tuner.wait()

..............................................................!
!


In [11]:
tuner_metrics = sagemaker.HyperparameterTuningJobAnalytics(tuning_job_name)
tuner_metrics.dataframe().sort_values(['FinalObjectiveValue'], ascending=False).head(5)

Unnamed: 0,learning_rate,mini_batch_size,optimizer,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
0,2.1e-05,54.0,adam,imageclassif-job-26-03-30-02-020-948db109,Completed,0.851852,2021-02-26 04:29:28+00:00,2021-02-26 04:31:57+00:00,149.0
12,0.000783,16.0,nag,imageclassif-job-26-03-30-02-008-45cf4bf0,Completed,0.839583,2021-02-26 03:54:38+00:00,2021-02-26 03:57:46+00:00,188.0
8,0.000879,16.0,nag,imageclassif-job-26-03-30-02-012-a37c1d32,Completed,0.829167,2021-02-26 04:06:27+00:00,2021-02-26 04:09:23+00:00,176.0
6,0.000557,16.0,nag,imageclassif-job-26-03-30-02-014-35ddf7fe,Completed,0.827083,2021-02-26 04:12:24+00:00,2021-02-26 04:15:21+00:00,177.0
5,2.5e-05,16.0,adam,imageclassif-job-26-03-30-02-015-579e9423,Completed,0.820833,2021-02-26 04:18:32+00:00,2021-02-26 04:21:14+00:00,162.0


In [12]:
total_time = tuner_metrics.dataframe()['TrainingElapsedTimeSeconds'].sum() / 3600
print("The total training time is {:.2f} hours".format(total_time))
tuner_metrics.dataframe()['TrainingJobStatus'].value_counts()

The total training time is 0.93 hours


Completed    20
Name: TrainingJobStatus, dtype: int64

In [13]:
best_job = tuner.best_estimator()


2021-02-26 04:31:57 Starting - Preparing the instances for training
2021-02-26 04:31:57 Downloading - Downloading input data
2021-02-26 04:31:57 Training - Training image download completed. Training in progress.
2021-02-26 04:31:57 Uploading - Uploading generated training model
2021-02-26 04:31:57 Completed - Training job completed


In [14]:
ic_classifier = best_job.deploy(initial_instance_count = 1, instance_type = 'ml.m4.xlarge')

---------------!

In [20]:
import json

endpoint = 'imageclassif-job-26-03-30-02-020-948db1-2021-02-26-04-50-29-889'
runtime = boto3.Session().client('sagemaker-runtime')

images = [
    'flower1.png', 'flower3.png', 'flower4.jpeg','flower5.jpeg', 'flower6.png', 'flower7.jpeg',
    'leaf-flower1.jpeg', 'leaf-flower2.jpeg', 'leaf-flower3.jpeg', 'leaf-flower4.jpeg', 'leaf-flower5.jpeg', 
    'leaf-flower6.png', 'leaf1.png', 'leaf2.jpeg', 'leaf3.jpeg', 'leaf4.jpeg', 'leaf5.jpeg', 'leaf6.png',
    'not1.png', 'not2.jpg', 'not5.png', 'not6.png'
]

for image in images:
    path = 'test-images/'+image
    with open(path, 'rb') as f:
        payload = f.read()
    response = runtime.invoke_endpoint(EndpointName=endpoint, ContentType='application/x-image', Body=payload)
    result = json.loads(response['Body'].read().decode())
    print(image, " :: ", result)

flower1.png  ::  [0.07953238487243652, 0.9750715494155884]
flower3.png  ::  [0.002297744620591402, 0.9825090169906616]
flower4.jpeg  ::  [0.011164083145558834, 0.9782940149307251]
flower5.jpeg  ::  [0.2722867727279663, 0.9165379405021667]
flower6.png  ::  [0.9429547190666199, 0.774402379989624]
flower7.jpeg  ::  [0.002597740851342678, 0.9664433002471924]
leaf-flower1.jpeg  ::  [0.43668413162231445, 0.998110294342041]
leaf-flower2.jpeg  ::  [0.24814531207084656, 0.9976128339767456]
leaf-flower3.jpeg  ::  [0.09307710081338882, 0.6431862711906433]
leaf-flower4.jpeg  ::  [0.9367368817329407, 0.9986779093742371]
leaf-flower5.jpeg  ::  [0.14500567317008972, 0.6142891049385071]
leaf-flower6.png  ::  [0.9397969841957092, 0.9578878283500671]
leaf1.png  ::  [0.2993389070034027, 0.37956634163856506]
leaf2.jpeg  ::  [0.5715838074684143, 0.6155316829681396]
leaf3.jpeg  ::  [0.7044222354888916, 0.6951624155044556]
leaf4.jpeg  ::  [0.747817873954773, 0.9176274538040161]
leaf5.jpeg  ::  [0.22423264384

In [6]:
multilabel_ic.set_hyperparameters(
                                 num_layers=18,
                                 image_shape = "3,224,224",
                                 num_classes=2,
                                 resize=256,
                                 epochs=100,
                                 top_k='2',
                                 num_training_samples=449,
                                 use_weighted_loss=1,
                                 augmentation_type = 'crop_color_transform',
                                 precision_dtype='float32',
                                 multi_label=1,
                                 optimizer='adam',
                                 learning_rate=0.000021,
                                 mini_batch_size=16,
                                )

In [8]:
multilabel_ic.fit(inputs=data_channels, logs=True)

2021-02-26 22:32:28 Starting - Starting the training job...
2021-02-26 22:32:50 Starting - Launching requested ML instancesProfilerReport-1614378747: InProgress
......
2021-02-26 22:33:51 Starting - Preparing the instances for training......
2021-02-26 22:34:52 Downloading - Downloading input data...
2021-02-26 22:35:12 Training - Downloading the training image...
2021-02-26 22:35:58 Training - Training image download completed. Training in progress..[34mDocker entrypoint called with argument(s): train[0m
[34m[02/26/2021 22:36:02 INFO 140254078891136] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/image_classification/default-input.json: {u'beta_1': 0.9, u'gamma': 0.9, u'beta_2': 0.999, u'optimizer': u'sgd', u'use_pretrained_model': 0, u'eps': 1e-08, u'epochs': 30, u'lr_scheduler_factor': 0.1, u'num_layers': 152, u'image_shape': u'3,224,224', u'precision_dtype': u'float32', u'mini_batch_size': 32, u'weight_decay': 0.0001, u'learning_rate': 0.1, u'momentum

[34m[02/26/2021 22:36:24 INFO 140254078891136] Epoch[11] Validation-accuracy=0.680556[0m
[34m[02/26/2021 22:36:25 INFO 140254078891136] Epoch[12] Batch [20]#011Speed: 565.590 samples/sec#011accuracy=0.694940[0m
[34m[02/26/2021 22:36:25 INFO 140254078891136] Epoch[12] Train-accuracy=0.697545[0m
[34m[02/26/2021 22:36:25 INFO 140254078891136] Epoch[12] Time cost=0.755[0m
[34m[02/26/2021 22:36:25 INFO 140254078891136] Epoch[12] Validation-accuracy=0.656250[0m
[34m[02/26/2021 22:36:26 INFO 140254078891136] Epoch[13] Batch [20]#011Speed: 577.085 samples/sec#011accuracy=0.684524[0m
[34m[02/26/2021 22:36:26 INFO 140254078891136] Epoch[13] Train-accuracy=0.691964[0m
[34m[02/26/2021 22:36:26 INFO 140254078891136] Epoch[13] Time cost=0.745[0m
[34m[02/26/2021 22:36:26 INFO 140254078891136] Epoch[13] Validation-accuracy=0.699219[0m
[34m[02/26/2021 22:36:27 INFO 140254078891136] Epoch[14] Batch [20]#011Speed: 582.911 samples/sec#011accuracy=0.680060[0m
[34m[02/26/2021 22:36:27 I

[34m[02/26/2021 22:36:54 INFO 140254078891136] Saved checkpoint to "/opt/ml/model/image-classification-0038.params"[0m
[34m[02/26/2021 22:36:54 INFO 140254078891136] Epoch[38] Batch [20]#011Speed: 584.732 samples/sec#011accuracy=0.748512[0m
[34m[02/26/2021 22:36:55 INFO 140254078891136] Epoch[38] Train-accuracy=0.748884[0m
[34m[02/26/2021 22:36:55 INFO 140254078891136] Epoch[38] Time cost=0.737[0m
[34m[02/26/2021 22:36:55 INFO 140254078891136] Epoch[38] Validation-accuracy=0.687500[0m
[34m[02/26/2021 22:36:56 INFO 140254078891136] Epoch[39] Batch [20]#011Speed: 585.355 samples/sec#011accuracy=0.720238[0m
[34m[02/26/2021 22:36:56 INFO 140254078891136] Epoch[39] Train-accuracy=0.726562[0m
[34m[02/26/2021 22:36:56 INFO 140254078891136] Epoch[39] Time cost=0.731[0m
[34m[02/26/2021 22:36:56 INFO 140254078891136] Epoch[39] Validation-accuracy=0.684028[0m
[34m[02/26/2021 22:36:57 INFO 140254078891136] Epoch[40] Batch [20]#011Speed: 365.749 samples/sec#011accuracy=0.744048[

[34m[02/26/2021 22:37:29 INFO 140254078891136] Epoch[68] Batch [20]#011Speed: 588.001 samples/sec#011accuracy=0.785714[0m
[34m[02/26/2021 22:37:29 INFO 140254078891136] Epoch[68] Train-accuracy=0.782366[0m
[34m[02/26/2021 22:37:29 INFO 140254078891136] Epoch[68] Time cost=0.731[0m
[34m[02/26/2021 22:37:30 INFO 140254078891136] Epoch[68] Validation-accuracy=0.726562[0m
[34m[02/26/2021 22:37:30 INFO 140254078891136] Epoch[69] Batch [20]#011Speed: 573.645 samples/sec#011accuracy=0.796131[0m
[34m[02/26/2021 22:37:30 INFO 140254078891136] Epoch[69] Train-accuracy=0.789062[0m
[34m[02/26/2021 22:37:30 INFO 140254078891136] Epoch[69] Time cost=0.748[0m
[34m[02/26/2021 22:37:31 INFO 140254078891136] Epoch[69] Validation-accuracy=0.710938[0m
[34m[02/26/2021 22:37:31 INFO 140254078891136] Epoch[70] Batch [20]#011Speed: 557.336 samples/sec#011accuracy=0.799107[0m
[34m[02/26/2021 22:37:32 INFO 140254078891136] Epoch[70] Train-accuracy=0.791295[0m
[34m[02/26/2021 22:37:32 INFO 1

[34m[02/26/2021 22:37:54 INFO 140254078891136] Saved checkpoint to "/opt/ml/model/image-classification-0090.params"[0m
[34m[02/26/2021 22:37:54 INFO 140254078891136] Epoch[90] Batch [20]#011Speed: 578.013 samples/sec#011accuracy=0.812500[0m
[34m[02/26/2021 22:37:55 INFO 140254078891136] Epoch[90] Train-accuracy=0.814732[0m
[34m[02/26/2021 22:37:55 INFO 140254078891136] Epoch[90] Time cost=0.736[0m
[34m[02/26/2021 22:37:55 INFO 140254078891136] Epoch[90] Validation-accuracy=0.738281[0m
[34m[02/26/2021 22:37:55 INFO 140254078891136] Epoch[91] Batch [20]#011Speed: 579.835 samples/sec#011accuracy=0.811012[0m
[34m[02/26/2021 22:37:56 INFO 140254078891136] Epoch[91] Train-accuracy=0.820312[0m
[34m[02/26/2021 22:37:56 INFO 140254078891136] Epoch[91] Time cost=0.740[0m
[34m[02/26/2021 22:37:56 INFO 140254078891136] Epoch[91] Validation-accuracy=0.722222[0m
[34m[02/26/2021 22:37:57 INFO 140254078891136] Epoch[92] Batch [20]#011Speed: 572.461 samples/sec#011accuracy=0.819940[

In [9]:
ic_classifier = multilabel_ic.deploy(initial_instance_count = 1, instance_type = 'ml.m4.xlarge')

---------------!

In [10]:
print(ic_classifier)
import json

endpoint = 'image-classification-2021-02-26-19-23-01-757'
runtime = boto3.Session().client('sagemaker-runtime')

images = [
    'flower1.png', 'flower3.png', 'flower4.jpeg','flower5.jpeg', 'flower6.png', 'flower7.jpeg',
    'leaf-flower1.jpeg', 'leaf-flower2.jpeg', 'leaf-flower3.jpeg', 'leaf-flower4.jpeg', 'leaf-flower5.jpeg', 
    'leaf-flower6.png', 'leaf1.png', 'leaf2.jpeg', 'leaf3.jpeg', 'leaf4.jpeg', 'leaf5.jpeg', 'leaf6.png',
    'not1.png', 'not2.jpg', 'not5.png', 'not6.png'
]

for image in images:
    path = 'test-images/'+image
    with open(path, 'rb') as f:
        payload = f.read()
    response = runtime.invoke_endpoint(EndpointName=endpoint, ContentType='application/x-image', Body=payload)
    result = json.loads(response['Body'].read().decode())
    print(image, " :: ", result)

<sagemaker.predictor.Predictor object at 0x7fc329c79190>


ValidationError: An error occurred (ValidationError) when calling the InvokeEndpoint operation: Endpoint image-classification-2021-02-26-19-23-01-757 of account 466363126778 not found.