In [26]:
import time
import boto3
import sagemaker
import json
import numpy as np
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner
from sagemaker.model import Model
from sagemaker.predictor import RealTimePredictor

# Configuring the built-in Image Classification algorithm

## Configure Hyperparameters

In [27]:
# Number of output classes
num_classes = 2

# Number of training samples in the training set
num_training_samples = 194266

# Number of layers for the underlying neural network
num_layers = 18

# Batch size for training
mini_batch_size =  128

# Input image shape for the training data
image_shape = '3,50,50'

# Augmentation type
augmentation_type = 'crop_color_transform'

# Number of epochs
epochs = 5

# Learning rate
learning_rate = 0.01

# Enable transfer learning
use_pretrained_model = 1

## Create a unique job name 

In [28]:
job_name_prefix = 'breast-cancer-detection'
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
job_name = job_name_prefix + timestamp

## Specify the input paths for the job

In [29]:
bucket = 'nitinproject1'
input_prefix = 'breast-cancer-detection/input/recordio'
input_train = 's3://{}/{}/train/'.format(bucket, input_prefix)
input_test = 's3://{}/{}/test/'.format(bucket, input_prefix)

## Specify the output path for the job

In [30]:
output_prefix = 'breast-cancer-detection/output'
output_path = 's3://{}/{}/'.format(bucket, output_prefix)

## Configure training instances

In [31]:
instance_count = 1
instance_type = 'ml.p2.xlarge'
volume_size_gb = 50

## Get the execution role and the training image URI for Image Classification

In [32]:
role = get_execution_role()
training_image = get_image_uri(boto3.Session().region_name, 'image-classification')

## Configure train timeout

In [33]:
train_timeout = 360000

## Creating an Estimator

In [34]:
sagemaker_session = sagemaker.Session()
estimator = sagemaker.estimator.Estimator(training_image, 
                                          role, 
                                          train_instance_count=instance_count,
                                          train_instance_type=instance_type,
                                          train_volume_size=volume_size_gb,
                                          train_max_run=train_timeout,
                                          output_path=output_path, 
                                          sagemaker_session=sagemaker_session,
                                          input_mode='Pipe')

In [35]:
estimator.set_hyperparameters(num_classes=num_classes,
                              num_training_samples=num_training_samples,
                              num_layers=num_layers,
                              mini_batch_size=mini_batch_size,
                              image_shape=image_shape,
                              augmentation_type=augmentation_type,
                              epochs=epochs,
                              learning_rate=learning_rate,
                              use_pretrained_model=use_pretrained_model)

# Create a training job

In [36]:
s3_input_train = sagemaker.s3_input(s3_data=input_train, content_type='application/x-recordio')
s3_input_validation = sagemaker.s3_input(s3_data=input_test, content_type='application/x-recordio')

In [None]:
estimator.fit({
    'train': s3_input_train,
    'validation': s3_input_validation
}, job_name=job_name)

2019-06-13 14:10:43 Starting - Starting the training job...
2019-06-13 14:10:45 Starting - Launching requested ML instances......
2019-06-13 14:11:49 Starting - Preparing the instances for training.........
2019-06-13 14:13:15 Downloading - Downloading input data...
2019-06-13 14:13:49 Training - Downloading the training image.....
[31mDocker entrypoint called with argument(s): train[0m
[31m[06/13/2019 14:14:53 INFO 139767644493632] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/image_classification/default-input.json: {u'beta_1': 0.9, u'gamma': 0.9, u'beta_2': 0.999, u'optimizer': u'sgd', u'use_pretrained_model': 0, u'eps': 1e-08, u'epochs': 30, u'lr_scheduler_factor': 0.1, u'num_layers': 152, u'image_shape': u'3,224,224', u'precision_dtype': u'float32', u'mini_batch_size': 32, u'weight_decay': 0.0001, u'learning_rate': 0.1, u'momentum': 0}[0m
[31m[06/13/2019 14:14:53 INFO 139767644493632] Reading provided configuration from /opt/ml/input/config/hyperp

# Creating a tuning job

## Defining tuning configuration

In [20]:
hyperparameter_ranges = {
    'learning_rate': ContinuousParameter(0.001, 1.0),
    'mini_batch_size': IntegerParameter(64, 128),
    'optimizer': CategoricalParameter(['sgd', 'adam'])
}

objective_metric_name = 'validation:accuracy'
objective_type='Maximize'
max_jobs=2
max_parallel_jobs=2

## Create a unique job name

In [15]:
job_name_prefix = 'bcd-tuning'
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
job_name = job_name_prefix + timestamp

## Creating a hyperparameter tuner

In [21]:
tuner = HyperparameterTuner(estimator=estimator, 
                            objective_metric_name=objective_metric_name, 
                            hyperparameter_ranges=hyperparameter_ranges,
                            objective_type=objective_type, 
                            max_jobs=max_jobs, 
                            max_parallel_jobs=max_parallel_jobs)

## Launch the tuning job

In [None]:
tuner.fit({
    'train': s3_input_train,
    'validation': s3_input_validation
}, job_name=job_name)
tuner.wait()

# Deploying the best model found by the tuning job

## Get the execution role and the hosting image URI for Image Classification

In [37]:
role = get_execution_role()
hosting_image = get_image_uri(boto3.Session().region_name, 'image-classification')

## Configure hosting instances

In [38]:
instance_count = 1
instance_type = 'ml.m4.xlarge'

## Create a unique model name

In [39]:
model_name_prefix = 'bcd-image-classification-high-level'
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
model_name = model_name_prefix + timestamp

## Create a Model object

In [40]:
model_artifacts_s3_path = 's3://nitinproject1/breast-cancer-detection/output/bcd-tuning-2020-05-07-22-13-15-002-48683d09/output/model.tar.gz'

model = Model(
    name=model_name,
    model_data=model_artifacts_s3_path,
    image=hosting_image,
    role=role,
    predictor_cls=lambda endpoint_name, sagemaker_session: RealTimePredictor(endpoint_name, sagemaker_session)
)

## Create a unique endpoint name

In [41]:
endpoint_name_prefix = 'breast-cancer-detection-ep'
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
endpoint_name = endpoint_name_prefix + timestamp

## Create a model, an endpoint configuration and an endpoint

In [42]:
predictor = model.deploy(
    endpoint_name=endpoint_name,
    initial_instance_count=instance_count,
    instance_type=instance_type
)

-----------------!

# Testing the deployed model

In [43]:
def predict_breast_cancer(image_path):
    with open(image_path, 'rb') as f:
        payload = f.read()
        payload = bytearray(payload)
    response = predictor.predict(payload)
    result = json.loads(response)
    print('Probabilities for all classes: ', result)
    predicted_class = np.argmax(result)
    if predicted_class == 0:
        print('Breast cancer not detected')
    else:
        print('Breast cancer detected')

In [44]:
predict_breast_cancer('images/0/8975_idx5_x2851_y1201_class0.png')

Probabilities for all classes:  [0.7335057258605957, 0.2664942145347595]
Breast cancer not detected


In [45]:
predict_breast_cancer('images/1/10253_idx5_x551_y651_class1.png')

Probabilities for all classes:  [0.25526249408721924, 0.7447375059127808]
Breast cancer detected


In [48]:
predict_breast_cancer('samples/fake_images-main.png')

predict_breast_cancer('samples/fake_images-1.png')

predict_breast_cancer('samples/fake_images-2.png')

predict_breast_cancer('samples/fake_images-3.png')

predict_breast_cancer('samples/fake_images-4.png')

predict_breast_cancer('samples/fake_images-5.png')

predict_breast_cancer('samples/fake_images-6.png')

predict_breast_cancer('samples/real_images.png')

Probabilities for all classes:  [0.04449419304728508, 0.9555057883262634]
Breast cancer detected
Probabilities for all classes:  [0.04449419304728508, 0.9555057883262634]
Breast cancer detected
Probabilities for all classes:  [0.9847603440284729, 0.015239688567817211]
Breast cancer not detected
Probabilities for all classes:  [0.9931393265724182, 0.006860687397420406]
Breast cancer not detected
Probabilities for all classes:  [0.9712310433387756, 0.02876889891922474]
Breast cancer not detected
Probabilities for all classes:  [0.9872902035713196, 0.012709741480648518]
Breast cancer not detected
Probabilities for all classes:  [0.9855087399482727, 0.014491289854049683]
Breast cancer not detected
Probabilities for all classes:  [0.04571998491883278, 0.9542800188064575]
Breast cancer detected


# Deleting endpoint

In [12]:
sagemaker.Session().delete_endpoint(predictor.endpoint)