In [1]:
import boto3
import numpy as np
import os
import pathlib
import random
import tensorflow as tf
import time
import sagemaker

from keras.preprocessing.image import img_to_array, load_img
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker.tensorflow import TensorFlow
from sklearn.model_selection import train_test_split
from sagemaker.tuner import ContinuousParameter, HyperparameterTuner
from sagemaker.tensorflow.serving import Model

Using TensorFlow backend.


# Converting images to the TFRecord format

## Get all image paths and shuffle them

In [2]:
data_root = pathlib.Path(os.path.join(os.getcwd(), 'images'))

In [3]:
all_image_paths = list(data_root.glob('*/*'))
all_image_paths = [str(path) for path in all_image_paths]
random.shuffle(all_image_paths)
image_count = len(all_image_paths)

In [4]:
all_image_paths[:10]

['/home/ec2-user/SageMaker/images/1/14154_idx5_x2151_y1401_class1.png',
 '/home/ec2-user/SageMaker/images/0/16165_idx5_x2751_y151_class0.png',
 '/home/ec2-user/SageMaker/images/0/16085_idx5_x501_y1701_class0.png',
 '/home/ec2-user/SageMaker/images/0/14192_idx5_x2501_y401_class0.png',
 '/home/ec2-user/SageMaker/images/1/10293_idx5_x901_y1901_class1.png',
 '/home/ec2-user/SageMaker/images/1/12822_idx5_x1301_y151_class1.png',
 '/home/ec2-user/SageMaker/images/0/10307_idx5_x2401_y651_class0.png',
 '/home/ec2-user/SageMaker/images/0/12891_idx5_x2851_y1001_class0.png',
 '/home/ec2-user/SageMaker/images/1/13462_idx5_x1501_y1601_class1.png',
 '/home/ec2-user/SageMaker/images/1/13692_idx5_x2251_y601_class1.png']

## Get all image labels

In [5]:
all_image_labels = [int(pathlib.Path(path).parent.name) for path in all_image_paths]

In [6]:
all_image_labels[:10]

[1, 0, 0, 0, 1, 1, 0, 0, 1, 1]

## Get a numpy array containing all images with their associated labels

In [7]:
channels = 3
image_height = 50
image_width = 50

In [8]:
dataset = np.ndarray(shape=(image_count, image_height, image_width, channels),
                     dtype=np.uint8)

i = 0
for file in all_image_paths:
    img = load_img(file)  # this is a PIL image
    img = img.resize((image_width, image_height))
    x = img_to_array(img, 'channels_last') 
    dataset[i] = x
    i += 1
    if i % 250 == 0:
        print("%d images to array" % i)
print("All images to array!")

250 images to array
500 images to array
750 images to array
1000 images to array
1250 images to array
1500 images to array
1750 images to array
2000 images to array
2250 images to array
2500 images to array
2750 images to array
3000 images to array
3250 images to array
3500 images to array
3750 images to array
4000 images to array
4250 images to array
4500 images to array
4750 images to array
5000 images to array
5250 images to array
5500 images to array
5750 images to array
6000 images to array
6250 images to array
6500 images to array
6750 images to array
7000 images to array
7250 images to array
7500 images to array
7750 images to array
8000 images to array
8250 images to array
8500 images to array
8750 images to array
9000 images to array
9250 images to array
9500 images to array
9750 images to array
10000 images to array
10250 images to array
10500 images to array
10750 images to array
11000 images to array
11250 images to array
11500 images to array
11750 images to array
12000 im

94000 images to array
94250 images to array
94500 images to array
94750 images to array
95000 images to array
95250 images to array
95500 images to array
95750 images to array
96000 images to array
96250 images to array
96500 images to array
96750 images to array
97000 images to array
97250 images to array
97500 images to array
97750 images to array
98000 images to array
98250 images to array
98500 images to array
98750 images to array
99000 images to array
99250 images to array
99500 images to array
99750 images to array
100000 images to array
100250 images to array
100500 images to array
100750 images to array
101000 images to array
101250 images to array
101500 images to array
101750 images to array
102000 images to array
102250 images to array
102500 images to array
102750 images to array
103000 images to array
103250 images to array
103500 images to array
103750 images to array
104000 images to array
104250 images to array
104500 images to array
104750 images to array
105000 image

183500 images to array
183750 images to array
184000 images to array
184250 images to array
184500 images to array
184750 images to array
185000 images to array
185250 images to array
185500 images to array
185750 images to array
186000 images to array
186250 images to array
186500 images to array
186750 images to array
187000 images to array
187250 images to array
187500 images to array
187750 images to array
188000 images to array
188250 images to array
188500 images to array
188750 images to array
189000 images to array
189250 images to array
189500 images to array
189750 images to array
190000 images to array
190250 images to array
190500 images to array
190750 images to array
191000 images to array
191250 images to array
191500 images to array
191750 images to array
192000 images to array
192250 images to array
192500 images to array
192750 images to array
193000 images to array
193250 images to array
193500 images to array
193750 images to array
194000 images to array
194250 imag

272750 images to array
273000 images to array
273250 images to array
273500 images to array
273750 images to array
274000 images to array
274250 images to array
274500 images to array
274750 images to array
275000 images to array
275250 images to array
275500 images to array
275750 images to array
276000 images to array
276250 images to array
276500 images to array
276750 images to array
277000 images to array
277250 images to array
277500 images to array
All images to array!


## Split dataset into train and test

In [9]:
dataset

array([[[[201, 151, 184],
         [172, 112, 159],
         [124,  77, 130],
         ...,
         [157, 111, 156],
         [194, 159, 188],
         [221, 206, 222]],

        [[183, 140, 184],
         [178, 123, 165],
         [206, 171, 195],
         ...,
         [128,  88, 134],
         [183, 130, 175],
         [173, 126, 167]],

        [[114,  77, 133],
         [162, 117, 160],
         [178, 146, 178],
         ...,
         [184, 138, 178],
         [164, 116, 160],
         [169, 110, 158]],

        ...,

        [[173, 121, 166],
         [185, 131, 173],
         [128,  81, 132],
         ...,
         [173, 108, 148],
         [201, 146, 181],
         [187, 107, 145]],

        [[166, 119, 162],
         [ 97,  61, 116],
         [142, 101, 148],
         ...,
         [192, 117, 159],
         [203, 116, 154],
         [153,  90, 136]],

        [[197, 141, 182],
         [128,  97, 145],
         [137,  92, 145],
         ...,
         [193, 143, 176],
        

In [10]:
X_train, X_test, y_train, y_test = train_test_split(dataset, all_image_labels, test_size=0.3, random_state=33)

## Convert images

In [11]:
def convert_to_tfrecord(images, labels, num_examples, name, directory):
    def _int64_feature(value):
        return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

    def _bytes_feature(value):
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

    if images.shape[0] != num_examples:
        raise ValueError('Images size %d does not match label size %d.' % (images.shape[0], num_examples))
    rows = images.shape[1]
    cols = images.shape[2]
    depth = images.shape[3]

    filename = os.path.join(directory, name + '.tfrecords')
    print('Writing', filename)
    writer = tf.python_io.TFRecordWriter(filename)
    for index in range(num_examples):
        image_raw = images[index].tobytes()
        example = tf.train.Example(features=tf.train.Features(feature={
            'height': _int64_feature(rows),
            'width': _int64_feature(cols),
            'depth': _int64_feature(depth),
            'label': _int64_feature(labels[index]),
            'image_raw': _bytes_feature(image_raw)}))
        writer.write(example.SerializeToString())
    writer.close()

In [12]:
convert_to_tfrecord(X_train, y_train, len(y_train), 'images_train', os.getcwd())

Writing /home/ec2-user/SageMaker/images_train.tfrecords


In [13]:
convert_to_tfrecord(X_test, y_test, len(y_test), 'images_test', os.getcwd())

Writing /home/ec2-user/SageMaker/images_test.tfrecords


## Upload the train and test .tfrecords files to S3

In [1]:
%%bash

aws s3 cp images_train.tfrecords s3://nitinproject1/breast-cancer-detection/input/tfrecord/train/
aws s3 cp images_test.tfrecords s3://nitinproject1/breast-cancer-detection/input/tfrecord/test/

Completed 256.0 KiB/1.4 GiB (861.3 KiB/s) with 1 file(s) remainingCompleted 512.0 KiB/1.4 GiB (1.6 MiB/s) with 1 file(s) remaining  Completed 768.0 KiB/1.4 GiB (2.4 MiB/s) with 1 file(s) remaining  Completed 1.0 MiB/1.4 GiB (3.2 MiB/s) with 1 file(s) remaining    Completed 1.2 MiB/1.4 GiB (3.9 MiB/s) with 1 file(s) remaining    Completed 1.5 MiB/1.4 GiB (4.7 MiB/s) with 1 file(s) remaining    Completed 1.8 MiB/1.4 GiB (5.4 MiB/s) with 1 file(s) remaining    Completed 2.0 MiB/1.4 GiB (6.1 MiB/s) with 1 file(s) remaining    Completed 2.2 MiB/1.4 GiB (6.8 MiB/s) with 1 file(s) remaining    Completed 2.5 MiB/1.4 GiB (7.5 MiB/s) with 1 file(s) remaining    Completed 2.8 MiB/1.4 GiB (8.2 MiB/s) with 1 file(s) remaining    Completed 3.0 MiB/1.4 GiB (8.9 MiB/s) with 1 file(s) remaining    Completed 3.2 MiB/1.4 GiB (9.6 MiB/s) with 1 file(s) remaining    Completed 3.5 MiB/1.4 GiB (10.2 MiB/s) with 1 file(s) remaining   Completed 3.8 MiB/1.4 GiB (10.9 MiB/s) with 1 file(s) remainin

# Creating a model using Tensorflow

## Configure hyperparameters

In [12]:
# Number of output classes
num_classes = 2

# Batch size for training
mini_batch_size =  128

# Max steps for training
max_steps = 5000

# Learning rate
learning_rate = 0.01

## Create a unique job name 

In [13]:
job_name_prefix = 'breast-cancer-detection'
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
job_name = job_name_prefix + timestamp

## Specify the input paths for the job

In [14]:
bucket = 'nitinproject1'
input_prefix = 'breast-cancer-detection/input/tfrecord'
input_train = 's3://{}/{}/train/'.format(bucket, input_prefix)
input_test = 's3://{}/{}/test/'.format(bucket, input_prefix)

## Specify the output path for the job

In [15]:
output_prefix = 'breast-cancer-detection/output'
output_path = 's3://{}/{}/'.format(bucket, output_prefix)

## Configure training instances

In [16]:
instance_count = 1
instance_type = 'ml.p2.xlarge'
volume_size_gb = 50

## Get the execution role

In [17]:
role = get_execution_role()

## Configure train timeout

In [18]:
train_timeout = 360000

## Specify the path to the training script

In [19]:
training_script_path = 'tensorflowScript.py'

## Create a sagemaker.TensorFlow estimator

In [20]:
estimator = TensorFlow(entry_point=training_script_path,
                       role=role,
                       train_instance_count=instance_count,
                       train_instance_type=instance_type,
                       train_volume_size=volume_size_gb,
                       train_max_run=train_timeout,
                       model_dir=output_path,
                       output_path=output_path,
                       framework_version='1.12.0',
                       py_version = 'py3',
                       hyperparameters = {
                           'num-classes': num_classes,
                           'mini-batch-size': mini_batch_size,
                           'max-steps': max_steps,
                           'learning-rate': learning_rate
                       },
                       metric_definitions = [
                           {
                               'Name': 'loss',
                               'Regex': 'loss = ([0-9\\.]+)'
                           }
                       ])

# Create a training job

In [18]:
estimator.fit({
    'train': input_train,
    'test': input_test
}, job_name = job_name)

2019-06-13 15:40:09 Starting - Starting the training job...
2019-06-13 15:40:10 Starting - Launching requested ML instances.........
2019-06-13 15:41:41 Starting - Preparing the instances for training......
2019-06-13 15:42:51 Downloading - Downloading input data......
2019-06-13 15:44:11 Training - Downloading the training image..
[31m2019-06-13 15:44:19,553 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[31m2019-06-13 15:44:20,276 sagemaker-containers INFO     Invoking user script
[0m
[31mTraining Env:
[0m
[31m{
    "additional_framework_parameters": {},
    "channel_input_dirs": {
        "test": "/opt/ml/input/data/test",
        "train": "/opt/ml/input/data/train"
    },
    "current_host": "algo-1",
    "framework_module": "sagemaker_tensorflow_container.training:main",
    "hosts": [
        "algo-1"
    ],
    "hyperparameters": {
        "num-classes": 2,
        "learning-rate": 0.01,
        "max-steps": 5000,
        "mode

# Creating a tuning job

## Defining tuning configuration

In [19]:
hyperparameter_ranges = {
    'learning_rate': ContinuousParameter(0.001, 1.0)
}
objective_metric_name = 'loss'
objective_type = 'Minimize'

max_jobs=2
max_parallel_jobs=2

## Create a unique job name

In [20]:
job_name_prefix = 'bcd-tuning'
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
job_name = job_name_prefix + timestamp

## Creating a hyperparameter tuner

In [None]:
tuner = HyperparameterTuner(estimator=estimator, 
                            objective_metric_name=objective_metric_name, 
                            hyperparameter_ranges=hyperparameter_ranges,
                            objective_type=objective_type, 
                            max_jobs=max_jobs, 
                            max_parallel_jobs=max_parallel_jobs,
                            metric_definitions = [
                                {
                                   'Name': 'loss',
                                   'Regex': 'loss = ([0-9\\.]+)'
                                }
                            ])

## Launch the tuning job

In [29]:
tuner.fit({
    'train': input_train,
    'test': input_test
}, job_name = job_name)
tuner.wait()

# Deploying the best model found by the tuning job

## Get the execution role

In [21]:
role = get_execution_role()

## Configure hosting instances

In [22]:
instance_count = 1
instance_type = 'ml.m4.xlarge'

## Create a unique model name

In [23]:
model_name_prefix = 'bcd-image-classification-tensorflow'
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
model_name = model_name_prefix + timestamp

## Create a Model object

In [24]:
model_artifacts_s3_path = 's3://nitinproject1/breast-cancer-detection/output/bcd-tuning-2020-05-07-22-59-02-001-03298e08/output/model.tar.gz'
model = Model(
    name=model_name,
    model_data=model_artifacts_s3_path,
    role=role
)

## Create a unique endpoint name

In [25]:
endpoint_name_prefix = 'breast-cancer-detection-ep'
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
endpoint_name = endpoint_name_prefix + timestamp

## Create a model, an endpoint configuration and an endpoint

In [26]:
predictor = model.deploy(
    endpoint_name=endpoint_name,
    initial_instance_count=instance_count,
    instance_type=instance_type
)

-------------!

# Testing the deployed model

In [27]:
def predict_breast_cancer(image):
    response = predictor.predict(image)
    print('Received response is: ', response)
    print('Probabilities for all classes: ', response['predictions'][0]['probabilities'])
    predicted_class = response['predictions'][0]['classes']
    if predicted_class == 0:
        print('Breast cancer not detected')
    else:
        print('Breast cancer detected')

## Get images for both classes

In [28]:
image_with_no_cancer = dataset[np.logical_not(all_image_labels)][0]

In [None]:
image_with_cancer = dataset[np.logical_not(all_image_labels)][1]

## Obtain predictions

In [30]:
predict_breast_cancer(image_with_no_cancer)

Received response is:  {'predictions': [{'probabilities': [1.0, 0.0], 'classes': 0}]}
Probabilities for all classes:  [1.0, 0.0]
Breast cancer not detected


In [35]:
predict_breast_cancer(image_with_cancer)

Received response is:  {'predictions': [{'probabilities': [1.0, 0.0], 'classes': 0}]}
Probabilities for all classes:  [1.0, 0.0]
Breast cancer not detected


# Deleting endpoint

In [46]:
sagemaker.Session().delete_endpoint(predictor.endpoint)