# Download data locally

In [None]:
!chmod a+x data_downloader.sh
!bash data_downloader.sh

### Perform EDA, Data Processing, Model Training locally

In [None]:
from glob import glob
import pandas as pd
import os

# Load labels dataset
base_dir = "data/"
df = pd.read_csv(os.path.join(base_dir, "labels.csv"))
print(df.shape)

# Add path of each image as a new column
image_dict = {}
for x in glob(os.path.join(base_dir, "images", "*.jpg")):
    image_dict[os.path.splitext(os.path.basename(x))[0]] = x
print(len(image_dict))
df['path'] = df['image_id'].map(image_dict.get)


# Pre-process labels: full name + categorize
lesion_type_dict = {
    'nv': 'Melanocytic nevi',
    'mel': 'Melanoma',
    'bkl': 'Benign keratosis-like lesions ',
    'bcc': 'Basal cell carcinoma',
    'akiec': 'Actinic keratoses',
    'vasc': 'Vascular lesions',
    'df': 'Dermatofibroma'
}

df['cell_type'] = df['dx'].map(lesion_type_dict.get)
df['cell_type_idx'] = pd.Categorical(df['cell_type']).codes

In [None]:
from tensorflow.python.keras.preprocessing import image

i = 11
print(df['cell_type'][i])
img = image.load_img(df['path'][i], target_size=(256, 256))
img

### Turn images and the label vectors into numpy arrays

In [None]:
from tensorflow.python.keras.utils.np_utils import to_categorical 
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from sklearn import preprocessing
import numpy as np
import json

def load_images(path):
    """Load images from local and resize them"""
    
    # ---- Load image and resize
    img = image.load_img(path, target_size=(32, 32))
    
    # ---- Covert to array
    img_array = image.img_to_array(img)
    
    return img_array

# Apply the image load function to all paths
df['image'] = df['path'].map(lambda x: load_images(x))

# Train/Test split
features = df['image']
target = df['cell_type_idx']
x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
cls_weight = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)


# Convert to numpy
y_train = to_categorical(y_train, num_classes=7)
y_test = to_categorical(y_test, num_classes=7)

x_train = np.asarray(x_train.tolist()).reshape((x_train.shape[0],*(32,32,3)))
x_test = np.asarray(x_test.tolist()).reshape((x_test.shape[0],*(32,32,3)))


# Save arrays to disk
!mkdir data/processed-data
!mkdir data/processed-data/train
!mkdir data/processed-data/test

np.save(os.path.join(base_dir, "processed-data/train/cls_weight.npy"), cls_weight)
np.save(os.path.join(base_dir, "processed-data/train/x_train.npy"), x_train)
np.save(os.path.join(base_dir, "processed-data/train/y_train.npy"), y_train)
np.save(os.path.join(base_dir, "processed-data/test/x_test.npy"), x_test)
np.save(os.path.join(base_dir, "processed-data/test/y_test.npy"), y_test)

!ls data/processed-data/train/

### Test-drive a tf.keras training script (again locally)

In [None]:
!pygmentize 'train.py'

In [None]:
!mkdir models

%run -i train.py \
    --model_dir ./models/ \
    --train ./data/processed-data/train/ \
    --test ./data/processed-data/test/ \
    --model_version 2 \
    --batch_size 32 \
    --epochs 3

# SageMaker Data Processing, Training, Tuning, Hosting & Monitoring

Before we start using SageMaker, let's copy our dataset to S3. S3 is the main data store for SageMaker.

In [None]:
# Create S3 client
import boto3
region='us-east-1'
s3_client = boto3.client('s3', region_name=region)

# Create S3 Buckets for this project
account_id = boto3.client('sts').get_caller_identity().get('Account')
BUCKET = "skin-cancer-classifier-{account_id}".format(account_id=account_id)
s3_client.create_bucket(Bucket=BUCKET)

print("Project Bucket: {bucket}".format(bucket=BUCKET))

!aws s3 cp data/images/ {"s3://{}/raw-data/images/".format(BUCKET)} --recursive

!aws s3 cp data/labels.csv {"s3://{}/raw-data/labels.csv".format(BUCKET)}

!aws s3 ls {"s3://" + BUCKET}

It's good practice to tag all sagemaker jobs/artifacts with the a date/time string

In [None]:
from time import gmtime, strftime
WORKFLOW_DATE_TIME = strftime("%Y-%m-%d-%H-%M-%S", gmtime())

## SageMaker Processing jobs

With Amazon SageMaker Processing jobs, you can leverage a simplified, managed experience to run data pre- or post-processing and model evaluation workloads on the Amazon SageMaker platform.

A processing job downloads input from Amazon Simple Storage Service (Amazon S3), then uploads outputs to Amazon S3 during or after the processing job.

<img src="README-IMAGES/Processing-1.jpg">

Note that:

    1. Common use case is to run a scikit-learn script that cleans, pre-processes, performs feature-engineering, and splits the input data into train and test sets.

    2. However, you can also run a post-processing jobs on the test data to evaluate a trained model's performance

    3. You can take advantage of SageMaker's pre-built scikit-learn, spark and popular deep learning containers or use your own custom container to run processing jobs with your own Python libraries and dependencies.

In [None]:
!pygmentize 'processing.py'

### Create Processing Job Configs

In [None]:
from sagemaker.processing import ProcessingInput, ProcessingOutput
train_path = "s3://{}/{}/data/train".format(BUCKET, WORKFLOW_DATE_TIME)
test_path = "s3://{}/{}/data/test".format(BUCKET, WORKFLOW_DATE_TIME)

inputs = [ProcessingInput(source="s3://{}/raw-data/".format(BUCKET),
                          destination='/opt/ml/processing/input',
                          s3_data_distribution_type='ShardedByS3Key'
                         )
         ]

outputs = [ProcessingOutput(output_name='train',
                            destination=train_path,
                            source='/opt/ml/processing/train'
                           ),
           ProcessingOutput(output_name='test',
                            destination=test_path,
                            source='/opt/ml/processing/test'
                           )
          ]

The `ScriptProcessor` class in the SageMaker SDK lets you run a command inside this container, which you can use to run your own script.

For a full list of available container URIs, see [Available Deep Learning Container Images](https://github.com/aws/deep-learning-containers/blob/master/available_images.md) for more information on using Docker containers, see Use Your Own Algorithms or Models with Amazon SageMaker.

In [None]:
from sagemaker.processing import ScriptProcessor
from sagemaker import get_execution_role
role = get_execution_role()

script_processor = ScriptProcessor(
    image_uri="763104351884.dkr.ecr.us-east-1.amazonaws.com/tensorflow-training:1.15.2-cpu-py37-ubuntu18.04",
    instance_count=1,
    instance_type='ml.m5.xlarge',
    role=role,
    command=['python3']
)
                                    
script_processor.run(job_name="skin-cancer-processing-{}".format(WORKFLOW_DATE_TIME),
                     code='processing.py',
                     inputs=inputs,
                     outputs=outputs,
                     arguments=['--train-test-split-ratio', '0.2']
                    )

### Bring Your Own Container
This is the Dockerfile to create the processing container. Install `pandas`, `scikit-learn`, `Pillow` and `TensorFlow` into it. You can install your own dependencies.

```shell
!mkdir docker
```

```python
%%writefile docker/Dockerfile

FROM python:3.7-slim-buster

RUN pip3 install pandas==0.25.3 scikit-learn==0.21.3 Pillow==5.4.1 tensorflow==1.15.2 Keras==2.2.4 Keras-Applications==1.0.8 Keras-Preprocessing==1.1.0
ENV PYTHONUNBUFFERED=TRUE

ENTRYPOINT ["python3"]
```

This block of code builds the container uri in AWS ECR
```python
import boto3
region = "us-east-1"
account_id = boto3.client('sts').get_caller_identity().get('Account')
uri_suffix = 'amazonaws.com'
ecr_repository = 'sagemaker-processing-containers'
tag = ':latest'
processing_repository_uri = '{}.dkr.ecr.{}.{}/{}'.format(account_id, region, uri_suffix, ecr_repository + tag)
```

This block of code builds the container using the `docker` command, creates an Amazon Elastic Container Registry (Amazon ECR) repository, and pushes the image to Amazon ECR.
```shell
# Create ECR repository and push docker image
!docker build -t $ecr_repository docker
!$(aws ecr get-login --region $region --registry-ids $account_id --no-include-email)
!aws ecr create-repository --repository-name $ecr_repository
!docker tag {ecr_repository + tag} $processing_repository_uri
!docker push $processing_repository_uri
```

## SageMaker hosted training 
There are four modes to SageMaker:
* **Built-in**: Choose one of our 17 built-in algorithms and simply point SageMaker to your data on S3.
* **Script Mode**: Author your own model using SKlearn, Tensorflow, PyTorch or MXNet.
* **BYO Container**: Very similar to script-mode but with one additional parameter that tells SageMaker to use one of your own custom docker containers.
* **Marketplace**: Purchase an algorithms from 100s of third-party sellers and simply point SageMaker to your data on S3.
![modes](README-IMAGES/sagemaker-training.png)

## Script Mode
We provide SageMaker with a training script that simply loads the processed data, which has been copied to the container from S3, and fits a simple CNN multi-class classifier.

At the end of the training job we have added a step to export the trained model to the path stored in the environment variable **SM_MODEL_DIR**, which always points to **/opt/ml/model**. This is critical because SageMaker uploads all the model artifacts in this folder to S3 at end of training.

In [None]:
!pygmentize 'train.py'

We will submit job by creating a TensorFlow Estimator. The sagemaker.tensorflow.TensorFlow estimator handles locating the script mode container, uploading your script to a S3 location and creating a SageMaker training job. Let's call out a couple important parameters here:

* py_version is set to 'py3' to indicate that we are using Python 3 and script mode. 
* entry_point is set to the name of our python training script
* hyperparameters is a dictionary containing values to model hyperparameters and other arguments needed to run our script. Example: model_version is not an hyperparameter but a way to version our model. 

To start a training job, we call estimator.fit(training_data_uri).

An S3 location is used here as the input. fit creates a default channel named 'train', which points to this S3 location. In the training script we can then access the training data from the location stored in SM_CHANNEL_TRAINING. fit accepts a couple other types of input as well. See the API doc [here](https://sagemaker.readthedocs.io/en/stable/estimators.html#sagemaker.estimator.EstimatorBase.fit) for details.

When training starts, the TensorFlow container executes train.py, passing hyperparameters and model_dir from the estimator as script arguments. 

When training is complete, the training job will upload the saved model for TensorFlow serving.

In [None]:
from sagemaker.tensorflow import TensorFlow

estimator = TensorFlow(#source_dir='./',
                       entry_point='train.py',
                       hyperparameters={
                           'epochs': 10,
                           'batch_size': 64,
                           'model_version': '2'
                       },
                       train_instance_count=1,
                       train_instance_type='ml.m5.xlarge',
                       output_path = 's3://{}/{}'.format(BUCKET, WORKFLOW_DATE_TIME + '/model-artifacts'),
                       code_location = 's3://{}/{}'.format(BUCKET, WORKFLOW_DATE_TIME + '/source-code'),
                       role=role,
                       framework_version="2.1.0",
                       py_version='py3',
                       model_dir="/opt/ml/model"
)


In [None]:
estimator.fit(job_name = "skin-cancer-{}".format(WORKFLOW_DATE_TIME),
              inputs = {'train': train_path, 'test': test_path},
              wait = True,
              logs=False
             )

## Automatic Model Tuning

So far we have simply run one Local Mode training job and one Hosted Training job without any real attempt to tune hyperparameters to produce a better model, other than increasing the number of epochs. Selecting the right hyperparameter values to train your model can be difficult, and typically is very time consuming if done manually. The right combination of hyperparameters is dependent on your data and algorithm; some algorithms have many different hyperparameters that can be tweaked; some are very sensitive to the hyperparameter values selected; and most have a non-linear relationship between model fit and hyperparameter values. SageMaker Automatic Model Tuning helps automate the hyperparameter tuning process: it runs multiple training jobs with different hyperparameter combinations to find the set with the best model performance.

We begin by specifying the hyperparameters we wish to tune, and the range of values over which to tune each one. We also must specify an objective metric to be optimized: in this use case, we'd like to minimize the validation loss.


In [None]:
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner

hyperparameter_ranges = {
    'learning_rate': ContinuousParameter(0.001, 0.2, scaling_type="Logarithmic"),
    'epochs': IntegerParameter(10, 50),
    'batch_size': IntegerParameter(64, 256),
}

metric_definitions = [{'Name': 'AUC',
                       'Regex': 'AUC: ([0-9\\.]+)'
                      }]
objective_metric_name = 'AUC'
objective_type = 'Maximize'

Next we specify a HyperparameterTuner object that takes the above definitions as parameters. Each tuning job must be given a budget: a maximum number of training jobs. A tuning job will complete after that many training jobs have been executed.

We also can specify how much parallelism to employ, in this case five jobs, meaning that the tuning job will complete after three series of five jobs in parallel have completed. For the default Bayesian Optimization tuning strategy used here, the tuning search is informed by the results of previous groups of training jobs, so we don't run all of the jobs in parallel, but rather divide the jobs into groups of parallel jobs. There is a trade-off: using more parallel jobs will finish tuning sooner, but likely will sacrifice tuning search accuracy.

Now we can launch a hyperparameter tuning job by calling the fit method of the HyperparameterTuner object. The tuning job may take around 10 minutes to finish. While you're waiting, the status of the tuning job, including metadata and results for invidual training jobs within the tuning job, can be checked in the SageMaker console in the Hyperparameter tuning jobs panel.


In [None]:
tuner = HyperparameterTuner(estimator,
                            objective_metric_name,
                            hyperparameter_ranges,
                            metric_definitions,
                            max_jobs=10,
                            max_parallel_jobs=5,
                            objective_type=objective_type
                           )

tuning_job_name = "skin-cancer-{}".format(WORKFLOW_DATE_TIME)
tuner.fit(job_name=tuning_job_name,
          inputs={'train': train_path, 'test': test_path}
         )
tuner.wait()

After the tuning job is finished, we can use the HyperparameterTuningJobAnalytics object from the SageMaker Python SDK to list the top 5 tuning jobs with the best performance. Although the results vary from tuning job to tuning job, the best validation loss from the tuning job (under the FinalObjectiveValue column) likely will be substantially lower than the validation loss from the hosted training job above, where we did not perform any tuning other than manually increasing the number of epochs once.

In [None]:
import sagemaker
tuner_metrics = sagemaker.HyperparameterTuningJobAnalytics(tuning_job_name)
tuner_metrics.dataframe().sort_values(['FinalObjectiveValue'], ascending=False).head(10)

The total training time and training jobs status can be checked with the following lines of code. Because automatic early stopping is by default off, all the training jobs should be completed normally. For an example of a more in-depth analysis of a tuning job, see the SageMaker official sample [HPO_Analyze_TuningJob_Results.ipynb](https://github.com/awslabs/amazon-sagemaker-examples/blob/master/hyperparameter_tuning/analyze_results/HPO_Analyze_TuningJob_Results.ipynb) notebook.

In [None]:
total_time = tuner_metrics.dataframe()['TrainingElapsedTimeSeconds'].sum() / 3600
print("The total training time is {:.2f} hours".format(total_time))
tuner_metrics.dataframe()['TrainingJobStatus'].value_counts()

### SageMaker hosted endpoint

Assuming the best model from the tuning job is better than the model produced by the individual Hosted Training job above, we could now easily deploy that model to production. A convenient option is to use a SageMaker hosted endpoint, which serves real time predictions from the trained model (Batch Transform jobs also are available for asynchronous, offline predictions on large datasets). The endpoint will retrieve the TensorFlow SavedModel created during training and deploy it within a SageMaker TensorFlow Serving container. This all can be accomplished with one line of code.

More specifically, by calling the deploy method of the HyperparameterTuner object we instantiated above, we can directly deploy the best model from the tuning job to a SageMaker hosted endpoint. It will take several minutes longer to deploy the model to the hosted endpoint compared to the Local Mode endpoint, which is more useful for fast prototyping of inference code.

In [None]:
tuning_predictor = tuner.deploy(initial_instance_count=1,
                                instance_type="ml.m5.xlarge",
                                endpoint_type="tensorflow-serving",
                                endpoint_name = "skin-cancer-classifier"
                               )

### Native support for data-capture
```python
from sagemaker.model_monitor import DataCaptureConfig

data_capture_config = DataCaptureConfig(
                        enable_capture = True,
                        sampling_percentage=50,
                        destination_s3_uri='s3://tf-dermatology/endpoint-traffic/',
                        kms_key_id=None,
                        capture_options=["REQUEST", "RESPONSE"],
                        csv_content_types=["text/csv"],
                        json_content_types=["application/json"]
)```

### add the new configuration and wait for it to be applied
```python
from sagemaker import RealTimePredictor

predictor = RealTimePredictor(endpoint="tf-dermatology")
predictor.update_data_capture_config(data_capture_config=data_capture_config)
```

# Invoking the endpoint

The formats of the input and the output data correspond directly to the request and response formats of the Predict method in the [TensorFlow Serving REST API](https://www.tensorflow.org/serving/api_rest). SageMaker's TensforFlow Serving endpoints can also accept additional input formats that are not part of the TensorFlow REST API, including the simplified JSON format, line-delimited JSON objects ("jsons" or "jsonlines"), and CSV data.

In this example we are using a numpy array as input, which will be serialized into the simplified JSON format. In addtion, TensorFlow serving can also process multiple items at once as you can see in the following code. You can find the complete documentation on how to make predictions against a TensorFlow serving SageMaker endpoint [here](https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/tensorflow/deploying_tensorflow_serving.rst#making-predictions-against-a-sagemaker-endpoint).


In [None]:
imgs = []
test_index = [1000,2000,3000,4000,5000]
for i in test_index:
    image_path = df['path'][i]
    img = image.load_img(image_path, target_size=(32, 32))
    img_array = image.img_to_array(img)
    imgs.append(img)
print(len(imgs))

test = np.stack(imgs)
print(test.shape)

In [None]:
inputs = {
  'instances': test
}
result = tuning_predictor.predict(inputs)
result

### Invoke the enpoint from anywhere! Using Lambda and API Gateway
[APIGateway](https://console.aws.amazon.com/apigateway/home?region=us-east-1#/apis/obrgi23zgl/resources/u208pq/methods/POST)

![modes](README-IMAGES/lambda-apigateway.png)

# Clean up

Let's delete the endpoint we just created to prevent incurring any extra costs.

In [None]:
sagemaker.Session().delete_endpoint(tuning_predictor.endpoint)