In [1]:
import boto3
import os
from botocore.exceptions import ClientError
import sagemaker

### 1. Data Preparation

In [2]:
# Public S3 bucket containing MNIST data
public_bucket = 'sagemaker-sample-files'

# Downloads train + test data from S3 bucket
def download_from_s3(data_dir='/tmp/data', train=True):
    """Downloads MNIST dataset and converts it to numpy array """
    # Project root
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    
    if train:
        images_file = "train-images-idx3-ubyte.gz"
        labels_file = "train-labels-idx1-ubyte.gz"
    else:
        images_file = "t10k-images-idx3-ubyte.gz"
        labels_file = "t10k-labels-idx1-ubyte.gz"
    
    # Downloads objects
    s3 = boto3.client('s3')
    bucket = public_bucket
    for obj in [images_file, labels_file]:
        key = os.path.join("datasets/image/MNIST", obj)
        dest = os.path.join(data_dir, obj)
        if not os.path.exists(dest):
            s3.download_file(bucket, key, dest)
    return

# Local data directory path
local_data_dir = './mnist_data'

download_from_s3(local_data_dir, True)
download_from_s3(local_data_dir, False)

In [14]:
s3_location

's3://mnist-data-bucket/mnist'

In [3]:
s3 = boto3.resource('s3')

bucket_name = 'mnist-data-bucket'
region = 'eu-west-1'

s3.create_bucket(
    Bucket=bucket_name, 
    CreateBucketConfiguration={
        'LocationConstraint': region
    }
)

s3.Bucket(name='mnist-data-bucket')

In [4]:
sagemaker_session = sagemaker.Session(default_bucket=bucket_name)
prefix = 'mnist'

# Uploads the local MNIST data to S3
s3_location = sagemaker_session.upload_data(
    path=local_data_dir, 
    bucket=bucket_name,
    key_prefix=prefix
)

channels = {
    "training": s3_location,
    "testing": s3_location
}

### 2. SageMaker Training/Hyperparameter Tuning Job Creation

In [5]:
from sagemaker.tensorflow import TensorFlow

In [25]:
sagemaker_role = sagemaker.get_execution_role()

# Creates the SageMaker training job using the TensorFlow estimator
sagemaker_estimator = TensorFlow(
    entry_point='train.py',
    source_dir='code',
    role=sagemaker_role,
    framework_version='2.3.0',
    model_dir='/opt/ml/model',
    py_version='py37',
    instance_type='ml.m5.4xlarge',
    instance_count=1,
    hyperparameters={
        'batch-size':512,
        'epochs':4
    },
#     output_path=s3_location
)

In [26]:
from sagemaker.tuner import ContinuousParameter, HyperparameterTuner

In [27]:
# Creates the SageMaker hyperparameter tuner
sagemaker_tuner = HyperparameterTuner(
    estimator=sagemaker_estimator,
    objective_metric_name='average test loss',
    hyperparameter_ranges={
        'learning-rate': ContinuousParameter(1e-4, 1e-3)
    },
    metric_definitions=[
        {
            'Name': 'average test loss',
            'Regex': 'Test Loss: ([0-9\\.]+)' # Corresponding line in code/train.py (168)
        }
    ],
    max_jobs=4,
    max_parallel_jobs=2,
    objective_type='Minimize'
)

sagemaker_tuner.fit(inputs=channels)

.............................................!


### 3. SageMaker Endpoint Deployment

In [28]:
# Deploys a SageMaker hyperparameter endpoint
sagemaker_endpoint = sagemaker_tuner.deploy(
    initial_instance_count=1,
    instance_type='ml.t2.medium',
    endpoint_name='mnist_sagemaker_endpoint'
)


2021-01-29 20:23:42 Starting - Preparing the instances for training
2021-01-29 20:23:42 Downloading - Downloading input data
2021-01-29 20:23:42 Training - Training image download completed. Training in progress.
2021-01-29 20:23:42 Uploading - Uploading generated training model
2021-01-29 20:23:42 Completed - Training job completed

update_endpoint is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.



-----------------!

### 4. Resource Cleanup

In [30]:
sagemaker_endpoint.delete_endpoint()