In [2]:
import boto3
import sagemaker

### Data Preparation

In [7]:
import os
import logging
from botocore.exceptions import ClientError

In [9]:
public_bucket = 'sagemaker-sample-files'
local_data_dir = 'sagemaker-tf-hpo/mnist/mnist_data'

# Download training and testing data from a public S3 bucket
def download_from_s3(data_dir='/tmp/data', train=True):
    """Download MNIST dataset and convert it to numpy array
    
    Args:
        data_dir (str): directory to save the data
        train (bool): download training set
    
    Returns:
        None
    """
    # project root
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    
    if train:
        images_file = "train-images-idx3-ubyte.gz"
        labels_file = "train-labels-idx1-ubyte.gz"
    else:
        images_file = "t10k-images-idx3-ubyte.gz"
        labels_file = "t10k-labels-idx1-ubyte.gz"
    
    # download objects
    s3 = boto3.client('s3')
    bucket = public_bucket
    for obj in [images_file, labels_file]:
        key = os.path.join("datasets/image/MNIST", obj)
        dest = os.path.join(data_dir, obj)
        if not os.path.exists(dest):
            s3.download_file(bucket, key, dest)
    return


download_from_s3(local_data_dir, True)
download_from_s3(local_data_dir, False)

In [3]:
s3 = boto3.resource('s3')

bucket_name = 'mnist-data-bucket'
region = 'eu-west-1'

s3.create_bucket(
    Bucket=bucket_name, 
    CreateBucketConfiguration={
        'LocationConstraint': region
    }
)

s3.Bucket(name='mnist-data-bucket')

In [14]:
sagemaker_role = sagemaker.get_execution_role()
sagemaker_session = sagemaker.Session()
prefix = 'mnist'

s3_location = sagemaker_session.upload_data(
    path=local_data_dir, 
    bucket=bucket_name, 
    key_prefix=prefix
)

channels = {
    "training": s3_location,
    "testing": s3_location
}