In [1]:
import os
import boto3
import re
import copy
import time
from time import gmtime, strftime
from sagemaker import get_execution_role

role = get_execution_role()

region = boto3.Session().region_name

bucket='sagemaker-keras-sagemaker-train' # Put your s3 bucket name here
# customize to your bucket where you will store data
bucket_path = 'https://s3-{}.amazonaws.com/{}'.format(region,bucket)
print(bucket_path)

https://s3-us-east-1.amazonaws.com/sagemaker-keras-sagemaker-train


In [2]:
%%sh

# The name of our algorithm
algorithm_name=keras-sagemaker-train

chmod +x src/*

account=$(aws sts get-caller-identity --query Account --output text)

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)
region=${region:-us-west-2}

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"

# If the repository doesn't exist in ECR, create it.

aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1

if [ $? -ne 0 ]
then
    aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly
$(aws ecr get-login --region ${region} --no-include-email)

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

# On a SageMaker Notebook Instance, the docker daemon may need to be restarted in order
# to detect your network configuration correctly.  (This is a known issue.)
if [ -d "/home/ec2-user/SageMaker" ]; then
  sudo service docker restart
fi

docker build  -t ${algorithm_name} -f Dockerfile.cpu .
docker tag ${algorithm_name} ${fullname}

docker push ${fullname}

Login Succeeded
Stopping docker: [  OK  ]
Starting docker:	.[  OK  ]
Sending build context to Docker daemon  125.4kB
Step 1/6 : FROM phenompeople/centos-python:3.6.3
 ---> e3d7d8ca4a30
Step 2/6 : ENV PATH="/opt/program:${PATH}"
 ---> Using cache
 ---> 1cbf2822780e
Step 3/6 : ADD requirements.txt /
 ---> Using cache
 ---> d25d6314f799
Step 4/6 : RUN pip3 install -r requirements.txt
 ---> Using cache
 ---> 096da37bff72
Step 5/6 : COPY src /opt/program
 ---> Using cache
 ---> 22d26313e1aa
Step 6/6 : WORKDIR /opt/program
 ---> Using cache
 ---> 6ae53be99130
Successfully built 6ae53be99130
Successfully tagged keras-sagemaker-train:latest
The push refers to repository [850021735523.dkr.ecr.us-east-1.amazonaws.com/keras-sagemaker-train]
bb428fe28fc2: Preparing
0dfc4f084219: Preparing
6f5c0c46682a: Preparing
952e0784686f: Preparing
65c06ae44bbd: Preparing
f194f1dd3e8f: Preparing
ea264623c568: Preparing
c4cd48200f79: Preparing
bcc97fbfc9e1: Preparing
ea264623c568: Waiting
c4cd48200f79: Wait

https://docs.docker.com/engine/reference/commandline/login/#credentials-store



In [3]:
data_location = 's3://{}/data'.format(bucket)
print("data location - " + data_location)

output_location = 's3://{}/output'.format(bucket)
print("output location - " + output_location)

data location - s3://sagemaker-keras-sagemaker-train/data
output location - s3://sagemaker-keras-sagemaker-train/output


In [4]:
import sagemaker as sage
sess = sage.Session()

In [5]:
account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name
image = '{}.dkr.ecr.{}.amazonaws.com/keras-sagemaker-train'.format(account, region)

In [6]:
classifier = sage.estimator.Estimator(image_name=image, 
                                      role=role,
                                      train_instance_count=1, 
                                      train_instance_type='ml.c5.2xlarge',
                                      output_path=output_location,
                                      sagemaker_session=sess)

In [7]:
classifier.fit(data_location)

2019-06-07 07:37:31 Starting - Starting the training job...
2019-06-07 07:37:33 Starting - Launching requested ML instances......
2019-06-07 07:38:42 Starting - Preparing the instances for training...
2019-06-07 07:39:21 Downloading - Downloading input data...
2019-06-07 07:39:33 Training - Downloading the training image...
2019-06-07 07:40:31 Uploading - Uploading generated training model
[31mUsing TensorFlow backend.[0m
[31mInstructions for updating:[0m
[31mColocations handled automatically by placer.[0m
[31mInstructions for updating:[0m
[31mPlease use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.[0m
[31mInstructions for updating:[0m
[31mUse tf.cast instead.[0m
[31m2019-06-07 07:40:17.207731: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 AVX512F FMA[0m
[31m2019-06-07 07:40:17.249781: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] C


2019-06-07 07:40:37 Completed - Training job completed
Billable seconds: 76
