In [1]:
from sagemaker import get_execution_role
from sagemaker.session import Session

# S3 bucket for saving code and model artifacts.
# Feel free to specify a different bucket here if you wish.
bucket = 'car-classifier-us-east-2/python_readable_data/stanford_cars_dataset/SGM_data_and_models'

#IAM execution role that gives SageMaker access to resources in your AWS account.
role = get_execution_role()

In [2]:
from sagemaker.tensorflow import TensorFlow

car_classifier = TensorFlow(entry_point='CNN_TF_script.py',
                            role=role,
                            train_volume_size = 30,
                            train_max_run = 600, #seconds to run before terminating
                            py_version = 'py3',
                            train_instance_count=2,
                            train_instance_type='ml.m4.xlarge',                           
                            framework_version = '1.12.0',
                            distributions={'parameter_server': {'enabled': True}}
                           )

In [4]:
%%time
import boto3

# use the region-specific sample data bucket
train_data_location = 's3://{}/data/train_head.csv'.format(bucket)
test_data_location = 's3://{}/data/test.csv'.format(bucket)

car_classifier.fit({'train' : train_data_location,
                   'test' : test_data_location
                  })

INFO:sagemaker:Creating training-job with name: sagemaker-tensorflow-scriptmode-2019-03-20-02-45-41-768


2019-03-20 02:45:42 Starting - Starting the training job...
2019-03-20 02:45:46 Starting - Launching requested ML instances......
2019-03-20 02:46:50 Starting - Preparing the instances for training......
2019-03-20 02:48:04 Downloading - Downloading input data...
2019-03-20 02:48:27 Training - Training image download completed. Training in progress.
[32m2019-03-20 02:48:32,042 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[32m2019-03-20 02:48:32,047 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[32m2019-03-20 02:48:32,480 sagemaker_tensorflow_container.training INFO     Running distributed training job with parameter servers[0m
[32m2019-03-20 02:48:32,481 sagemaker_tensorflow_container.training INFO     Launching parameter server process[0m
[32m2019-03-20 02:48:32,481 sagemaker_tensorflow_container.training INFO     Running distributed training job with parameter servers[0m
[32m2019-03-20 02:48:3

[32mINFO:tensorflow:Calling model_fn.[0m
[32mINFO:tensorflow:Done calling model_fn.[0m
[32mINFO:tensorflow:Create CheckpointSaverHook.[0m
[31mINFO:tensorflow:loss = 0.775914, step = 0[0m
[31mINFO:tensorflow:loss = 0.775914[0m
[32mINFO:tensorflow:Graph was finalized.[0m
[32mINFO:tensorflow:Running local_init_op.[0m
[32mINFO:tensorflow:Done running local_init_op.[0m
[32mINFO:tensorflow:loss = 1.1265903, step = 2[0m
[32mINFO:tensorflow:loss = 1.1265903[0m
[32mINFO:tensorflow:global_step/sec: 4.81884[0m
[32mINFO:tensorflow:loss = 0.6088884, step = 15 (2.780 sec)[0m
[32mINFO:tensorflow:loss = 0.6088884 (2.780 sec)[0m
[32mINFO:tensorflow:global_step/sec: 5.66318[0m
[32mINFO:tensorflow:loss = 0.48403507, step = 28 (1.875 sec)[0m
[32mINFO:tensorflow:loss = 0.48403507 (1.875 sec)[0m
[32mINFO:tensorflow:global_step/sec: 6.42837[0m
[32mINFO:tensorflow:loss = 0.60450137, step = 40 (1.915 sec)[0m
[32mINFO:tensorflow:loss = 0.60450137 (1.915 sec)[0m
[32mINFO:te

[31mINFO:tensorflow:loss = 0.44741887, step = 492 (8.956 sec)[0m
[31mINFO:tensorflow:loss = 0.44741887 (8.956 sec)[0m
[32mINFO:tensorflow:global_step/sec: 6.60666[0m
[32mINFO:tensorflow:loss = 0.6363586, step = 501 (1.952 sec)[0m
[32mINFO:tensorflow:loss = 0.6363586 (1.952 sec)[0m
[32mINFO:tensorflow:global_step/sec: 6.46085[0m
[32mINFO:tensorflow:loss = 0.42816928, step = 513 (1.852 sec)[0m
[32mINFO:tensorflow:loss = 0.42816928 (1.852 sec)[0m
[32mINFO:tensorflow:global_step/sec: 6.37908[0m
[32mINFO:tensorflow:loss = 0.117971316, step = 525 (1.926 sec)[0m
[32mINFO:tensorflow:loss = 0.117971316 (1.926 sec)[0m
[32mINFO:tensorflow:global_step/sec: 6.33985[0m
[32mINFO:tensorflow:loss = 0.05122222, step = 538 (1.901 sec)[0m
[32mINFO:tensorflow:loss = 0.05122222 (1.901 sec)[0m
[32mINFO:tensorflow:global_step/sec: 6.40462[0m
[32mINFO:tensorflow:loss = 0.18116216, step = 550 (1.887 sec)[0m
[32mINFO:tensorflow:loss = 0.18116216 (1.887 sec)[0m
[32mINFO:tensorfl

[31mINFO:tensorflow:loss = 1.5970083, step = 1001 (8.945 sec)[0m
[31mINFO:tensorflow:loss = 1.5970083 (8.945 sec)[0m
[31mINFO:tensorflow:Saving checkpoints for 1002 into s3://sagemaker-us-east-2-050922845674/sagemaker-tensorflow-scriptmode-2019-03-20-02-45-41-768/model/model.ckpt.[0m
[31mINFO:tensorflow:Calling model_fn.[0m
[31mINFO:tensorflow:Done calling model_fn.[0m
[31mINFO:tensorflow:Starting evaluation at 2019-03-20-02:51:26[0m
[31mINFO:tensorflow:Graph was finalized.[0m
[31mINFO:tensorflow:Restoring parameters from s3://sagemaker-us-east-2-050922845674/sagemaker-tensorflow-scriptmode-2019-03-20-02-45-41-768/model/model.ckpt-1002[0m
[31mINFO:tensorflow:Running local_init_op.[0m
[31mINFO:tensorflow:Done running local_init_op.[0m
[31mINFO:tensorflow:Finished evaluation at 2019-03-20-02:51:27[0m
[31mINFO:tensorflow:Saving dict for global step 1002: eval_accuracy = 0.94222224, global_step = 1002, loss = 0.31109127[0m
[31mINFO:tensorflow:Saving 'checkpoint_pat

In [6]:
%%time
car_predictor = car_classifier.deploy(initial_instance_count=1,
                                      instance_type='ml.m4.xlarge')

INFO:sagemaker:Creating model with name: sagemaker-tensorflow-scriptmode-2019-03-20-02-45-41-768


ClientError: An error occurred (ValidationException) when calling the CreateModel operation: Could not find model data at s3://sagemaker-us-east-2-050922845674/sagemaker-tensorflow-scriptmode-2019-03-20-02-45-41-768/output/model.tar.gz.

In [5]:
from sagemaker.tensorflow.serving import Model

model = Model(model_data='s3://sagemaker-us-east-2-050922845674/' +
              'sagemaker-tensorflow-scriptmode-2019-03-20-02-45-41-768/source/sourcedir.tar.gz', role=role)

predictor = model.deploy(initial_instance_count=1, instance_type='ml.c5.xlarge')

INFO:sagemaker:Creating model with name: sagemaker-tensorflow-serving-2019-03-20-03-03-04-779
INFO:sagemaker:Creating endpoint with name sagemaker-tensorflow-serving-2019-03-20-03-03-04-779


--------------------------------------------------------------------------------------------------------------*

ValueError: Error hosting endpoint sagemaker-tensorflow-serving-2019-03-20-03-03-04-779: Failed Reason:  The primary container for production variant AllTraffic did not pass the ping health check. Please check CloudWatch logs for this endpoint.

In [None]:
with open('/home/ec2-user/train_head.csv') as f:
    for line in f:
        
    content = f.readlines()
# you may also want to remove whitespace characters like `\n` at the end of each line
content = [x.strip() for x in content] 

In [None]:
predictor

Register the model

In [11]:
import boto3
import sagemaker
from sagemaker import get_execution_role

client = boto3.client('sagemaker')
job_name = 'sagemaker-tensorflow-scriptmode-2019-03-17-23-24-01-675'
sess = sagemaker.Session()
role = get_execution_role()

#to specify a model to register, 
#you pass in the specs of the training job which created that model
info = client.describe_training_job(TrainingJobName=job_name)

model_data = info['ModelArtifacts']['S3ModelArtifacts']
training_image = info['AlgorithmSpecification']['TrainingImage']

primary_container = {
    'Image': training_image,
    'ModelDataUrl': 's3://sagemaker-us-east-2-050922845674/sagemaker-tensorflow-scriptmode-2019-03-17-20-56-23-190/source/sourcedir.tar.gz'
}

create_model_response = client.create_model(
    ModelName = job_name,
    ExecutionRoleArn = role,
    PrimaryContainer = primary_container)

print(create_model_response['ModelArn'])

arn:aws:sagemaker:us-east-2:050922845674:model/sagemaker-tensorflow-scriptmode-2019-03-17-23-24-01-675


In [12]:
create_model_response

{'ModelArn': 'arn:aws:sagemaker:us-east-2:050922845674:model/sagemaker-tensorflow-scriptmode-2019-03-17-23-24-01-675',
 'ResponseMetadata': {'RequestId': '68f7341c-e28b-4281-aed9-009edba990af',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '68f7341c-e28b-4281-aed9-009edba990af',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '117',
   'date': 'Wed, 20 Mar 2019 02:09:17 GMT'},
  'RetryAttempts': 0}}

Do a batch transform

In [13]:
import boto3
import sagemaker
import json
from urllib.parse import urlparse
from time import gmtime, strftime

batch_job_name = 'Batch-Transform-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
input_location = 's3://car-classifier-us-east-2/python_readable_data/stanford_cars_dataset/SGM_data_and_models/data/train_head.csv'
output_location = 's3://car-classifier-us-east-2/python_readable_data/stanford_cars_dataset/SGM_data_and_models/batch_output/'

### Create a transform job
sm = boto3.client('sagemaker')

request = \
{
    "TransformJobName": batch_job_name,
    "ModelName": 'sagemaker-tensorflow-scriptmode-2019-03-17-23-24-01-675',
    #"MaxConcurrentTransforms": 0,
    "MaxPayloadInMB": 6,
    "BatchStrategy": "MultiRecord",
    "TransformOutput": {
        "S3OutputPath": output_location
    },
    "TransformInput": {
        "DataSource": {
            "S3DataSource": {
                "S3DataType": "S3Prefix",
                "S3Uri": input_location 
            }
        },
        "ContentType": "text/csv",
        "SplitType": "Line",
        "CompressionType": "None"
    },
    "TransformResources": {
            "InstanceType": "ml.m4.xlarge",
            "InstanceCount": 1
    }
}

sm.create_transform_job(**request)

print("Created Transform job with name: ", batch_job_name)

Created Transform job with name:  Batch-Transform-2019-03-20-02-12-08


In [None]:
%%time
iris_predictor = iris_estimator.deploy(initial_instance_count=1,
                                       instance_type='ml.m4.xlarge')

In [None]:
iris_predictor.predict([6.4, 3.2, 4.5, 1.5]) #expected label to be 1