In [1]:
from sagemaker import get_execution_role
from sagemaker.session import Session

# S3 bucket for saving code and model artifacts.
# Feel free to specify a different bucket here if you wish.
bucket = 'car-classifier-us-east-2/python_readable_data/stanford_cars_dataset/SGM_data_and_models'

#IAM execution role that gives SageMaker access to resources in your AWS account.
role = get_execution_role()

Define Model

In [2]:
from sagemaker.tensorflow import TensorFlow

car_classifier = TensorFlow(entry_point='CNN_TF_script.py',
                            role=role,
                            train_volume_size = 30,
                            train_max_run = 3600, #seconds to run before terminating
                            py_version = 'py3',
                            train_instance_count=3,
                            train_instance_type='ml.m4.xlarge',                           
                            framework_version = '1.12.0',
                            distributions={'parameter_server': {'enabled': True}}
                           )

Train

In [3]:
%%time
import boto3

# use the region-specific sample data bucket
train_data_location = 's3://{}/data/train.csv'.format(bucket)
test_data_location = 's3://{}/data/test.csv'.format(bucket)

car_classifier.fit({'train' : train_data_location,
                   'test' : test_data_location
                  })

INFO:sagemaker:Creating training-job with name: sagemaker-tensorflow-scriptmode-2019-03-21-19-19-15-422


2019-03-21 19:19:15 Starting - Starting the training job...
2019-03-21 19:19:17 Starting - Launching requested ML instances......
2019-03-21 19:20:46 Starting - Preparing the instances for training......
2019-03-21 19:21:41 Downloading - Downloading input data......
2019-03-21 19:22:47 Training - Downloading the training image..
[32m2019-03-21 19:22:54,158 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[32m2019-03-21 19:22:54,164 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[32m2019-03-21 19:22:54,424 sagemaker_tensorflow_container.training INFO     Running distributed training job with parameter servers[0m
[32m2019-03-21 19:22:54,424 sagemaker_tensorflow_container.training INFO     Launching parameter server process[0m
[32m2019-03-21 19:22:54,425 sagemaker_tensorflow_container.training INFO     Running distributed training job with parameter servers[0m
[32m2019-03-21 19:22:54,438 sagemaker_tenso


2019-03-21 19:22:50 Training - Training image download completed. Training in progress.[32mINFO:tensorflow:TF_CONFIG environment variable: {'cluster': {'master': ['algo-1:2222'], 'ps': ['algo-1:2223', 'algo-2:2223', 'algo-3:2223'], 'worker': ['algo-2:2222', 'algo-3:2222']}, 'environment': 'cloud', 'task': {'index': 0, 'type': 'worker'}}[0m
[32mINFO:tensorflow:Using config: {'_model_dir': 's3://sagemaker-us-east-2-050922845674/sagemaker-tensorflow-scriptmode-2019-03-21-19-19-15-422/model', '_tf_random_seed': None, '_save_summary_steps': 10, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': device_filters: "/job:ps"[0m
[32mdevice_filters: "/job:worker/task:0"[0m
[32mallow_soft_placement: true[0m
[32mgraph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }[0m
[32m}[0m
[32m, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 10, '_train_distribute': None, '_device_fn': None, '_protoco

[32mINFO:tensorflow:global_step/sec: 1.33483[0m
[32mINFO:tensorflow:loss = 0.69377303, step = 153 (24.720 sec)[0m
[32mINFO:tensorflow:loss = 0.69377303 (24.720 sec)[0m
[32mINFO:tensorflow:global_step/sec: 1.37584[0m
[31mINFO:tensorflow:loss = 0.69091845, step = 159 (20.359 sec)[0m
[31mINFO:tensorflow:loss = 0.69091845 (20.359 sec)[0m
[33mINFO:tensorflow:loss = 0.6888151, step = 165 (22.412 sec)[0m
[33mINFO:tensorflow:loss = 0.6888151 (22.412 sec)[0m
[32mINFO:tensorflow:global_step/sec: 1.38563[0m
[32mINFO:tensorflow:global_step/sec: 1.4308[0m
[31mINFO:tensorflow:loss = 0.68229043, step = 186 (20.435 sec)[0m
[31mINFO:tensorflow:loss = 0.68229043 (20.435 sec)[0m
[32mINFO:tensorflow:loss = 0.68549836, step = 187 (23.298 sec)[0m
[32mINFO:tensorflow:loss = 0.68549836 (23.298 sec)[0m
[33mINFO:tensorflow:loss = 0.6955304, step = 194 (20.388 sec)[0m
[33mINFO:tensorflow:loss = 0.6955304 (20.388 sec)[0m
[32mINFO:tensorflow:global_step/sec: 1.42363[0m
[32mINFO:t

[32mINFO:tensorflow:global_step/sec: 1.49625[0m
[33mINFO:tensorflow:loss = 0.6307268, step = 666 (23.874 sec)[0m
[33mINFO:tensorflow:loss = 0.6307268 (23.874 sec)[0m
[31mINFO:tensorflow:loss = 0.6668575, step = 668 (21.307 sec)[0m
[31mINFO:tensorflow:loss = 0.6668575 (21.307 sec)[0m
[32mINFO:tensorflow:loss = 0.6547414, step = 673 (21.902 sec)[0m
[32mINFO:tensorflow:loss = 0.6547414 (21.902 sec)[0m
[32mINFO:tensorflow:global_step/sec: 1.18226[0m
[32mINFO:tensorflow:global_step/sec: 1.28728[0m
[33mINFO:tensorflow:loss = 0.60304046, step = 696 (23.637 sec)[0m
[33mINFO:tensorflow:loss = 0.60304046 (23.637 sec)[0m
[31mINFO:tensorflow:loss = 0.5985784, step = 697 (22.069 sec)[0m
[31mINFO:tensorflow:loss = 0.5985784 (22.069 sec)[0m
[32mINFO:tensorflow:global_step/sec: 1.3914[0m
[32mINFO:tensorflow:loss = 0.63928807, step = 704 (22.281 sec)[0m
[32mINFO:tensorflow:loss = 0.63928807 (22.281 sec)[0m
[32mINFO:tensorflow:global_step/sec: 1.27772[0m
[32mINFO:tenso

[31mINFO:tensorflow:Evaluation [20/100][0m
[33m2019-03-21 19:38:30,444 sagemaker_tensorflow_container.training INFO     master algo-1 is still up, waiting for it to exit[0m
[32m2019-03-21 19:38:32,342 sagemaker_tensorflow_container.training INFO     master algo-1 is still up, waiting for it to exit[0m
[31mINFO:tensorflow:Evaluation [30/100][0m
[31mINFO:tensorflow:Finished evaluation at 2019-03-21-19:38:33[0m
[31mINFO:tensorflow:Saving dict for global step 1001: eval_accuracy = 0.7141604, global_step = 1001, loss = 0.62365764[0m
[31mINFO:tensorflow:Saving 'checkpoint_path' summary for global step 1001: s3://sagemaker-us-east-2-050922845674/sagemaker-tensorflow-scriptmode-2019-03-21-19-19-15-422/model/model.ckpt-1001[0m
[33m2019-03-21 19:38:40,675 sagemaker_tensorflow_container.training INFO     master algo-1 is still up, waiting for it to exit[0m
[32m2019-03-21 19:38:42,372 sagemaker_tensorflow_container.training INFO     master algo-1 is still up, waiting for it to exi

Deploy from object

In [5]:
%%time
car_predictor = car_classifier.deploy(initial_instance_count=1,
                                      instance_type='ml.m4.xlarge')

INFO:sagemaker:Creating model with name: sagemaker-tensorflow-scriptmode-2019-03-20-22-33-25-091
INFO:sagemaker:Creating endpoint with name sagemaker-tensorflow-scriptmode-2019-03-20-22-33-25-091


---------------------------------------------------!CPU times: user 274 ms, sys: 18.6 ms, total: 292 ms
Wall time: 4min 18s


Deploy from file

In [23]:
from sagemaker.tensorflow.serving import Model

model = Model(model_data='s3://sagemaker-us-east-2-050922845674/' +
              'sagemaker-tensorflow-scriptmode-2019-03-20-22-33-25-091/output/model.tar.gz', role=role)

predictor = model.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')

INFO:sagemaker:Creating model with name: sagemaker-tensorflow-serving-2019-03-20-22-59-50-675
INFO:sagemaker:Creating endpoint with name sagemaker-tensorflow-serving-2019-03-20-22-59-50-675


---------------------------------------------------------------!

Predict

In [12]:
import numpy as np

head_line = np.loadtxt('/home/ec2-user/SageMaker/train_head.csv', delimiter=',')

head_line = head_line[:, :-2].reshape((30, 200, 200, 1))

In [22]:
#object
car_predictor.predict(head_line[0,:])

{'predictions': [{'probabilities': [1.0, 0.0],
   'logits': [120.136, -124.957],
   'classes': 0}]}

In [24]:
#file
car_predictor.predict(head_line[0,:])

{'predictions': [{'probabilities': [1.0, 0.0],
   'logits': [120.136, -124.957],
   'classes': 0}]}

Clean up

In [1]:
import boto3

client = boto3.client('sagemaker')

In [5]:
client.delete_endpoint(EndpointName = 'sagemaker-tensorflow-scriptmode-2019-03-20-22-33-25-091')

{'ResponseMetadata': {'RequestId': 'fd14ade2-dfe2-4589-bf3f-d95f44b3746f',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'fd14ade2-dfe2-4589-bf3f-d95f44b3746f',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Wed, 20 Mar 2019 23:09:15 GMT'},
  'RetryAttempts': 0}}

In [6]:
client.delete_model(ModelName = 'sagemaker-tensorflow-serving-2019-03-20-22-59-50-675')

{'ResponseMetadata': {'RequestId': '0de5a353-b9ec-446a-9039-0a813fbda7bb',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '0de5a353-b9ec-446a-9039-0a813fbda7bb',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Wed, 20 Mar 2019 23:09:33 GMT'},
  'RetryAttempts': 0}}