In [1]:
from sagemaker import get_execution_role
from sagemaker.session import Session

# S3 bucket for saving code and model artifacts.
# Feel free to specify a different bucket here if you wish.
bucket = 'car-classifier-us-east-2/python_readable_data/stanford_cars_dataset/SGM_data_and_models'

# Location where results of model training are saved.
model_artifacts_location = 's3://{}/artifacts'.format(bucket)

# Location where Tensorboard
model_dir = 's3://{}/tensorboard'.format(bucket)

#IAM execution role that gives SageMaker access to resources in your AWS account.
role = get_execution_role()

In [2]:
from sagemaker.tensorflow import TensorFlow

In [7]:
car_classifier = TensorFlow(entry_point='CNN_TF_script.py',
                            role=role,
                           train_volume_size = 30,
                           train_max_run = 600, #seconds to run before terminating
                           output_path = model_artifacts_location,
                           py_version = 'py3',
                           model_dir = model_dir,
                        train_instance_count=2,
                        train_instance_type='ml.m4.xlarge',
                        #training_steps=10,
                           
                           framework_version = '1.12.0',
                           distributions={'parameter_server': {'enabled': True}}
                            #evaluation_steps=100
                          )

In [8]:
%%time
import boto3

# use the region-specific sample data bucket
train_data_location = 's3://{}/data/train_head.csv'.format(bucket)
test_data_location = 's3://{}/data/test.csv'.format(bucket)

car_classifier.fit({'train' : train_data_location,
                   'test' : test_data_location
                  })

INFO:sagemaker:Creating training-job with name: sagemaker-tensorflow-scriptmode-2019-03-17-23-24-01-675


2019-03-17 23:24:02 Starting - Starting the training job...
2019-03-17 23:24:03 Starting - Launching requested ML instances...
2019-03-17 23:25:02 Starting - Preparing the instances for training.........
2019-03-17 23:26:15 Downloading - Downloading input data...
2019-03-17 23:26:54 Training - Training image download completed. Training in progress..
[31m2019-03-17 23:26:57,190 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[31m2019-03-17 23:26:57,198 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[31m2019-03-17 23:26:57,410 sagemaker_tensorflow_container.training INFO     Running distributed training job with parameter servers[0m
[31m2019-03-17 23:26:57,410 sagemaker_tensorflow_container.training INFO     Launching parameter server process[0m
[31m2019-03-17 23:26:57,410 sagemaker_tensorflow_container.training INFO     Running distributed training job with parameter servers[0m
[31m2019-03-17 23:26:

[31mINFO:tensorflow:loss = 0.32766607, step = 130[0m
[31mINFO:tensorflow:loss = 0.32766607[0m
[32mINFO:tensorflow:global_step/sec: 5.3142[0m
[32mINFO:tensorflow:loss = 0.36108345, step = 142 (2.316 sec)[0m
[32mINFO:tensorflow:loss = 0.36108345 (2.316 sec)[0m
[32mINFO:tensorflow:global_step/sec: 5.91215[0m
[32mINFO:tensorflow:loss = 0.7185116, step = 154 (1.831 sec)[0m
[32mINFO:tensorflow:loss = 0.7185116 (1.831 sec)[0m
[32mINFO:tensorflow:global_step/sec: 6.76731[0m
[32mINFO:tensorflow:loss = 0.047343705, step = 166 (1.807 sec)[0m
[32mINFO:tensorflow:loss = 0.047343705 (1.807 sec)[0m
[32mINFO:tensorflow:global_step/sec: 6.44613[0m
[32mINFO:tensorflow:loss = 0.18300012, step = 178 (1.870 sec)[0m
[32mINFO:tensorflow:loss = 0.18300012 (1.870 sec)[0m
[32mINFO:tensorflow:global_step/sec: 6.49103[0m
[32mINFO:tensorflow:loss = 0.81595933, step = 190 (1.863 sec)[0m
[32mINFO:tensorflow:loss = 0.81595933 (1.863 sec)[0m
[32mINFO:tensorflow:global_step/sec: 6.148

[32mINFO:tensorflow:global_step/sec: 7.07173[0m
[32mINFO:tensorflow:loss = 0.051930893, step = 649 (1.841 sec)[0m
[32mINFO:tensorflow:loss = 0.051930893 (1.841 sec)[0m
[32mINFO:tensorflow:global_step/sec: 6.52825[0m
[31mINFO:tensorflow:loss = 0.06497176, step = 651 (8.988 sec)[0m
[31mINFO:tensorflow:loss = 0.06497176 (8.988 sec)[0m
[32mINFO:tensorflow:loss = 2.369699, step = 661 (1.903 sec)[0m
[32mINFO:tensorflow:loss = 2.369699 (1.903 sec)[0m
[32mINFO:tensorflow:global_step/sec: 6.39077[0m
[32mINFO:tensorflow:loss = 6.675698e-06, step = 673 (1.905 sec)[0m
[32mINFO:tensorflow:loss = 6.675698e-06 (1.905 sec)[0m
[32mINFO:tensorflow:global_step/sec: 6.38209[0m
[32mINFO:tensorflow:global_step/sec: 6.40348[0m
[32mINFO:tensorflow:loss = 0.20648156, step = 686 (1.877 sec)[0m
[32mINFO:tensorflow:loss = 0.20648156 (1.877 sec)[0m
[32mINFO:tensorflow:global_step/sec: 6.11852[0m
[32mINFO:tensorflow:loss = 0.27345634, step = 698 (1.887 sec)[0m
[32mINFO:tensorflow:


2019-03-17 23:29:41 Completed - Training job completed
Billable seconds: 412
CPU times: user 1.07 s, sys: 60.4 ms, total: 1.13 s
Wall time: 6min 15s


In [None]:
%%time
iris_predictor = iris_estimator.deploy(initial_instance_count=1,
                                       instance_type='ml.m4.xlarge')

In [None]:
iris_predictor.predict([6.4, 3.2, 4.5, 1.5]) #expected label to be 1