### Local SageMaker

In [7]:
import tensorflow
import boto3
import sagemaker
import keras
import os
import numpy as np
from keras.datasets import fashion_mnist

We check the versions

In [2]:
print(tensorflow.__version__)

2.2.0


In [4]:
print(keras.__version__)

2.3.1


In [5]:
sess = sagemaker.Session()  # Use the AWS region configured with the AWS CLI
# sess = sagemaker.Session(boto3.session.Session(region_name='eu-west-1'))

# This doesn't work on your local machine because it doesn't have an IAM role :)
# role = sagemaker.get_execution_role()

# This is the SageMaker role you're already using, it will work just fine
role = 'YOUR_ROLE'


In [8]:
(x_train, y_train), (x_val, y_val) = fashion_mnist.load_data()


Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading data from http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz


In [9]:
os.makedirs("./data", exist_ok = True)
np.savez('./data/training', image=x_train, label=y_train)
np.savez('./data/validation', image=x_val, label=y_val)

In [10]:
# Train on local data. S3 URIs would work too.
training_input_path   = 'file://data/training.npz'
validation_input_path = 'file://data/validation.npz'
# Store model locally. A S3 URI would work too.
output_path           = 'file:///tmp/model/'

The notebook supports three different versions of the Keras script
* mnist_keras_tf.py: Keras in symbolic mode with TensorFlow 1.15
* mnist_keras_tf20_compat.py: Keras in symbolic mode with TensorFlow 2.0 
* mnist_keras_tf20_eager.py: Keras in eager mode with TensorFlow 2.0

You only need to set the correct TensorFlow version when configuring the TensorFlow estimator.


In [15]:
from sagemaker.tensorflow import TensorFlow
tf_estimator = TensorFlow(entry_point='mnist_keras_tf20_compat.py',
                          role=role,
                          instance_count=1, 
                          instance_type='local',   # Train on the local CPU ('local_gpu' if it has a GPU)
                          framework_version='2.0', 
                          py_version='py3',
                          hyperparameters={'epochs': 1},
                          output_path=output_path
                         )

Windows Support for Local Mode is Experimental


In [16]:
# Train! This will pull (once) the SageMaker CPU/GPU container for TensorFlow to your local machine.
# Make sure that Docker is running and that docker-compose is installed

tf_estimator.fit({'training': training_input_path, 'validation': validation_input_path})

Creating network "sagemaker-local" with the default driver
Creating 1nm7dhp11g-algo-1-5jdu7 ... 
Creating 1nm7dhp11g-algo-1-5jdu7 ... done
Docker Compose is now in the Docker CLI, try `docker compose up`

Attaching to 1nm7dhp11g-algo-1-5jdu7
[36m1nm7dhp11g-algo-1-5jdu7 |[0m 2021-06-26 09:55:54,141 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training
[36m1nm7dhp11g-algo-1-5jdu7 |[0m 2021-06-26 09:55:54,151 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36m1nm7dhp11g-algo-1-5jdu7 |[0m 2021-06-26 09:55:54,200 botocore.credentials INFO     Found credentials in environment variables.
[36m1nm7dhp11g-algo-1-5jdu7 |[0m 2021-06-26 09:55:54,512 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36m1nm7dhp11g-algo-1-5jdu7 |[0m 2021-06-26 09:55:54,538 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36m1nm7dhp11g-algo-1-5jdu7 |[0m 2021-06-26

[36m1nm7dhp11g-algo-1-5jdu7 |[0m Collecting keras-metrics
[36m1nm7dhp11g-algo-1-5jdu7 |[0m   Downloading keras_metrics-1.1.0-py2.py3-none-any.whl (5.6 kB)
[36m1nm7dhp11g-algo-1-5jdu7 |[0m Installing collected packages: keras-metrics
[36m1nm7dhp11g-algo-1-5jdu7 |[0m Successfully installed keras-metrics-1.1.0
[36m1nm7dhp11g-algo-1-5jdu7 |[0m x_train shape: (60000, 28, 28, 1)
[36m1nm7dhp11g-algo-1-5jdu7 |[0m 60000 train samples
[36m1nm7dhp11g-algo-1-5jdu7 |[0m 10000 test samples
[36m1nm7dhp11g-algo-1-5jdu7 |[0m Model: "sequential"
[36m1nm7dhp11g-algo-1-5jdu7 |[0m _________________________________________________________________
[36m1nm7dhp11g-algo-1-5jdu7 |[0m Layer (type)                 Output Shape              Param #   
[36m1nm7dhp11g-algo-1-5jdu7 |[0m conv2d (Conv2D)              (None, 28, 28, 64)        640       
[36m1nm7dhp11g-algo-1-5jdu7 |[0m _________________________________________________________________
[36m1nm7dhp11g-algo-1-5jdu7 |[0m batch_norma

[36m1nm7dhp11g-algo-1-5jdu7 exited with code 0
[0mAborting on container exit...
===== Job Complete =====


In [19]:
!docker images 

REPOSITORY                                                            TAG           IMAGE ID       CREATED        SIZE
763104351884.dkr.ecr.eu-central-1.amazonaws.com/tensorflow-training   2.0-cpu-py3   a19ddacd606a   3 months ago   1.97GB


In [18]:
!tar tvfz /tmp/model/model.tar.gz

drwxrwxrwx  0 0      0           0 Jun 26 11:57 model/
drwxrwxrwx  0 0      0           0 Jun 26 11:57 model/1/
-rw-rw-rw-  0 0      0      240772 Jun 26 11:57 model/1/saved_model.pb
drwxrwxrwx  0 0      0           0 Jun 26 11:57 model/1/variables/
-rw-rw-rw-  0 0      0    19520132 Jun 26 11:57 model/1/variables/variables.data-00000-of-00001
-rw-rw-rw-  0 0      0        1500 Jun 26 11:57 model/1/variables/variables.index
