In [5]:
!pip install tensorflow_datasets

Collecting tensorflow_datasets
  Downloading tensorflow_datasets-3.2.1-py3-none-any.whl (3.4 MB)
[K     |################################| 3.4 MB 8.0 MB/s eta 0:00:01
[?25hCollecting promise
  Downloading promise-2.3.tar.gz (19 kB)
Collecting tensorflow-metadata
  Downloading tensorflow_metadata-0.24.0-py3-none-any.whl (44 kB)
[K     |################################| 44 kB 144 kB/s  eta 0:00:01
Collecting dill
  Downloading dill-0.3.2.zip (177 kB)
[K     |################################| 177 kB 91.2 MB/s eta 0:00:01
Collecting future
  Downloading future-0.18.2.tar.gz (829 kB)
[K     |################################| 829 kB 67.1 MB/s eta 0:00:01
Collecting tqdm
  Downloading tqdm-4.49.0-py2.py3-none-any.whl (69 kB)
[K     |################################| 69 kB 309 kB/s  eta 0:00:01
Collecting googleapis-common-protos<2,>=1.52.0
  Downloading googleapis_common_protos-1.52.0-py2.py3-none-any.whl (100 kB)
[K     |################################| 100 kB 1.2 MB/s eta 0:00:01
Bu

In [None]:
import time, os, sys
import sagemaker, boto3

In [59]:
datasets = sagemaker_session.upload_data(path='tensorflow_datasets/mnist/3.0.1', key_prefix='datasets/mnist')

In [8]:
sess = boto3.Session()
sm   = sess.client('sagemaker')
role = sagemaker.get_execution_role()
sagemaker_session = sagemaker.Session(boto_session=sess)

In [6]:
from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
from smexperiments.trial_component import TrialComponent

In [9]:
training_experiment = Experiment.create(
                                experiment_name = "sagemaker-simple-mnist-cnn", 
                                description     = "Experimenting-with-sagemaker", 
                                sagemaker_boto_client=sm)

In [37]:
mnist_trial = Trial.create(
    trial_name = 'mnist-trial', 
    experiment_name = training_experiment.experiment_name,
    sagemaker_boto_client = sm
)
"""experiment > trials"""

In [52]:
experiment_config = {"ExperimentName": training_experiment.experiment_name, 
                       "TrialName": mnist_trial.trial_name,
                       "TrialComponentDisplayName": "MnistCNN"} #no underscore or space in name?

In [62]:
from sagemaker.tensorflow import TensorFlow

hyperparams={'epochs'       : 2,
             'learning-rate': 0.001,
             'batch-size'   : 256,
             'weight-decay' : 2e-4,
             'momentum'     : 0.9,
             'optimizer'    : 'adam'} 


bucket_name = sagemaker_session.default_bucket()
output_path = f's3://{bucket_name}/jobs'
metric_definitions = [{'Name': 'val_acc', 'Regex': 'val_acc: ([0-9\\.]+)'}]

tf_estimator = TensorFlow(entry_point          = 'mnist.py', 
                          output_path          = f'{output_path}/',
                          code_location        = output_path,
                          role                 = role,
                          train_instance_count = 1, 
                          train_instance_type  = 'ml.m4.xlarge',
                          framework_version    = '1.15.2', 
                          py_version           = 'py3',
                          script_mode          = True,
                          metric_definitions   = metric_definitions,
                          sagemaker_session    = sagemaker_session,
                          hyperparameters      = hyperparams)

job_name=f'tensorflow-mnist-{time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())}'
tf_estimator.fit({'training'  : datasets,
                  'validation': datasets,
                  'eval'      : datasets},
                 job_name = job_name,
                 experiment_config=experiment_config)

INFO:sagemaker:Creating training-job with name: tensorflow-mnist-2020-09-24-22-03-51


2020-09-24 22:03:51 Starting - Starting the training job...
2020-09-24 22:04:02 Starting - Launching requested ML instances......
2020-09-24 22:05:20 Starting - Preparing the instances for training.........
2020-09-24 22:06:34 Downloading - Downloading input data...
2020-09-24 22:07:13 Training - Downloading the training image...
2020-09-24 22:07:53 Uploading - Uploading generated training model
2020-09-24 22:07:53 Completed - Training job completed
[0m
[34m2020-09-24 22:07:41,011 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34m2020-09-24 22:07:41,017 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-09-24 22:07:42,549 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-09-24 22:07:42,567 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-09-24 22:07:42,585 sagemaker-containers INFO     No GPUs detected (normal if no

Summary:
-make a session with  sagemaker.Session() and boto3.Session()
-sagemaker_session.upload_data() specifies the location of the data( returns a string)
-create an experiment with smexperiments.experiment.Experiment 
-create a trial smexperiments.trial.Trial
-create an estimator with sagemaker.tensorflow.TensorFlow << pass to it a main.py file with model definition and training script
-fit estimator (must pass data location as string)
-check experiment tab for grapsh

In [64]:
datasets

's3://sagemaker-us-east-1-638457420143/datasets/mnist'