In [None]:
!pip install sagemaker-experiments

In [None]:
from sagemaker import get_execution_role
from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
import time
from sagemaker.tensorflow import TensorFlow

role = get_execution_role()

In [None]:
our_experiment = Experiment.create(experiment_name="our-experiment-3", description="It's public")

In [None]:
experiment_name='our-experiment-3'
trial_name = f"someone-cnn-{int(time.time())}"
trial = Trial.create(trial_name=trial_name, 
                     experiment_name=experiment_name)

In [None]:
%%writefile mnist_exp_cnn.py

import tensorflow as tf
import argparse
import os, time
import numpy as np
import json
import gzip, pickle

if __name__ == "__main__":
    
    start = time.time()
    parser = argparse.ArgumentParser()
    parser.add_argument('--model_dir', type=str)
    parser.add_argument('--sm-model-dir', type=str, default=os.environ.get('SM_MODEL_DIR'))
    parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAINING'))
    parser.add_argument('--hosts', type=list, default=json.loads(os.environ.get('SM_HOSTS')))
    parser.add_argument('--current-host', type=str, default=os.environ.get('SM_CURRENT_HOST'))
    args, _ = parser.parse_known_args()
  
    input_path = os.path.join(args.train, 'dataset.pkl.gz')
    with gzip.open(input_path, 'rb') as f:
        train_data, train_label, test_data, test_label = pickle.load(f)

    model = tf.keras.models.Sequential([
        tf.keras.layers.Reshape((28, 28, 1)),
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10, activation='softmax')
    ])

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    
    model.fit(train_data, train_label, epochs=2, verbose=2)
    model.evaluate(test_data, test_label, verbose=0)
    
    model.save(os.path.join(args.sm_model_dir, '000000001'), 'my_model.h5')
        
    print("training time: {}".format(time.time() - start))

In [None]:
estimator = TensorFlow(entry_point='mnist_exp_cnn.py',
                       role=role,
                       train_instance_count=1,
                       train_instance_type='ml.m5.xlarge',
                       metric_definitions=[
                           {'Name': 'Training:seconds', 'Regex': 'training time: ([0-9\\.]+)'}
                       ],                       
                       framework_version='2.1.0',
                       py_version='py3')

training_data_uri = 's3://sagemaker-us-east-1-233037139193/mbp3/dataset/dataset.pkl.gz'
estimator.fit(inputs=training_data_uri,
              job_name=trial_name,
              experiment_config={
                  "TrialName": trial.trial_name,
                  "TrialComponentDisplayName": "Training"
              })