In [2]:
import sys

In [3]:
# update boto3 and sagemaker to ensure latest SDK version
!{sys.executable} -m pip install --upgrade pip
!{sys.executable} -m pip install --upgrade boto3
!{sys.executable} -m pip install --upgrade sagemaker
!{sys.executable} -m pip install --upgrade tensorflow

Keyring is skipped due to an exception: 'keyring.backends'
[0mKeyring is skipped due to an exception: 'keyring.backends'
Collecting boto3
  Downloading boto3-1.26.45-py3-none-any.whl (132 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.7/132.7 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting botocore<1.30.0,>=1.29.45
  Downloading botocore-1.29.45-py3-none-any.whl (10.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.3/10.3 MB[0m [31m40.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: botocore, boto3
  Attempting uninstall: botocore
    Found existing installation: botocore 1.29.24
    Uninstalling botocore-1.29.24:
      Successfully uninstalled botocore-1.29.24
  Attempting uninstall: boto3
    Found existing installation: boto3 1.26.24
    Uninstalling boto3-1.26.24:
      Successfully uninstalled boto3-1.26.24
[31mERROR: pip's dependency resolver does not currently take

In [4]:
import json
import boto3
import sagemaker
from sagemaker.session import Session
from sagemaker import get_execution_role
from sagemaker.experiments.run import Run

In [5]:
sagemaker_session = Session()
boto_sess = boto3.Session()

role = get_execution_role()
default_bucket = sagemaker_session.default_bucket()

sm = boto_sess.client("sagemaker")
region = boto_sess.region_name

### Prepare the data for training

In [6]:
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd

In [7]:
!mkdir -p datasets

In [8]:
# Model / data parameters
num_classes = 10
input_shape = (28, 28, 1)

# Here we download the data from S3

s3 = boto3.client("s3")

train_path = "datasets/input_train.npy"
test_path = "datasets/input_test.npy"
train_labels_path = "datasets/input_train_labels.npy"
test_labels_path = "datasets/input_test_labels.npy"

# Load the data and split it between train and test sets
s3.download_file("sagemaker-sample-files", "datasets/image/MNIST/numpy/input_train.npy", train_path)
s3.download_file("sagemaker-sample-files", "datasets/image/MNIST/numpy/input_test.npy", test_path)
s3.download_file(
    "sagemaker-sample-files", "datasets/image/MNIST/numpy/input_train_labels.npy", train_labels_path
)
s3.download_file(
    "sagemaker-sample-files", "datasets/image/MNIST/numpy/input_test_labels.npy", test_labels_path
)

In [9]:
x_train = np.load(train_path)
x_test = np.load(test_path)
y_train = np.load(train_labels_path)
y_test = np.load(test_labels_path)

# Reshape the arrays

x_train = np.reshape(x_train, (60000, 28, 28))
x_test = np.reshape(x_test, (10000, 28, 28))
y_train = np.reshape(y_train, (60000,))
y_test = np.reshape(y_test, (10000,))

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255

# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)

print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)


x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


### Create a model

In [10]:
def get_model(dropout=0.5):
    """ """
    model = keras.Sequential(
        [
            keras.Input(shape=input_shape),
            layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
            layers.MaxPooling2D(pool_size=(2, 2)),
            layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
            layers.MaxPooling2D(pool_size=(2, 2)),
            layers.Flatten(),
            layers.Dropout(dropout),
            layers.Dense(num_classes, activation="softmax"),
        ]
    )
    model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

    return model

### Define the Keras callback to log metrics to the run

In [11]:
class ExperimentCallback(keras.callbacks.Callback):
    """ """

    def __init__(self, run, model, x_test, y_test):
        """Save params in constructor"""
        self.run = run
        self.model = model
        self.x_test = x_test
        self.y_test = y_test

    def on_epoch_end(self, epoch, logs=None):
        """ """
        keys = list(logs.keys())
        for key in keys:
            self.run.log_metric(name=key, value=logs[key], step=epoch)
            print("{} -> {}".format(key, logs[key]))

### Train and track it in an experiment

In [12]:
batch_size = 256
epochs = 5
dropout = 0.5

model = get_model(dropout)

experiment_name = "local-keras-experiment"
with Run(experiment_name=experiment_name, sagemaker_session=sagemaker_session) as run:
    run.log_parameter("batch_size", batch_size)
    run.log_parameter("epochs", epochs)
    run.log_parameter("dropout", dropout)

    run.log_file("datasets/input_train.npy", is_output=False)
    run.log_file("datasets/input_test.npy", is_output=False)
    run.log_file("datasets/input_train_labels.npy", is_output=False)
    run.log_file("datasets/input_test_labels.npy", is_output=False)

    # Train locally
    model.fit(
        x_train,
        y_train,
        batch_size=batch_size,
        epochs=epochs,
        validation_split=0.1,
        callbacks=[ExperimentCallback(run, model, x_test, y_test)],
    )

    score = model.evaluate(x_test, y_test, verbose=0)
    print("Test loss:", score[0])
    print("Test accuracy:", score[1])

    run.log_metric(name="Final Test Loss", value=score[0])
    run.log_metric(name="Final Test Accuracy", value=score[1])

Epoch 1/5
accuracy -> 0.8456851840019226
val_loss -> 0.10680681467056274
val_accuracy -> 0.9728333353996277
Epoch 2/5
accuracy -> 0.9560185074806213
val_loss -> 0.07074430584907532
val_accuracy -> 0.981333315372467
Epoch 3/5
accuracy -> 0.9682962894439697
val_loss -> 0.06288964301347733
val_accuracy -> 0.9826666712760925
Epoch 4/5
accuracy -> 0.9725925922393799
val_loss -> 0.05115329846739769
val_accuracy -> 0.9861666560173035
Epoch 5/5
accuracy -> 0.9769814610481262
val_loss -> 0.048014428466558456
val_accuracy -> 0.9863333106040955
Test loss: 0.04682110249996185
Test accuracy: 0.9853000044822693
