In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import sagemaker
from sagemaker import get_execution_role

## Setup session
sagemaker_session = sagemaker.Session()

# default s3 bucket
bucket = sagemaker_session.default_bucket()
prefix = "sagemaker/DEMO-pytorch-mnist"

#IAM role
role = get_execution_role()

# region
region = sagemaker_session.boto_session.region_name

In [None]:
from torchvision.datasets import MNIST
from torchvision import transform

MNIST.mirrors = ["https://sagemaker-sample-files.s3.amazonaws.com/datasets/image/MNIST/"]

MNIST(
    'data', download=True, transform=transforms.Compose(
        [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
    )
)

In [None]:
# prepare input path for training with estimator.fit()
inputs = sagemaker_session.upload_data(path='data', bucket=bucket, key_prefix=prefix)
print('input spec (in this case, just an S3 path): {}'.format(inputs))

In [None]:
def _get_training_data_loader(batch_size, training_dir, is_distributed, **kwargs):
  logger.infor("get train data loader")
  dataset = dataset.MNIST(
      training_dir,
      download=True,
      train=True,
      transform=transform.Compose(
          [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
      ),
  )

  train_sampler = (
      torch.utils.data.distributed.DistributedSampler(dataset) if is_distributed else None
  )

  return torch.utils.data.DataLoader(
      dataset,
      batch_size = batch_size,
      shuffle = train_sampler is None,
      sampler = train_sampler.
      **kwargs
  )

In [None]:
from sagemaker.pytorch import Pytorch
# Pytorch configuration
estimator = Pytorch(entry_point='mnist.py',role=role, py_version='py3', framework_version='1.8.0', instance_count=2, instance_type='ml.c5.2xlarge',
                    hyperparameters={
                        'epochs':1,
                        'backend': 'gloo'
                    })

# training
estimator.fit({'training': inputs})

# deploy
predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')

In [None]:
# create a sample data for inference
import gzip
import numpy as np
import random
import os

data_dir = 'data/MNIST/raw'
with gzip.open(os.path.join(data_dir, 't10k-images-idx3-ubyte.gz'), 'rb') as f:
  images = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1, 28, 28).astype(np.float32)


# random select some of the test image
mask = random.sample(range(len(images)), 16)
mask = np.array(mask, dtype=np.int)

# input data
data = images(mask)

In [None]:
# inference
# input data
response = predictor.predict(np.expand_dims(data, axis=1))
print("Raw prediction result:")
print(response)

labeled_predictions = list(zip(range(10), response[0]))
print("labeled predictions: ")
print(labeled_predictions)

labeled_predictions.sort(key=lambda label_and_prob: 1.0 - label_and_prob[1])
print("Most likely answer: {}".format(labeled_predictions[0]))