# Fashion-MNIST PyTorch image classification w/ Tensorboard
Source
- https://tutorials.pytorch.kr/intermediate/tensorboard_tutorial.html
- https://github.com/aws/amazon-sagemaker-examples/blob/master/frameworks/pytorch/get_started_mnist_train.ipynb

## Prepare dataset

In [None]:
# imports
import matplotlib.pyplot as plt
import numpy as np

import torch
import torchvision
import torchvision.transforms as transforms

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [None]:
# transforms
transform = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))])

# datasets
trainset = torchvision.datasets.FashionMNIST('./data',
    download=True,
    train=True,
    transform=transform)
testset = torchvision.datasets.FashionMNIST('./data',
    download=True,
    train=False,
    transform=transform)

## Set up the environment

In [31]:
import os
import sagemaker
from sagemaker.pytorch import PyTorch
from sagemaker import get_execution_role

sagemaker_session = sagemaker.Session()

role = get_execution_role()

bucket = sagemaker_session.default_bucket()
prefix = "tensorboard_pytorch_fashion_mnist"
tensorboard_logs_path = "s3://{}/{}/logs".format(bucket, prefix) # Key points
output_path = "s3://{}/{}/output".format(bucket, prefix)

print("Bucket: {}".format(bucket))
print("SageMaker ver: " + sagemaker.__version__)
print("Tensorboard log path: {}".format(tensorboard_logs_path))

Bucket: sagemaker-us-east-1-889750940888
SageMaker ver: 2.68.0
Tensorboard log path: s3://sagemaker-us-east-1-889750940888/tensorboard_pytorch_fashion_mnist/logs


## Uploading the data to s3

In [15]:
!aws s3 cp ./data/FashionMNIST/raw s3://{bucket}/{prefix}/data --recursive

upload: data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to s3://sagemaker-us-east-1-889750940888/tensorboard_pytorch_fashion_mnist/data/t10k-labels-idx1-ubyte.gz
upload: data/FashionMNIST/raw/t10k-labels-idx1-ubyte to s3://sagemaker-us-east-1-889750940888/tensorboard_pytorch_fashion_mnist/data/t10k-labels-idx1-ubyte
upload: data/FashionMNIST/raw/train-labels-idx1-ubyte to s3://sagemaker-us-east-1-889750940888/tensorboard_pytorch_fashion_mnist/data/train-labels-idx1-ubyte
upload: data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to s3://sagemaker-us-east-1-889750940888/tensorboard_pytorch_fashion_mnist/data/t10k-images-idx3-ubyte.gz
upload: data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to s3://sagemaker-us-east-1-889750940888/tensorboard_pytorch_fashion_mnist/data/train-labels-idx1-ubyte.gz
upload: data/FashionMNIST/raw/t10k-images-idx3-ubyte to s3://sagemaker-us-east-1-889750940888/tensorboard_pytorch_fashion_mnist/data/t10k-images-idx3-ubyte
upload: data/FashionMNIST/raw/train-images

In [16]:
train_location = 's3://{}/{}/data'.format(bucket, prefix)
test_location = 's3://{}/{}/data'.format(bucket, prefix)

In [18]:
!aws s3 ls {train_location} --recursive

2021-12-09 06:08:56    7840016 tensorboard_pytorch_fashion_mnist/data/t10k-images-idx3-ubyte
2021-12-09 06:08:56    4422102 tensorboard_pytorch_fashion_mnist/data/t10k-images-idx3-ubyte.gz
2021-12-09 06:08:56      10008 tensorboard_pytorch_fashion_mnist/data/t10k-labels-idx1-ubyte
2021-12-09 06:08:56       5148 tensorboard_pytorch_fashion_mnist/data/t10k-labels-idx1-ubyte.gz
2021-12-09 06:08:56   47040016 tensorboard_pytorch_fashion_mnist/data/train-images-idx3-ubyte
2021-12-09 06:08:56   26421880 tensorboard_pytorch_fashion_mnist/data/train-images-idx3-ubyte.gz
2021-12-09 06:08:56      60008 tensorboard_pytorch_fashion_mnist/data/train-labels-idx1-ubyte
2021-12-09 06:08:56      29515 tensorboard_pytorch_fashion_mnist/data/train-labels-idx1-ubyte.gz


In [19]:
!aws s3 ls {test_location} --recursive

2021-12-09 06:08:56    7840016 tensorboard_pytorch_fashion_mnist/data/t10k-images-idx3-ubyte
2021-12-09 06:08:56    4422102 tensorboard_pytorch_fashion_mnist/data/t10k-images-idx3-ubyte.gz
2021-12-09 06:08:56      10008 tensorboard_pytorch_fashion_mnist/data/t10k-labels-idx1-ubyte
2021-12-09 06:08:56       5148 tensorboard_pytorch_fashion_mnist/data/t10k-labels-idx1-ubyte.gz
2021-12-09 06:08:56   47040016 tensorboard_pytorch_fashion_mnist/data/train-images-idx3-ubyte
2021-12-09 06:08:56   26421880 tensorboard_pytorch_fashion_mnist/data/train-images-idx3-ubyte.gz
2021-12-09 06:08:56      60008 tensorboard_pytorch_fashion_mnist/data/train-labels-idx1-ubyte
2021-12-09 06:08:56      29515 tensorboard_pytorch_fashion_mnist/data/train-labels-idx1-ubyte.gz


## Local mode training

In [32]:
hyperparameters = {"batch-size": 128,
                   "epochs": 1,
                   "learning-rate": 1e-3,
                   "log-interval": 100,
                   "tensorboard-logs-path": tensorboard_logs_path}

In [37]:
# set local_mode to be True if you want to run the training script
# on the machine that runs this notebook

local_mode = True

if local_mode:
    instance_type = "local"
else:
    instance_type = "ml.c4.xlarge"

est = PyTorch(
    entry_point="train.py",
    source_dir="code",  # directory of your training script
    role=role,
    framework_version="1.8.1",
    py_version="py3",
    instance_type=instance_type,
    instance_count=1,
    output_path=output_path,
    hyperparameters=hyperparameters,
)

In [38]:
channels = {"training": train_location, "testing": test_location}

In [41]:
est.fit(inputs=channels)

CalledProcessError: Command '['docker', 'pull', '763104351884.dkr.ecr.us-east-1.amazonaws.com/pytorch-training:1.8.1-cpu-py3']' returned non-zero exit status 1.