# Image Classification - Bring Your Own Container

In [None]:
import sys
import IPython
!{sys.executable} -m pip install sagemaker-studio-image-build ipywidgets opencv-python matplotlib
IPython.Application.instance().kernel.do_shutdown(True)

In [None]:
import os
import random

import cv2 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import confusion_matrix, classification_report

%matplotlib inline

In [None]:
%%sh

# The name of our algorithm 
repository_name=sagemaker-tf-cifar10-latest:latest

cd container/training

sm-docker build . --file ./Dockerfile --repository $repository_name


In [None]:
data_folder = '/tmp/data'

train_data_dir = f'{data_folder}/cifar10/train'
test_data_dir = f'{data_folder}/cifar10/test'

In [None]:
!mkdir -p $data_folder
!aws s3 cp --no-sign-request s3://fast-ai-imageclas/cifar10.tgz $data_folder
!tar -zxvf $data_folder/cifar10.tgz -C $data_folder

In [None]:
class_names = [
    'airplane',
    'automobile', 
    'bird',
    'cat', 
    'deer', 
    'dog', 
    'frog', 
    'horse', 
    'ship', 
    'truck'
]
class_names_label = {class_name:i for i, class_name in enumerate(class_names)}

nb_classes = len(class_names)

IMAGE_SIZE = (32, 32)

In [None]:
from PIL import Image

def get_image_array(image_path):
    img = Image.open(image_path, 'r')
    return np.array(img)

def display_examples(class_names, dataset_folder):
    fig = plt.figure(figsize = (10, 10))
    fig.suptitle('Some examples of images of the dataset', fontsize = 16)
    
    for i in range(20):
        image_class = class_names[random.randint(0, len(class_names) - 1)]
        image_folder = f'{dataset_folder}/{image_class}'
        image_files = os.listdir(image_folder)
        image_file = image_files[random.randint(0, len(image_files) - 1)]
        image_file_path = f'{image_folder}/{image_file}'
        plt.subplot(4, 5, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(get_image_array(image_file_path))
        plt.xlabel(image_class)
    plt.show()

In [None]:
display_examples(class_names, train_data_dir)

In [None]:
import sagemaker
import boto3
from sagemaker import get_execution_role

region = boto3.Session().region_name

session = sagemaker.Session()

bucket = session.default_bucket()


role = get_execution_role()

prefix = "sagemaker/cifar10-byo"

data_path = f"s3://{bucket}/{prefix}/data"
training_data_path = f"{data_path}/training"
validation_data_path = f"{data_path}/validation"

In [None]:
train_data_dir, training_data_path

In [None]:
test_data_dir, validation_data_path

## Model Training with SageMaker

In [None]:

client = boto3.client("sts")
account = client.get_caller_identity()["Account"]

my_session = boto3.session.Session()
region = my_session.region_name

algorithm_name = "sagemaker-tf-cifar10-latest"

ecr_image = "{}.dkr.ecr.{}.amazonaws.com/{}:latest".format(account, region, algorithm_name)

print(ecr_image)

In [None]:
from sagemaker.estimator import Estimator
from sagemaker.inputs import TrainingInput

#Training results: loss=0.97792; accuracy=0.66220; val_loss=0.86919; val_accuracy=0.69960; lr=0.00000
metric_definitions = [
    { "Name": "loss", "Regex": "loss=([0-9\\.]+)"},
    { "Name": "accuracy", "Regex": "accuracy=([0-9\\.]+)" },
    { "Name": "validation:loss", "Regex": "val_loss=([0-9\\.]+)" },
    { "Name": "validation:accuracy", "Regex": "val_accuracy=([0-9\\.]+)" },
]

hyperparameters = {
    "epochs": 20
}

instance_type = "ml.g4dn.xlarge"

estimator = Estimator(
    role=role,
    instance_count=1,
    instance_type=instance_type,
    image_uri=ecr_image,
    hyperparameters=hyperparameters,
    metric_definitions=metric_definitions,
    max_run=30*60
)


In [None]:

estimator.fit({
    "training": TrainingInput(s3_data=training_data_path, s3_data_type="S3Prefix", input_mode="File"),
    "validation": TrainingInput(s3_data=validation_data_path, s3_data_type="S3Prefix", input_mode="File")
})

In [None]:
training_job_name = estimator.latest_training_job.name

In [None]:
attached_estimator = sagemaker.estimator.Estimator.attach(training_job_name)


In [None]:
attached_estimator.model_data

In [None]:
model_data = attached_estimator.model_data

In [None]:
from sagemaker.tensorflow import TensorFlowModel

model = TensorFlowModel(
    model_data=model_data,
    role=role,
    framework_version='2.4'
)

In [None]:
predictor = model.deploy(initial_instance_count=1, instance_type='ml.c5.xlarge')

In [None]:
import cv2
import numpy as np

from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

class_names = [
    "airplane",
    "automobile",
    "bird",
    "cat",
    "deer",
    "dog",
    "frog",
    "horse",
    "ship",
    "truck"
]

In [None]:
image = cv2.imread("cat01.jpeg", 1)

# resize, as our model is expecting images in 32x32.
image = cv2.resize(image, (32, 32))

data = {"signature_name": "serving_default", "instances": np.asarray(image).astype(float).tolist()}

predictor.serializer = JSONSerializer()
predictor.deserializer = JSONDeserializer()

pred = predictor.predict(data)

print(pred)
print(f"Class: {class_names[pred['predictions'][0]['classes']]}")