# AutoGluon Image Example
>__NOTE:__ Make sure to use the Pyton 3 (Data Science) Jupyter Kernel.

## Prerequisites

### Intalling the Image Build CLI

In [None]:
%%capture
import sys
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

!{sys.executable} -m pip install -U pip sagemaker-studio-image-build

### Configuring the AutoGluon Training/Testing Script

In [None]:
%%writefile train.py
import os
import json
import boto3
import json
import warnings
import numpy as np
import pandas as pd
from autogluon.vision import ImagePredictor

warnings.filterwarnings("ignore", category=DeprecationWarning)
prefix = "/opt/ml"
input_path = os.path.join(prefix, "input/data")
output_path = os.path.join(prefix, "output")
model_path = os.path.join(prefix, "model")
param_path = os.path.join(prefix, "input/config/hyperparameters.json")


def train(params):
    time_limit = int(params["time_limit"])
    presets = "".join([str(i) for i in list(params["presets"])])
    channel_name = "training"
    training_path = os.path.join(input_path, channel_name)
    training_dataset = ImagePredictor.Dataset.from_folder(training_path)
    predictor = ImagePredictor().fit(training_dataset, time_limit=time_limit, presets=presets)
    with open(os.path.join(model_path, "FitSummary.json"), "w") as f:
        json.dump(predictor.fit_summary(), f)
    predictor.save(os.path.join(model_path, "ImagePredictor.Autogluon"))
    return "AutoGluon Job Complete"


if __name__ == "__main__":
    print("Loading Parameters\n")
    with open(param_path) as f:
        params = json.load(f)
    print("Training Models\n")
    result = train(params)
    print(result)

### Container Image Build Instructions (Dockerfile)

In [None]:
%%writefile Dockerfile
ARG REGION
FROM 763104351884.dkr.ecr.${REGION}.amazonaws.com/autogluon-training:0.3.1-gpu-py37-cu102-ubuntu18.04
RUN pip install -U pip wheel setuptools
RUN pip install autogluon
RUN mkdir -p /opt/program
RUN mkdir -p /opt/ml
COPY train.py /opt/program
WORKDIR /opt/program
ENTRYPOINT ["python", "train.py"]

### Container Build Process

In [None]:
import boto3
import sagemaker

aws_region = sagemaker.Session().boto_session.region_name
!sm-docker build --build-arg REGION={aws_region} .

---

## AutoGluon Experiment

### Download the Image Data

In [None]:
import io
import urllib
import zipfile

dataset_url = "https://storage.googleapis.com/laurencemoroney-blog.appspot.com/rps.zip"
with urllib.request.urlopen(dataset_url) as rps_zipfile:
    with zipfile.ZipFile(io.BytesIO(rps_zipfile.read())) as z:
        z.extractall("data")

### Experiment Parameters

>__NOTE:__ Make sure to update the `image_uri` parameter with the _Image URI_ output the __Container Build Process__.

In [None]:
import sagemaker
import datetime

image_uri = "<Enter the Image URI from the sm-docker output>"
role = sagemaker.get_execution_role()
session = sagemaker.session.Session()
bucket = session.default_bucket()
job_version = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')[:-3]
job_name = f"autogluon-image-{job_version}"

### Create the AutoGluon Estimator

>__TIP:__ To leverage [Managed Spot Training](https://docs.aws.amazon.com/sagemaker/latest/dg/model-managed-spot-training.html) to further resuce training costs, uncomment the lines in the following code cell.

In [None]:
from sagemaker.estimator import Estimator

autogluon = Estimator(
    image_uri=image_uri,
    role=role,
    output_path=f"s3://{bucket}/{job_name}",
    base_job_name=job_name,
    instance_count=1,
    instance_type="ml.p2.xlarge",
    hyperparameters={
        "presets": "medium_quality_faster_train",
        "time_limit": "600",
        "bucket": bucket,
        "training_job": job_name
    },
    volume_size=50,
#     use_spot_instances=True,
#     max_wait=3600,
#     max_run=8*3600
)

### Execute the Experiment

In [None]:
autogluon.fit(
    inputs={
        "training": session.upload_data(
            "data/rps",
            bucket=bucket,
            key_prefix=f"{job_name}/input"
        )
    }
)

### Experiment Results

#### Download Model Artifacts

In [None]:
!mkdir extract
sagemaker.s3.S3Downloader.download(autogluon.model_data, "./")
!tar xfz ./model.tar.gz -C extract

#### Review Model Summary

In [None]:
import json
with open("extract/FitSummary.json", "r") as f:
    fit_summary = json.load(f)
print(json.dumps(fit_summary, indent=4))
print(f"""Best Model Training Accuracy: {fit_summary["train_acc"]} \nBest Model Validation Accuracy: {fit_summary["valid_acc"]}""")