## Build Docker Container

In [None]:
% cd ~/SageMaker/aws-auto-training

In [None]:
! pwd

In [None]:
# Seoul Region Login
! aws ecr get-login-password --region ap-northeast-2 | docker login --username AWS --password-stdin 763104351884.dkr.ecr.ap-northeast-2.amazonaws.com

In [None]:
! docker build -t pytorch-extended-container .

## Extended Container - Local Test

In [None]:
import argparse
import os
import sagemaker
from sagemaker import get_execution_role
from sagemaker.pytorch import PyTorch

# Set up the Environment
sagemaker_session = sagemaker.Session()

# IAM role setting
role = get_execution_role()

# Create a training job using the PyTorch Estimator
estimator = PyTorch(image_uri='pytorch-extended-container',
                    entry_point='/home/ec2-user/SageMaker/aws-auto-training/train.py',
                    source_dir='/home/ec2-user/SageMaker/aws-auto-training/',
                    role=role,
                    train_instance_count=1,
                    train_instance_type='local', # local, ml.c5.xlarge, ml.p2.xlarge
                    hyperparameters={
                        "epochs": 5,
                        "backend": "gloo"
                    })

# Calling `fit`
estimator.fit({"train": "s3://sagemaker-pytorch-metal/data/"})

## Extended Container - ECR Test

In [None]:
! sh build_and_push.sh

In [None]:
import boto3

client = boto3.client('sts')
account = client.get_caller_identity()['Account']

my_session = boto3.session.Session()
region = my_session.region_name

algorithm_name = "pytorch-extended-container"
ecr_image=f'{account}.dkr.ecr.{region}.amazonaws.com/{algorithm_name}:latest'

ecr_image

In [None]:
import argparse
import os
import sagemaker
from sagemaker import get_execution_role
from sagemaker.pytorch import PyTorch

# Set up the Environment
sagemaker_session = sagemaker.Session()

# IAM role setting
role = get_execution_role()

# Create a training job using the PyTorch Estimator
estimator = PyTorch(image_uri=ecr_image,
                    entry_point='/home/ec2-user/SageMaker/aws-auto-training/train.py',
                    source_dir='/home/ec2-user/SageMaker/aws-auto-training/',
                    role=role,
                    train_instance_count=1,
                    train_instance_type='ml.c5.xlarge', # ml.c5.xlarge, ml.p2.xlarge,
                    base_job_name='pytorch-extended-container', # prefix for training job name
                    hyperparameters={
                        "epochs": 5,
                        "backend": "gloo"
                    })

# Calling `fit`
estimator.fit({"train": "s3://sagemaker-pytorch-metal/data/"})