## 準備

In [1]:
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput
from sagemaker import get_execution_role
from sagemaker.s3 import parse_s3_url
import yaml,boto3, io
import numpy as np
from matplotlib import pyplot as plt
role = get_execution_role()
with open('./setting.yaml', 'r') as yml:
    config = yaml.load(yml)
name = config['name']
zip_dataset_s3_uri = config['zip_dataset_s3_uri']
timestamp = config['timestamp']
print(f'role: {role}')
print(f'name: {name}')
print(f'zip_dataset_s3_uri: {zip_dataset_s3_uri}')
print(f'timestamp: {timestamp}')

role: arn:aws:iam::815969174475:role/sageMaker-groundtruth-custom
name: taketosk
zip_dataset_s3_uri: s3://sagemaker-ap-northeast-1-815969174475/sagemaker-handson-taketosk/dataset-20210118134158/dataset.zip
timestamp: 20210118134158


## 前処理用コンテナの作成

In [2]:
# 特に変更してません。https://github.com/HKT-SSA/yolov5-on-sagemaker/blob/master/2-yolov5-sm-container/container/Dockerfile
!cat ./container/Dockerfile

# ARG BASE_IMG=763104351884.dkr.ecr.us-east-1.amazonaws.com/pytorch-training:1.5.1-gpu-py36-cu101-ubuntu16.04

ARG BASE_IMG=763104351884.dkr.ecr.ap-northeast-1.amazonaws.com/pytorch-training:1.5.1-gpu-py36-cu101-ubuntu16.04

ARG BASE_IMG=${BASE_IMG}
FROM ${BASE_IMG} 

ENV PATH="/opt/code:${PATH}"

RUN apt-get update \
 && apt-get install -y --no-install-recommends --allow-unauthenticated \
    jq

## fix /usr/local/cuda-10.0/compat/libcuda.so
## RUN bash -c 'echo "/usr/local/cuda-10.0/compat" > /etc/ld.so.conf.d/cuda.conf'
RUN ldconfig -v
RUN pip install tensorboard torch torchvision --upgrade

RUN cd opt && git clone https://github.com/ultralytics/yolov5
RUN pip install -r /opt/yolov5/requirements.txt

ENV PATH="/opt/yolov5:${PATH}"
WORKDIR /opt/code
## https://github.com/aws/sagemaker-pytorch-training-toolkit/issues/143#issuecomment-566776288
## https://github.com/aws/sagemaker-pytorch-training-toolkit/blob/upgrade-training-toolkit/docker/build_artifacts/start_with_right_hostname.sh


In [5]:
!docker images
#!docker rmi bc6ef2a0faf0 3cda59a5581c 

REPOSITORY                                                           TAG                                IMAGE ID            CREATED             SIZE
763104351884.dkr.ecr.ap-northeast-1.amazonaws.com/pytorch-training   1.6.0-gpu-py36-cu101-ubuntu16.04   0d7223a296dd        2 months ago        8.16GB


In [6]:
!chmod +x ./build-and-push.sh
! ./build-and-push.sh yolov5

https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
base_img:763104351884.dkr.ecr.ap-northeast-1.amazonaws.com/pytorch-training:1.6.0-gpu-py36-cu101-ubuntu16.04
Sending build context to Docker daemon  14.34kB
Step 1/15 : ARG BASE_IMG=763104351884.dkr.ecr.ap-northeast-1.amazonaws.com/pytorch-training:1.5.1-gpu-py36-cu101-ubuntu16.04
Step 2/15 : ARG BASE_IMG=${BASE_IMG}
Step 3/15 : FROM ${BASE_IMG}
 ---> 0d7223a296dd
Step 4/15 : ENV PATH="/opt/code:${PATH}"
 ---> Running in 5de52c109bd3
Removing intermediate container 5de52c109bd3
 ---> 56b414f32a0d
Step 5/15 : RUN apt-get update  && apt-get install -y --no-install-recommends --allow-unauthenticated     jq
 ---> Running in e47abc762af1
Ign:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64  InRelease
Ign:2 https://developer.download.nvidia.com/compute/machine-learning/repos/ub

In [7]:
import boto3

# boto3の機能を使ってリポジトリ名に必要な情報を取得する
account_id = boto3.client('sts').get_caller_identity().get('Account')
region = boto3.session.Session().region_name
tag = ':latest'

ecr_repository = f'yolov5'
image_uri = f'{account_id}.dkr.ecr.{region}.amazonaws.com/{ecr_repository+tag}'

print(f'コンテナは {image_uri} へ登録されています。')

コンテナは 815969174475.dkr.ecr.ap-northeast-1.amazonaws.com/yolov5:latest へ登録されています。


In [11]:
# 推論のために S3 からコピーしてくる内容
## predict-args.json の項目とコンテナ内のディレクトリは一致している必要があります
predict_args_s3_path = 's3://20210118-yolov5/args/predict-args.json'
predict_args_dir = '/opt/ml/processing/data/cfg/'

model_s3_path = 's3://20210118-yolov5/weights/yolov5s.pt'
processing_weigth_dir = '/opt/ml/processing/data/weights'

inference_s3_path = 's3://20210118-yolov5/test/'
processing_image_dir = '/opt/ml/processing/data/images/inference'

# 推論後へ コンテナの中から S3 へコピーする内容
processing_output_dir = '/opt/ml/processing/data/images/inference/detect'
output_s3_path = 's3://20210118-yolov5/output/'

job_name = f'sagemaker-preprocess-handson-{name}'

In [12]:
from sagemaker.processing import Processor, ScriptProcessor, ProcessingInput, ProcessingOutput

processor = Processor(base_job_name=job_name,
                            image_uri=image_uri,
                            # Dockerfile 内に ENTRYPOINT として書いてもOK
                            entrypoint=["bash","/opt/code/predict"],
                            role=role,
                            instance_count=1,
                            instance_type='ml.c5.xlarge'
                                  )

In [None]:
processor.run(
    inputs=[
        ProcessingInput(source=predict_args_s3_path,destination=predict_args_dir),
        ProcessingInput(source=model_s3_path,destination=processing_weigth_dir),
        ProcessingInput(source=inference_s3_path,destination=processing_image_dir)
        ],
    outputs=[ProcessingOutput(output_name='output',
                              source=processing_output_dir,
                              destination=output_s3_path)
        ]
)


Job Name:  sagemaker-preprocess-handson-taketosk-2021-01-20-04-51-20-060
Inputs:  [{'InputName': 'input-1', 'S3Input': {'S3Uri': 's3://20210118-yolov5/args/predict-args.json', 'LocalPath': '/opt/ml/processing/data/cfg/', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'input-2', 'S3Input': {'S3Uri': 's3://20210118-yolov5/weights/yolov5s.pt', 'LocalPath': '/opt/ml/processing/data/weights', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'input-3', 'S3Input': {'S3Uri': 's3://20210118-yolov5/test/', 'LocalPath': '/opt/ml/processing/data/images/inference', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]
Outputs:  [{'OutputName': 'output', 'S3Output': {'S3Uri': 's3://20210118-yolov5/output/', 'LocalPath': '/opt/ml/processing/data/image