In [2]:
# 初期設定
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

import os, boto3, json, sagemaker
import numpy as np
from sagemaker.pytorch import PyTorchModel
from io import BytesIO

def make_dir(path):
    if os.path.isdir(path):
        pass
    else:
        os.mkdir(path)

sagemaker_session = sagemaker.Session()

# AWS設定
region = boto3.Session().region_name
role = 'han_s3_full_access'
bucket='sagemaker-han-batch'
# role = 'FullAccessHan'
# bucket='sagemaker-han'
prefix = 'batch-images'
bucket_path = 'https://s3-{}.amazonaws.com/{}'.format(region,bucket)
model_prefix = os.path.join(prefix, 'model')
input_prefix = os.path.join(prefix, 'inputs')
output_prefix = os.path.join(prefix, 'outputs')
inference_prefix = os.path.join(prefix, 'f_inference')

# Local設定
base_dir = os.getcwd()
data_dir = os.path.join(base_dir, 'data')
model_dir = os.path.join(base_dir, 'model')
input_dir = os.path.join(base_dir, 'inputs')
output_dir = os.path.join(base_dir, 'outputs')
inference_dir = os.path.join(base_dir, 'inference')
source_dir = os.path.join(base_dir, 'src')
for dir_name in [model_dir, input_dir, output_dir, source_dir, inference_dir]:
    make_dir(dir_name)

In [116]:
'''
Inference用のデータを作る(jsonlines)
    - Sampling
    - Convert Image into bytes
    - Save as jsonlines
'''
from src.utils import image_to_bytes

r_inference_path = os.path.join(base_dir, 'real_inference')
n = 1000
json_name = r_inference_path+f'/inf_data{n}.jsonl'

image_to_bytes(json_name, data_dir, inference_dir, n)

f_inference = sagemaker_session.upload_data(path=r_inference_path, bucket=bucket, key_prefix='batch-images/r_inference')

# Batch Transformer

In [25]:
model_path = sagemaker_session.upload_data(path=model_dir, bucket=bucket, key_prefix=model_prefix)

In [26]:
# s3://sagemaker-han-batch/batch-images/model/model.tar.gz
model_path

's3://sagemaker-han-batch/batch-images/model'

In [27]:

from sagemaker.pytorch.model import PyTorchModel

# home
# model_path = 's3://sagemaker-us-west-2-608095525235/pytorch-training-2022-08-22-14-02-01-637/model.tar.gz'
# fusic
model_path = 's3://sagemaker-han-batch/batch-images/model/model.tar.gz'

pytorch_model = PyTorchModel(model_data = model_path,
                             entry_point='inference.py',
                             source_dir = 'custom_model/code',
                             framework_version='1.12.0',
                             py_version='py38',
                             role = role)

In [23]:
# predictor = pytorch_model.deploy(instance_type='ml.m4.xlarge', initial_instance_count=1)

-------!

In [28]:
max_concurrent_transforms = 2
max_payload = 1
# strategy = 'SingleRecord'
# split_type = None
strategy = 'MultiRecord'
split_type = 'Line'

output_s3_path = 's3://{}/{}/bt_test_{}_{}_{}_{}'.format(bucket, output_prefix, max_concurrent_transforms, max_payload, strategy, split_type)

transformer = pytorch_model.transformer(instance_count=1,
                              instance_type="ml.m5.xlarge",
                              max_concurrent_transforms=max_concurrent_transforms,
                              max_payload=max_payload,  
                              strategy=strategy,
                              output_path=output_s3_path,
                              accept="application/jsonlines",
                              assemble_with="Line"
                              )


In [29]:

inference_path = f's3://{bucket}/batch-images/r_inference'
transformer.transform(
    data=inference_path,
    data_type="S3Prefix",
    content_type="application/jsonlines",
    join_source="Input",
    wait=True,
    split_type=split_type,
)

............................[34mCollecting omegaconf
  Downloading omegaconf-2.2.3-py3-none-any.whl (79 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 79.3/79.3 kB 4.3 MB/s eta 0:00:00[0m
[34mCollecting antlr4-python3-runtime==4.9.*
  Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 117.0/117.0 kB 13.8 MB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'[0m
[34mBuilding wheels for collected packages: antlr4-python3-runtime
  Building wheel for antlr4-python3-runtime (setup.py): started
  Building wheel for antlr4-python3-runtime (setup.py): finished with status 'done'
  Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.9.3-py3-none-any.whl size=144554 sha256=b1e84db33ec9640b24a1f3e98208ca06dfb793951c17e3137b747174e263c63d
  Stored in directory: /root/.cache/pip/wheels/b1/a3/c2/6df046c09459b73cc9bb6c4401b0be6c47048baf9a1617c485[0m
[34m

UnexpectedStatusException: Error for Transform job pytorch-inference-2022-09-01-13-12-45-284: Failed. Reason: AlgorithmError: See job logs for more information

In [None]:
# transformer.stop_transform_job()

In [113]:
# import pprint as pp

# job_name = 'pytorch-inference-2022-08-21-03-13-37-242'
# sm_cli = sagemaker_session.sagemaker_client
# job_info = sm_cli.describe_transform_job(TransformJobName=job_name)
# pp.pprint(job_info)

{'CreationTime': datetime.datetime(2022, 8, 21, 12, 13, 37, 860000, tzinfo=tzlocal()),
 'DataProcessing': {'InputFilter': '$',
                    'JoinSource': 'None',
                    'OutputFilter': '$'},
 'FailureReason': 'AlgorithmError: See job logs for more information',
 'ModelName': 'pytorch-inference-2022-08-21-03-13-10-667',
 'ResponseMetadata': {'HTTPHeaders': {'content-length': '949',
                                      'content-type': 'application/x-amz-json-1.1',
                                      'date': 'Sun, 21 Aug 2022 05:10:13 GMT',
                                      'x-amzn-requestid': 'cd218913-c7fe-4e1f-9b31-2b2fa07372d4'},
                      'HTTPStatusCode': 200,
                      'RequestId': 'cd218913-c7fe-4e1f-9b31-2b2fa07372d4',
                      'RetryAttempts': 0},
 'TransformEndTime': datetime.datetime(2022, 8, 21, 12, 18, 21, 796000, tzinfo=tzlocal()),
 'TransformInput': {'CompressionType': 'None',
                    'ContentType'

In [77]:
from src.resnet_batch_transform import resnet20

tmp_model = resnet20()
tmp_model.eval()
tmp_input = torch.rand(2, 3, 32, 32)

preds = []
for _ in range(3):
    tmp_output = tmp_model(tmp_input)
    pred = torch.argmax(tmp_output, dim=1)
    print(f'PRED SHAPE: {pred.shape}')
    print(pred)
    preds += pred

preds = np.array(preds).tolist()

p_return = {"predictions": preds}
json.dumps(p_return)
    

PRED SHAPE: torch.Size([2])
tensor([1, 1])
PRED SHAPE: torch.Size([2])
tensor([1, 1])
PRED SHAPE: torch.Size([2])
tensor([1, 1])


'{"predictions": [1, 1, 1, 1, 1, 1]}'

In [86]:
import boto3
sm = boto3.client("sagemaker")
sm

<botocore.client.SageMaker at 0x290eb4a60>

In [88]:
list(filter(lambda x: "transform" in x, dir(sm)))

['create_transform_job',
 'describe_transform_job',
 'list_transform_jobs',
 'stop_transform_job']

In [92]:
list(filter(lambda x: x['TransformJobStatus'] == "InProgress", sm.list_transform_jobs()['TransformJobSummaries']))

[]