In [32]:
%store -z
%store

Stored variables and their in-db values:


In [33]:
import os
import boto3
import time
from glob import glob
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import hvplot.pandas
from PIL import Image

import sagemaker
from sagemaker.workflow.pipeline_context import PipelineSession

sagemaker_session = sagemaker.session.Session()
role = sagemaker.get_execution_role()
default_bucket = sagemaker_session.default_bucket()
model_package_group_name = f"CatBreedModelPackage"

os.environ["default_bucket"] = default_bucket
source_s3_bucket = "kefico-source/aws_study/dl-service"

In [34]:
input_data_uri = f"s3://{default_bucket}/cat-breed/"
print(input_data_uri)
os.makedirs("cat-breed", exist_ok=True)

s3://sagemaker-ap-northeast-2-242201274000/cat-breed/


In [35]:
import sagemaker
import sagemaker.workflow as workflow

from sagemaker.workflow.parameters import (
    ParameterString,
    ParameterInteger
)
from sagemaker.workflow.steps import CacheConfig

input_data = ParameterString(
    name='InputData',
    default_value=input_data_uri
)

processing_instance_count = ParameterInteger(
    name="ProcessingInstanceCount",
    default_value=1
)

processing_instance_type = ParameterString(
    name="ProcessingInstanceType",
    default_value='ml.m5.large'
)

training_instance_type = ParameterString(
    name="TrainingInstanceType",
    default_value="ml.m5.large"
)

training_instance_count = ParameterInteger(
    name="TrainingInstanceCount",
    default_value=1
)

evaluation_instance_type = ParameterString(
    name="EvaluationInstanceType",
    default_value="ml.m5.large"
)

evaluation_instance_count = ParameterInteger(
    name="EvaluationInstanceCount",
    default_value=1
)

inference_instance_type = ParameterString(
    name="InferenceInstanceType",
    default_value="ml.m5.large"
)

inference_instance_count = ParameterInteger(
    name="InferenceInstanceCount",
    default_value=1
)

# model_approval_status = ParameterString(
#     name="ModelApprovalStatus",
#     default_value="Approved"
# )

In [36]:
preprocessing_code = "cat-breed/preprocessing.py"
from sagemaker.sklearn.processing import SKLearnProcessor
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep

framework_version = "0.23-1"

sklearn_processor = SKLearnProcessor(
    framework_version=framework_version,
    instance_type=processing_instance_type,
    instance_count=processing_instance_count,
    base_job_name="sklearn-cat-breed-process",
    role=role
)
print('role:', role)

step_process = ProcessingStep(
    name="CatBreedProcess",
    processor=sklearn_processor,
    inputs = [ProcessingInput(source=input_data, destination="/opt/ml/processing/input")],
    outputs = [ProcessingOutput(output_name="train", source="/opt/ml/processing/train"),
              ProcessingOutput(output_name="test", source="/opt/ml/processing/test")],
    code=preprocessing_code
)

INFO:sagemaker.image_uris:Defaulting to only available Python version: py3


role: arn:aws:iam::242201274000:role/service-role/AmazonSageMaker-ExecutionRole-20240923T230631


In [37]:
from sagemaker import image_uris, model_uris, script_uris

model_id, model_version = "pytorch-ic-resnet18", "2.0.0"

# JumpStart 용 기본 모델의 S3 Uri 검색
base_model_uri = model_uris.retrieve(
    model_id=model_id,
    model_version=model_version,
    model_scope="training"
)

# 학습 스크립트 검색
training_script_uri = script_uris.retrieve(
    model_id=model_id,
    model_version=model_version,
    script_scope="training"
)

# 학습 Docker Image 검색
model_image_uri = image_uris.retrieve(
    region=None,
    framework=None,
    image_scope="training",
    model_id=model_id,
    model_version=model_version,
    instance_type=training_instance_type
)

INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.


In [38]:
model_path = f"s3://{default_bucket}/CatBreedTrain"

from sagemaker.estimator import Estimator
from sagemaker import hyperparameters

# JumpStart 용 기본 변수 가져오기
default_hyperparameters = hyperparameters.retrieve_default(
    model_id=model_id,
    model_version=model_version
)

# hyperparameter는 바꿔줄 수 있음
default_hyperparameters['epochs'] = 5
default_hyperparameters['batch-size'] = 8
default_hyperparameters['adam-learning-rate'] = "0.001"
default_hyperparameters['reinitialize-top-layer'] = "auto"
default_hyperparameters['train-only-yop-layer'] = "True"

print(default_hyperparameters)

{'epochs': 5, 'adam-learning-rate': '0.001', 'batch-size': 8, 'reinitialize-top-layer': 'auto', 'train-only-yop-layer': 'True'}


In [39]:
# Estimator 생성
estimator = Estimator(
    image_uri=model_image_uri,
    source_dir=training_script_uri,
    model_uri=base_model_uri,
    entry_point="transfer_learning.py",
    role=sagemaker.session.Session().get_caller_identity_arn(),
    hyperparameters=default_hyperparameters,
    instance_count=training_instance_count,
    instance_type=training_instance_type,
    enable_network_isolation=True,
    output_path=model_path,
    disable_profiler=True,
    metric_definitions=[{'Name': 'train:Loss', 'Regex': 'train Loss: (\S+)'},
                       {'Name': 'train:Acc', 'Regex': 'train Loss: \S+ Acc: (\S+)'},
                       {'Name': 'test:Loss', 'Regex': 'test Loss: (\S+)'},
                       {'Name': 'test:Acc', 'Regex': 'test Loss: \S+ Acc: (\S+)'},]
)

In [40]:
from sagemaker.inputs import TrainingInput
from sagemaker.workflow.steps import TrainingStep

step_train = TrainingStep(
    name="CatBreedTrain",
    estimator=estimator,
    inputs = {
        'training': TrainingInput(
            s3_data=step_process.properties.ProcessingOutputConfig.Outputs['train'].S3Output.S3Uri,
            content_type='application/x-image'
        )
    }
)

In [41]:
from sagemaker.processing import ScriptProcessor
from sagemaker.workflow.properties import PropertyFile

script_eval = ScriptProcessor(
    image_uri=model_image_uri,
    command=["python3"],
    instance_type=evaluation_instance_type,
    instance_count=evaluation_instance_count,
    base_job_name="script-cat-breed-eval",
    role=role
)

evaluation_report = PropertyFile(
    name="EvaluationReport",
    output_name="evaluation",
    path = "evaluation.json"
)

In [42]:
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep

step_eval = ProcessingStep(
    name="CatBreedEval",
    processor=script_eval,
    inputs=[ProcessingInput(source=step_train.properties.ModelArtifacts.S3ModelArtifacts,
                            destination='/opt/ml/processing/model'),
           ProcessingInput(source=step_process.properties.ProcessingOutputConfig.Outputs["test"].S3Output.S3Uri,
                           destination="/opt/ml/processing/test")],
    outputs=[ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation")],
    code="cat-breed/evaluation.py",
    property_files=[evaluation_report]
)

In [43]:
import logging
from sagemaker.model import Model
from sagemaker.pytorch import PyTorchModel
from sagemaker import image_uris, script_uris

inference_image_url = image_uris.retrieve(
    framework='pytorch',
    region='ap-northeast-2',
    version='1.10.2',
    py_version='py38',
    image_scope='inference',
    instance_type=inference_instance_type
)

print('inference_image_url: ', inference_image_url)

# 추론용 스크립트 uri 검색
deploy_source_uri = script_uris.retrieve(
    model_id=model_id,
    model_version=model_version, 
    script_scope="inference"
)

print(f"\ndeploy_source_uri: {deploy_source_uri}")



inference_image_url:  763104351884.dkr.ecr.ap-northeast-2.amazonaws.com/pytorch-inference:1.10.2-cpu-py38

deploy_source_uri: s3://jumpstart-cache-prod-ap-northeast-2/source-directory-tarballs/pytorch/inference/ic/v2.0.0/sourcedir.tar.gz


In [44]:
from sagemaker.workflow.pipeline_context import PipelineSession

pipeline_session = PipelineSession()

model_name = "CatBreedClassification"
model = Model(
    name=model_name,
    image_uri=inference_image_url,
    model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
    # source_dir='inference_script1',
    # entry_point="inference.py",
    sagemaker_session=pipeline_session,
    role=role
)

In [45]:
from sagemaker.workflow.model_step import ModelStep

register_model_steps_args = model.register(
    content_types=["application/x-image"],
    response_types=["application/json"],
    inference_instances=["ml.m5.large"],
    transform_instances=["ml.m5.large"],
    model_package_group_name=model_package_group_name,
    approval_status="Approved"
)

step_register = ModelStep(
    name="CatBreedRegisterModel",
    step_args=register_model_steps_args
)



In [46]:
from iam_helper import create_lambda_role
import time
from sagemaker.workflow.lambda_step import LambdaStep, LambdaOutput, LambdaOutputTypeEnum
from sagemaker.lambda_helper import Lambda

lambda_role = create_lambda_role("lambda-deployment-role")
print(lambda_role)

# 현재 시간을 사용해 생성된 리소스에 고유한 이름 부여
current_time = time.strftime("%m-%d-%H-%M-%S", time.localtime())
deploy_model_name_prefix = model_name
endpoint_config_name_prefix = "cat-breed-ep-config"
endpoint_name = "cat-breed-ep-sync"
function_name = "sagemaker-cat-breed-lambda-step" + current_time
s3_prefix = "CatBreedPred"

# Lambda Helper 클래스를 사용해 Lambda 함수 생성
func = Lambda(
    function_name=function_name,
    execution_role_arn=lambda_role,
    script="cat-breed/lambda_deployer.py",
    handler="lambda_deployer.lambda_handler",
    timeout=600,
    memory_size=3008
)

lambda_create_res = func.create()

func_from_exist_lambda = Lambda(
    function_arn=lambda_create_res["FunctionArn"]
)

Using ARN from existing role: lambda-deployment-role
arn:aws:iam::242201274000:role/lambda-deployment-role


In [47]:
# Lambda Step 생성

# Lambda 함수의 Output: LambdaOutput에 의해 캡쳐되고 있음.
output_param_1 = LambdaOutput(output_name="statusCode",
                              output_type=LambdaOutputTypeEnum.String)

output_param_2 = LambdaOutput(output_name="body",
                              output_type=LambdaOutputTypeEnum.String)

# Lambda 함수에 제공된 입력 (event): Event 객체로 검색해라.
step_deploy_lambda = LambdaStep(
    name="CatBreedDeploy",
    lambda_func=func_from_exist_lambda,
    inputs = {
        "model_name": deploy_model_name_prefix,
        "endpoint_config_name": endpoint_config_name_prefix,
        "endpoint_name": endpoint_name,
        "model_package_arn": step_register.steps[-1].properties.ModelPackageArn,
        "role": role
    },
    outputs = [output_param_1, output_param_2]
)

In [48]:
from sagemaker.workflow.conditions import ConditionGreaterThanOrEqualTo
from sagemaker.workflow.condition_step import ConditionStep
from sagemaker.workflow.functions import JsonGet

cond_gte = ConditionGreaterThanOrEqualTo(
    left=JsonGet(
        step_name=step_eval.name,
        property_file=evaluation_report,
        json_path="classification_metrics.accuracy.value"
    ),
    right= 0.5
) # Left에 들어오는 것이 right에 설정한 값보다 크거나 같으면 참. 그렇지 않으면 거짓

In [49]:
step_cond = ConditionStep(
    name="CatBreedAccuracyCond",
    conditions=[cond_gte],
    if_steps=[step_register, step_deploy_lambda], # cond_gte가 참일 때 실행되는 step
    else_steps=[], # cond_gte가 거짓일 때 실행되는 step
)

In [50]:
from sagemaker.workflow.pipeline import Pipeline

pipeline_name = f"Cat-Breed-Demo"
pipeline = Pipeline(
    name=pipeline_name,
    parameters=[
        input_data,
        processing_instance_count,
        processing_instance_type,
        training_instance_count,
        training_instance_type,
        evaluation_instance_count,
        evaluation_instance_type,
        inference_instance_count,
        inference_instance_type,
    ],
    steps=[step_process, step_train, step_eval, step_cond]
)

In [51]:
import json

json.loads(pipeline.definition())



{'Version': '2020-12-01',
 'Metadata': {},
 'Parameters': [{'Name': 'InputData',
   'Type': 'String',
   'DefaultValue': 's3://sagemaker-ap-northeast-2-242201274000/cat-breed/'},
  {'Name': 'ProcessingInstanceCount', 'Type': 'Integer', 'DefaultValue': 1},
  {'Name': 'ProcessingInstanceType',
   'Type': 'String',
   'DefaultValue': 'ml.m5.large'},
  {'Name': 'TrainingInstanceCount', 'Type': 'Integer', 'DefaultValue': 1},
  {'Name': 'TrainingInstanceType',
   'Type': 'String',
   'DefaultValue': 'ml.m5.large'},
  {'Name': 'EvaluationInstanceCount', 'Type': 'Integer', 'DefaultValue': 1},
  {'Name': 'EvaluationInstanceType',
   'Type': 'String',
   'DefaultValue': 'ml.m5.large'},
  {'Name': 'InferenceInstanceCount', 'Type': 'Integer', 'DefaultValue': 1},
  {'Name': 'InferenceInstanceType',
   'Type': 'String',
   'DefaultValue': 'ml.m5.large'}],
 'PipelineExperimentConfig': {'ExperimentName': {'Get': 'Execution.PipelineName'},
  'TrialName': {'Get': 'Execution.PipelineExecutionId'}},
 'Ste

In [52]:
pipeline.upsert(role_arn=role)
execution = pipeline.start()
execution.describe()
print("\n====Now Waiting for Pipeline Execution...====\n")
execution.wait()
execution.list_steps()




====Now Waiting for Pipeline Execution...====



[{'StepName': 'CatBreedDeploy',
  'StartTime': datetime.datetime(2024, 11, 18, 7, 9, 10, 526000, tzinfo=tzlocal()),
  'EndTime': datetime.datetime(2024, 11, 18, 7, 9, 13, 18000, tzinfo=tzlocal()),
  'StepStatus': 'Succeeded',
  'Metadata': {'Lambda': {'Arn': 'arn:aws:lambda:ap-northeast-2:242201274000:function:sagemaker-cat-breed-lambda-step11-18-06-57-47',
    'OutputParameters': [{'Name': 'body', 'Value': '"Created Endpoint!"'},
     {'Name': 'statusCode', 'Value': '200.0'}]}},
  'AttemptCount': 1},
 {'StepName': 'CatBreedRegisterModel-RegisterModel',
  'StartTime': datetime.datetime(2024, 11, 18, 7, 9, 8, 274000, tzinfo=tzlocal()),
  'EndTime': datetime.datetime(2024, 11, 18, 7, 9, 10, tzinfo=tzlocal()),
  'StepStatus': 'Succeeded',
  'Metadata': {'RegisterModel': {'Arn': 'arn:aws:sagemaker:ap-northeast-2:242201274000:model-package/CatBreedModelPackage/7'}},
  'AttemptCount': 1},
 {'StepName': 'CatBreedAccuracyCond',
  'StartTime': datetime.datetime(2024, 11, 18, 7, 9, 7, 775000, tz

In [None]:
# API Test
filename = "data/Bombay/16213718_8832.jpg"
with open(filename, "rb") as f:
    img_bytes = f.read()

endpoint_name = "cat-breed-ep-sync"

import json
import boto3
from IPython.core.display import HTML

def query_endpoint(img, endpoint_name):
    client = boto3.client("runtime.sagemaker")
    response = client.invoke_endpoint(EndpointName=endpoint_name,
                                     ContentType='application/x-image',
                                     Body=img,
                                     Accept='application/json;verbose')
    return response

def parse_prediction(query_response):
    model_predictions = json.loads(query_response['Body'].read())
    predicted_label = model_predictions['predicted_label']
    labels = model_predictions['label']
    probabilities = model_predictions['probabilities']
    return predicted_label, probabilities, labels

query_response = query_endpoint(img_bytes, endpoint_name)
predicted_label, prob, labels = parse_prediction(query_response)
print(predicted_label)
print(prob)
print(labels)
# display(HTML(f'<img src={filename} alt={filename} align="left" style="width: 250px;"/>' f""))