In [132]:
import sagemaker
import boto3
import os
import pandas as pd
from IPython.display import display as dp

project_prefix = "Kefico-Anomaly-Detection"

DEFAULT_BUCKET = True
if DEFAULT_BUCKET:
    sagemaker_session = sagemaker.session.Session()
    bucket = sagemaker_session.default_bucket()
else:
    bucket = "<사용자 버켓 이름>"
    
print("now using bucket: ", bucket)
%store bucket

now using bucket:  sagemaker-ap-northeast-2-242201274000
Stored 'bucket' (str)


In [133]:
region = boto3.Session().region_name
sagemaker_session = sagemaker.session.Session()
role = sagemaker.get_execution_role()
print('SG Role: ', role)

SG Role:  arn:aws:iam::242201274000:role/service-role/AmazonSageMaker-ExecutionRole-20240923T230631


In [134]:
controller_name = "V2LC"
main_test_name = "HEV_P2_ACOverLoad_IG1_1"
sub_test_name = "Severe1_Above2.83s_1"

In [135]:
# Pipeline Parameter 정의
from sagemaker.workflow.parameters import ParameterInteger, ParameterString

input_data_uri = f's3://kefico-source/main_parquet_dir/{controller_name}/{main_test_name}/{sub_test_name}/'

input_data = ParameterString(
    name="InputData",
    default_value=input_data_uri
)

processing_instance_count = ParameterInteger(
    name='ProcessingInstanceCount',
    default_value=1
)

processing_instance_type = ParameterString(
    name="ProcessingInstanceType",
    default_value="ml.m4.10xlarge"
)

training_instance_type = ParameterString(
    name='TrainingInstanceType',
    default_value='ml.m4.4xlarge'
)

training_instance_count = ParameterInteger(
    name='TrainingInstanceCount',
    default_value=1
)

In [136]:
from sagemaker.sklearn.processing import SKLearnProcessor

framework_version = '1.0-1'

sklearn_processor = SKLearnProcessor(
    framework_version=framework_version,
    instance_type=processing_instance_type,
    instance_count=processing_instance_count,
    base_job_name='sklearn-AD-Demo-process',
    role=role
)

INFO:sagemaker.image_uris:Defaulting to only available Python version: py3


In [137]:
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep

step_process = ProcessingStep(
    name='Kefico-AD-Preprocess',
    processor=sklearn_processor,
    inputs = [ProcessingInput(source=input_data_uri, destination='/opt/ml/processing/input/raw')],
    outputs = [ProcessingOutput(output_name='train', source='/opt/ml/processing/output')],
    code = 'src/ad_parquet_preprocessing.py'
)

In [138]:
max_run = 1*60*60

use_spot_instances = False
if use_spot_instances:
    max_wait = 1*60*60
else:
    max_wait = None

instance_type ='sagemaker'
if instance_type in ['local', 'local_gpu']:
    from sagemaker.local import LocalSession
    sagemaker_session = LocalSession()
    sagemaker_session.config = {'local': {'local_code': True}}
else:
    sagemaker_session = sagemaker.session.Session()

In [139]:
from sagemaker.pytorch import PyTorch

estimator_output_path = f"s3://{bucket}/{project_prefix}/training_jobs"

estimator = PyTorch(
    # framework_version='2.2.0',
    # py_version='py310',
    image_uri="763104351884.dkr.ecr.ap-northeast-2.amazonaws.com/pytorch-training:2.4.0-cpu-py311-ubuntu22.04-sagemaker",
    entry_point='ad_train_and_evaluate.py',
    source_dir='src',
    output_path=estimator_output_path,
    code_location=estimator_output_path,
    role=role,
    sagemaker_session=sagemaker_session,
    instance_count=training_instance_count,
    instance_type=training_instance_type,
    hyperparameters={
        'num_epochs': 2,
        'batch_size': 1024
    },
    max_run=max_run,
    use_spot_instances=use_spot_instances,
    max_wait=max_wait
)

In [140]:
from sagemaker.inputs import TrainingInput
from sagemaker.workflow.steps import TrainingStep

step_train = TrainingStep(
    name="Kefico-AD-Train-And-Evaluate",
    estimator=estimator,
    inputs={
        "train": TrainingInput(
            s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
                "train"
            ].S3Output.S3Uri,
        ),
    },
)

In [141]:
from sagemaker.workflow.pipeline import Pipeline

pipeline_name = project_prefix
pipeline = Pipeline(
    name=pipeline_name,
    parameters=[
        processing_instance_type,
        processing_instance_count,
        input_data,
        training_instance_count,
        training_instance_type
    ],
    steps=[step_process, step_train]
)

In [142]:
import json

definition = json.loads(pipeline.definition())
definition



{'Version': '2020-12-01',
 'Metadata': {},
 'Parameters': [{'Name': 'ProcessingInstanceType',
   'Type': 'String',
   'DefaultValue': 'ml.m4.10xlarge'},
  {'Name': 'ProcessingInstanceCount', 'Type': 'Integer', 'DefaultValue': 4},
  {'Name': 'InputData',
   'Type': 'String',
   'DefaultValue': 's3://kefico-source/main_parquet_dir/V2LC/HEV_P2_ACOverLoad_IG1_1/Severe1_Above2.83s_1/'},
  {'Name': 'TrainingInstanceCount', 'Type': 'Integer', 'DefaultValue': 4},
  {'Name': 'TrainingInstanceType',
   'Type': 'String',
   'DefaultValue': 'ml.m4.4xlarge'}],
 'PipelineExperimentConfig': {'ExperimentName': {'Get': 'Execution.PipelineName'},
  'TrialName': {'Get': 'Execution.PipelineExecutionId'}},
 'Steps': [{'Name': 'Kefico-AD-Preprocess',
   'Type': 'Processing',
   'Arguments': {'ProcessingResources': {'ClusterConfig': {'InstanceType': {'Get': 'Parameters.ProcessingInstanceType'},
      'InstanceCount': {'Get': 'Parameters.ProcessingInstanceCount'},
      'VolumeSizeInGB': 30}},
    'AppSpecifi

In [143]:
# Pipeline 등록, 실행
pipeline.upsert(role_arn=role)
# execution = pipeline.start()



{'PipelineArn': 'arn:aws:sagemaker:ap-northeast-2:242201274000:pipeline/Kefico-Anomaly-Detection',
 'ResponseMetadata': {'RequestId': '08d025f2-5529-4dcc-a36c-e42b5e326119',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '08d025f2-5529-4dcc-a36c-e42b5e326119',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '97',
   'date': 'Fri, 15 Nov 2024 03:29:59 GMT'},
  'RetryAttempts': 0}}

In [144]:
execution.describe()

{'PipelineArn': 'arn:aws:sagemaker:ap-northeast-2:242201274000:pipeline/Kefico-Anomaly-Detection',
 'PipelineExecutionArn': 'arn:aws:sagemaker:ap-northeast-2:242201274000:pipeline/Kefico-Anomaly-Detection/execution/nak4boj3uqbo',
 'PipelineExecutionDisplayName': 'execution-1731639376305',
 'PipelineExecutionStatus': 'Succeeded',
 'PipelineExperimentConfig': {'ExperimentName': 'kefico-anomaly-detection',
  'TrialName': 'nak4boj3uqbo'},
 'CreationTime': datetime.datetime(2024, 11, 15, 2, 56, 16, 250000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2024, 11, 15, 3, 12, 17, 150000, tzinfo=tzlocal()),
 'CreatedBy': {'UserProfileArn': 'arn:aws:sagemaker:ap-northeast-2:242201274000:user-profile/d-l0dltcg6j4kj/default-20240923T230629',
  'UserProfileName': 'default-20240923T230629',
  'DomainId': 'd-l0dltcg6j4kj',
  'IamIdentity': {'Arn': 'arn:aws:sts::242201274000:assumed-role/AmazonSageMaker-ExecutionRole-20240923T230631/SageMaker',
   'PrincipalId': 'AROATQZCSE2IHHIBWPD6G:SageM

In [145]:
execution.wait()

In [146]:
execution.list_steps()

[{'StepName': 'Kefico-AD-Train-And-Evaluate',
  'StartTime': datetime.datetime(2024, 11, 15, 3, 3, 54, 825000, tzinfo=tzlocal()),
  'EndTime': datetime.datetime(2024, 11, 15, 3, 12, 16, 954000, tzinfo=tzlocal()),
  'StepStatus': 'Succeeded',
  'Metadata': {'TrainingJob': {'Arn': 'arn:aws:sagemaker:ap-northeast-2:242201274000:training-job/pipelines-nak4boj3uqbo-Kefico-AD-Train-And--DXVPaOUvlF'}},
  'AttemptCount': 1},
 {'StepName': 'Kefico-AD-Preprocess',
  'StartTime': datetime.datetime(2024, 11, 15, 2, 56, 17, 609000, tzinfo=tzlocal()),
  'EndTime': datetime.datetime(2024, 11, 15, 3, 3, 54, 382000, tzinfo=tzlocal()),
  'StepStatus': 'Succeeded',
  'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:ap-northeast-2:242201274000:processing-job/pipelines-nak4boj3uqbo-Kefico-AD-Preprocess-9Q51yX4ExW'}},
  'AttemptCount': 1}]

In [147]:
import boto3

def get_proc_artifact(execution, client, kind):
    
    '''
    preprocess 후 전처리 결과물의 S3경로를 획득
    kind = 0 --> train
    kind = 1 --> test
    '''
    response = execution.list_steps()

    proc_arn = response[-1]['Metadata']['ProcessingJob']['Arn']
    # print(proc_arn)
    
    proc_job_name = proc_arn.split('/')[-1]
    # print(proc_job_name)

    response = client.describe_processing_job(ProcessingJobName = proc_job_name)
    print(response['ProcessingOutputConfig']['Outputs'])
    test_preprocessed_file = response['ProcessingOutputConfig']['Outputs'][kind]['S3Output']['S3Uri']
    return test_preprocessed_file


client = boto3.client("sagemaker")

train_preproc_dir_artifact = get_proc_artifact(execution, client, kind=0)
# test_preproc_dir_artifact = get_proc_artifact(execution, client, kind=1)

print('output-train: ', train_preproc_dir_artifact)
# print('output-test : ', test_preproc_dir_artifact)

[{'OutputName': 'train', 'S3Output': {'S3Uri': 's3://sagemaker-ap-northeast-2-242201274000/Kefico-Anomaly-Detection/nak4boj3uqbo/Kefico-AD-Preprocess/output/train', 'LocalPath': '/opt/ml/processing/output', 'S3UploadMode': 'EndOfJob'}, 'AppManaged': False}]
output-train:  s3://sagemaker-ap-northeast-2-242201274000/Kefico-Anomaly-Detection/nak4boj3uqbo/Kefico-AD-Preprocess/output/train
