In [1]:
#!pip install import-ipynb -q

In [10]:
import sagemaker
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.pipeline_definition_config import PipelineDefinitionConfig
from sagemaker.model_metrics import MetricsSource, ModelMetrics 
from sagemaker.workflow.functions import Join

In [11]:
import import_ipynb
import b4_pre_processing_step
import c2_training_step
import d2_post_processing_step

In [12]:
pre_processing_step = b4_pre_processing_step.create_pre_processing_step()

training_step = c2_training_step.create_training_step()
training_step.add_depends_on([pre_processing_step])

post_processing_step = d2_post_processing_step.create_post_processing_step(training_step)
post_processing_step.add_depends_on([training_step])

INFO:sagemaker.image_uris:Defaulting to only available Python version: py3
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


In [13]:
s3_evaluation_uri = Join(on='/', values=[post_processing_step.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"],"0_evaluation.json"])
evaluation_source = MetricsSource(s3_uri=s3_evaluation_uri,content_type="application/json")
model_metrics = ModelMetrics(model_statistics=evaluation_source)



In [14]:
from sagemaker.workflow.step_collections import RegisterModel
register_model_step = RegisterModel(
     name="Register_Model",
     estimator= training_step.estimator,
     model_data=training_step.properties.ModelArtifacts.S3ModelArtifacts,
     content_types=["application/json"],
     response_types=["application/json"],
     #inference_instances=['ml.m5.large'],     
     model_package_group_name='poc-model',
     model_metrics=model_metrics,
     depends_on=[training_step, post_processing_step]
)

In [15]:
# create pipeline
pipeline = Pipeline(
    name='poc-pipeline',
    steps=[pre_processing_step, training_step, post_processing_step, register_model_step]
)
pipeline_definition = pipeline.definition()
print(pipeline_definition)



{"Version": "2020-12-01", "Metadata": {}, "Parameters": [], "PipelineExperimentConfig": {"ExperimentName": {"Get": "Execution.PipelineName"}, "TrialName": {"Get": "Execution.PipelineExecutionId"}}, "Steps": [{"Name": "pre-processing", "Type": "Processing", "Arguments": {"ProcessingResources": {"ClusterConfig": {"InstanceType": "ml.m5.large", "InstanceCount": 1, "VolumeSizeInGB": 30}}, "AppSpecification": {"ImageUri": "141502667606.dkr.ecr.eu-west-1.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3", "ContainerArguments": ["--input_path", "/opt/ml/processing/input", "--input_file", "bank-additional-full.csv", "--output_path", "/opt/ml/processing/output"], "ContainerEntrypoint": ["python3", "/opt/ml/processing/input/code/b1_pre_processing.py"]}, "RoleArn": "arn:aws:iam::864814979818:role/service-role/AmazonSageMaker-ExecutionRole-20240820T163044", "ProcessingInputs": [{"InputName": "input-1", "AppManaged": false, "S3Input": {"S3Uri": "s3://ktzouvan-trading-point-sagemaker-poc/datasets/

In [16]:
# upload to sagemaker
pipeline.upsert(role_arn=sagemaker.get_execution_role())



{'PipelineArn': 'arn:aws:sagemaker:eu-west-1:864814979818:pipeline/poc-pipeline',
 'ResponseMetadata': {'RequestId': '0258a566-95a3-46ad-ad27-d8145cc23454',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '0258a566-95a3-46ad-ad27-d8145cc23454',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '80',
   'date': 'Tue, 03 Sep 2024 13:05:58 GMT'},
  'RetryAttempts': 0}}

In [9]:
# pipeline.start()