# start 

In [2]:
import os

import sagemaker
from sagemaker import ScriptProcessor, ModelMetrics, MetricsSource, TrainingInput
from sagemaker.estimator import Estimator
from sagemaker.network import NetworkConfig
from sagemaker.processing import ProcessingInput, ProcessingOutput, ScriptProcessor
from sagemaker.sklearn import SKLearnProcessor, SKLearn
from sagemaker.model import Model
from sagemaker.workflow.condition_step import JsonGet, ConditionStep
from sagemaker.workflow.conditions import ConditionLessThanOrEqualTo
from sagemaker.workflow.conditions import ConditionGreaterThanOrEqualTo
from sagemaker.workflow.parameters import ParameterInteger, ParameterString, ParameterFloat
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.properties import PropertyFile
from sagemaker.workflow.step_collections import RegisterModel
from sagemaker.workflow.steps import ProcessingStep, TrainingStep
from sagemaker.dataset_definition.inputs import (
    AthenaDatasetDefinition,
    DatasetDefinition,
)
from datetime import datetime
import time
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.pipeline_experiment_config import PipelineExperimentConfig
from sagemaker.workflow.execution_variables import ExecutionVariables
from sagemaker.workflow.functions import Join
from sagemaker.transformer import Transformer
from sagemaker.inputs import TransformInput
from sagemaker.workflow.steps import TransformStep


In [3]:
import boto3
import sagemaker


def get_environment(project_name, ssm_params):
    sm = boto3.client("sagemaker")
    ssm = boto3.client("ssm")

    r = sm.describe_domain(
            DomainId=sm.describe_project(
                ProjectName=project_name
                )["CreatedBy"]["DomainId"]
        )
    del r["ResponseMetadata"]
    del r["CreationTime"]
    del r["LastModifiedTime"]
    r = {**r, **r["DefaultUserSettings"]}
    del r["DefaultUserSettings"]

    i = {
        **r,
        **{t["Key"]:t["Value"]
            for t in sm.list_tags(ResourceArn=r["DomainArn"])["Tags"]
            if t["Key"] in ["EnvironmentName", "EnvironmentType"]}
    }

    for p in ssm_params:
        try:
            i[p["VariableName"]] = ssm.get_parameter(Name=f"{i['EnvironmentName']}-{i['EnvironmentType']}-{p['ParameterName']}")["Parameter"]["Value"]
        except:
            i[p["VariableName"]] = ""

    return i


def get_session(region, default_bucket):
    """Gets the sagemaker session based on the region.

    Args:
        region: the aws region to start the session
        default_bucket: the bucket to use for storing the artifacts

    Returns:
        sagemaker.session.Session instance
    """

    boto_session = boto3.Session(region_name=region)

    sagemaker_client = boto_session.client("sagemaker")
    runtime_client = boto_session.client("sagemaker-runtime")
    print(f"Sarah: ml_pipelines > utiles > environments.py > sagemaker.__version__ is {sagemaker.__version__}")
    return sagemaker.session.Session(
        boto_session=boto_session,
        sagemaker_client=sagemaker_client,
        sagemaker_runtime_client=runtime_client,
        default_bucket=default_bucket,
    ), sagemaker_client


def environment_data(project_name):
    # Dynamically load environmental SSM parameters - provide the list of the variables to load from SSM parameter store
    ssm_parameters = [
        {"VariableName": "DataBucketName", "ParameterName": "data-bucket-name"},
        {"VariableName": "ModelBucketName", "ParameterName": "model-bucket-name"},
        {"VariableName": "S3KmsKeyId", "ParameterName": "kms-s3-key-arn"},
        {"VariableName": "EbsKmsKeyArn", "ParameterName": "kms-ebs-key-arn"},
        {"VariableName": "TrustedDefaultKinesisAccount", "ParameterName": "TrustedDefaultKinesisAccount"},
    ]
    env_data = get_environment(project_name=project_name, ssm_params=ssm_parameters)
    env_data["ProcessingRole"] = env_data["ExecutionRole"]
    env_data["TrainingRole"] = env_data["ExecutionRole"]
    
    return env_data


# get_latest_model_metadata

In [4]:
def get_latest_model_metadata(sm_client, model_package_group_name):
    print("SARAH: Inference > get_latest_model_metadata()")
    approved_packages = []
    for p in sm_client.get_paginator('list_model_packages').paginate(
            ModelPackageGroupName=model_package_group_name,
            ModelApprovalStatus='PendingManualApproval',
            SortBy="CreationTime",
            SortOrder="Descending",
    ):
        approved_packages.extend(p["ModelPackageSummaryList"])
    model_metadata = sm_client.describe_model_package(ModelPackageName =approved_packages[0]["ModelPackageArn"])
    print(model_metadata)
    print("SARAH: Inference > get_latest_model_metadata() END")
    return model_metadata


In [5]:
import os
import json



def list_files(startpath):
    for root, dirs, files in os.walk(startpath):
        level = root.replace(startpath, '').count(os.sep)
        indent = ' ' * 4 * (level)
        print('{}{}/'.format(indent, os.path.basename(root)))
        subindent = ' ' * 4 * (level + 1)
        for f in files:
            print('{}{}'.format(subindent, f))


def get_pipeline(
        region,
        project_name=None,
        source_scripts_path="./",
        model_package_group_name="AbalonePackageGroup",
        pipeline_name="AbalonePipeline",
        base_job_prefix="Abalone",
        revision="no-revision-provided",):
    """Gets a SageMaker ML Pipeline instance working with on abalone data.

    Args:
        region: AWS region to create and run the pipeline.
        @todo arg. definitions

    Returns:
        an instance of a pipeline
    """

    # get env data
    env_data = environment_data(project_name)
    print(f"Environment data:\n{json.dumps(env_data, indent=2)}")
    print(f"SARAH: get_pipeline() > source_scripts_path={source_scripts_path}")

    sagemaker_session, sagemaker_client = get_session(region, env_data["DataBucketName"])
    default_bucket = sagemaker_session.default_bucket()
    base_dir = os.getcwd()
    print(f"Creating the pipeline '{pipeline_name}':")
    print(f"Parameters:{region}\n{env_data['SecurityGroups']}\n{env_data['SubnetIds']}\n{env_data['ProcessingRole']}\n\
    {env_data['TrainingRole']}\n{env_data['DataBucketName']}\n{env_data['ModelBucketName']}\n{model_package_group_name}\n\
    {pipeline_name}\n{base_job_prefix}\n{env_data['TrustedDefaultKinesisAccount']}")
    model_metadata = get_latest_model_metadata(sagemaker_client, model_package_group_name)

    print(f"SARAH: get_pipeline() > source_scripts_path={source_scripts_path}, model_metadata={model_metadata}")
    pipeline = standard_model_pipeline(
        base_job_prefix=base_job_prefix,
        default_bucket=default_bucket,
        env_data=env_data,
        model_package_group_name=model_package_group_name,
        pipeline_name=pipeline_name,
        region=region,
        sagemaker_session=sagemaker_session,
        base_dir=base_dir,
        source_scripts_path=source_scripts_path,
        model_metadata=model_metadata,
        project=project_name,
        revision=revision)
    return pipeline




# standard_model_pipeline

In [6]:
def standard_model_pipeline(base_job_prefix, default_bucket, env_data, model_package_group_name, pipeline_name, region,
                            sagemaker_session, base_dir, source_scripts_path, model_metadata, project="standard_model",
                            revision="none", purpose="p1033"):
    
    
    print(f"SARAH: base_job_prefix={base_job_prefix}")
    print(f"Sarah: default_bucket={default_bucket}")
    print(f"SArah: env_data={env_data}")
    print(f"sarah: model_package_group_name={model_package_group_name}")
    print(f"Sraah: pipeline_name={pipeline_name}")
    print(f"Sarah: base_dir={base_dir}")
    print(f"sarah: source_scripts_path={source_scripts_path}")
    print(f"sarah: model_metadata={model_metadata}")
    print(f"sarah: revision={revision}")
    
    # parameters for pipeline execution
    processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1) # used in sample application
    processing_instance_type = ParameterString(name="ProcessingInstanceType", default_value="ml.m5.xlarge") # used in sample application
    training_instance_type = ParameterString(name="TrainingInstanceType", default_value="ml.m5.xlarge") # used in sample application
    training_instance_count = "1"
    inference_instance_type = ParameterString(name="InferenceInstanceType", default_value="ml.m5.large")
    hpo_tuner_instance_type = ParameterString(name="HPOTunerScriptInstanceType", default_value="ml.t3.medium")
    model_approval_status = ParameterString(name="ModelApprovalStatus", default_value="PendingManualApproval") # used in sample application
    role = "arn:aws:iam::370702650160:role/sm-mlops-env-EnvironmentI-SageMakerPipelineExecuti-1AWTL5A5UKOHN"
    # role = "arn:aws:iam::370702650160:role/sm-mlops-env-EnvironmentIAM-SageMakerExecutionRole-14AU65MVMBUGO"
    #role=$SAGEMAKER_PIPELINE_ROLE_ARN

#     default_bucket_data = ParameterString(name="DefaultS3BucketData", default_value="mlops-dev-370702650160-eu-north-1-data")
#     default_bucket_models = ParameterString(name="DefaultS3BucketModels", default_value="mlops-dev-370702650160-eu-north-1-models")
    default_bucket_data = ParameterString(name="DefaultS3BucketData", default_value=env_data['DataBucketName'])
    default_bucket_models = ParameterString(name="DefaultS3BucketModels", default_value=env_data['ModelBucketName'])

    baseline_model_objective_value = ParameterFloat(name='BaselineModelObjectiveValue', default_value=0.6)

#     bucket_prefix_data = ParameterString(name="S3BucketPrefixData", default_value="lifecycle/30d/customerone-dev-branch/") # lifecycle/30d/${SAGEMAKER_PROJECT_NAME}/
#     bucket_prefix_models = ParameterString(name="S3BucketPrefixModels", default_value="lifecycle/max/customerone-dev-branch/") # lifecycle/max/${SAGEMAKER_PROJECT_NAME}/


#     image_uri = "370702650160.dkr.ecr.eu-north-1.amazonaws.com/sagemaker-cross-validation-pipeline:0.1"
    image_uri = "813736554012.dkr.ecr.eu-north-1.amazonaws.com/engineering-custom-images:crossvalidation"
    framework_version = "0.23-1"
    baseline_model_objective_value = 0.6


    data_bucket = ParameterString(name="DataBucket", default_value=env_data["DataBucketName"])
    purpose_param = ParameterString(name="Purpose", default_value=purpose)
    trigger_id = ParameterString(name="TriggerID", default_value="0000000000") #from codebuild - use CODEBUILD_BUILD_ID env variable parsed after ":" The CodeBuild ID of the build (for example, codebuild-demo-project:b1e6661e-e4f2-4156-9ab9-82a19EXAMPLE).
    prefix_path = Join(on='/', values=["lifecycle/60d", project, pipeline_name, trigger_id, purpose_param])
    data_base_path = Join(on='/', values=['s3:/', data_bucket, prefix_path])  # khodam: s3:/***/lifecycle/60d/customerone-inf/customerone-inf-p-0mogq7hgpkye-inference
    processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1)
    processing_instance_type = ParameterString(name="ProcessingInstanceType", default_value="ml.m5.xlarge" )
    snapshot_data = Join(on='/', values=[data_base_path, 'data-snapshot'])
    batch_data = Join(on='/', values=[data_base_path, 'model-input'])
    inference_output = Join(on='/', values=[data_base_path, 'results'])
    # subnet1 = ParameterString(name="Subnet1", default_value="{}".format(env_data["SubnetIds"][0]))
    subnet1 = env_data["SubnetIds"][0]
    # subnet2 = ParameterString(name="Subnet2", default_value="{}".format(env_data["SubnetIds"][1]))
    subnet2 = env_data["SubnetIds"][1]
    # securitygroup = ParameterString(name="SecurityGroup", default_value="{}".format(env_data["SecurityGroups"][0]))
    securitygroup = env_data["SecurityGroups"][0]
    # volume_kms_key = ParameterString(name="EbsKmsKeyArn", default_value="{}".format(env_data["EbsKmsKeyArn"]))
    volume_kms_key = env_data["EbsKmsKeyArn"]
    # output_kms_key = ParameterString(name="S3KmsKeyId", default_value="{}".format(env_data["S3KmsKeyId"]))
    output_kms_key = env_data["S3KmsKeyId"]
    # processing_role = ParameterString(name="ProcessingRole", default_value=env_data["ProcessingRole"])
    processing_role = env_data["ProcessingRole"]
    # source_account = ParameterString(name="SourceAccount")


    database = ParameterString(name="DataBase", default_value="ml-se-sample-data_rl")
    table = ParameterString(name="AbaloneTable", default_value="master")  # ??????
    filter = ParameterString(name="FilterRings", default_value="disabled")

    nowgmt = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
    execution_time = ParameterString(name="ExecutionTime", default_value=nowgmt)

    # configure network for encryption, network isolation and VPC configuration
    # Since the preprocessor job takes the data from S3, enable_network_isolation must be set to False
    # see https://github.com/aws/amazon-sagemaker-examples/issues/1689
    network_config = NetworkConfig(
        enable_network_isolation=False,
        security_group_ids=[securitygroup],
        subnets=[subnet1, subnet2],
        encrypt_inter_container_traffic=True)

    vpc_config = {
        "Subnets": network_config.subnets,
        "SecurityGroupIds": network_config.security_group_ids
    }

    step_process = preprocessing(base_job_prefix=base_job_prefix,
                                 network_config=network_config,
                                 processing_instance_count=processing_instance_count,
                                 processing_instance_type=processing_instance_type,
                                 sagemaker_session=sagemaker_session,
                                 source_scripts_path=source_scripts_path,
                                 preprocess_script_path=model_metadata["CustomerMetadataProperties"]["preprocess"],
                                 batch_data=batch_data,
                                 database=database,
                                 table=table,
                                 filter=filter,
                                 volume_kms_key=volume_kms_key,
                                 output_kms_key=output_kms_key,
                                 processing_role=processing_role,
                                 execution_time=execution_time
    )

    step_create_model = create_model_tasks(
        sagemaker_session=sagemaker_session,
        vpc_config=vpc_config,
        model_metadata=model_metadata,
        processing_role=processing_role,
    )

    # print(f"sarah: inference > standard_model: step_create_model.properties.ModelName= {step_create_model.properties.ModelName}")
    # print(f"sarah: inference > standard_model: batch_data={batch_data}")
    # print(f"sarah: inference > standard_model: inference_output={inference_output}")
    step_inference = inference_tasks(
        model_name=step_create_model.properties.ModelName,
        batch_data=batch_data,
        output_data_path=inference_output,
        volume_kms_key=volume_kms_key,
        output_kms_key=output_kms_key,
        instance_type="ml.m5.large",
    )

    post_process = postprocessing(base_job_prefix=base_job_prefix,
                                 network_config=network_config,
                                 processing_instance_count=processing_instance_count,
                                 processing_instance_type=processing_instance_type,
                                 sagemaker_session=sagemaker_session,
                                 postprocess_script_path=model_metadata["CustomerMetadataProperties"]["postprocess"],
                                 volume_kms_key=volume_kms_key,
                                 output_kms_key=output_kms_key,
                                 processing_role=processing_role,
                                 trigger_id=trigger_id,
                                 inference_output=inference_output,
                                 # source_account=source_account,
    )

    step_inference.add_depends_on([step_process])
    post_process.add_depends_on([step_inference])

    # pipeline instance
    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            # subnet1,
            # subnet2,
            # securitygroup,
            processing_instance_count,
            processing_instance_type,
            # volume_kms_key,
            # output_kms_key,
            # processing_role,
            data_bucket,
            purpose_param,
            trigger_id,
            # source_account,
            execution_time,
            database,
            table,
            filter
        ],
        steps=[step_process, step_create_model,step_inference, post_process],
        sagemaker_session=sagemaker_session,
    )
    return pipeline



# preprocessing

In [7]:
def preprocessing(base_job_prefix,
                  network_config,
                  processing_instance_count,
                  processing_instance_type,
                  sagemaker_session,
                  source_scripts_path,
                  preprocess_script_path,
                  batch_data,
                  volume_kms_key,
                  output_kms_key,
                  database,
                  table,
                  filter,
                  processing_role,
                  execution_time
                  ):

    # processing step for feature engineering
    print(f"Sarah: inference > standard_model > preprocessing : base_job_name={base_job_prefix}/sklearn-cvm-preprocess")
    sklearn_processor = SKLearnProcessor(
        framework_version="0.23-1",
        instance_type=processing_instance_type,
        instance_count=processing_instance_count,
        base_job_name=f"{base_job_prefix}/sklearn-cvm-preprocess",
        sagemaker_session=sagemaker_session,
        role=processing_role,
        network_config=network_config,
        volume_kms_key=volume_kms_key,
        output_kms_key=output_kms_key
    )

    step_process = ProcessingStep(
        name="PreprocessCVMData",
        processor=sklearn_processor,
        inputs=[ProcessingInput(source=f'{source_scripts_path}/preprocessing/utils/',
                                destination="/opt/ml/processing/input/code/utils/")],
        outputs=[
            ProcessingOutput(output_name="inference",
                             source="/opt/ml/processing/inference-test/",
                             destination=batch_data
                             ),
        ],
        code=preprocess_script_path,
        job_arguments=[
            "--context", "inference",
            "--executiontime", execution_time,
            "--database", database,
            "--table", table,
            "--filter", filter,
            "--ref_period", '2021-06-30',
            "--ref_date", '2021-06-30',
        ],
    )

    return step_process

# create_model_tasks

In [8]:
def create_model_tasks(sagemaker_session,
                       model_metadata, processing_role, vpc_config,
                       instance_type="ml.m5.large",
                       accelerator_type="ml.eia1.medium"):

    from sagemaker.inputs import CreateModelInput
    from sagemaker.workflow.steps import CreateModelStep
    image_uri = model_metadata["InferenceSpecification"]["Containers"][0]["Image"]
    model_data = model_metadata["InferenceSpecification"]["Containers"][0]["ModelDataUrl"]

    print(f"Sarah: inference > standard_model > create_model_tasks: model_data={model_data}")
    model = Model(
        image_uri=image_uri,
        model_data=model_data,
        sagemaker_session=sagemaker_session,
        role=processing_role,
        vpc_config=vpc_config
    )

    inputs = CreateModelInput(
        instance_type=instance_type,
        accelerator_type=accelerator_type,
    )
    step_create_model = CreateModelStep(
        name="CVMCreateModel",
        model=model,
        inputs=inputs,
    )

    print("sarah: inference > standard_model > create_model_tasks END")
    return step_create_model

In [9]:
os.getcwd()

'/root/sagemaker-customerone2-p-vrs1c6dm1yir-model-build-train/notebooks'

# inference_tasks

In [10]:
def inference_tasks(
                    model_name,
                    output_data_path,
                    batch_data,
                    volume_kms_key,
                    output_kms_key,
                    instance_type="ml.m5.large"
    ):

    from sagemaker.inputs import CreateModelInput
    from sagemaker.workflow.steps import CreateModelStep
    print("sarah: inference > standard_model > inference_tasks starts")
    transformer = Transformer(
        model_name=model_name,
        instance_type=instance_type,
        instance_count=1,
        output_path=output_data_path,
        volume_kms_key=volume_kms_key,
        output_kms_key=output_kms_key
    )

    step_inference = TransformStep(
        name="CVMTransform",
        transformer=transformer,
        inputs=TransformInput(data=batch_data, content_type="text/csv")
    )
    print("sarah: inference > standard_model > inference_tasks END")
    return step_inference

# postprocessing

In [11]:
def postprocessing(base_job_prefix,
                   network_config,
                   processing_instance_count,
                   processing_instance_type,
                   sagemaker_session,
                   postprocess_script_path,
                   volume_kms_key,
                   output_kms_key,
                   processing_role,
                   trigger_id,
                   inference_output,
                   # source_account
                   ):
    print("sarah: inference > standard_model > postprocessing start")
    # processing step for post processing after inference
    sklearn_processor = SKLearnProcessor(
        framework_version="0.23-1",
        instance_type=processing_instance_type,
        instance_count=processing_instance_count,
        base_job_name=f"{base_job_prefix}/sklearn-cvm-postprocess",
        sagemaker_session=sagemaker_session,
        role=processing_role,
        network_config=network_config,
        volume_kms_key=volume_kms_key,
        output_kms_key=output_kms_key
    )

    post_process = ProcessingStep(
        name="PostprocessCVMData",
        processor=sklearn_processor,
        code=postprocess_script_path,
        # job_arguments=["--context", "postprocess",  "--triggerid", trigger_id, "--inferenceoutput", inference_output, "--sourceaccount", source_account])
        job_arguments=["--context", "postprocess",  "--triggerid", trigger_id, "--inferenceoutput", inference_output,])

    print("sarah: inference > standard_model > postprocessing END")

    return post_process

# get_pipeline

In [12]:
pipeline_cvu=get_pipeline(
        region='eu-north-1',
        project_name='customerone-inf',
        source_scripts_path="/root/sagemaker-customerone2-p-vrs1c6dm1yir-model-build-train/source_scripts/",
        model_package_group_name="customerone-inf-p-0mogq7hgpkye",
        pipeline_name="debugging-inf-cv2",
        base_job_prefix="CVM",
        revision="no-revision-provided",)

Environment data:
{
  "DomainArn": "arn:aws:sagemaker:eu-north-1:370702650160:domain/d-tdizim9qnor9",
  "DomainId": "d-tdizim9qnor9",
  "DomainName": "mlops-dev-eu-north-1-sagemaker-domain",
  "HomeEfsFileSystemId": "fs-03fc3d37f8623fea2",
  "Status": "InService",
  "AuthMode": "IAM",
  "AppNetworkAccessType": "VpcOnly",
  "SubnetIds": [
    "subnet-0724be5e7071e7070",
    "subnet-01def51ffe7467c71"
  ],
  "Url": "https://d-tdizim9qnor9.studio.eu-north-1.sagemaker.aws",
  "VpcId": "vpc-0459a28f3637e285c",
  "KmsKeyId": "f4664542-0f2e-42ca-b51f-2bec0ad62278",
  "ExecutionRole": "arn:aws:iam::370702650160:role/sm-mlops-env-EnvironmentIAM-SageMakerExecutionRole-14AU65MVMBUGO",
  "SecurityGroups": [
    "sg-041054ee4500f96f6"
  ],
  "JupyterServerAppSettings": {
    "DefaultResourceSpec": {
      "SageMakerImageArn": "arn:aws:sagemaker:eu-north-1:243637512696:image/jupyter-server-3",
      "InstanceType": "system",
      "LifecycleConfigArn": "arn:aws:sagemaker:eu-north-1:370702650160:stud

The input argument instance_type of function (sagemaker.image_uris.retrieve) is a pipeline variable (<class 'sagemaker.workflow.parameters.ParameterString'>), which is not allowed. The default_value of this Parameter object will be used to override it.
The input argument instance_type of function (sagemaker.image_uris.retrieve) is a pipeline variable (<class 'sagemaker.workflow.parameters.ParameterString'>), which is not allowed. The default_value of this Parameter object will be used to override it.


{'ModelPackageGroupName': 'customerone-inf-p-0mogq7hgpkye', 'ModelPackageVersion': 5, 'ModelPackageArn': 'arn:aws:sagemaker:eu-north-1:370702650160:model-package/customerone-inf-p-0mogq7hgpkye/5', 'CreationTime': datetime.datetime(2022, 10, 3, 14, 11, 11, 584000, tzinfo=tzlocal()), 'InferenceSpecification': {'Containers': [{'Image': '662702820516.dkr.ecr.eu-north-1.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3', 'ImageDigest': 'sha256:4b41b03ae858fca42720730b06e2803b3b046c676c143526cba9ed5a3483e4ad', 'ModelDataUrl': 's3://mlops-dev-370702650160-eu-north-1-data/lifecycle/60d/customerone-inf/601f446/2022_10_03_13_36_50/p1033/output/training/output/pipelines-u5lr8glo2o8k-ModelSelectionStep-MgOYTUvvLF/output/model.tar.gz'}], 'SupportedTransformInstanceTypes': ['ml.m5.xlarge'], 'SupportedRealtimeInferenceInstanceTypes': ['ml.t2.medium', 'ml.m5.xlarge'], 'SupportedContentTypes': ['text/csv'], 'SupportedResponseMIMETypes': ['text/csv']}, 'ModelPackageStatus': 'Completed', 'ModelPackageS

In [13]:
import json 
parsed = json.loads(pipeline_cvu.definition())
print(json.dumps(parsed, indent=2, sort_keys=True))

# Or:
# pipeline_cvu.definition()

{
  "Metadata": {},
  "Parameters": [
    {
      "DefaultValue": 1,
      "Name": "ProcessingInstanceCount",
      "Type": "Integer"
    },
    {
      "DefaultValue": "ml.m5.xlarge",
      "Name": "ProcessingInstanceType",
      "Type": "String"
    },
    {
      "DefaultValue": "mlops-dev-370702650160-eu-north-1-data",
      "Name": "DataBucket",
      "Type": "String"
    },
    {
      "DefaultValue": "p1033",
      "Name": "Purpose",
      "Type": "String"
    },
    {
      "DefaultValue": "0000000000",
      "Name": "TriggerID",
      "Type": "String"
    },
    {
      "DefaultValue": "2022-10-04 14:37:51",
      "Name": "ExecutionTime",
      "Type": "String"
    },
    {
      "DefaultValue": "ml-se-sample-data_rl",
      "Name": "DataBase",
      "Type": "String"
    },
    {
      "DefaultValue": "master",
      "Name": "AbaloneTable",
      "Type": "String"
    },
    {
      "DefaultValue": "disabled",
      "Name": "FilterRings",
      "Type": "String"
    }
  ],
  "Pi

# start the pipeline

In [14]:
upsert_response = pipeline_cvu.upsert(
            role_arn="arn:aws:iam::370702650160:role/sm-mlops-env-EnvironmentI-SageMakerPipelineExecuti-1AWTL5A5UKOHN"
        )

In [15]:
upsert_response

{'PipelineArn': 'arn:aws:sagemaker:eu-north-1:370702650160:pipeline/debugging-inf-cv2',
 'ResponseMetadata': {'RequestId': 'e9acc4c0-0970-4452-b0ac-cdc4e7ae2bca',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'e9acc4c0-0970-4452-b0ac-cdc4e7ae2bca',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '86',
   'date': 'Tue, 04 Oct 2022 14:37:52 GMT'},
  'RetryAttempts': 0}}

In [16]:
pipeline_cvu.start()

_PipelineExecution(arn='arn:aws:sagemaker:eu-north-1:370702650160:pipeline/debugging-inf-cv2/execution/kpkfczubc531', sagemaker_session=<sagemaker.session.Session object at 0x7f2de622a950>)