In [1]:
import os

import sagemaker
from sagemaker import ScriptProcessor, ModelMetrics, MetricsSource, TrainingInput
from sagemaker.estimator import Estimator
from sagemaker.network import NetworkConfig
from sagemaker.processing import ProcessingInput, ProcessingOutput, ScriptProcessor
from sagemaker.sklearn import SKLearnProcessor, SKLearn
from sagemaker.workflow.condition_step import JsonGet, ConditionStep
from sagemaker.workflow.conditions import ConditionLessThanOrEqualTo
from sagemaker.workflow.parameters import ParameterInteger, ParameterString, ParameterFloat
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.properties import PropertyFile
from sagemaker.workflow.step_collections import RegisterModel
from sagemaker.workflow.steps import ProcessingStep, TrainingStep
from sagemaker.dataset_definition.inputs import (
    AthenaDatasetDefinition,
    DatasetDefinition,
)
from datetime import datetime
import time
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.pipeline_experiment_config import PipelineExperimentConfig
from sagemaker.workflow.execution_variables import ExecutionVariables

In [2]:
import boto3
import sagemaker


def get_environment(project_name, ssm_params):
    sm = boto3.client("sagemaker")
    ssm = boto3.client("ssm")

    r = sm.describe_domain(
            DomainId=sm.describe_project(
                ProjectName=project_name
                )["CreatedBy"]["DomainId"]
        )
    del r["ResponseMetadata"]
    del r["CreationTime"]
    del r["LastModifiedTime"]
    r = {**r, **r["DefaultUserSettings"]}
    del r["DefaultUserSettings"]

    i = {
        **r,
        **{t["Key"]:t["Value"]
            for t in sm.list_tags(ResourceArn=r["DomainArn"])["Tags"]
            if t["Key"] in ["EnvironmentName", "EnvironmentType"]}
    }

    for p in ssm_params:
        try:
            i[p["VariableName"]] = ssm.get_parameter(Name=f"{i['EnvironmentName']}-{i['EnvironmentType']}-{p['ParameterName']}")["Parameter"]["Value"]
        except:
            i[p["VariableName"]] = ""

    return i


def get_session(region, default_bucket):
    """Gets the sagemaker session based on the region.

    Args:
        region: the aws region to start the session
        default_bucket: the bucket to use for storing the artifacts

    Returns:
        sagemaker.session.Session instance
    """

    boto_session = boto3.Session(region_name=region)

    sagemaker_client = boto_session.client("sagemaker")
    runtime_client = boto_session.client("sagemaker-runtime")
    print(f"Sarah: ml_pipelines > utiles > environments.py > sagemaker.__version__ is {sagemaker.__version__}")
    return sagemaker.session.Session(
        boto_session=boto_session,
        sagemaker_client=sagemaker_client,
        sagemaker_runtime_client=runtime_client,
        default_bucket=default_bucket,
    ), sagemaker_client


def environment_data(project_name):
    # Dynamically load environmental SSM parameters - provide the list of the variables to load from SSM parameter store
    ssm_parameters = [
        {"VariableName": "DataBucketName", "ParameterName": "data-bucket-name"},
        {"VariableName": "ModelBucketName", "ParameterName": "model-bucket-name"},
        {"VariableName": "S3KmsKeyId", "ParameterName": "kms-s3-key-arn"},
        {"VariableName": "EbsKmsKeyArn", "ParameterName": "kms-ebs-key-arn"},
        {"VariableName": "TrustedDefaultKinesisAccount", "ParameterName": "TrustedDefaultKinesisAccount"},
    ]
    env_data = get_environment(project_name=project_name, ssm_params=ssm_parameters)
    env_data["ProcessingRole"] = env_data["ExecutionRole"]
    env_data["TrainingRole"] = env_data["ExecutionRole"]
    
    return env_data


In [3]:
import os
import json



def list_files(startpath):
    for root, dirs, files in os.walk(startpath):
        level = root.replace(startpath, '').count(os.sep)
        indent = ' ' * 4 * (level)
        print('{}{}/'.format(indent, os.path.basename(root)))
        subindent = ' ' * 4 * (level + 1)
        for f in files:
            print('{}{}'.format(subindent, f))


def get_pipeline(
        region,
        project_name=None,
        source_scripts_path="./",
        model_package_group_name="AbalonePackageGroup",
        pipeline_name="AbalonePipeline",
        base_job_prefix="Abalone",
        revision="no-revision-provided",):
    """Gets a SageMaker ML Pipeline instance working with on abalone data.

    Args:
        region: AWS region to create and run the pipeline.
        @todo arg. definitions

    Returns:
        an instance of a pipeline
    """

    # get env data
    env_data = environment_data(project_name)
    print(f"Sarah: Environment data:\n{json.dumps(env_data, indent=2)}")

    sagemaker_session, sagemaker_client = get_session(region, env_data["DataBucketName"])
    default_bucket = sagemaker_session.default_bucket()
    base_dir = os.getcwd()
    print(f"Sarah: Creating the pipeline '{pipeline_name}':")
    print(f"Parameters:{region}\n{env_data['SecurityGroups']}\n{env_data['SubnetIds']}\n{env_data['ProcessingRole']}\n\
    {env_data['TrainingRole']}\n{env_data['DataBucketName']}\n{env_data['ModelBucketName']}\n{model_package_group_name}\n\
    {pipeline_name}\n{base_job_prefix}")

    pipeline = standard_model_pipeline(
        base_job_prefix=base_job_prefix,
        default_bucket=default_bucket,
        env_data=env_data,
        model_package_group_name=model_package_group_name,
        pipeline_name=pipeline_name,
        region=region,
        sagemaker_session=sagemaker_session,
        base_dir=base_dir,
        source_scripts_path=source_scripts_path,
        project=project_name,
        revision=revision)
    return pipeline




In [4]:
def standard_model_pipeline(base_job_prefix, default_bucket, env_data, model_package_group_name, pipeline_name,
                            region, sagemaker_session, base_dir, source_scripts_path, project="standard_model",
                            revision="none", purpose="p1033"):
    # parameters for pipeline execution
    processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1) # used in sample application
    processing_instance_type = ParameterString(name="ProcessingInstanceType", default_value="ml.m5.xlarge") # used in sample application
    training_instance_type = ParameterString(name="TrainingInstanceType", default_value="ml.m5.xlarge") # used in sample application
    training_instance_count = "1"
    inference_instance_type = ParameterString(name="InferenceInstanceType", default_value="ml.m5.large")
    hpo_tuner_instance_type = ParameterString(name="HPOTunerScriptInstanceType", default_value="ml.t3.medium")
    model_approval_status = ParameterString(name="ModelApprovalStatus", default_value="PendingManualApproval") # used in sample application
    role = "arn:aws:iam::370702650160:role/sm-mlops-env-EnvironmentI-SageMakerPipelineExecuti-1AWTL5A5UKOHN"
    # role = "arn:aws:iam::370702650160:role/sm-mlops-env-EnvironmentIAM-SageMakerExecutionRole-14AU65MVMBUGO"
    #role=$SAGEMAKER_PIPELINE_ROLE_ARN

    default_bucket_data = ParameterString(name="DefaultS3BucketData", default_value="mlops-dev-370702650160-eu-north-1-data")
    default_bucket_models = ParameterString(name="DefaultS3BucketModels", default_value="mlops-dev-370702650160-eu-north-1-models")

    baseline_model_objective_value = ParameterFloat(name='BaselineModelObjectiveValue', default_value=0.6)

    bucket_prefix_data = ParameterString(name="S3BucketPrefixData", default_value="lifecycle/30d/customerone-dev-branch/") # lifecycle/30d/${SAGEMAKER_PROJECT_NAME}/
    bucket_prefix_models = ParameterString(name="S3BucketPrefixModels", default_value="lifecycle/max/customerone-dev-branch/") # lifecycle/max/${SAGEMAKER_PROJECT_NAME}/

    # image_uri = ParameterString(name="ImageURI")
    image_uri = "370702650160.dkr.ecr.eu-north-1.amazonaws.com/sagemaker-cross-validation-pipeline:latest"
    k = ParameterInteger(name="KFold", default_value=3)
    max_jobs = ParameterInteger(name="MaxTrainingJobs", default_value=3)
    max_parallel_jobs = ParameterInteger(name="MaxParallelTrainingJobs", default_value=1)
    min_c = ParameterInteger(name="MinimumC", default_value=0)
    max_c = ParameterInteger(name="MaximumC", default_value=1)
    min_gamma = ParameterFloat(name="MinimumGamma", default_value=0.0001)
    max_gamma = ParameterFloat(name="MaximumGamma", default_value=0.001)
    gamma_scaling_type = ParameterString(name="GammaScalingType", default_value="Logarithmic")

    # Variables / Constants used throughout the pipeline
    model_package_group_name="customerone-dev-branch-p-lwkq81p5gxnk"
    framework_version = "0.23-1"
    s3_bucket_base_path=f"s3://{default_bucket_data.default_value}/{bucket_prefix_data.default_value}"
    # s3_bucket_base_path_train = f"{s3_bucket_base_path}/train"
    # s3_bucket_base_path_test = f"{s3_bucket_base_path}/test"
    s3_bucket_base_path_train = f"{s3_bucket_base_path}train"
    s3_bucket_base_path_test = f"{s3_bucket_base_path}test"
    s3_bucket_base_path_evaluation = f"{s3_bucket_base_path}/evaluation"
    s3_bucket_base_path_jobinfo = f"{s3_bucket_base_path}/jobinfo"
    s3_bucket_base_path_output = f"{s3_bucket_base_path}/output"

    
    model_approval_status, processing_instance_count, processing_instance_type, training_instance_type, training_instance_count, hpo_tuner_instance_type = sagemaker_pipeline_parameters(data_bucket=default_bucket)
    # TODO: Sarah what are the following parameters? How do I set them dynamically? Shouldn't they go to the sagemaker_pipeline_parameters() method too?
    database = ParameterString(name="DataBase", default_value="customerone_mock_data_rl")
    table = ParameterString(name="AbaloneTable", default_value="master") # ??????
    filter = ParameterString(name="FilterRings", default_value="disabled")
    time_path = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
    trigger_id = ParameterString(name="TriggerID", default_value="0000000000") #from codebuild - use CODEBUILD_BUILD_ID env variable parsed after ":" The CodeBuild ID of the build (for example, codebuild-demo-project:b1e6661e-e4f2-4156-9ab9-82a19EXAMPLE).
    nowgmt = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
    execution_time = ParameterString(name="ExecutionTime", default_value=nowgmt)
    image_uri = "370702650160.dkr.ecr.eu-north-1.amazonaws.com/sagemaker-cross-validation-pipeline:latest"
    framework_version = "0.23-1"

    network_config = NetworkConfig(
        enable_network_isolation=False,
        security_group_ids=env_data["SecurityGroups"],
        subnets=env_data["SubnetIds"],
        encrypt_inter_container_traffic=True)
#     print(f"Sarah: standard_model_pipeline > network_config: {network_config}") # <sagemaker.network.NetworkConfig object at 0x7f27da7ce100>
    data_base_path = "s3://{}/lifecycle/60d/{}/{}/{}/{}/output/training".format(env_data["DataBucketName"], project, revision, time_path, purpose)


    model_name = "xsell_cust_voice_to_fixed"
    # Specify the model path where you want to save the models from training:
    model_path = "s3://{}/lifecycle/max/{}/{}/{}/{}/output/training".format(env_data["ModelBucketName"], project, revision, model_name, time_path)
    evaluation_path = "s3://{}/lifecycle/max/{}/{}/{}/{}/output/evaluation".format(env_data["ModelBucketName"], project, revision, model_name, time_path)
    step_model_selection, step_cv_train_hpo, sklearn_estimator = lightgbm_training_tasks(base_job_prefix=base_job_prefix,
                                                    env_data=env_data,
                                                    image_uri=image_uri,
                                                    network_config=network_config,
                                                    sagemaker_session=sagemaker_session,
                                                    training_instance_type=training_instance_type,
                                                    training_instance_count=training_instance_count,
                                                    model_path=model_path,
                                                    data_base_path=data_base_path,
                                                    evaluation_path=evaluation_path,
                                                    hpo_tuner_instance_type=hpo_tuner_instance_type,
                                                    region=region,
                                                    framework_version=framework_version,
                                                    source_scripts_path=source_scripts_path,
                                                    )




    pipeline_name = f"CrossValidationTrainingPipeline"
    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            processing_instance_count,
            processing_instance_type,
            training_instance_type,
#             training_instance_count,
            inference_instance_type,
            hpo_tuner_instance_type,
            model_approval_status,
            role,
            default_bucket_data,
            baseline_model_objective_value,
            bucket_prefix_data,
            image_uri,
#             k,
#             max_jobs,
#             max_parallel_jobs,
#             min_c,
#             max_c,
#             min_gamma,
#             max_gamma,
#             gamma_scaling_type
        ],    
        pipeline_experiment_config=PipelineExperimentConfig(
          ExecutionVariables.PIPELINE_NAME,
          ExecutionVariables.PIPELINE_EXECUTION_ID),
        steps=[step_cv_train_hpo],
    )
    return pipeline



In [12]:
def lightgbm_training_tasks(base_job_prefix, env_data, image_uri, network_config, sagemaker_session,
                            training_instance_type, training_instance_count, model_path, data_base_path,
                            evaluation_path, hpo_tuner_instance_type, region, framework_version, source_scripts_path):


    cross_validation_with_hpo_script = "{}/preprocessing/cross_validation_with_hpo.py".format(source_scripts_path)
    print(f"SARAH: lightgbm_training_tasks > cross_validation_with_hpo_script = {cross_validation_with_hpo_script}")

    s3_bucket_base_path_jobinfo = f"{data_base_path}/jobinfo" # TODO: SARAH: is this correct????
    # s3_bucket_base_path = f"s3://{default_bucket_data.default_value}/{bucket_prefix_data.default_value}"
    # s3_bucket_base_path_train = f"{s3_bucket_base_path}train"
    # bucket_prefix_data = ParameterString(name="S3BucketPrefixData", default_value="lifecycle/30d/customerone-dev-branch/")
    # s3_bucket_base_path = f"{data_base_path}/{bucket_prefix_data.default_value}"
    # s3_bucket_base_path_train = f"{s3_bucket_base_path}train"
    # s3_bucket_base_path_test = f"{s3_bucket_base_path}test"
    # s3_bucket_base_path_output = f"{s3_bucket_base_path}/output"
    s3_bucket_base_path_train = f"{data_base_path}train"
    s3_bucket_base_path_test = f"{data_base_path}test"
    s3_bucket_base_path_output = f"{data_base_path}/output"

    # print(f"SARAH: lightgbm_training_tasks > s3_bucket_base_path = {s3_bucket_base_path}")
    # print(f"SARAH: lightgbm_training_tasks > bucket_prefix_data={bucket_prefix_data.default_value}")
    print(f"SARAH: lightgbm_training_tasks > s3_bucket_base_path_train={s3_bucket_base_path_train}")
    print(f"SARAH: lightgbm_training_tasks > s3_bucket_base_path_test={s3_bucket_base_path_test}")
    print(F"SARAH: lightgbm_training_tasks > s3_bucket_base_path_output={s3_bucket_base_path_output}")
    print(f"SARAH: lightgbm_training_tasks > s3_bucket_base_path_jobinfo = {s3_bucket_base_path_jobinfo}")
    print(f"SARAH: lightgbm_training_task > env_data[TrainingRole] ={env_data['TrainingRole'],}")
    # SARAH: lightgbm_training_tasks > s3_bucket_base_path = s3://***/lifecycle/60d/customerone-dev-branch/dc8e2f8/2022_08_24_12_32_10/p1033/output/training/lifecycle/30d/customerone-dev-branch/
    # SARAH: lightgbm_training_tasks > bucket_prefix_data=lifecycle/30d/customerone-dev-branch/
    # SARAH: lightgbm_training_tasks > s3_bucket_base_path_train=s3://***/lifecycle/60d/customerone-dev-branch/dc8e2f8/2022_08_24_12_32_10/p1033/output/training/lifecycle/30d/customerone-dev-branch/train
    # SARAH: lightgbm_training_tasks > s3_bucket_base_path_test=s3://***/lifecycle/60d/customerone-dev-branch/dc8e2f8/2022_08_24_12_32_10/p1033/output/training/lifecycle/30d/customerone-dev-branch/test
    # SARAH: lightgbm_training_tasks > s3_bucket_base_path_output=s3://***/lifecycle/60d/customerone-dev-branch/dc8e2f8/2022_08_24_12_32_10/p1033/output/training/lifecycle/30d/customerone-dev-branch//output
    # SARAH: lightgbm_training_tasks > s3_bucket_base_path_jobinfo = s3://***/lifecycle/60d/customerone-dev-branch/dc8e2f8/2022_08_24_12_32_10/p1033/output/training/jobinfo
    # SARAH: lightgbm_training_tasks > from xgbm train =<sagemaker.inputs.TrainingInput object at 0x7f15f905faf0>

    # ## 2- Cross Validation Model Training Step


    evaluation_report = PropertyFile(name="EvaluationReport", output_name="evaluation", path="evaluation.json")
    jobinfo = PropertyFile(name="JobInfo", output_name="jobinfo", path="jobinfo.json")

    script_tuner = ScriptProcessor(
        image_uri=image_uri,
        command=["python3"],
        instance_type=hpo_tuner_instance_type,
        instance_count=1,
        base_job_name=f"{base_job_prefix}/KFoldCrossValidationHyperParameterTuner",
        role=env_data["TrainingRole"],
        sagemaker_session=sagemaker_session,
        # subnets=network_config.subnets,
        # security_group_ids=network_config.security_group_ids,
        # encrypt_inter_container_traffic=True,
        # enable_network_isolation=False,
        volume_kms_key=env_data["EbsKmsKeyArn"],
        output_kms_key=env_data["S3KmsKeyId"],
        network_config=network_config,
    )
    print("SARAH: lightgbm_training_tasks > script_tuner is created")
    
    k = "3"
    max_jobs = "3"
    max_parallel_jobs = "1"
    min_c = "0"
    max_c = "1"
    min_gamma = "0.0001"
    max_gamma = "0.001"
    gamma_scaling_type = "Logarithmic"

    step_cv_train_hpo = ProcessingStep(
        name="HyperParameterTuningStep",
        processor=script_tuner,
        code=cross_validation_with_hpo_script,
        outputs=[
            ProcessingOutput(output_name="evaluation",
                             source="/opt/ml/processing/evaluation",
                             destination=evaluation_path), # s3_bucket_base_path_evaluation
            ProcessingOutput(output_name="jobinfo",
                             source="/opt/ml/processing/jobinfo",
                             destination=s3_bucket_base_path_jobinfo)
        ],
        job_arguments=["-k", k,
                       "--image-uri", image_uri,
                       "--train", s3_bucket_base_path_train,
                       "--test", s3_bucket_base_path_test,
                       "--instance-type", training_instance_type,
                       "--instance-count", "1",
                       "--output-path", s3_bucket_base_path_output,
                       "--max-jobs", "3",
                       "--max-parallel-jobs", max_parallel_jobs,
                       "--min-c", min_c,
                       "--max-c", max_c,
                       "--min-gamma", min_gamma,
                       "--max-gamma", max_gamma,
                       "--gamma-scaling-type", gamma_scaling_type,
                       "--region", str(region)],
        property_files=[evaluation_report],
    )
    

    # ## 3- Model Selection Step
    sklearn_estimator = SKLearn("scikit_learn_iris.py",
                                framework_version=framework_version,
                                instance_type=training_instance_type,
                                py_version='py3',
                                source_dir="/opt/ml/code/",
                                output_path=s3_bucket_base_path_output,
                                role=env_data["TrainingRole"])  # what should be the role here?
    print("SARAH: lightgbm_training_tasks > sklearn_estimator is created")

    step_model_selection = TrainingStep(
        name="ModelSelectionStep",
        estimator=sklearn_estimator,
        inputs={
            "train": TrainingInput(
                s3_data='s3://mock1234567890/mock',
                content_type="text/csv"
            ),
            "jobinfo": TrainingInput(
                s3_data=f"{s3_bucket_base_path_jobinfo}",
                content_type="application/json"
            )
        }
    )
    print("SARAH: lightgbm_training_tasks > step_model_selection is created")
    return step_model_selection, step_cv_train_hpo, sklearn_estimator

In [13]:
def sagemaker_pipeline_parameters(data_bucket):
    processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1)
    training_instance_count = "1"
    processing_instance_type = ParameterString(name="ProcessingInstanceType", default_value="ml.m5.xlarge")
    training_instance_type = ParameterString(name="TrainingInstanceType", default_value="ml.m5.xlarge")
    model_approval_status = ParameterString(name="ModelApprovalStatus", default_value="PendingManualApproval")
    hpo_tuner_instance_type = ParameterString(name="HPOTunerScriptInstanceType", default_value="ml.t3.medium")

    return model_approval_status, processing_instance_count, processing_instance_type, training_instance_type, training_instance_count, hpo_tuner_instance_type


In [14]:
pipeline_cvu=get_pipeline(
        region='eu-north-1',
        project_name='customerone-dev-branch',
        source_scripts_path="/root/sagemaker-customerone2-p-vrs1c6dm1yir-model-build-train/source_scripts/",
        model_package_group_name="AbalonePackageGroup",
        pipeline_name="debugging-cv2",
        base_job_prefix="Abalone",
        revision="no-revision-provided",)



Sarah: Environment data:
{
  "DomainArn": "arn:aws:sagemaker:eu-north-1:370702650160:domain/d-tdizim9qnor9",
  "DomainId": "d-tdizim9qnor9",
  "DomainName": "mlops-dev-eu-north-1-sagemaker-domain",
  "HomeEfsFileSystemId": "fs-03fc3d37f8623fea2",
  "Status": "InService",
  "AuthMode": "IAM",
  "AppNetworkAccessType": "VpcOnly",
  "SubnetIds": [
    "subnet-0724be5e7071e7070",
    "subnet-01def51ffe7467c71"
  ],
  "Url": "https://d-tdizim9qnor9.studio.eu-north-1.sagemaker.aws",
  "VpcId": "vpc-0459a28f3637e285c",
  "KmsKeyId": "f4664542-0f2e-42ca-b51f-2bec0ad62278",
  "ExecutionRole": "arn:aws:iam::370702650160:role/sm-mlops-env-EnvironmentIAM-SageMakerExecutionRole-14AU65MVMBUGO",
  "SecurityGroups": [
    "sg-041054ee4500f96f6"
  ],
  "EnvironmentName": "mlops",
  "EnvironmentType": "dev",
  "DataBucketName": "mlops-dev-370702650160-eu-north-1-data",
  "ModelBucketName": "mlops-dev-370702650160-eu-north-1-models",
  "S3KmsKeyId": "arn:aws:kms:eu-north-1:370702650160:key/2851a68b-348e-

In [15]:
pipeline_cvu.definition()

'{"Version": "2020-12-01", "Metadata": {}, "Parameters": [{"Name": "ProcessingInstanceCount", "Type": "Integer", "DefaultValue": 1}, {"Name": "ProcessingInstanceType", "Type": "String", "DefaultValue": "ml.m5.xlarge"}, {"Name": "TrainingInstanceType", "Type": "String", "DefaultValue": "ml.m5.xlarge"}, {"Name": "InferenceInstanceType", "Type": "String", "DefaultValue": "ml.m5.large"}, {"Name": "HPOTunerScriptInstanceType", "Type": "String", "DefaultValue": "ml.t3.medium"}, {"Name": "ModelApprovalStatus", "Type": "String", "DefaultValue": "PendingManualApproval"}, {"Name": "DefaultS3BucketData", "Type": "String", "DefaultValue": "mlops-dev-370702650160-eu-north-1-data"}, {"Name": "BaselineModelObjectiveValue", "Type": "Float", "DefaultValue": 0.6}, {"Name": "S3BucketPrefixData", "Type": "String", "DefaultValue": "lifecycle/30d/customerone-dev-branch/"}], "PipelineExperimentConfig": {"ExperimentName": {"Get": "Execution.PipelineName"}, "TrialName": {"Get": "Execution.PipelineExecutionId"}

In [16]:
upsert_response = pipeline_cvu.upsert(
            role_arn="arn:aws:iam::370702650160:role/sm-mlops-env-EnvironmentI-SageMakerPipelineExecuti-1AWTL5A5UKOHN"
        )

In [17]:
upsert_response

{'PipelineArn': 'arn:aws:sagemaker:eu-north-1:370702650160:pipeline/crossvalidationtrainingpipeline',
 'ResponseMetadata': {'RequestId': '510e22f7-97c8-4cea-9441-44d73f399103',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '510e22f7-97c8-4cea-9441-44d73f399103',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '100',
   'date': 'Fri, 26 Aug 2022 09:09:48 GMT'},
  'RetryAttempts': 0}}

In [18]:
pipeline_cvu.start()

_PipelineExecution(arn='arn:aws:sagemaker:eu-north-1:370702650160:pipeline/crossvalidationtrainingpipeline/execution/tz2vc0z9ldqb', sagemaker_session=<sagemaker.session.Session object at 0x7fd5a051cad0>)