In [None]:
import os
import sys
from pathlib import Path
import time

from sagemaker.session import Session

from sagemaker.processing import ProcessingInput, ProcessingOutput

from sagemaker.debugger import DebuggerHookConfig, CollectionConfig, Rule, rule_configs
from sagemaker.estimator import Estimator

from sagemaker.parameter import IntegerParameter, ContinuousParameter
from sagemaker.tuner import HyperparameterTuner

from sagemaker.xgboost import XGBoostProcessor, XGBoostModel

from sagemaker.model_metrics import MetricsSource, ModelMetrics

from sagemaker.transformer import Transformer
from sagemaker.drift_check_baselines import DriftCheckBaselines

from sagemaker.workflow.pipeline_definition_config import PipelineDefinitionConfig
from sagemaker.workflow.pipeline_context import PipelineSession, LocalPipelineSession
from sagemaker.workflow.pipeline import Pipeline

from sagemaker.workflow.quality_check_step import ModelQualityCheckConfig
from sagemaker.workflow.condition_step import ConditionStep
from sagemaker.workflow.lambda_step import LambdaStep
from sagemaker.workflow.model_step import ModelStep
from sagemaker.workflow.fail_step import FailStep
from sagemaker.workflow.steps import CacheConfig, ProcessingStep, TrainingStep, TuningStep, TransformStep

from sagemaker.workflow.conditions import ConditionGreaterThanOrEqualTo
from sagemaker.workflow.parameters import ParameterFloat, ParameterString
from sagemaker.workflow.functions import JsonGet, Join

from sagemaker.lambda_helper import Lambda
from sagemaker.inputs import TrainingInput

import logging
import warnings

logging.getLogger('sagemaker').setLevel(logging.ERROR)
warnings.filterwarnings('ignore')

# %load_ext autoreload
# %autoreload 2
%load_ext dotenv
%dotenv

In [None]:
bucket = os.environ["BUCKET"]
role_name = os.environ["ROLE_NAME"]
account_id = os.environ["ACCOUNT_ID"]
access_key = os.environ["ACCESS_KEY"]
secret_key = os.environ["SECRET_KEY"]
pipeline_name = os.environ["PIPELINE_NAME"]
region = os.environ["AWS_REGION"]
sagemaker_session = Session()

DUMMY_ROLE = "arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-11111111111111"
s3_location = f"s3://{bucket}"
LOCAL_MODE = False

architecture = !(uname -m)
IS_ARM64_ARCHITECTURE = architecture[0] == "arm64"

if LOCAL_MODE:
    config = {
        "session": LocalPipelineSession(default_bucket=bucket),
        "instance_type": "local",
        "image": "sagemaker-xgboost-training-toolkit-local" if IS_ARM64_ARCHITECTURE else None
    }
else:
    config = {
        "session": PipelineSession(default_bucket=bucket),
        "instance_type": "ml.m5.xlarge",
        "image": None,
    }

config["framework_version"] = "1.7-1"
config["py_version"] = "py310"

USE_TUNING_STEP = False and not LOCAL_MODE

CODE_FOLDER = Path("code")
CODE_FOLDER.mkdir(parents=True, exist_ok=True)
INFERENCE_CODE_FOLDER = CODE_FOLDER / "inference"
INFERENCE_CODE_FOLDER.mkdir(parents=True, exist_ok=True)

sys.path.extend([f"./{CODE_FOLDER}", f"./{INFERENCE_CODE_FOLDER}"])

In [None]:
from aws_football_predictor.program.code.config.AWSClientManager import AWSClientManager

aws_client_manager = AWSClientManager(region=region, access_key_id=access_key, secret_access_key=secret_key, account_id=account_id)

sagemaker_client = aws_client_manager.get_client("sagemaker", role_name)
iam_client = aws_client_manager.get_client("iam", role_name)

In [None]:
pipeline_definition_config = PipelineDefinitionConfig(use_custom_job_prefix=True)
cache_config = CacheConfig(enable_caching=True, expire_after="15d")

In [None]:
processor_image_name = 'sagemaker-processing-container'
train_image_name = 'xgb-clf-training-container'

processor_image_uri = f'{account_id}.dkr.ecr.eu-north-1.amazonaws.com/{processor_image_name}'
train_image_uri = f'{account_id}.dkr.ecr.eu-north-1.amazonaws.com/{train_image_name}'
role_arn = f'arn:aws:iam::{account_id}:role/{role_name}'

In [None]:
dataset_location = ParameterString(
    name="dataset_location",
    default_value=f"{s3_location}/data",
)

from sagemaker.processing import ScriptProcessor

processor = ScriptProcessor(
    command=['python3'],
    role=role_arn,
    image_uri=processor_image_uri,
    instance_type=config['instance_type'],
    instance_count=1,
    sagemaker_session=config['session'],
)

split_and_transform_data_step = ProcessingStep(
    name="split-and-transform-data",
    step_args=processor.run(
        code=f"{CODE_FOLDER}/containers/preprocessor/preprocessor.py",
        inputs=[
            ProcessingInput(source=dataset_location, destination="/opt/ml/processing/input"),
        ],
        outputs=[
            ProcessingOutput(output_name="train", source="/opt/ml/processing/train"),
            ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"),
            ProcessingOutput(output_name="test", source="/opt/ml/processing/test"),
            ProcessingOutput(output_name="model", source="/opt/ml/processing/model"),
            ProcessingOutput(output_name="train-baseline", source="/opt/ml/processing/train-baseline"),
            ProcessingOutput(output_name="test-baseline", source="/opt/ml/processing/test-baseline"),
        ]
    ),
    cache_config=cache_config,
)

In [None]:
def create_training_step(estimator):
    """Create a SageMaker TrainingStep using the provided estimator."""
    return TrainingStep(
        name="train-model",
        step_args=estimator.fit(
            inputs={
                "train": TrainingInput(
                    s3_data=split_and_transform_data_step.properties.ProcessingOutputConfig.Outputs[
                        "train"
                    ].S3Output.S3Uri,
                    content_type="text/csv",
                ),
                "validation": TrainingInput(
                    s3_data=split_and_transform_data_step.properties.ProcessingOutputConfig.Outputs[
                        "validation"
                    ].S3Output.S3Uri,
                    content_type="text/csv",
                ),
                "pipeline": TrainingInput(
                    s3_data=split_and_transform_data_step.properties.ProcessingOutputConfig.Outputs[
                        "model"
                    ].S3Output.S3Uri,
                    content_type="application/tar+gzip",
                ),
            },
        ),
        cache_config=cache_config,
    )

In [None]:
use_spot_instances = True and not LOCAL_MODE
max_run = 500
max_wait = 800 if use_spot_instances else None
instance_type = config['instance_type']
save_interval = 2

debugger_hook_config = DebuggerHookConfig(
    s3_output_path=f'{s3_location}/train_analyse'
)

xgb_estimator = Estimator(
    image_uri=train_image_uri,
    instance_count=2,
    py_version=config['py_version'],
    instance_type=config["instance_type"],
    sagemaker_session=config["session"],
    role=role_arn,
    use_spot_instances=use_spot_instances,
    max_run=max_run,
    max_wait=max_wait,
    disable_profiler=False,
    environment={
        'BUCKET': bucket
    },
    debugger_hook_config=debugger_hook_config
)

xgb_train_model_step = create_training_step(xgb_estimator)

In [None]:
hyperparameter_ranges = {
    'eta': ContinuousParameter(min_value=0.05, max_value=0.3, scaling_type="Logarithmic"),
    'max_depth': IntegerParameter(min_value=5, max_value=15, scaling_type="Auto"),
    'subsample': ContinuousParameter(min_value=0.7, max_value=1.0, scaling_type="Auto"),
    'colsample_bytree': ContinuousParameter(min_value=0.7, max_value=1.0, scaling_type="Logarithmic"),
    'lambda': ContinuousParameter(min_value=5, max_value=12, scaling_type="Logarithmic"),
    'alpha': ContinuousParameter(min_value=1, max_value=10, scaling_type="Logarithmic"),
    'min_child_weight': ContinuousParameter(min_value=0.4, max_value=1.0, scaling_type="Auto"),
    'scale_pos_weight': ContinuousParameter(min_value=0.8, max_value=2.2, scaling_type="Auto"),
}

objective_type = "Maximize"
metric_definitions = [
    {
        'Name': 'validation:logloss',
        'Regex': r".*\[[0-9]+\].*#011validation_0-logloss:([-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?).*"
    },
    {
        'Name': 'validation:auc',
        'Regex': r".*\[[0-9]+\].*#011validation_0-auc:([-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?).*"
    },
    {
        'Name': 'train:logloss',
        'Regex': r".*\[[0-9]+\].*#011train-logloss:([-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?).*"
    },
    {
        'Name': 'train:auc',
        'Regex': r".*\[[0-9]+\].*#011train-auc:([-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?).*"
    }
]

metric_name = "validation:auc"
strategy = "Bayesian"

tuner = HyperparameterTuner(
    base_tuning_job_name='xgboost-tuning',
    estimator=xgb_estimator,
    objective_metric_name=metric_name,
    objective_type=objective_type,
    hyperparameter_ranges=hyperparameter_ranges,
    metric_definitions=metric_definitions,
    max_jobs=5,
    max_parallel_jobs=2,
    early_stopping_type='Auto'
)


In [None]:
tune_model_step = TuningStep(
    name="tune-model",
    step_args=tuner.fit(
        inputs={
            "train": TrainingInput(
                s3_data=
                split_and_transform_data_step.properties.ProcessingOutputConfig.Outputs[
                    "train"
                ].S3Output.S3Uri,
                content_type="text/csv",
            ),
            "pipeline": TrainingInput(
                s3_data=split_and_transform_data_step.properties.ProcessingOutputConfig.Outputs[
                    "model"
                ].S3Output.S3Uri,
                content_type="application/tar+gzip",
            ),
        },
    ),
    cache_config=cache_config,
)

In [None]:
evaluation_processor = XGBoostProcessor(
    base_job_name="evaluation-processor",
    image_uri=config["image"],
    framework_version=config["framework_version"],
    instance_type=config["instance_type"],
    instance_count=1,
    role=role_arn,
    sagemaker_session=config["session"],
)

In [None]:
from sagemaker.workflow.properties import PropertyFile

evaluation_report = PropertyFile(
    name="evaluation-report", output_name="evaluation", path="evaluation.json"
)

In [None]:
model_assets = xgb_train_model_step.properties.ModelArtifacts.S3ModelArtifacts

if USE_TUNING_STEP:
    model_assets = tune_model_step.get_top_model_s3_uri(
        top_k=0,
        s3_bucket=config["session"].default_bucket(),
    )

In [None]:
evaluate_model_step = ProcessingStep(
    name="evaluate-model",
    code=f"{(CODE_FOLDER / 'evaluate/evaluation.py').as_posix()}",
    step_args=evaluation_processor.run(
        inputs=[
            ProcessingInput(
                source=split_and_transform_data_step.properties.ProcessingOutputConfig.Outputs[
                    "test"
                ].S3Output.S3Uri,
                destination="/opt/ml/processing/test",
            ),
            ProcessingInput(
                source=model_assets,
                destination="/opt/ml/processing/model",
            ),
            ProcessingInput(
                source=split_and_transform_data_step.properties.ProcessingOutputConfig.Outputs[
                    "model"
                ].S3Output.S3Uri,
                destination="/opt/ml/processing/pipeline",
            ),
        ],
        outputs=[
            ProcessingOutput(
                output_name="evaluation", source="/opt/ml/processing/evaluation"
            ),
        ],
        code=f"{CODE_FOLDER}/evaluate/evaluation.py",
    ),
    property_files=[evaluation_report],
    cache_config=cache_config,
)

In [None]:
MODEL_PACKAGE_GROUP = os.environ["MODEL_PACKAGE_GROUP"]

In [None]:
model_metrics = ModelMetrics(
    model_statistics=MetricsSource(
        s3_uri=Join(
            on="/",
            values=[
                evaluate_model_step.properties.ProcessingOutputConfig.Outputs[
                    "evaluation"
                ].S3Output.S3Uri,
                "evaluation.json",
            ],
        ),
        content_type="application/json",
    ),
)

In [None]:
def create_registration_step(
        model,
        model_package_group_name,
        approval_status="Approved",
        content_types=["text/csv"],
        response_types=["text/csv"],
        model_metrics=None,
        drift_check_baselines=None,
):
    """Create a Registration Step using the supplied parameters."""
    return ModelStep(
        name="register",
        step_args=model.register(
            model_package_group_name=model_package_group_name,
            approval_status=approval_status,
            model_metrics=model_metrics,
            drift_check_baselines=drift_check_baselines,
            content_types=content_types,
            response_types=response_types,
            inference_instances=[config["instance_type"]],
            transform_instances=[config["instance_type"]],
            framework_version=config["framework_version"],
            domain="MACHINE_LEARNING",
            task="CLASSIFICATION",
            framework="XGBOOST",
        ),
    )

In [None]:
custom_xgb_model = XGBoostModel(
    name="xgb_football",
    model_data=model_assets,
    entry_point="inference.py",
    source_dir=(CODE_FOLDER / "inference").as_posix(),
    framework_version=config["framework_version"],
    sagemaker_session=config["session"],
    role=role_arn,
)

In [None]:
GROUND_TRUTH_LOCATION = f"{s3_location}/monitoring/groundtruth"
DATA_QUALITY_LOCATION = f"{s3_location}/monitoring/data-quality"
MODEL_QUALITY_LOCATION = f"{s3_location}/monitoring/model-quality"

In [None]:
from sagemaker.model_monitor.dataset_format import DatasetFormat
from sagemaker.workflow.check_job_config import CheckJobConfig
from sagemaker.workflow.quality_check_step import (
    DataQualityCheckConfig,
    QualityCheckStep,
)

data_quality_check_config = DataQualityCheckConfig(
    baseline_dataset=split_and_transform_data_step.properties.ProcessingOutputConfig.Outputs["train-baseline"].S3Output.S3Uri,
    dataset_format=DatasetFormat.csv(header=True), output_s3_uri=DATA_QUALITY_LOCATION, )

check_job_config = CheckJobConfig(instance_type="ml.c5.xlarge", instance_count=1, volume_size_in_gb=20, sagemaker_session=config["session"], role=role_arn, )

data_quality_baseline_step = QualityCheckStep(
    name="generate-data-quality-baseline",
    check_job_config=check_job_config,
    quality_check_config=data_quality_check_config,
    model_package_group_name=MODEL_PACKAGE_GROUP,
    skip_check=True,
    register_new_baseline=True,
    cache_config=cache_config,
)

In [None]:
create_model_step = ModelStep(
    name="create-model",
    step_args=custom_xgb_model.create(instance_type=config["instance_type"]),
)

In [None]:
transformer = Transformer(
    model_name=create_model_step.properties.ModelName,
    instance_type=config["instance_type"],
    instance_count=1,
    strategy="MultiRecord",
    accept="text/csv",
    assemble_with="Line",
    output_path=f"{s3_location}/transform",
    sagemaker_session=config["session"],

)

In [None]:
generate_test_predictions_step = TransformStep(
    name="generate-test-predictions",
    step_args=transformer.transform(
        data=split_and_transform_data_step.properties.ProcessingOutputConfig.Outputs[
            "test-baseline"
        ].S3Output.S3Uri,
        join_source="Input",
        split_type="Line",
        content_type="text/csv",
        output_filter="$[-3,-2]",
    ),
    cache_config=cache_config,
)

In [None]:
model_quality_baseline_step = QualityCheckStep(
    name="generate-model-quality-baseline",
    check_job_config=CheckJobConfig(
        instance_type="ml.m5.xlarge",
        instance_count=2,
        volume_size_in_gb=20,
        sagemaker_session=config["session"],
        role=role_arn,
    ),
    quality_check_config=ModelQualityCheckConfig(
        baseline_dataset=generate_test_predictions_step.properties.TransformOutput.S3OutputPath,
        dataset_format=DatasetFormat.csv(header=False),
        problem_type="MulticlassClassification",
        ground_truth_attribute="_c0",
        inference_attribute="_c1",
        output_s3_uri=MODEL_QUALITY_LOCATION,
    ),
    model_package_group_name=MODEL_PACKAGE_GROUP,
    skip_check=True,
    register_new_baseline=True,
    cache_config=cache_config,
)

In [None]:
model_quality_model_metrics = ModelMetrics(
    model_statistics=MetricsSource(
        s3_uri=model_quality_baseline_step.properties.CalculatedBaselineStatistics,
        content_type="application/json",
    ),
    model_constraints=MetricsSource(
        s3_uri=model_quality_baseline_step.properties.CalculatedBaselineConstraints,
        content_type="application/json",
    ),
    model_data_statistics=MetricsSource(
        s3_uri=data_quality_baseline_step.properties.CalculatedBaselineStatistics,
        content_type="application/json",
    ),
    model_data_constraints=MetricsSource(
        s3_uri=data_quality_baseline_step.properties.CalculatedBaselineConstraints,
        content_type="application/json",
    ),
)

model_quality_drift_check_baselines = DriftCheckBaselines(
    model_statistics=MetricsSource(
        s3_uri=model_quality_baseline_step.properties.BaselineUsedForDriftCheckStatistics,
        content_type="application/json",
    ),
    model_constraints=MetricsSource(
        s3_uri=model_quality_baseline_step.properties.BaselineUsedForDriftCheckConstraints,
        content_type="application/json",
    ),
    model_data_statistics=MetricsSource(
        s3_uri=data_quality_baseline_step.properties.BaselineUsedForDriftCheckStatistics,
        content_type="application/json",
    ),
    model_data_constraints=MetricsSource(
        s3_uri=data_quality_baseline_step.properties.BaselineUsedForDriftCheckConstraints,
        content_type="application/json",
    ),
)

In [None]:
register_model_step = create_registration_step(
    custom_xgb_model,
    MODEL_PACKAGE_GROUP,
    approval_status='PendingManualApproval',
    content_types=["text/csv", "application/json"],
    response_types=["text/csv", "application/json"],
    model_metrics=model_quality_model_metrics,
    drift_check_baselines=model_quality_drift_check_baselines,
)

In [None]:
f1_threshold = ParameterFloat(name="f1_threshold", default_value=0.6)

fail_step = FailStep(
    name="fail",
    error_message=Join(
        on=" ",
        values=[
            "Execution failed because the model's f1 result was lower than",
            f1_threshold,
        ],
    ),
)

condition = ConditionGreaterThanOrEqualTo(
    left=JsonGet(
        step_name=evaluate_model_step.name,
        property_file=evaluation_report,
        json_path="metrics.f1.value",
    ),
    right=f1_threshold,
)

In [None]:
from aws_football_predictor.program.code.config.RoleManager import RoleManager

username = os.getenv('USER_NAME')
lambda_role_name = "lambda-role"

role_manager = RoleManager(account_id, username)
lambda_role_arn = role_manager.create_lambda_execution_role(lambda_role_name, bucket, region, pipeline_name, MODEL_PACKAGE_GROUP)

time.sleep(5)

In [None]:
ENDPOINT = "football-endpoint"
DATA_CAPTURE_DESTINATION = f"{s3_location}/monitoring/data-capture"
DATA_CAPTURE_PERCENTAGE = 100

deploy_lambda_fn = Lambda(
    function_name="deployment_fn",
    execution_role_arn=lambda_role_arn,
    script=(CODE_FOLDER / "lambda" / "lambda_approve_model.py").as_posix(),
    handler="lambda_approve_model.lambda_handler",
    timeout=600,
    session=sagemaker_session,
    runtime="python3.12",
    environment={
        "Variables": {
            "ENDPOINT": ENDPOINT,
            "DATA_CAPTURE_DESTINATION": DATA_CAPTURE_DESTINATION,
            "DATA_CAPTURE_PERCENTAGE": str(DATA_CAPTURE_PERCENTAGE),
            "ROLE": lambda_role_arn,
            "MODEL_PACKAGE_GROUP": MODEL_PACKAGE_GROUP
        },
    },
)

deploy_lambda_fn_response = deploy_lambda_fn.upsert()

In [None]:
rule_name = "PendingModelApprovedRule"
event_pattern_approve_model = f"""
{{
  "source": ["aws.sagemaker"],
  "detail-type": ["SageMaker Model Package State Change"],
  "detail": {{
    "ModelPackageGroupName": ["{MODEL_PACKAGE_GROUP}"],
    "ModelApprovalStatus": ["Approved"]
  }}
}}
"""

events_client = aws_client_manager.get_client("events", role_name)

rule_response_approve_model = events_client.put_rule(
    Name=rule_name,
    EventPattern=event_pattern_approve_model,
    State="ENABLED",
    RoleArn=lambda_role_arn,
    Description='Rule to trigger Lambda to deploy model'
)

events_client.put_targets(
    Rule=rule_name,
    Targets=[
        {
            "Id": "1",
            "Arn": deploy_lambda_fn_response["FunctionArn"],
        },
    ],
)

print(f"EventBridge rule: {rule_name} created.")

In [None]:
deploy_lambda_function_name = deploy_lambda_fn_response["FunctionName"]
lambda_client = aws_client_manager.get_client("lambda", lambda_role_name)

try:
    lambda_client.add_permission(
        Action="lambda:InvokeFunction",
        FunctionName=deploy_lambda_function_name,
        Principal="events.amazonaws.com",
        SourceArn=rule_response_approve_model["RuleArn"],
        StatementId="EventBridgeDeploy",
    )
    print(f'Specified permission for "{deploy_lambda_function_name}" function.')

except lambda_client.exceptions.ResourceConflictException:
    print(f'Function "{deploy_lambda_function_name}" already has the specified permission.')

In [None]:
run_pipeline_lambda_fn = Lambda(
    function_name="run_pipeline_fn",
    execution_role_arn=lambda_role_arn,
    script=(CODE_FOLDER / "lambda" / "lambda_new_data.py").as_posix(),
    handler="lambda_new_data.lambda_handler",
    timeout=600,
    session=sagemaker_session,
    runtime="python3.12",
    environment={
        "Variables": {
            "PIPELINE_NAME": pipeline_name,
        },
    },
)

run_pipeline_lambda_fn_response = run_pipeline_lambda_fn.upsert()

In [None]:
event_pattern_s3_object_created = f"""
{{
  "source": ["aws.s3"],
  "detail-type": ["Object Created"],
  "detail": {{
    "bucket": {{"name": ["{bucket}"]}},
    "object": {{"key": [{{
        "prefix": "data/"
    }}]}}
  }}
}}
"""

object_created_rule = 'S3ObjectCreatedRule'
rule_response = events_client.put_rule(
    Name=('%s' % object_created_rule),
    EventPattern=event_pattern_s3_object_created,
    State='ENABLED',
    Description='Rule to trigger Lambda on S3 Object Creation',
    RoleArn=lambda_role_arn
)

events_client.put_targets(
    Rule=object_created_rule,
    Targets=[
        {
            'Id': '2',
            'Arn': run_pipeline_lambda_fn_response['FunctionArn']
        }
    ]
)

print(f"EventBridge rule: {object_created_rule} created.")

In [None]:
lambda_function_name = run_pipeline_lambda_fn_response["FunctionName"]

try:
    lambda_client.add_permission(
        FunctionName=lambda_function_name,
        StatementId='EventBridgeObjectCreated',
        Action='lambda:InvokeFunction',
        Principal='events.amazonaws.com',
        SourceArn=rule_response['RuleArn']
    )
    print(f'Specified permission for "{lambda_function_name}" function.')

except lambda_client.exceptions.ResourceConflictException:
    print(f'Function "{lambda_function_name}" already has the specified permission.')

In [None]:
def create_deployment_step(register_model_step):
    """Create a Deploy Step using the supplied parameters."""
    return LambdaStep(
        name="deploy",
        lambda_func=deploy_lambda_fn,
        inputs={
            "model_package_arn": register_model_step.properties.ModelPackageArn,
        },
    )


deploy_step = create_deployment_step(register_model_step)

In [None]:
condition_step = ConditionStep(
    name="check-model-f1-score",
    conditions=[condition],
    if_steps=(
        [
            create_model_step,
            generate_test_predictions_step,
            model_quality_baseline_step,
            register_model_step,
        ]
    ),
    else_steps=[fail_step],
)

In [None]:
session_pipeline = Pipeline(
    name=pipeline_name,
    parameters=[dataset_location, f1_threshold],
    steps=[
        split_and_transform_data_step,
        tune_model_step if USE_TUNING_STEP else xgb_train_model_step,
        evaluate_model_step,
        data_quality_baseline_step,
        condition_step,
    ],
    pipeline_definition_config=pipeline_definition_config,
    sagemaker_session=config["session"],
)

session_pipeline.upsert(role_arn=role_arn)

In [None]:
session_pipeline.start()