In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('../..')

from sagemaker.processing import NetworkConfig, ProcessingInput, ProcessingOutput
from sagemaker.workflow.parameters import ParameterString
from sagemaker_rightline.model import Configuration
from sagemaker_rightline.validations import (
    StepImagesExist,
    StepKmsKeyIdAsExpected,
    PipelineParametersAsExpected,
    ContainerImage,
    StepNetworkConfigAsExpected,
    StepLambdaFunctionExists,
    StepRoleNameExists,
    StepRoleNameAsExpected,
    StepInputsAsExpected,
    StepOutputsAsExpected,
    StepOutputsMatchInputsAsExpected,
    StepCallbackSqsQueueExists,
)
from sagemaker_rightline.rules import Equals, Contains
from tests.utils import create_image
from tests.fixtures.pipeline import get_sagemaker_pipeline, DUMMY_BUCKET
from tests.fixtures.image_details import IMAGE_1_URI, IMAGE_2_URI
from moto import mock_ecr
import boto3

ModuleNotFoundError: No module named 'sagemaker'

In [9]:
sys.path.append('../..')


In [8]:
!python3 -m pip install "../.[dev]"

Processing /Users/stiebels/Desktop/projects/sagemaker-rightline
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25lerror
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mGetting requirements to build wheel[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m [31m[93 lines of output][0m
  [31m   [0m No `packages` or `py_modules` configuration, performing automatic discovery.
  [31m   [0m `flat-layout` detected -- analysing .
  [31m   [0m discovered packages -- ['sagemaker_rightline', 'create', 'sagemaker_rightline.cli', 'create.bin', 'create.include', 'create.lib']
  [31m   [0m Traceback (most recent call last):
  [31m   [0m   File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pip/_vendor/pyproject_hooks/_in_process/_in_process.py", line 353, in <module>
  [31m   [0m     main()
  [31m   [0m   File "/Library/Frameworks/Python.frame

In [4]:
from tests.fixtures.pipeline import get_sagemaker_pipeline, DUMMY_BUCKET


ModuleNotFoundError: No module named 'tests.fixtures'

## Fetching SM Pipeline

In [3]:
sm_pipeline = get_sagemaker_pipeline(script_path="../tests/fixtures/fake_processing_script.py")

NameError: name 'get_sagemaker_pipeline' is not defined

In [None]:
sm_pipeline.steps[2].inputs

## Defining function that validates the SM Pipeline

Note: In order to run this example offline/without AWS access, we're mocking ECR and publishing two mock images to it.

In [None]:
@mock_ecr
@create_image(
    boto3.client("ecr"), [ContainerImage(uri=IMAGE_1_URI), ContainerImage(uri=IMAGE_2_URI)]
)
def validate_sm_pipeline(sm_pipeline: "sagemaker.workflow.pipeline.Pipeline") -> "pandas.DataFrame":
    validations = [
        StepImagesExist(),
        PipelineParametersAsExpected(
            parameters_expected=[
                ParameterString(
                    name="parameter-1",
                    default_value="some-value",
                ),
            ],
            rule=Contains(),
        ),
        StepKmsKeyIdAsExpected(
            kms_key_id_expected="some/kms-key-alias",
            step_name="sm_training_step_sklearn",  # optional: if not set, will check all steps
            rule=Equals(),
        ),
        StepNetworkConfigAsExpected(
            network_config_expected=NetworkConfig(
                enable_network_isolation=False,
                security_group_ids=["sg-1234567890"],
                subnets=["subnet-1234567890"],
            ),
            rule=Equals(negative=True),
        ),
        StepLambdaFunctionExists(),
        StepRoleNameExists(),
        StepRoleNameAsExpected(
            role_name_expected="some-role-name",
            step_name="sm_training_step_sklearn",  # optional: if not set, will check all steps
            rule=Equals(),
        ),
        StepInputsAsExpected(
            inputs_expected=[
                ProcessingInput(
                    source=f"s3://{DUMMY_BUCKET}/input-1",
                    destination="/opt/ml/processing/input",
                    input_name="input-2",
                )
            ],
            step_type="Processing",  # either step_type or step_name must be set to filter
            rule=Contains(),
        ),
        StepOutputsAsExpected(
            outputs_expected=[
                ProcessingOutput(
                    source="/opt/ml/processing/output",
                    destination=f"s3://{DUMMY_BUCKET}/output-1",
                    output_name="output-1",
                )
            ],
            step_name="sm_processing_step_spark",  # optional
            rule=Contains(),
        ),
        StepOutputsMatchInputsAsExpected(
            inputs_outputs_expected=[
                {
                    "input": {
                        "step_name": "sm_processing_step_sklearn",
                        "input_name": "input-1",
                    },
                    "output": {
                        "step_name": "sm_processing_step_sklearn",
                        "output_name": "output-1",
                    },
                }
            ]
        ),
        StepCallbackSqsQueueExists(),
    ]
    cm = Configuration(
        validations=validations,
        sagemaker_pipeline=sm_pipeline,
    )
    return cm.run(return_df=True)

## Running the validation and printing the report

In [None]:
report = validate_sm_pipeline(sm_pipeline)

In [None]:
report