In [1]:
from sagemaker.workflow.steps import ProcessingStep
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.processing import FrameworkProcessor
from sagemaker.sklearn import SKLearn
from sagemaker import Session

import os

In [37]:
PROCESSING_CONTAINER_DIR = "/opt/ml/processing"
PREPROCESSING_COMPONENT_SOURCE_DIR = "/Users/knikitiuk/workspace/mlops-utilities/mlops_utilities/processing_code"

In [38]:
def create_processor(sagemaker_session) -> FrameworkProcessor:
    return FrameworkProcessor(
        estimator_cls=SKLearn,
        framework_version="0.23-1",
        role="arn:aws:iam::311638508164:role/AmazonSageMaker-ExecutionRole",
        instance_count=1,
        instance_type="ml.t3.medium",
        sagemaker_session=sagemaker_session,
    )

In [39]:

sm_session = Session(default_bucket='kris-mlops-utilities-test')

notebook_processing = ProcessingStep(
        "NotebookProcessing",
        processor=create_processor(sm_session),
        inputs=[
            ProcessingInput(
                input_name="code",
                source=PREPROCESSING_COMPONENT_SOURCE_DIR,
                destination=os.path.join(PROCESSING_CONTAINER_DIR, "code"),
            ),
        ],
        outputs=[
            ProcessingOutput(
                output_name="output-data",
                source=os.path.join(PROCESSING_CONTAINER_DIR, "output-data"),
            )
        ],
        code=os.path.join(PREPROCESSING_COMPONENT_SOURCE_DIR, "entrypoint.sh")
    )

In [40]:

pipeline = Pipeline(
    name='processing-notebook-pipeline',
    parameters=[],
    steps=[
        notebook_processing
    ],
    sagemaker_session=sm_session,
)


In [41]:
pipeline.upsert(role_arn='arn:aws:iam::311638508164:role/AmazonSageMaker-ExecutionRole')

{'PipelineArn': 'arn:aws:sagemaker:us-east-1:311638508164:pipeline/processing-notebook-pipeline',
 'ResponseMetadata': {'RequestId': '9074f41b-71fa-419c-b420-06c811db7a48',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '9074f41b-71fa-419c-b420-06c811db7a48',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '96',
   'date': 'Fri, 10 Feb 2023 13:23:20 GMT'},
  'RetryAttempts': 0}}

In [3]:
from actions import run_pipeline

run_pipeline(pipeline_name='test-upsert', execution_name_prefix='test', pipeline_params={})

{'PipelineExecutionArn': 'arn:aws:sagemaker:us-east-1:311638508164:pipeline/test-upsert/execution/wuiix8q6ji5x',
 'ResponseMetadata': {'RequestId': 'ed7701b7-4c3a-4c75-a742-08bd7e0c2d68',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'ed7701b7-4c3a-4c75-a742-08bd7e0c2d68',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '111',
   'date': 'Mon, 13 Feb 2023 12:32:49 GMT'},
  'RetryAttempts': 0}}

In [2]:
from actions import upsert_notebook_pipeline
#
upsert_notebook_pipeline(pipeline_name='test-upsert', notebook_path='/Users/knikitiuk/workspace/mlops-utilities/mlops_utilities/processing_code/', config_type='')

In [4]:
pipeline_role = 'arn:aws:iam::311638508164:role/AmazonSageMaker-ExecutionRole'

In [9]:
from omegaconf import OmegaConf

default_conf_path = '/Users/knikitiuk/workspace/mlops-utilities/mlops_utilities/processing_code/training_pipeline.defaults.yml'
default_conf = OmegaConf.load(default_conf_path)
# arg_conf = OmegaConf.create({'pipeline': {'role': pipeline_role}})
# override_arg_conf = OmegaConf.from_dotlist(args)
# return OmegaConf.merge(default_conf, arg_conf, override_arg_conf)

In [10]:
default_conf

{'pipeline': {'default_bucket': '???', 'role': '???', 'cache_config': {'enable_caching': True, 'expire_after': 'p1d'}, 'model_package_group_name': '???'}, 'featurizing': {'instance_count': 1, 'instance_type': 'ml.t3.medium', 'role': '${pipeline.role}'}, 'training': {'instance_count': 1, 'instance_type': 'ml.m5.large', 'role': '${pipeline.role}'}, 'model': {'instance_count': 1, 'instance_type': 'ml.m5.large', 'role': '${pipeline.role}'}, 'clarify': {'instance_count': 1, 'instance_type': 'ml.m5.large', 'role': '${pipeline.role}'}, 'monitor': {'role': '${pipeline.role}'}}