In [1]:
import pandas as pd
import numpy as np
from sagemaker.workflow.pipeline import Pipeline

### A SageMaker Pipeline

The pipeline that we create follows a typical Machine Learning Application pattern of pre-processing, training, evaluation, and conditional model registration and publication, if the quality of the model is sufficient.

![A typical ML Application pipeline](img/pipeline-full.png)

### Getting some constants

We get some constants from the local execution environment.

In [2]:
import boto3
import sagemaker


region = boto3.Session().region_name
role = sagemaker.get_execution_role()
#default_bucket = sagemaker.session.Session().default_bucket()
default_bucket = "ars-mlops-projects"

# Change these to reflect your project/business name or if you want to separate ModelPackageGroup/Pipeline from the rest of your team
model_package_group_name = f"MOBPackageGroup-Example"
pipeline_name = f"MobPricePipeline"

### Get the pipeline instance

Here we get the pipeline instance from your pipeline module so that we can work with it.

In [3]:
from pipelines.pipeline import get_pipeline

In [4]:
boto_session = boto3.Session(region_name=region)

sagemaker_client = boto_session.client("sagemaker")
runtime_client = boto_session.client("sagemaker-runtime")

In [5]:
pipeline = get_pipeline(
    region=region,
    role=role,
    default_bucket=default_bucket,
    model_package_group_name=model_package_group_name,
    pipeline_name=pipeline_name,
)


In [6]:
import json

json.loads(pipeline.definition())

{'Version': '2020-12-01',
 'Metadata': {},
 'Parameters': [{'Name': 'ProcessingInstanceType',
   'Type': 'String',
   'DefaultValue': 'ml.m5.xlarge'},
  {'Name': 'ProcessingInstanceCount', 'Type': 'Integer', 'DefaultValue': 1},
  {'Name': 'TrainingInstanceType',
   'Type': 'String',
   'DefaultValue': 'ml.m5.xlarge'},
  {'Name': 'ModelApprovalStatus',
   'Type': 'String',
   'DefaultValue': 'PendingManualApproval'},
  {'Name': 'InputData',
   'Type': 'String',
   'DefaultValue': 's3://ars-mlops-projects/mobile-price-prediction/data/raw_data/test.csv'}],
 'PipelineExperimentConfig': {'ExperimentName': {'Get': 'Execution.PipelineName'},
  'TrialName': {'Get': 'Execution.PipelineExecutionId'}},
 'Steps': [{'Name': 'PreProcessMob',
   'Type': 'Processing',
   'Arguments': {'ProcessingResources': {'ClusterConfig': {'InstanceType': {'Get': 'Parameters.ProcessingInstanceType'},
      'InstanceCount': {'Get': 'Parameters.ProcessingInstanceCount'},
      'VolumeSizeInGB': 30}},
    'AppSpecific

In [7]:
pipeline.upsert(role_arn=role)

{'PipelineArn': 'arn:aws:sagemaker:ap-south-1:832173187970:pipeline/mobpricepipeline',
 'ResponseMetadata': {'RequestId': '8c96aa16-687a-47e3-a9f6-f4b63ed65e11',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '8c96aa16-687a-47e3-a9f6-f4b63ed65e11',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '85',
   'date': 'Tue, 14 Jun 2022 16:58:34 GMT'},
  'RetryAttempts': 0}}

In [8]:
execution = pipeline.start()

In [9]:
execution.describe()

{'PipelineArn': 'arn:aws:sagemaker:ap-south-1:832173187970:pipeline/mobpricepipeline',
 'PipelineExecutionArn': 'arn:aws:sagemaker:ap-south-1:832173187970:pipeline/mobpricepipeline/execution/wxi19cf0kc7y',
 'PipelineExecutionDisplayName': 'execution-1655225924978',
 'PipelineExecutionStatus': 'Executing',
 'PipelineExperimentConfig': {'ExperimentName': 'mobpricepipeline',
  'TrialName': 'wxi19cf0kc7y'},
 'CreationTime': datetime.datetime(2022, 6, 14, 16, 58, 44, 912000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2022, 6, 14, 16, 58, 44, 912000, tzinfo=tzlocal()),
 'CreatedBy': {'UserProfileArn': 'arn:aws:sagemaker:ap-south-1:832173187970:user-profile/d-l3kyvi0vqpnz/arshad',
  'UserProfileName': 'arshad',
  'DomainId': 'd-l3kyvi0vqpnz'},
 'LastModifiedBy': {'UserProfileArn': 'arn:aws:sagemaker:ap-south-1:832173187970:user-profile/d-l3kyvi0vqpnz/arshad',
  'UserProfileName': 'arshad',
  'DomainId': 'd-l3kyvi0vqpnz'},
 'ResponseMetadata': {'RequestId': 'feb55440-ddcc-4548-9