In [1]:
import pandas as pd
import numpy as np
from sagemaker.workflow.pipeline import Pipeline

### A SageMaker Pipeline

The pipeline that we create follows a typical Machine Learning Application pattern of pre-processing, training, evaluation, and conditional model registration and publication, if the quality of the model is sufficient.

![A typical ML Application pipeline](img/pipeline-full.png)

### Getting some constants

We get some constants from the local execution environment.

In [2]:
#pip install --upgrade numpy

In [3]:
import boto3
import sagemaker


region = boto3.Session().region_name
role = sagemaker.get_execution_role()
#default_bucket = sagemaker.session.Session().default_bucket()
default_bucket = "ars-mlops-projects"

# Change these to reflect your project/business name or if you want to separate ModelPackageGroup/Pipeline from the rest of your team
model_package_group_name = f"MOBPackageGroup-Example"
pipeline_name = f"MobPricePipeline"

### Get the pipeline instance

Here we get the pipeline instance from your pipeline module so that we can work with it.

In [4]:
from pipelines.pipeline import get_pipeline

In [5]:
boto_session = boto3.Session(region_name=region)

sagemaker_client = boto_session.client("sagemaker")
runtime_client = boto_session.client("sagemaker-runtime")

In [6]:
pipeline = get_pipeline(
    region=region,
    role=role,
    default_bucket=default_bucket,
    model_package_group_name=model_package_group_name,
    pipeline_name=pipeline_name,
)


In [7]:
import json

json.loads(pipeline.definition())

{'Version': '2020-12-01',
 'Metadata': {},
 'Parameters': [{'Name': 'ProcessingInstanceType',
   'Type': 'String',
   'DefaultValue': 'ml.m5.xlarge'},
  {'Name': 'ProcessingInstanceCount', 'Type': 'Integer', 'DefaultValue': 1},
  {'Name': 'ModelApprovalStatus',
   'Type': 'String',
   'DefaultValue': 'PendingManualApproval'},
  {'Name': 'InputData',
   'Type': 'String',
   'DefaultValue': 's3://ars-mlops-projects/mobile-price-prediction/data/raw_data/train.csv'}],
 'PipelineExperimentConfig': {'ExperimentName': {'Get': 'Execution.PipelineName'},
  'TrialName': {'Get': 'Execution.PipelineExecutionId'}},
 'Steps': [{'Name': 'PreProcessMob',
   'Type': 'Processing',
   'Arguments': {'ProcessingResources': {'ClusterConfig': {'InstanceType': 'ml.m5.xlarge',
      'InstanceCount': 1,
      'VolumeSizeInGB': 30}},
    'AppSpecification': {'ImageUri': '720646828776.dkr.ecr.ap-south-1.amazonaws.com/sagemaker-scikit-learn:0.20.0-cpu-py3',
     'ContainerArguments': ['--input-data', {'Get': 'Para

In [8]:
pipeline.upsert(role_arn=role)

{'PipelineArn': 'arn:aws:sagemaker:ap-south-1:832173187970:pipeline/mobpricepipeline',
 'ResponseMetadata': {'RequestId': '8576d3bd-876c-4730-a319-446f904e9c87',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '8576d3bd-876c-4730-a319-446f904e9c87',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '85',
   'date': 'Thu, 14 Jul 2022 17:02:02 GMT'},
  'RetryAttempts': 0}}

In [9]:
execution = pipeline.start()

In [10]:
execution.describe()

{'PipelineArn': 'arn:aws:sagemaker:ap-south-1:832173187970:pipeline/mobpricepipeline',
 'PipelineExecutionArn': 'arn:aws:sagemaker:ap-south-1:832173187970:pipeline/mobpricepipeline/execution/rbn4m2ra9np8',
 'PipelineExecutionDisplayName': 'execution-1657818123276',
 'PipelineExecutionStatus': 'Executing',
 'CreationTime': datetime.datetime(2022, 7, 14, 17, 2, 3, 211000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2022, 7, 14, 17, 2, 3, 211000, tzinfo=tzlocal()),
 'CreatedBy': {'UserProfileArn': 'arn:aws:sagemaker:ap-south-1:832173187970:user-profile/d-l3kyvi0vqpnz/arshad',
  'UserProfileName': 'arshad',
  'DomainId': 'd-l3kyvi0vqpnz'},
 'LastModifiedBy': {'UserProfileArn': 'arn:aws:sagemaker:ap-south-1:832173187970:user-profile/d-l3kyvi0vqpnz/arshad',
  'UserProfileName': 'arshad',
  'DomainId': 'd-l3kyvi0vqpnz'},
 'ResponseMetadata': {'RequestId': 'b782f979-4eba-42f0-bc9d-a9264f8f5646',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'b782f979-4eba-42f0-bc

In [11]:
execution.list_steps()

[]

In [12]:
pwd


'/root/AWS Projects/AWS-Projects/Custom Pipelines/mobprice'

In [13]:
p = "/root/Practice/data/autos.csv"
df = pd.read_csv(p)

In [14]:
df.head()

Unnamed: 0,symboling,make,fuel_type,aspiration,num_of_doors,body_style,drive_wheels,engine_location,wheel_base,length,...,engine_size,fuel_system,bore,stroke,compression_ratio,horsepower,peak_rpm,city_mpg,highway_mpg,price
0,3,alfa-romero,gas,std,2,convertible,rwd,front,88.6,168.8,...,130,mpfi,3.47,2.68,9,111,5000,21,27,13495
1,3,alfa-romero,gas,std,2,convertible,rwd,front,88.6,168.8,...,130,mpfi,3.47,2.68,9,111,5000,21,27,16500
2,1,alfa-romero,gas,std,2,hatchback,rwd,front,94.5,171.2,...,152,mpfi,2.68,3.47,9,154,5000,19,26,16500
3,2,audi,gas,std,4,sedan,fwd,front,99.8,176.6,...,109,mpfi,3.19,3.4,10,102,5500,24,30,13950
4,2,audi,gas,std,4,sedan,4wd,front,99.4,176.6,...,136,mpfi,3.19,3.4,8,115,5500,18,22,17450
