# RUN PIPELINE

### Import libraries

In [29]:
import os
import pytz
import boto3
import sagemaker
import sagemaker.session
from sagemaker.processing import (
    ProcessingInput,
    ProcessingOutput,
    ScriptProcessor,
)
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.properties import PropertyFile
from sagemaker.workflow.steps import ProcessingStep


In [30]:
def get_session(region, default_bucket):
    """Gets the sagemaker session based on the region.
    Args:
        region: the aws region to start the session
        default_bucket: the bucket to use for storing the artifacts
    Returns:
        `sagemaker.session.Session instance
    """

    boto_session = boto3.Session(region_name=region)

    sagemaker_client = boto_session.client("sagemaker")
    runtime_client = boto_session.client("sagemaker-runtime")
    return sagemaker.session.Session(
        boto_session=boto_session,
        sagemaker_client=sagemaker_client,
        sagemaker_runtime_client=runtime_client,
        default_bucket=default_bucket,
    )

In [None]:
region = # define your region
role=# define iam role arn
default_bucket= # define a bucket
pipeline_name= #define a pipeline

### Define the  pipeline 

In [32]:
def get_pipeline(
    region,
    role=None,
    default_bucket=None,
    pipeline_name
):
    """Gets a SageMaker ML Pipeline instance working with on own data.
    Args:
        region: AWS region to create and run the pipeline.
        role: IAM role to create and run steps and pipeline.
        default_bucket: the bucket to use for storing the artifacts
    Returns:
        an instance of a pipeline
    """

    
    #working with input data path
    input_data = # input data s3 path

    
    #working with output data path   
    preprocessed_output1 = # s3 path to save output1
    preprocessed_output2 = # s3 path to save output2

    # ECR image name
    ecr_repository = # ECR name

    sagemaker_session = get_session(region, default_bucket)

    if role is None:
        role = sagemaker.session.get_execution_role(sagemaker_session)
    account_id = boto3.client("sts").get_caller_identity().get("Account")
    region = boto3.session.Session().region_name

    # Parameters for pipeline execution

    ####### --------------------- PREPROCESSING --------------------------------------------------------------------


    uri_suffix = "amazonaws.com"
    tag = ":latest"

    preprocessing_repository_uri = "{}.dkr.ecr.{}.{}/{}".format(
        account_id, region, uri_suffix, ecr_repository + tag
    )
        
    script_processor = ScriptProcessor(
         command = ["python3"],
         image_uri = preprocessing_repository_uri,
         role = role,
         instance_count = ,
         instance_type = 1,
         tags = [{'Key': 'JobType', 'Value': 'Preprocessing'}]
    )
    
    step_preprocess = ProcessingStep(
        name= "preprocessing-step",
        processor= script_processor, 
        code= 'Processing_job/preprocessing.py',
        inputs= [ProcessingInput(source=input_data, destination="/opt/ml/processing/input"),
                ProcessingInput(source='Processing_job/my_package/', destination="/opt/ml/processing/input/code/my_package/")
               ],
        outputs= [
            ProcessingOutput(output_name="output1", destination=preprocessed_output1, source="/opt/ml/processing/output1"),
            ProcessingOutput(output_name="output2", destination=preprocessed_output2,  source="/opt/ml/processing/output2")
        ]
    )
    
    # ========================================= PIPELINE ORCHESTRATION ================================================
    
    # Pipeline instance
    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
        ],
        steps=[
            step_preprocess,
              ],
        sagemaker_session=sagemaker_session,
    )
    return pipeline


In [34]:
pipeline_def = get_pipeline(region, 
                            role,
                            default_bucket)
pipeline_def.upsert(role_arn=role)
execution = pipeline_def.start()

# Delete pipeline

In [153]:
client = boto3.client('sagemaker')
response = client.delete_pipeline(
    PipelineName = #Pipeline name,
ClientRequestToken= '12345678890123456789012345678901245')
# Client token is any 32 digit