# Launch Sagemaker Pipeline

In [1]:
import boto3
import sagemaker
from datetime import date
from sagemaker.workflow.parallelism_config import ParallelismConfiguration
import utils

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


## General parameters

In [2]:
 # By default
region = boto3.Session().region_name
role = sagemaker.get_execution_role()

# Use own bucket and prefix
s3_bucket_nps = 'iberia-data-lake' # In this case: iberia-data-lake
s3_bucket_nps_prefix = 'customer/nps_explainability_model' # In this case: sagemaker/sagemaker-template

s3_bucket_lf = 'ibdata-prod-ew1-s3-customer'
s3_bucket_lf_prefix = 'customer/load_factor_to_s3_nps_model'

# Environment (sbx or prod pipeline)
environment = "production"
environment_ppl = "prod" if environment == "production" else "sbx"

# Config file
configuration = utils.read_config_data()

# Parameters
str_execution_date = '2024-05-09' # Change
str_intervals_starting_date = '2023-01-01'
is_last_date = "0" # Optional
model_version = configuration.get("MODEL_VERSION")
vertical = configuration.get("VERTICAL")
s3_path_write = "customer/nps_aggregated_explainability/prod"


insert_date_ci='2024-05-06'
s3_path_read_nps = 'customer/nps_surveys/export_historic'
s3_path_read_lf = "customer/load_factor_to_s3_nps_model"



## Pipeline definition

### Create pipeline execution updating code from sagemaker repository

In [3]:
from production.pipelines_code.pipeline import get_pipeline as pipeline_definition

pipeline_name = (
    f"ibdata-{vertical}-{model_version.lower()}-{environment_ppl}-ppl"
)
base_job_prefix = pipeline_name

pipe_definition = pipeline_definition(
    region=region,
    role=role,
    default_bucket='iberia-data-lake',
    default_bucket_prefix='customer/nps_aggregated_explainability/prod/pipeline',
    pipeline_name=pipeline_name,
    base_job_prefix=base_job_prefix
)
pipe_definition.upsert(role_arn=role)


INFO:sagemaker.processing:Uploaded None to s3://iberia-data-lake/customer/nps_aggregated_explainability/prod/pipeline/ibdata-customer-nps-aggregated-explaina-2024-05-10-13-44-49-862/source/sourcedir.tar.gz
INFO:sagemaker.processing:runproc.sh uploaded to s3://iberia-data-lake/customer/nps_aggregated_explainability/prod/pipeline/ibdata-customer-nps-aggregated-explaina-2024-05-10-13-44-49-862/source/runproc.sh
INFO:sagemaker.processing:Uploaded None to s3://iberia-data-lake/customer/nps_aggregated_explainability/prod/pipeline/ibdata-customer-nps-aggregated-explaina-2024-05-10-13-44-50-299/source/sourcedir.tar.gz
INFO:sagemaker.processing:runproc.sh uploaded to s3://iberia-data-lake/customer/nps_aggregated_explainability/prod/pipeline/ibdata-customer-nps-aggregated-explaina-2024-05-10-13-44-50-299/source/runproc.sh
INFO:sagemaker.processing:Uploaded None to s3://iberia-data-lake/customer/nps_aggregated_explainability/prod/pipeline/ibdata-customer-nps-aggregated-explaina-2024-05-10-13-44-5

{'PipelineArn': 'arn:aws:sagemaker:eu-west-1:077156906314:pipeline/ibdata-customer-nps-aggregated-explainability-prod-ppl',
 'ResponseMetadata': {'RequestId': 'c00dc895-fbe4-4027-b1f9-4435479adbfe',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'c00dc895-fbe4-4027-b1f9-4435479adbfe',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '122',
   'date': 'Fri, 10 May 2024 13:44:51 GMT'},
  'RetryAttempts': 0}}

In [4]:
pipeline_name

'ibdata-customer-nps-aggregated-explainability-prod-ppl'

### Execute pipeline with parameters

In [5]:
# This cell starts an execution of the previous pipeline created
execution = pipe_definition.start(
    parallelism_config=ParallelismConfiguration(max_parallel_execution_steps=4).to_request(),
    parameters=dict(
        str_execution_date=str_execution_date,
        str_intervals_starting_date=str_intervals_starting_date,
        is_last_date=is_last_date,
        s3_path_write=s3_path_write,
        s3_path_read_nps=s3_path_read_nps,
        s3_path_read_lf=s3_path_read_lf,        
        use_type='predict_historic',
    )
)
execution.describe()

{'PipelineArn': 'arn:aws:sagemaker:eu-west-1:077156906314:pipeline/ibdata-customer-nps-aggregated-explainability-prod-ppl',
 'PipelineExecutionArn': 'arn:aws:sagemaker:eu-west-1:077156906314:pipeline/ibdata-customer-nps-aggregated-explainability-prod-ppl/execution/h0dym9xe3wux',
 'PipelineExecutionDisplayName': 'execution-1715348692518',
 'PipelineExecutionStatus': 'Executing',
 'CreationTime': datetime.datetime(2024, 5, 10, 13, 44, 52, 472000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2024, 5, 10, 13, 44, 52, 472000, tzinfo=tzlocal()),
 'CreatedBy': {'UserProfileArn': 'arn:aws:sagemaker:eu-west-1:077156906314:user-profile/d-8rnxtw3g36nb/customer-diego-rivera',
  'UserProfileName': 'customer-diego-rivera',
  'DomainId': 'd-8rnxtw3g36nb',
  'IamIdentity': {'Arn': 'arn:aws:sts::077156906314:assumed-role/ibdata-aip-role-sagemaker-customer-user/SageMaker',
   'PrincipalId': 'AROARD5XI6VFDJH3ORBAM:SageMaker'}},
 'LastModifiedBy': {'UserProfileArn': 'arn:aws:sagemaker:eu-west