# Launch Sagemaker Pipeline

In [5]:
import boto3
import sagemaker
from datetime import date
from sagemaker.workflow.parallelism_config import ParallelismConfiguration
import utils

## General parameters

In [8]:
def read_config_data() -> Dict:
    """Read the config.yml file asociated.

    The config.yml file asociated is the one in the same path.

    Returns
    -------
        Dictionary with the configuration of the process.
    """
    config_file_path = f"config.yml"
    with open(config_file_path) as conf_file:
        configuration = conf_file.read()
    return safe_load(configuration)



# Use own bucket and prefix
s3_bucket_nps = 'iberia-data-lake' # In this case: iberia-data-lake
s3_bucket_nps_prefix = 'customer/nps_client_model' # In this case: sagemaker/sagemaker-template

s3_bucket_lf = 'ibdata-prod-ew1-s3-customer'
s3_bucket_lf_prefix = 'customer/load_factor_to_s3_nps_model/'

# Environment (sbx or prod pipeline)
environment = "develop"
environment_ppl = "prod" if environment == "production" else "sbx"

# Config file
configuration = read_config_data()

# Parameters
str_execution_date = '20234-03-13' # Change
is_last_date = "0" # Optional
model_version = configuration.get("MODEL_VERSION")
vertical = configuration.get("VERTICAL")
s3_path_write = "sagemaker/sagemaker-template/prod/"


insert_date_ci='2024-02-12'
s3_path_read_nps = f's3://iberia-data-lake/customer/nps_surveys/export_historic/insert_date_ci={insert_date_ci}/'
s3_path_read_lf = "s3://ibdata-prod-ew1-s3-customer/customer/load_factor_to_s3_nps_model/"


ValueError: Must setup local AWS configuration with a region supported by SageMaker.

## Pipeline definition

### Create pipeline execution updating code from sagemaker repository

In [4]:
from production.pipelines_code.pipeline import get_pipeline as pipeline_definition

pipeline_name = (
    f"ibdata-{vertical}-{model_version.lower()}-{environment_ppl}-ppl"
)
base_job_prefix = pipeline_name

pipe_definition = pipeline_definition(
    region=region,
    role=role,
    default_bucket=default_bucket,
    default_bucket_prefix=default_bucket_prefix,
    pipeline_name=pipeline_name,
    base_job_prefix=base_job_prefix
)
# all_step.upsert(role_arn=role)

This function has been deprecated and could break pipeline step caching. We recommend using the run() function directly with pipeline sessionsto access step arguments.


### Execute pipeline with parameters

In [5]:
# This cell starts an execution of the previous pipeline created
execution = pipe_definition.start(
    parallelism_config=ParallelismConfiguration(max_parallel_execution_steps=4).to_request(),
    parameters=dict(
        str_execution_date=str_execution_date,
        is_last_date=is_last_date,
        s3_path_write=s3_path_write,
        s3_path_read=s3_path_read,
        use_type=use_type
    )
)
execution.describe()

{'PipelineArn': 'arn:aws:sagemaker:eu-west-1:077156906314:pipeline/ibdata-customer-blv-sbx-ppl',
 'PipelineExecutionArn': 'arn:aws:sagemaker:eu-west-1:077156906314:pipeline/ibdata-customer-blv-sbx-ppl/execution/xiz1fh32n9pz',
 'PipelineExecutionDisplayName': 'execution-1672240944345',
 'PipelineExecutionStatus': 'Executing',
 'CreationTime': datetime.datetime(2022, 12, 28, 15, 22, 24, 258000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2022, 12, 28, 15, 22, 24, 258000, tzinfo=tzlocal()),
 'CreatedBy': {'UserProfileArn': 'arn:aws:sagemaker:eu-west-1:077156906314:user-profile/d-8rnxtw3g36nb/customer-esteban-sanchez',
  'UserProfileName': 'customer-esteban-sanchez',
  'DomainId': 'd-8rnxtw3g36nb'},
 'LastModifiedBy': {'UserProfileArn': 'arn:aws:sagemaker:eu-west-1:077156906314:user-profile/d-8rnxtw3g36nb/customer-esteban-sanchez',
  'UserProfileName': 'customer-esteban-sanchez',
  'DomainId': 'd-8rnxtw3g36nb'},
 'ParallelismConfiguration': {'MaxParallelExecutionSteps': 4},
 