# Launch Sagemaker Pipeline

In [2]:
import boto3
import sagemaker
from datetime import date
from sagemaker.workflow.parallelism_config import ParallelismConfiguration
import utils
import pandas as pd

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


## General parameters

In [3]:
 # By default
region = boto3.Session().region_name
role = sagemaker.get_execution_role()

# Use own bucket and prefix
s3_bucket_nps = 'iberia-data-lake' # In this case: iberia-data-lake
s3_bucket_nps_prefix = 'customer/nps_explainability_model' # In this case: sagemaker/sagemaker-template

s3_bucket_lf = 'ibdata-prod-ew1-s3-customer'
s3_bucket_lf_prefix = 'customer/load_factor_to_s3_nps_model'

# Environment (sbx or prod pipeline)
environment = "develop"
environment_ppl = "prod" if environment == "production" else "sbx"

# Config file
configuration = utils.read_config_data()

# Parameters
str_execution_date = '2024-07-02' # Change
is_last_date = "0" # Optional
model_version = configuration.get("MODEL_VERSION")
vertical = configuration.get("VERTICAL")
s3_path_write = "customer/simulations/sbx"


insert_date_ci='2024-07-02'
s3_path_read_nps = 'customer/nps_surveys/export_historic'
s3_path_read_lf = "customer/load_factor_to_s3_nps_model"

str_start_date = '2024-01-01'
str_end_date = '2024-01-31'
str_cabin = 'Economy'
str_haul = 'SH'
use_type = 'Client'


data = {
    "ticket_price": [100.0],
    "load_factor": [88.95],
    "pun_100_punctuality_satisfaction": [84.8],
    "bkg_200_journey_preparation_satisfaction": [72.5],
    "pfl_100_checkin_satisfaction": [78.6],
    "pfl_200_security_satisfaction": [81.3],
    "pfl_300_lounge_satisfaction": [76.1],
    "pfl_500_boarding_satisfaction": [80.8],
    "ifl_300_cabin_satisfaction": [79.5],
    "ifl_200_flight_crew_annoucements_satisfaction": [79.4],
    "ifl_600_wifi_satisfaction": [38.6],
    "ifl_500_ife_satisfaction": [70.0],
    "ifl_400_food_drink_satisfaction": [66.4],
    "ifl_100_cabin_crew_satisfaction": [80.0],
    "arr_100_arrivals_satisfaction": [81.9],
    "con_100_connections_satisfaction": [71.1],
    "loy_200_loyalty_programme_satisfaction": [76.2],
    "img_310_ease_contact_phone_satisfaction": [59.3]
}

df = pd.DataFrame(data)
# Convert the DataFrame to CSV string without headers and remove newlines
df_targets = df.to_csv(index=False, header=False).strip().replace('\n', '')



In [4]:
df_targets

'100.0,88.95,84.8,72.5,78.6,81.3,76.1,80.8,79.5,79.4,38.6,70.0,66.4,80.0,81.9,71.1,76.2,59.3'

## Pipeline definition

### Create pipeline execution updating code from sagemaker repository

In [5]:
from production.pipelines_code.pipeline import get_pipeline as pipeline_definition

pipeline_name = (
    f"ibdata-{vertical}-{model_version.lower()}-{environment_ppl}-ppl"
)
base_job_prefix = pipeline_name

pipe_definition = pipeline_definition(
    region=region,
    role=role,
    default_bucket='iberia-data-lake',
    default_bucket_prefix='customer/simulations/sbx/pipeline',
    pipeline_name=pipeline_name,
    base_job_prefix=base_job_prefix
)
pipe_definition.upsert(role_arn=role)


INFO:sagemaker.processing:Uploaded None to s3://iberia-data-lake/customer/simulations/sbx/pipeline/ibdata-customer-simulations-sbx-ppl/fra-2024-10-08-07-36-35-508/source/sourcedir.tar.gz
INFO:sagemaker.processing:runproc.sh uploaded to s3://iberia-data-lake/customer/simulations/sbx/pipeline/ibdata-customer-simulations-sbx-ppl/fra-2024-10-08-07-36-35-508/source/runproc.sh
INFO:sagemaker.processing:Uploaded None to s3://iberia-data-lake/customer/simulations/sbx/pipeline/ibdata-customer-simulations-sbx-ppl/fra-2024-10-08-07-36-35-937/source/sourcedir.tar.gz
INFO:sagemaker.processing:runproc.sh uploaded to s3://iberia-data-lake/customer/simulations/sbx/pipeline/ibdata-customer-simulations-sbx-ppl/fra-2024-10-08-07-36-35-937/source/runproc.sh
INFO:sagemaker.processing:Uploaded None to s3://iberia-data-lake/customer/simulations/sbx/pipeline/ibdata-customer-simulations-sbx-ppl/fra-2024-10-08-07-36-36-072/source/sourcedir.tar.gz
INFO:sagemaker.processing:runproc.sh uploaded to s3://iberia-data

{'PipelineArn': 'arn:aws:sagemaker:eu-west-1:077156906314:pipeline/ibdata-customer-simulations-sbx-ppl',
 'ResponseMetadata': {'RequestId': '369d9aa7-2d0e-4537-af7a-4294c4c9e7e0',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '369d9aa7-2d0e-4537-af7a-4294c4c9e7e0',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '103',
   'date': 'Tue, 08 Oct 2024 07:36:37 GMT'},
  'RetryAttempts': 0}}

In [6]:
pipeline_name

'ibdata-customer-simulations-sbx-ppl'

### Execute pipeline with parameters

In [7]:
# This cell starts an execution of the previous pipeline created
execution = pipe_definition.start(
    parallelism_config=ParallelismConfiguration(max_parallel_execution_steps=4).to_request(),
    parameters=dict(
        str_execution_date=str_execution_date,
        is_last_date=is_last_date,
        s3_path_write=s3_path_write,
        s3_path_read_nps=s3_path_read_nps,
        s3_path_read_lf=s3_path_read_lf,
        str_start_date=str_start_date,
        str_end_date=str_end_date,
        str_cabin = str_cabin,
        str_haul = str_haul, 
        df_targets = df_targets,
        use_type = use_type
    )
)
execution.describe()

{'PipelineArn': 'arn:aws:sagemaker:eu-west-1:077156906314:pipeline/ibdata-customer-simulations-sbx-ppl',
 'PipelineExecutionArn': 'arn:aws:sagemaker:eu-west-1:077156906314:pipeline/ibdata-customer-simulations-sbx-ppl/execution/8nhh3obriz9u',
 'PipelineExecutionDisplayName': 'execution-1728372997720',
 'PipelineExecutionStatus': 'Executing',
 'CreationTime': datetime.datetime(2024, 10, 8, 7, 36, 37, 670000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2024, 10, 8, 7, 36, 37, 670000, tzinfo=tzlocal()),
 'CreatedBy': {'UserProfileArn': 'arn:aws:sagemaker:eu-west-1:077156906314:user-profile/d-8rnxtw3g36nb/customer-diego-rivera',
  'UserProfileName': 'customer-diego-rivera',
  'DomainId': 'd-8rnxtw3g36nb',
  'IamIdentity': {'Arn': 'arn:aws:sts::077156906314:assumed-role/ibdata-aip-role-sagemaker-customer-user/SageMaker',
   'PrincipalId': 'AROARD5XI6VFDJH3ORBAM:SageMaker'}},
 'LastModifiedBy': {'UserProfileArn': 'arn:aws:sagemaker:eu-west-1:077156906314:user-profile/d-8rnxtw3g3