# Prework
#### Importing Libraries and Configurations

In [2]:
import os
import glob
import json

## Loading the configurations from config.json file.
import json
with open("config.json") as file:
    build_parameters = json.load(file)

#### Setting Default Bucket and getting region and role

In [3]:
import boto3
import sagemaker

region = boto3.Session().region_name
pipeline_output_bucket = build_parameters["output_bucket"] 
sagemaker_session = sagemaker.session.Session(default_bucket = pipeline_output_bucket)
role = sagemaker.get_execution_role()
# role = "arn:aws:iam::852619674999:role/service-role/AmazonSageMaker-ExecutionRole-20220427T124311"

print(role)
print(sagemaker_session.default_bucket())



arn:aws:iam::720541911643:role/service-role/AmazonSageMaker-ExecutionRole-20230606T110107
demo-output-bucket


#### Input Data Location Parameters

In [4]:

# Default location for the datasets
input_bucket = build_parameters["input_bucket"]
batch_data_uri = build_parameters["evaluation_data"]
feature_selection_file_uri = build_parameters["feature_selection"]

# Parametrizing Data paths
from sagemaker.workflow.parameters import ParameterInteger, ParameterString
batch_data = ParameterString(name="BatchData", default_value=batch_data_uri)
feature_selection_file = ParameterString(name = "FeatureSelectionFile", default_value = feature_selection_file_uri)


#### Handling Output Locations
See this link to learn more about pipeline execution variables: https://sagemaker.readthedocs.io/en/stable/workflows/pipelines/sagemaker.workflow.pipelines.html#sagemaker.workflow.execution_variables.ExecutionVariables
pipeline_start_time is a execution vairable, so to create processig_output_path and inference_output_path we had to use sagemaker.workflow.functions.Join and we could not use Python f-strings.

In [5]:

pipeline_output_bucket = build_parameters["output_bucket"]

pipeline_start_time = sagemaker.workflow.execution_variables.ExecutionVariables.START_DATETIME

from sagemaker.workflow import functions
processing_output_path = functions.Join(on='/', values=["s3:/", pipeline_output_bucket, "Scoring_Pipeline_Output", pipeline_start_time, "ProcessingOutput"])
inference_output_path = functions.Join(on='/', values=["s3:/", pipeline_output_bucket, "Scoring_Pipeline_Output", pipeline_start_time, "InferenceOutput"])


# Building the Pipeline Steps
### Step 1: Building the Preprocessing Component
#### Building the Processor

In [6]:
from sagemaker.sklearn.processing import SKLearnProcessor

framework_version = build_parameters["sklearn_processor_framework_version"]

sklearn_processor = SKLearnProcessor(
    framework_version=framework_version,
    instance_type=build_parameters["scoring_preprocessing_instance_type"],
    instance_count=build_parameters["scoring_preprocessing_instance_count"],
    base_job_name="Churn-Inference-Preprocessing",
    role=role
)

#### Building the Processing Step

In [7]:

from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep, TuningStep
    

step_process = ProcessingStep(
    name="Preprocessing",
    processor=sklearn_processor,
    inputs=[
      ProcessingInput(source=batch_data, destination="/opt/ml/processing/input"),  
      ProcessingInput(source=feature_selection_file, destination="/opt/ml/processing/input/feature_selection")
    ],
    outputs=[
        ProcessingOutput(output_name="train", source="/opt/ml/processing/train", 
                         destination = processing_output_path
                        ),
        ProcessingOutput(output_name = "logs", source="/opt/ml/processing/logss", 
                         destination = processing_output_path
                        )
    ],
    code = os.path.join("Pipeline_Component_Codes", "Scoring", "1_Preprocessing", "Scoring_Preprocessing.py"),
    job_arguments = ["--batch_data_location", "/opt/ml/processing/input", "--target_column", "Churn",
                     "--feature_selection_file_location", "/opt/ml/processing/input/feature_selection",
                     "--preprocessed_batch_data_location", "/opt/ml/processing/train", "--log_location", "/opt/ml/processing/logss"]
)

### Step 2: Get Model Step
#### Building the Lambda Function

In [8]:
from sagemaker.lambda_helper import Lambda

func = Lambda(
    function_name = "get_model_from_registry",
    execution_role_arn=build_parameters["role_given_to_lambda"],
    script = os.path.join("Pipeline_Component_Codes", "Scoring", "2_Get_Model", "main.py"),
    handler="main.main",
)


#### Building the Lambdastep

In [9]:
from sagemaker.workflow.lambda_step import LambdaOutput, LambdaStep, LambdaOutputTypeEnum

output_param_1 = LambdaOutput(output_name="image_uri", output_type=LambdaOutputTypeEnum.String)
output_param_2 = LambdaOutput(output_name="model_data_uri", output_type=LambdaOutputTypeEnum.String)
output_param_3 = LambdaOutput(output_name="instance_type", output_type=LambdaOutputTypeEnum.String)

get_model_step = LambdaStep(
    name=f"get_model",
    lambda_func=func,
    inputs={
        "model_package_group_name": build_parameters["model_package_group_name"]
    },
    outputs=[output_param_1, output_param_2, output_param_3]
)


### Step 3: Making Inference Step
As mentioned earlier this step is created using a ProcessingStep
#### Building the processor

In [10]:
from sagemaker.processing import Processor
processor = Processor(
    image_uri = get_model_step.properties.Outputs["image_uri"],
    instance_type = get_model_step.properties.Outputs["instance_type"],
    instance_count = 1,
    base_job_name = f"inference-preprocessing",
    role=role
)

#### Building the Inference Step

In [11]:
step_inference = ProcessingStep(
    name="Inference",
    processor=processor,
#     processor=sklearn_processor,
    inputs=[
      ProcessingInput(source=step_process.properties.ProcessingOutputConfig.Outputs["train"].S3Output.S3Uri, destination="/opt/ml/processing/input/data"),  
      ProcessingInput(source=get_model_step.properties.Outputs["model_data_uri"], destination="/opt/ml/processing/input/model_folder"),  
    ],
    outputs=[
        ProcessingOutput(output_name="train", source="/opt/ml/processing/train", destination = inference_output_path)
    ],
    code = os.path.join("Pipeline_Component_Codes", "Scoring", "3_Scoring", "scoring.py"),
#     job_arguments = ["--batch_data_location", "/opt/ml/processing/input/data", 
#                      "--model_location", "/opt/ml/processing/input/model_folder",
#                      "--predicted_data_location", "/opt/ml/processing/train", 
#                      "--log_location", "/opt/ml/processing/logss"
#                     ]
)

# Building the Pipeline
#### Arranging the steps inside pipeline

In [12]:
from sagemaker.workflow.pipeline import Pipeline

pipeline_name = f"churn-scoring"
pipeline = Pipeline(
    name=pipeline_name,
    parameters=[
        batch_data,
        feature_selection_file
    ],
    steps=[step_process, 
           get_model_step, 
           step_inference
          ]
)

#### Uploading the Pipeline

In [13]:
pipeline.upsert(role_arn=role)

Popping out 'ProcessingJobName' from the pipeline definition by default since it will be overridden at pipeline execution time. Please utilize the PipelineDefinitionConfig to persist this field in the pipeline definition if desired.
Popping out 'ProcessingJobName' from the pipeline definition by default since it will be overridden at pipeline execution time. Please utilize the PipelineDefinitionConfig to persist this field in the pipeline definition if desired.
Popping out 'ProcessingJobName' from the pipeline definition by default since it will be overridden at pipeline execution time. Please utilize the PipelineDefinitionConfig to persist this field in the pipeline definition if desired.
Popping out 'ProcessingJobName' from the pipeline definition by default since it will be overridden at pipeline execution time. Please utilize the PipelineDefinitionConfig to persist this field in the pipeline definition if desired.


{'PipelineArn': 'arn:aws:sagemaker:ap-south-1:720541911643:pipeline/churn-scoring',
 'ResponseMetadata': {'RequestId': '8ebd137c-20fb-4dd4-a1c8-6b7bfd33199e',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '8ebd137c-20fb-4dd4-a1c8-6b7bfd33199e',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '82',
   'date': 'Thu, 17 Aug 2023 17:28:56 GMT'},
  'RetryAttempts': 0}}