# Prework
#### Importing Libraries and Configurations

In [1]:
import os
import glob
import json

## Loading the configurations from config.json file.
import json
with open("config.json") as file:
    build_parameters = json.load(file)

#### Setting Default Bucket and getting region and role

In [2]:
import boto3
import sagemaker

region = boto3.Session().region_name
pipeline_output_bucket = build_parameters["output_bucket"] 
sagemaker_session = sagemaker.session.Session(default_bucket = pipeline_output_bucket)
# role = sagemaker.get_execution_role()
role = "arn:aws:iam::852619674999:role/service-role/AmazonSageMaker-ExecutionRole-20220427T124311"

print(role)
print(sagemaker_session.default_bucket())

arn:aws:iam::852619674999:role/service-role/AmazonSageMaker-ExecutionRole-20220427T124311
churn-output-bucket-us-east-1


#### Input Data Location Parameters

In [3]:

# Default location for the datasets
input_bucket = build_parameters["input_bucket"]
batch_data_uri = build_parameters["scoring_data_s3_location"]
feature_selection_file_uri = build_parameters["feature_selection"]

# Parametrizing Data paths
from sagemaker.workflow.parameters import ParameterInteger, ParameterString
batch_data = ParameterString(name="BatchData", default_value=batch_data_uri)
feature_selection_file = ParameterString(name = "FeatureSelectionFile", default_value = feature_selection_file_uri)


#### Handling Output Locations
See this link to learn more about pipeline execution variables: https://sagemaker.readthedocs.io/en/stable/workflows/pipelines/sagemaker.workflow.pipelines.html#sagemaker.workflow.execution_variables.ExecutionVariables
pipeline_start_time is a execution vairable, so to create processig_output_path and inference_output_path we had to use sagemaker.workflow.functions.Join and we could not use Python f-strings.

In [6]:

pipeline_output_bucket = build_parameters["output_bucket"]

pipeline_start_time = sagemaker.workflow.execution_variables.ExecutionVariables.START_DATETIME

from sagemaker.workflow import functions
processing_output_path = functions.Join(on='/', values=["s3:/", pipeline_output_bucket, "Scoring_Pipeline_Output", pipeline_start_time, "ProcessingOutput"])
inference_output_path = functions.Join(on='/', values=["s3:/", pipeline_output_bucket, "Scoring_Pipeline_Output", pipeline_start_time, "InferenceOutput"])


# Building the Pipeline Steps
### Step 1: Building the Preprocessing Component
#### Building the Processor

In [7]:
from sagemaker.sklearn.processing import SKLearnProcessor

framework_version = build_parameters["sklearn_processor_framework_version"]

sklearn_processor = SKLearnProcessor(
    framework_version=framework_version,
    instance_type=build_parameters["scoring_preprocessing_instance_type"],
    instance_count=build_parameters["scoring_preprocessing_instance_count"],
    base_job_name="Churn-Inference-Preprocessing",
    role=role
)

#### Building the Processing Step

In [8]:

from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep, TuningStep
    

step_process = ProcessingStep(
    name="Preprocessing",
    processor=sklearn_processor,
    inputs=[
      ProcessingInput(source=batch_data, destination="/opt/ml/processing/input"),  
      ProcessingInput(source=feature_selection_file, destination="/opt/ml/processing/input/feature_selection")
    ],
    outputs=[
        ProcessingOutput(output_name="train", source="/opt/ml/processing/train", 
                         destination = processing_output_path
                        ),
        ProcessingOutput(output_name = "logs", source="/opt/ml/processing/logss", 
                         destination = processing_output_path
                        )
    ],
    code = os.path.join("Pipeline_Component_Codes", "Scoring", "1_Preprocessing", "Scoring_Preprocessing.py"),
    job_arguments = ["--batch_data_location", "/opt/ml/processing/input", "--target_column", "Churn",
                     "--feature_selection_file_location", "/opt/ml/processing/input/feature_selection",
                     "--preprocessed_batch_data_location", "/opt/ml/processing/train", "--log_location", "/opt/ml/processing/logss"]
)

### Step 2: Get Model Step
#### Building the Lambda Function

In [9]:
from sagemaker.lambda_helper import Lambda

func = Lambda(
    function_name = "get_model_from_registry",
    execution_role_arn="arn:aws:iam::852619674999:role/role_given_to_lambda",
    script = os.path.join("Pipeline_Component_Codes", "Scoring", "2_Get_Model", "main.py"),
    handler="main.main",
)


#### Building the Lambdastep

In [10]:
from sagemaker.workflow.lambda_step import LambdaOutput, LambdaStep, LambdaOutputTypeEnum

output_param_1 = LambdaOutput(output_name="image_uri", output_type=LambdaOutputTypeEnum.String)
output_param_2 = LambdaOutput(output_name="model_data_uri", output_type=LambdaOutputTypeEnum.String)
output_param_3 = LambdaOutput(output_name="instance_type", output_type=LambdaOutputTypeEnum.String)

get_model_step = LambdaStep(
    name=f"get_model",
    lambda_func=func,
    inputs={
        "model_package_group_name": build_parameters["model_package_group_name"]
    },
    outputs=[output_param_1, output_param_2, output_param_3]
)


### Step 3: Making Inference Step
As mentioned earlier this step is created using a ProcessingStep
#### Building the processor

In [11]:
from sagemaker.processing import Processor
processor = Processor(
    image_uri = get_model_step.properties.Outputs["image_uri"],
    instance_type = get_model_step.properties.Outputs["instance_type"],
    instance_count = 1,
    base_job_name = f"inference-preprocessing",
    role=role
)

#### Building the Inference Step

In [12]:
step_inference = ProcessingStep(
    name="Inference",
    # processor=processor,
    processor=sklearn_processor,
    inputs=[
      ProcessingInput(source=step_process.properties.ProcessingOutputConfig.Outputs["train"].S3Output.S3Uri, destination="/opt/ml/processing/input/data"),  
      ProcessingInput(source=get_model_step.properties.Outputs["model_data_uri"], destination="/opt/ml/processing/input/model_folder"),  
    ],
    outputs=[
        ProcessingOutput(output_name="train", source="/opt/ml/processing/train", destination = inference_output_path)
    ],
    code = os.path.join("Pipeline_Component_Codes", "Scoring", "3_Scoring", "scoring.py"),
    job_arguments = ["--batch_data_location", "/opt/ml/processing/input/data", 
                     "--model_location", "/opt/ml/processing/input/model_folder",
                     "--predicted_data_location", "/opt/ml/processing/train", 
                     "--log_location", "/opt/ml/processing/logss"
                    ]
)

In [8]:

# from sagemaker.transformer import Transformer


# transformer = Transformer(
#     model_name=step_create_model.properties.ModelName,
#     instance_type=build_parameters["scoring_instance_type"],
#     instance_count=1,
#     output_path=inference_output_path,
#     base_transform_job_name = "Churn-Transformation",
#     # max_concurrent_transforms = 1,
#     # strategy = "SingleRecord"
# )

# from sagemaker.inputs import TransformInput
# from sagemaker.workflow.steps import TransformStep


# step_transform = TransformStep(
#     name="Inference",
#     transformer=transformer,
#     inputs=TransformInput(data=step_process.properties.ProcessingOutputConfig.Outputs["train"].S3Output.S3Uri,
#                           # data_type = "text/csv"
#                          ),
#     depends_on  = [step_process]
# )

# Building the Pipeline
#### Arranging the steps inside pipeline

In [13]:
from sagemaker.workflow.pipeline import Pipeline

pipeline_name = f"Churn-Scoring"
pipeline = Pipeline(
    name=pipeline_name,
    parameters=[
        batch_data,
        feature_selection_file
    ],
    steps=[step_process, 
           get_model_step, 
           step_inference
          ]
)

#### Uploading the Pipeline

In [14]:
pipeline.upsert(role_arn=role)

{'PipelineArn': 'arn:aws:sagemaker:us-east-1:852619674999:pipeline/churn-scoring',
 'ResponseMetadata': {'RequestId': '87462b97-1b04-4a14-b0ac-fe33ad8c0125',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '87462b97-1b04-4a14-b0ac-fe33ad8c0125',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '81',
   'date': 'Fri, 19 May 2023 10:27:18 GMT'},
  'RetryAttempts': 0}}