In [1]:
import boto3
import sagemaker

region = boto3.Session().region_name
# sagemaker_session = sagemaker.session.Session(default_bucket = pipeline_output_bucket)
sagemaker_session = sagemaker.session.Session()
# role = sagemaker.get_execution_role()
role = "arn:aws:iam::852619674999:role/service-role/AmazonSageMaker-ExecutionRole-20220427T124311"

print(role)
print(sagemaker_session)

arn:aws:iam::852619674999:role/service-role/AmazonSageMaker-ExecutionRole-20220427T124311
<sagemaker.session.Session object at 0x7f2c70abc940>


### Taking configuration parameter values from config.json

In [2]:
## Loading the configurations from config.json file.
import json
with open("../config.json") as file:
    build_parameters = json.load(file)

### Handling Input

In [3]:
batch_data_uri = build_parameters["scoring_data_s3_location"]

from sagemaker.workflow.parameters import ParameterInteger, ParameterString

batch_data = ParameterString(name="BatchData", default_value=batch_data_uri)

input_feature_selection_file_uri = build_parameters["feature_selection_file_s3_location"]
# preprocessing_code_location_uri = f"s3://{pipeline_input_bucket}/codes/Training_Preprocessing.py"

# Basic feature selection file path
feature_selection_file = ParameterString(name = "FeatureSelectionFile", default_value = input_feature_selection_file_uri)

### Handling Output

In [4]:
pipeline_output_bucket = build_parameters["output_bucket"] 
sagemaker_session.default_bucket = pipeline_output_bucket

from time import gmtime, strftime
pipeline_start_time = strftime("%Y%m%d-%H-%M-%S", gmtime())

processing_output_path = f"s3://{pipeline_output_bucket}/Scoring_Pipeline_Output/{pipeline_start_time}/ProcessingOutput"
inference_output_path = f"s3://{pipeline_output_bucket}/Scoring_Pipeline_Output/{pipeline_start_time}/InferenceOutput"

### Building the Preprocessing Component

In [6]:
from sagemaker.sklearn.processing import SKLearnProcessor

framework_version = build_parameters["sklearn_processor_framework_version"]

sklearn_processor = SKLearnProcessor(
    framework_version=framework_version,
    instance_type=build_parameters["scoring_preprocessing_instance_type"],
    instance_count=build_parameters["scoring_preprocessing_instance_count"],
    base_job_name="Churn-Inference-Preprocessing",
    role=role
)


from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep, TuningStep
    

step_process = ProcessingStep(
    name="Preprocessing",
    processor=sklearn_processor,
    inputs=[
      ProcessingInput(source=batch_data, destination="/opt/ml/processing/input"),  
      ProcessingInput(source=feature_selection_file, destination="/opt/ml/processing/input/feature_selection")
    ],
    outputs=[
        ProcessingOutput(output_name="train", source="/opt/ml/processing/train", destination = processing_output_path),
        ProcessingOutput(output_name = "logs", source="/opt/ml/processing/logss", destination = processing_output_path)
    ],
    code="../SageMaker_Pipeline_Component_Codes/Scoring/Scoring_Preprocessing.py",
    job_arguments = ["--batch_data_location", "/opt/ml/processing/input", "--target_column", "Churn",
                     "--feature_selection_file_location", "/opt/ml/processing/input/feature_selection"
                     "--preprocessed_batch_data_location", "/opt/ml/processing/train", "--log_location", "/opt/ml/processing/logss"]
)

### Get Model Step

In [22]:
#### Obtaining the model from Sagemaker model registry.
package_group = build_parameters["model_package_group_name"]

import boto3
client = boto3.client('sagemaker')
model_packages = client.list_model_packages(ModelPackageGroupName = package_group)


latest_package = model_packages["ModelPackageSummaryList"][0]
latest_package_arn = latest_package["ModelPackageArn"]

print(latest_package)
print(latest_package_arn)


latest_package_details = client.describe_model_package(ModelPackageName=latest_package_arn)

from sagemaker.model import Model
inference_model = Model(image_uri = latest_package_details['InferenceSpecification']['Containers'][0]['Image'], 
#                         entry_point="../" + build_parameters["scoring_code_loaction"], 
                        model_data = latest_package_details['InferenceSpecification']['Containers'][0]["ModelDataUrl"], 
                        role = role,
                        sagemaker_session = sagemaker_session
                       )



from sagemaker.inputs import CreateModelInput

inputs = CreateModelInput(
    instance_type=build_parameters["scoring_instance_type"],
    # accelerator_type="ml.eia1.medium",
)


from sagemaker.workflow.steps import CreateModelStep

step_create_model = CreateModelStep(
    name="Get-Model",
    model=inference_model,
    inputs=inputs
)

{'ModelPackageGroupName': 'churn-packagegroup', 'ModelPackageVersion': 5, 'ModelPackageArn': 'arn:aws:sagemaker:us-east-1:852619674999:model-package/churn-packagegroup/5', 'CreationTime': datetime.datetime(2022, 6, 30, 7, 40, 14, 674000, tzinfo=tzlocal()), 'ModelPackageStatus': 'Completed', 'ModelApprovalStatus': 'PendingManualApproval'}
arn:aws:sagemaker:us-east-1:852619674999:model-package/churn-packagegroup/5


In [None]:
latest_package_details

### Making Inference Step

In [13]:

from sagemaker.transformer import Transformer


transformer = Transformer(
    model_name=step_create_model.properties.ModelName,
    instance_type=build_parameters["scoring_instance_type"],
    instance_count=1,
    output_path=inference_output_path,
    base_transform_job_name = "Churn-Transformation"
)

from sagemaker.inputs import TransformInput
from sagemaker.workflow.steps import TransformStep


step_transform = TransformStep(
    name="Inference",
    transformer=transformer,
    inputs=TransformInput(data=step_process.properties.ProcessingOutputConfig.Outputs["train"].S3Output.S3Uri,
                          # data_type = "text/csv"
                         )
)

### Building the Pipeline

In [23]:
from sagemaker.workflow.pipeline import Pipeline

pipeline_name = f"Churn-Scoring"
pipeline = Pipeline(
    name=pipeline_name,
    parameters=[
        batch_data,
        feature_selection_file
    ],
    steps=[step_process, 
           step_create_model, 
#            step_transform
          ]
)

### Uploading the Pipeline

In [24]:
pipeline.upsert(role_arn=role)

{'PipelineArn': 'arn:aws:sagemaker:us-east-1:852619674999:pipeline/churn-scoring',
 'ResponseMetadata': {'RequestId': 'ce546728-853c-4adc-b60f-3bf4ab90b560',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'ce546728-853c-4adc-b60f-3bf4ab90b560',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '81',
   'date': 'Thu, 30 Jun 2022 11:34:11 GMT'},
  'RetryAttempts': 0}}

### Building pipeline parameters. When we run the pipeline we can set these parameter values.

In [None]:
batch_data_uri = "s3://demo-bucket-test-mlop/Churn_Demo/churn-bigml-20.csv"

from time import gmtime, strftime
output_path = f"s3://{default_bucket}/ChurnTrain/" + strftime("%Y%m%d-%H-%M-%S", gmtime())
print(output_path)




processing_instance_count = ParameterInteger(
    name="ProcessingInstanceCount",
    default_value=1
)
processing_instance_type = ParameterString(
    name="ProcessingInstanceType",
    default_value="ml.m5.xlarge"
)
inference_instance_type = ParameterString(
    name="TrainingInstanceType",
    default_value="ml.m5.xlarge"
)
batch_data = ParameterString(
    name="BatchData",
    default_value=batch_data_uri,
)
pipeline_output_path = ParameterString(
    name="OutputPath",
    default_value=output_path,
)

### Building the Preprocessing component.

In [None]:
from sagemaker.sklearn.processing import SKLearnProcessor

framework_version = "0.23-1"

sklearn_processor = SKLearnProcessor(
    framework_version=framework_version,
    instance_type=processing_instance_type,
    instance_count=processing_instance_count,
    base_job_name="Churn-Inference-Preprocessing",
    role=role
)


from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep, TuningStep
    

step_process = ProcessingStep(
    name="Preprocessing",
    processor=sklearn_processor,
    inputs=[
      ProcessingInput(source=batch_data, destination="/opt/ml/processing/input"),  
    ],
    outputs=[
        ProcessingOutput(output_name="train", source="/opt/ml/processing/train")
    ],
    code="SageMaker_Pipeline_Component_Codes/Scoring/Scoring_Preprocessing.py",
)

### Get model step.

In [None]:
#### Obtaining the model from Sagemaker model registry.
package_group = "ChurnPackageGroup"
model_packages = client.list_model_packages(ModelPackageGroupName = package_group)


latest_package = model_packages["ModelPackageSummaryList"][0]
latest_package_arn = latest_package["ModelPackageArn"]

print(latest_package)
print(latest_package_arn)


latest_package_details = client.describe_model_package(ModelPackageName=latest_package_arn)

from sagemaker.model import Model
inference_model = Model(image_uri = latest_package_details['InferenceSpecification']['Containers'][0]['Image'], 
                        entry_point='SageMaker_Pipeline_Component_Codes/Scoring/inference.py', 
                        model_data = latest_package_details['InferenceSpecification']['Containers'][0]["ModelDataUrl"], 
                        role = role,
                        sagemaker_session = sagemaker_session
                       )



from sagemaker.inputs import CreateModelInput

inputs = CreateModelInput(
    instance_type=inference_instance_type,
    # accelerator_type="ml.eia1.medium",
)


from sagemaker.workflow.steps import CreateModelStep

step_create_model = CreateModelStep(
    name="Get-Model",
    model=inference_model,
    inputs=inputs
)

### Making inference step.

In [None]:

from sagemaker.transformer import Transformer


transformer = Transformer(
    model_name=step_create_model.properties.ModelName,
    instance_type=inference_instance_type,
    instance_count=1,
    output_path=pipeline_output_path,
    base_transform_job_name = "Churn-Transformation"
)

from sagemaker.inputs import TransformInput
from sagemaker.workflow.steps import TransformStep


step_transform = TransformStep(
    name="Inference",
    transformer=transformer,
    inputs=TransformInput(data=step_process.properties.ProcessingOutputConfig.Outputs["train"].S3Output.S3Uri,
                          # data_type = "text/csv"
                         )
)

### Building the pipeline.

In [None]:
from sagemaker.workflow.pipeline import Pipeline

pipeline_name = f"Churn-Scoring"
pipeline = Pipeline(
    name=pipeline_name,
    parameters=[
        batch_data,
        pipeline_output_path,
        processing_instance_count,
        processing_instance_type,
        inference_instance_type
    ],
    steps=[step_process, step_create_model, step_transform]
)

### Uploading the pipeline.

In [None]:
pipeline.upsert(role_arn=role)