In [10]:
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep, TrainingStep
from sagemaker.workflow.step_collections import RegisterModel
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.estimator import Estimator
from sagemaker.model import Model
from sagemaker.inputs import TrainingInput
from sagemaker.model_monitor import DataCaptureConfig

from sagemaker import get_execution_role

# role = "<sagemaker-execution-role>"

role=get_execution_role()
bucket = "feature-engineering-bucket-989220949c9c"
region = "ap-south-1"
image_uri = "961807745392.dkr.ecr.ap-south-1.amazonaws.com/mlops-container:latest"

# Preprocessing Step
preprocess_processor = ScriptProcessor(
    image_uri=image_uri,
    role=role,
    command=["python3"],
    instance_count=1,
    instance_type="ml.t3.medium"
)

preprocess_step = ProcessingStep(
    name="preprocess_step",
    processor=preprocess_processor,
    code="preprocess.py",
    inputs=[
        ProcessingInput(
            source="s3://feature-engineering-bucket-989220949c9c/Dataset/bank-additional-full.csv",  # S3 input file
            destination="/opt/ml/processing/input/"  # Path inside the container
        )
        # {"source": "s3://feature-engineering-bucket/bank-additional-full.csv", "destination": "/opt/ml/processing/input"}
    ],
    outputs=[
        ProcessingOutput(
            output_name="processed_data",
            source="/opt/ml/processing/output",  # Path inside the container
            destination=f"s3://{bucket}/processed/"  # S3 output location
        )
    ]
)


# Define Estimator
estimator = Estimator(
    image_uri=image_uri,
    role=role,
    entry_point="train.py",             # ✅ Training script passed at runtime
    source_dir=".",                     # ✅ Folder where train.py is located
    instance_count=1,
    instance_type="ml.m5.large",
    base_job_name="mlops-train",
    output_path=f"s3://{bucket}/models/"  # ✅ where model.tar.gz will be saved
)

# Define TrainingStep in pipeline
training_step = TrainingStep(
    name="TrainingStep",
    estimator=estimator,
    inputs={
        "train": TrainingInput(
            s3_data=f"s3://{bucket}/processed/bank-additional-processed.csv",
            content_type="text/csv"
        )
    }
)


# Register model
model_artifact = training_step.properties.ModelArtifacts.S3ModelArtifacts
model = Model(
    image_uri=image_uri,
    model_data=model_artifact,
    role=role
)

register_step = RegisterModel(
    name="RegisterModel",
    model=model,
    content_types=["text/csv"],
    response_types=["application/json"],
    inference_instances=["ml.m5.large"],
    transform_instances=["ml.m5.large"],
    model_package_group_name="mlops-model-group",
    approval_status="Approved"
)

# Define Pipeline
pipeline = Pipeline(
    name="MLOpsPipeline",
    steps=[preprocess_step, training_step, register_step]
)

# Deploy Pipeline
pipeline.upsert(role_arn=role)
pipeline.start().wait()


# --------------------------------------------------------------------------------
# 5. Deploy the Model to Endpoint
# --------------------------------------------------------------------------------

from sagemaker.model import Model
from sagemaker.model_monitor import DataCaptureConfig


model = Model(
    image_uri=image_uri,
    model_data="s3://feature-engineering-bucket-989220949c9c/models/pipelines-80zrh3f0kyla-TrainingStep-ZYJh9bEUwc/output/model.tar.gz",  # use exact path
    role=role
)

predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.large",
    endpoint_name="mlops-prod-endpoint",
    data_capture_config=DataCaptureConfig(
        enable_capture=True,
        sampling_percentage=100,
        destination_s3_uri=f"s3://{bucket}/monitoring/data/",
        capture_options=["Input", "Output"]
    )
)

response = predictor.predict("56,housemaid,married,basic.4y,no,no,no,telephone,may,mon,261,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0")  # example input
print(response)



# --------------------------------------------------------------------------------
# 6. Monitor Performance Using Model Monitor
# --------------------------------------------------------------------------------


from sagemaker.model_monitor import DefaultModelMonitor


monitor = DefaultModelMonitor(
    role=role,
    instance_count=1,
    instance_type="ml.m5.large",
    volume_size_in_gb=20,
    max_runtime_in_seconds=1800
)


monitor.create_monitoring_schedule(
    monitor_schedule_name="mlops-monitor",
    endpoint_input=predictor.endpoint_name,
    output_s3_uri=f"s3://{bucket}/monitoring/reports/",
    # generate the report every 5 minutes
    schedule_cron_expression="cron(0/5 * * * ? *)"
)

# Now performance reports will be generated daily in S3


# --------------------------------------------------------------------------------
# 7. Automate Retraining (Optional)
# --------------------------------------------------------------------------------


# Use EventBridge or S3 trigger → Lambda → start pipeline
# Lambda code (Python) can call:
# import boto3
# sm = boto3.client('sagemaker')
# sm.start_pipeline_execution(PipelineName='MLOpsPipeline')


sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3Bucket
sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3ObjectKeyPrefix
sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3Bucket
sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3ObjectKeyPrefix
sagemaker.config INFO - Applied value from config key = SageMaker.ProcessingJob.NetworkConfig.VpcConfig.Subnets
sagemaker.config INFO - Applied value from config key = SageMaker.ProcessingJob.NetworkConfig.VpcConfig.SecurityGroupIds
sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3Bucket
sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3ObjectKeyPrefix
sagemaker.config INFO - Applied value from config key = SageMaker.TrainingJob.VpcConfig.Subn

In [12]:
from sagemaker.model import Model
from sagemaker.model_monitor import DataCaptureConfig

model = Model(
    image_uri=image_uri,
    model_data="s3://feature-engineering-bucket-989220949c9c/models/pipelines-80zrh3f0kyla-TrainingStep-ZYJh9bEUwc/output/model.tar.gz",  # use exact path
    role=role
)

predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.large",
    endpoint_name="mlops-prod-endpoint2",
    data_capture_config=DataCaptureConfig(
        enable_capture=True,
        sampling_percentage=100,
        destination_s3_uri=f"s3://{bucket}/monitoring/data/",
        capture_options=["Input", "Output"]
    )
)


sagemaker.config INFO - Applied value from config key = SageMaker.Model.VpcConfig
sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3Bucket
sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3ObjectKeyPrefix
sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3Bucket
sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3ObjectKeyPrefix
----------------------------------------------*

In [5]:
pipeline.list_steps()


In [2]:
from sagemaker.model import Model
model = Model(
    image_uri=image_uri,
    model_data="s3://feature-engineering-bucket-989220949c9c/models/pipelines-80zrh3f0kyla-TrainingStep-ZYJh9bEUwc/model.tar.gz",
    role=role,
    entry_point="serve.py",      # ✅ send script at runtime
    source_dir="."               # ✅ folder containing serve.py
)

predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.large",
    endpoint_name="mlops-endpoint"
)


sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3Bucket
sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3ObjectKeyPrefix
sagemaker.config INFO - Applied value from config key = SageMaker.Model.VpcConfig
sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3Bucket
sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.Session.DefaultS3ObjectKeyPrefix
