## Implement Data Monitors

In [7]:
# Imports
import boto3
import botocore
import json
from sagemaker import get_execution_role, Session
from sagemaker.sklearn.model import SKLearnModel
from sagemaker.model_monitor import DefaultModelMonitor, CronExpressionGenerator, DataCaptureConfig

In [8]:
# Initialize session and role
session = Session()
role = get_execution_role()
region = 'us-east-1'
bucket = 'sagemaker-us-east-1-531690656306'

### Check endpoint and json files

In [9]:
# Initialize clients
region = 'us-east-1'
s3 = boto3.client('s3', region_name=region)
sm_client = boto3.client('sagemaker', region_name=region)

# --- Baseline File Check ---
bucket = 'sagemaker-us-east-1-531690656306'
prefix = 'cardio_data/baseline-results/'

response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix)

if 'Contents' in response:
    files_found = [obj['Key'] for obj in response['Contents']]
    print("Baseline files found in S3:")
    for file in files_found:
        print(file)
else:
    print("No baseline files found.")

# --- Endpoint Status Check ---
endpoint_name = 'cardio-logistic-monitor-endpoint'

try:
    response = sm_client.describe_endpoint(EndpointName=endpoint_name)
    status = response['EndpointStatus']
    print(f"Endpoint '{endpoint_name}' found with status: {status}")
except sm_client.exceptions.ClientError as e:
    print(f"Could not find endpoint '{endpoint_name}': {e}")

Baseline files found in S3:
cardio_data/baseline-results/constraints.json
cardio_data/baseline-results/statistics.json
Endpoint 'cardio-logistic-monitor-endpoint' found with status: InService


### Re-Create Endpoint if not found

In [10]:
# Initialize SageMaker client
sm_client = boto3.client('sagemaker', region_name='us-east-1')

# Define your endpoint name
endpoint_name = 'cardio-logistic-monitor-endpoint'

# Delete the endpoint itself
try:
    sm_client.delete_endpoint(EndpointName=endpoint_name)
    print(f"Deleted endpoint: {endpoint_name}")
except sm_client.exceptions.ClientError as e:
    print(f"Could not delete endpoint (might not exist): {e}")

# Delete the endpoint configuration
try:
    sm_client.delete_endpoint_config(EndpointConfigName=endpoint_name)
    print(f"Deleted endpoint config: {endpoint_name}")
except sm_client.exceptions.ClientError as e:
    print(f"Could not delete endpoint config (might not exist): {e}")

Deleted endpoint: cardio-logistic-monitor-endpoint
Deleted endpoint config: cardio-logistic-monitor-endpoint


### Deploy Endpoint

In [11]:
# Model artifact and inference script
model_artifact = f's3://{bucket}/model/logistic_model.tar.gz'
entry_point = 'inference.py'

# Create SKLearn model
sklearn_model = SKLearnModel(
    model_data=model_artifact,
    role=role,
    entry_point=entry_point,
    framework_version='0.23-1',
    sagemaker_session=session
)

# Create Data Capture config
data_capture_config = DataCaptureConfig(
    enable_capture=True,
    sampling_percentage=100,
    destination_s3_uri=f's3://{bucket}/data-capture',
    capture_options=['Request', 'Response']
)

# Deploy endpoint
sklearn_model.deploy(
    initial_instance_count=1,
    instance_type='ml.m5.xlarge',
    endpoint_name=endpoint_name,
    data_capture_config=data_capture_config
)

print(f"Endpoint '{endpoint_name}' successfully redeployed.")

------!Endpoint 'cardio-logistic-monitor-endpoint' successfully redeployed.


In [13]:
# Verify that endpoint exist
response = sm_client.describe_endpoint(EndpointName=endpoint_name)
status = response['EndpointStatus']
print(f"Endpoint '{endpoint_name}' status: {status}")

Endpoint 'cardio-logistic-monitor-endpoint' status: InService


### Create Data Monitor Schedule

In [14]:
# Create monitor object
monitor = DefaultModelMonitor(
    role=role,
    instance_count=1,
    instance_type='ml.m5.xlarge',
    volume_size_in_gb=20,
    max_runtime_in_seconds=1200,  # Safe shorter run time
    sagemaker_session=session
)

# S3 Paths
baseline_output_uri = f's3://sagemaker-us-east-1-531690656306/cardio_data/baseline-results'
monitor_output_uri = f's3://sagemaker-us-east-1-531690656306/cardio_data/monitoring/reports'

# Monitoring schedule
schedule_name = 'cardio-data-monitor-schedule'

monitor.create_monitoring_schedule(
    monitor_schedule_name=schedule_name,
    endpoint_input=endpoint_name,
    output_s3_uri=monitor_output_uri,
    statistics=baseline_output_uri + '/statistics.json',
    constraints=baseline_output_uri + '/constraints.json',
    schedule_cron_expression=CronExpressionGenerator.daily(),
    enable_cloudwatch_metrics=True
)

print(f"Data monitor schedule '{schedule_name}' successfully created.")

Data monitor schedule 'cardio-data-monitor-schedule' successfully created.


## Infrastructure Monitoring

In [16]:
# Initialize CloudWatch client
cloudwatch = boto3.client('cloudwatch', region_name='us-east-1')
variant_name = 'AllTraffic'

# Helper function to create alarms
def create_alarm(metric_name, threshold, comparison='GreaterThanThreshold'):
    alarm_name = f"{endpoint_name}-{metric_name}-Alarm"

    response = cloudwatch.put_metric_alarm(
        AlarmName=alarm_name,
        MetricName=metric_name,
        Namespace='AWS/SageMaker',
        Dimensions=[
            {'Name': 'EndpointName', 'Value': endpoint_name},
            {'Name': 'VariantName', 'Value': variant_name}
        ],
        Statistic='Average',
        Period=300,
        EvaluationPeriods=1,
        Threshold=threshold,
        ComparisonOperator=comparison,
        ActionsEnabled=False,
        AlarmDescription=f"Alarm when {metric_name} is above {threshold}%"
    )
    print(f"Alarm created for {metric_name}")

# Create CPU, Memory, Disk alarms
create_alarm('CPUUtilization', threshold=70)
create_alarm('MemoryUtilization', threshold=75)
create_alarm('DiskUtilization', threshold=80)

Alarm created for CPUUtilization
Alarm created for MemoryUtilization
Alarm created for DiskUtilization


### Create CloudWatch Dashboard for Endpoint Monitoring

In [22]:
# Initialize CloudWatch client
cloudwatch = boto3.client('cloudwatch', region_name=region)

# Set dashboard name and endpoint name
dashboard_name = "cardio_endpoint_dashboard"
endpoint_name = "cardio-logistic-monitor-endpoint"
variant_name = "AllTraffic"

# Define dashboard widgets
widgets = [
    # 1. CPU Utilization
    {
        "type": "metric",
        "x": 0, "y": 0, "width": 12, "height": 6,
        "properties": {
            "title": "CPU Utilization",
            "metrics": [
                [ "AWS/SageMaker", "CPUUtilization", "EndpointName", endpoint_name, "VariantName", variant_name ]
            ],
            "period": 300, "stat": "Average", "region": region
        }
    },
    # 2. Memory Utilization
    {
        "type": "metric",
        "x": 12, "y": 0, "width": 12, "height": 6,
        "properties": {
            "title": "Memory Utilization",
            "metrics": [
                [ "AWS/SageMaker", "MemoryUtilization", "EndpointName", endpoint_name, "VariantName", variant_name ]
            ],
            "period": 300, "stat": "Average", "region": region
        }
    },
    # 3. Disk Utilization
    {
        "type": "metric",
        "x": 0, "y": 6, "width": 12, "height": 6,
        "properties": {
            "title": "Disk Utilization",
            "metrics": [
                [ "AWS/SageMaker", "DiskUtilization", "EndpointName", endpoint_name, "VariantName", variant_name ]
            ],
            "period": 300, "stat": "Average", "region": region
        }
    },
    # 4. Invocation Count
    {
        "type": "metric",
        "x": 12, "y": 6, "width": 12, "height": 6,
        "properties": {
            "title": "Invocation Count",
            "metrics": [
                [ "AWS/SageMaker", "Invocations", "EndpointName", endpoint_name, "VariantName", variant_name ]
            ],
            "period": 300, "stat": "Sum", "region": region
        }
    },
    # 5. Model Latency
    {
        "type": "metric",
        "x": 0, "y": 12, "width": 12, "height": 6,
        "properties": {
            "title": "Model Latency (ms)",
            "metrics": [
                [ "AWS/SageMaker", "ModelLatency", "EndpointName", endpoint_name, "VariantName", variant_name ]
            ],
            "period": 300, "stat": "Average", "region": region
        }
    },
    # 6. 5XX Errors
    {
        "type": "metric",
        "x": 12, "y": 12, "width": 12, "height": 6,
        "properties": {
            "title": "❌ Model Errors (5XX)",
            "metrics": [
                [ "AWS/SageMaker", "Invocation5XXErrors", "EndpointName", endpoint_name, "VariantName", variant_name ]
            ],
            "period": 300, "stat": "Sum", "region": region
        }
    }
]

# Create or update the dashboard
response = cloudwatch.put_dashboard(
    DashboardName=dashboard_name,
    DashboardBody=json.dumps({"widgets": widgets})
)

print(f"Dashboard '{dashboard_name}' created and updated.")

Dashboard 'cardio_endpoint_dashboard' created and updated.


In [23]:
# Create a local .py file from the CloudWatch dashboard code
dashboard_code = """
import boto3
import json

client = boto3.client('cloudwatch', region_name='us-east-1')

dashboard_name = "cardio_endpoint_dashboard"
endpoint_name = "cardio-logistic-monitor-endpoint"
variant_name = "AllTraffic"
region = "us-east-1"

widgets = [
    {
        "type": "metric",
        "x": 0, "y": 0, "width": 12, "height": 6,
        "properties": {
            "title": "CPU Utilization",
            "metrics": [
                [ "AWS/SageMaker", "CPUUtilization", "EndpointName", endpoint_name, "VariantName", variant_name ]
            ],
            "period": 300, "stat": "Average", "region": region
        }
    },
    {
        "type": "metric",
        "x": 12, "y": 0, "width": 12, "height": 6,
        "properties": {
            "title": "Memory Utilization",
            "metrics": [
                [ "AWS/SageMaker", "MemoryUtilization", "EndpointName", endpoint_name, "VariantName", variant_name ]
            ],
            "period": 300, "stat": "Average", "region": region
        }
    },
    {
        "type": "metric",
        "x": 0, "y": 6, "width": 12, "height": 6,
        "properties": {
            "title": "Disk Utilization",
            "metrics": [
                [ "AWS/SageMaker", "DiskUtilization", "EndpointName", endpoint_name, "VariantName", variant_name ]
            ],
            "period": 300, "stat": "Average", "region": region
        }
    },
    {
        "type": "metric",
        "x": 12, "y": 6, "width": 12, "height": 6,
        "properties": {
            "title": "Invocation Count",
            "metrics": [
                [ "AWS/SageMaker", "Invocations", "EndpointName", endpoint_name, "VariantName", variant_name ]
            ],
            "period": 300, "stat": "Sum", "region": region
        }
    },
    {
        "type": "metric",
        "x": 0, "y": 12, "width": 12, "height": 6,
        "properties": {
            "title": "Model Latency (ms)",
            "metrics": [
                [ "AWS/SageMaker", "ModelLatency", "EndpointName", endpoint_name, "VariantName", variant_name ]
            ],
            "period": 300, "stat": "Average", "region": region
        }
    },
    {
        "type": "metric",
        "x": 12, "y": 12, "width": 12, "height": 6,
        "properties": {
            "title": "Model Errors (5XX)",
            "metrics": [
                [ "AWS/SageMaker", "Invocation5XXErrors", "EndpointName", endpoint_name, "VariantName", variant_name ]
            ],
            "period": 300, "stat": "Sum", "region": region
        }
    }
]

response = client.put_dashboard(
    DashboardName=dashboard_name,
    DashboardBody=json.dumps({"widgets": widgets})
)

print(f"Dashboard '{dashboard_name}' created and uploaded.")
"""

# Write to local file
with open('cloudwatch_dashboard_setup.py', 'w') as f:
    f.write(dashboard_code)

print("cloudwatch_dashboard_setup.py file created.")

cloudwatch_dashboard_setup.py file created.


### Save files into s3 bucket

In [26]:
# Your bucket and folder
bucket = 'sagemaker-us-east-1-531690656306'
folder = 'cardio_project/'

# Save notebook (.ipynb)
!aws s3 cp cardio_model_monitoring.ipynb s3://{bucket}/{folder}cardio_data_quality_monitoring_schedule_v2.ipynb

# Save standalone python script (.py)
!aws s3 cp cloudwatch_dashboard_setup.py s3://{bucket}/{folder}cloudwatch_dashboard_setup.py

print("Both .ipynb and .py files successfully saved to S3.")

upload: ./cardio_model_monitoring.ipynb to s3://sagemaker-us-east-1-531690656306/cardio_project/cardio_data_quality_monitoring_schedule_v2.ipynb
upload: ./cloudwatch_dashboard_setup.py to s3://sagemaker-us-east-1-531690656306/cardio_project/cloudwatch_dashboard_setup.py
Both .ipynb and .py files successfully saved to S3.
