## Implement Data Monitors (Real-Time)

In [32]:
# Imports
import boto3
import botocore
import json
import os
from sagemaker import get_execution_role, Session
from sagemaker.sklearn.model import SKLearnModel
from sagemaker.model_monitor import DefaultModelMonitor, CronExpressionGenerator, DataCaptureConfig
from datetime import datetime

In [22]:
# Initialize Session
session = Session()
role = get_execution_role()
region = session.boto_region_name

# Your Bucket and Paths
bucket = 'sagemaker-us-east-1-531690656306'
prefix = 'cardio_data'
baseline_results_uri = f's3://{bucket}/{prefix}/baseline-results'
monitor_output_uri = f's3://{bucket}/{prefix}/monitoring/reports'

# Use your deployed endpoint
endpoint_name = 'cardio-logistic-monitor-endpoint'

In [23]:
# Verify that endpoint exist
sm_client = boto3.client('sagemaker', region_name='us-east-1')

response = sm_client.describe_endpoint(EndpointName=endpoint_name)
status = response['EndpointStatus']
print(f"Endpoint '{endpoint_name}' status: {status}")

Endpoint 'cardio-logistic-monitor-endpoint' status: InService


### Create Data Monitor Schedule

In [24]:
# Create Monitor object
monitor = DefaultModelMonitor(
    role=role,
    instance_count=1,
    instance_type='ml.m5.xlarge',
    volume_size_in_gb=20,
    max_runtime_in_seconds=1200,
    sagemaker_session=session
)

In [25]:
# Generate unique schedule name (safe for reuse)
current_time = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
schedule_name = f"cardio-data-monitor-schedule-{current_time}"

monitor.create_monitoring_schedule(
    monitor_schedule_name=schedule_name,
    endpoint_input=endpoint_name,
    output_s3_uri=monitor_output_uri,
    statistics=f'{baseline_results_uri}/statistics.json',
    constraints=f'{baseline_results_uri}/constraints.json',
    schedule_cron_expression=CronExpressionGenerator.daily(),
    enable_cloudwatch_metrics=True
)

print(f"Real-time Data Quality Monitoring Schedule '{schedule_name}' successfully created!")

Real-time Data Quality Monitoring Schedule 'cardio-data-monitor-schedule-2025-06-11-02-20-29' successfully created!


In [38]:
# Verify schedule exists
sm_client = boto3.client('sagemaker', region_name=region)
response = sm_client.list_monitoring_schedules()

print("\nActive Monitoring Schedules:")
for schedule in response['MonitoringScheduleSummaries']:
    print(schedule['MonitoringScheduleName'], ":", schedule['MonitoringScheduleStatus'])


Active Monitoring Schedules:
cardio-data-monitor-schedule-2025-06-11-02-20-29 : Scheduled


-----

## Infrastructure Monitoring

In [39]:
# Initialize Cloudwatch
cloudwatch = boto3.client('cloudwatch', region_name='us-east-1')
endpoint_name = 'cardio-logistic-monitor-endpoint'
variant_name = 'AllTraffic'

# Alarm creation helper
def create_alarm(metric_name, threshold, comparison_operator='GreaterThanThreshold'):
    alarm_name = f"{endpoint_name}-{metric_name}-Alarm"

    cloudwatch.put_metric_alarm(
        AlarmName=alarm_name,
        MetricName=metric_name,
        Namespace='AWS/SageMaker',
        Dimensions=[
            {'Name': 'EndpointName', 'Value': endpoint_name},
            {'Name': 'VariantName', 'Value': variant_name}
        ],
        Statistic='Average',
        Period=300,  # 5 minutes
        EvaluationPeriods=1,
        Threshold=threshold,
        ComparisonOperator=comparison_operator,
        ActionsEnabled=False,  # Can be toggled on later
        AlarmDescription=f"Alarm when {metric_name} exceeds {threshold}",
        Unit='Percent'
    )
    print(f"Alarm created: {alarm_name}")

# Create alarms
create_alarm('CPUUtilization', threshold=70)
create_alarm('MemoryUtilization', threshold=75)
create_alarm('DiskUtilization', threshold=80)
create_alarm('Invocation5XXErrors', threshold=1, comparison_operator='GreaterThanOrEqualToThreshold')

Alarm created: cardio-logistic-monitor-endpoint-CPUUtilization-Alarm
Alarm created: cardio-logistic-monitor-endpoint-MemoryUtilization-Alarm
Alarm created: cardio-logistic-monitor-endpoint-DiskUtilization-Alarm
Alarm created: cardio-logistic-monitor-endpoint-Invocation5XXErrors-Alarm


### Save files into s3 bucket

In [40]:
bucket = 'sagemaker-us-east-1-531690656306'
folder = 'cardio_project/'

# Upload the notebook file
os.system(f"aws s3 cp cardio_data_and_infrastructure_monitors.ipynb s3://{bucket}/{folder}cardio_data_and_infrastructure_monitors.ipynb")

upload: ./cardio_data_and_infrastructure_monitors.ipynb to s3://sagemaker-us-east-1-531690656306/cardio_project/cardio_data_and_infrastructure_monitors.ipynb


0