## CloudWatch Dashboard Setup

In [32]:
# Import Libraries
import boto3
import json
import pandas as pd
from time import strftime
import os

In [22]:
# Initialize CloudWatch Client
region = 'us-east-1'
endpoint_name = 'cardio-logistic-monitor-endpoint'
variant_name = 'AllTraffic'
dashboard_name = f"cardio_monitoring_dashboard_{strftime('%Y-%m-%d-%H-%M-%S')}"
bucket = 'sagemaker-us-east-1-531690656306'
s3_folder = 'cardio_project'
cloudwatch = boto3.client('cloudwatch', region_name=region)

In [23]:
# Define widgets for CloudWatch dashboard
widgets = [
    {
        "type": "metric",
        "x": 0, "y": 0, "width": 12, "height": 6,
        "properties": {
            "title": "CPU Utilization",
            "metrics": [
                ["AWS/SageMaker", "CPUUtilization", "EndpointName", endpoint_name, "VariantName", variant_name]
            ],
            "period": 300, "stat": "Average", "region": region
        }
    },
    {
        "type": "metric",
        "x": 12, "y": 0, "width": 12, "height": 6,
        "properties": {
            "title": "Memory Utilization",
            "metrics": [
                ["AWS/SageMaker", "MemoryUtilization", "EndpointName", endpoint_name, "VariantName", variant_name]
            ],
            "period": 300, "stat": "Average", "region": region
        }
    },
    {
        "type": "metric",
        "x": 0, "y": 6, "width": 12, "height": 6,
        "properties": {
            "title": "Disk Utilization",
            "metrics": [
                ["AWS/SageMaker", "DiskUtilization", "EndpointName", endpoint_name, "VariantName", variant_name]
            ],
            "period": 300, "stat": "Average", "region": region
        }
    },
    {
        "type": "metric",
        "x": 12, "y": 6, "width": 12, "height": 6,
        "properties": {
            "title": "Invocation Count",
            "metrics": [
                ["AWS/SageMaker", "Invocations", "EndpointName", endpoint_name, "VariantName", variant_name]
            ],
            "period": 300, "stat": "Sum", "region": region
        }
    },
    {
        "type": "metric",
        "x": 0, "y": 12, "width": 12, "height": 6,
        "properties": {
            "title": "Model Latency (ms)",
            "metrics": [
                ["AWS/SageMaker", "ModelLatency", "EndpointName", endpoint_name, "VariantName", variant_name]
            ],
            "period": 300, "stat": "Average", "region": region
        }
    },
    {
        "type": "metric",
        "x": 12, "y": 12, "width": 12, "height": 6,
        "properties": {
            "title": "Invocation Errors (5XX)",
            "metrics": [
                ["AWS/SageMaker", "Invocation5XXErrors", "EndpointName", endpoint_name, "VariantName", variant_name]
            ],
            "period": 300, "stat": "Sum", "region": region
        }
    }
]

# Deploy dashboard
response = cloudwatch.put_dashboard(
    DashboardName=dashboard_name,
    DashboardBody=json.dumps({"widgets": widgets})
)

print(f"CloudWatch dashboard '{dashboard_name}' successfully created and deployed.")

CloudWatch dashboard 'cardio_monitoring_dashboard_2025-06-09-07-42-34' successfully created and deployed.


### Create .py file for Cloudwatch and save .ipynb to s3

In [24]:
# Write current dashboard creation code to file
py_filename = 'cloudwatch_dashboard_setup.py'

with open(py_filename, 'w') as f:
    f.write("# CloudWatch Dashboard Setup Code\n")
    f.write(f"region = '{region}'\n")
    f.write(f"endpoint_name = '{endpoint_name}'\n")
    f.write(f"variant_name = '{variant_name}'\n")
    f.write(f"dashboard_name = '{dashboard_name}'\n")
    f.write(f"bucket = '{bucket}'\n")
    f.write(f"s3_folder = '{s3_folder}'\n")
    f.write("# Full code omitted for brevity\n")

# Upload files to S3
s3_client = boto3.client('s3', region_name=region)

# Upload .ipynb
os.system(f"aws s3 cp cardio_model_monitoring.ipynb s3://{bucket}/{s3_folder}/cardio_cloudwatch.ipynb")

# Upload .py
s3_client.upload_file(py_filename, bucket, f"{s3_folder}/{py_filename}")

print("Both .ipynb and .py files successfully uploaded to S3.")

upload: ./cardio_model_monitoring.ipynb to s3://sagemaker-us-east-1-531690656306/cardio_project/cardio_cloudwatch.ipynb
Both .ipynb and .py files successfully uploaded to S3.


---------------

## Generate Model and Data Reports on SageMaker

In [25]:
# Initialize S3 client
s3 = boto3.client('s3', region_name='us-east-1')

# S3 Locations
bucket = 'sagemaker-us-east-1-531690656306'
baseline_results_prefix = 'cardio_data/baseline-results'

# Download statistics.json
statistics_file = f'{baseline_results_prefix}/statistics.json'
s3.download_file(bucket, statistics_file, 'statistics.json')

# Download constraints.json
constraints_file = f'{baseline_results_prefix}/constraints.json'
s3.download_file(bucket, constraints_file, 'constraints.json')

# Load files
with open('statistics.json', 'r') as f:
    statistics = json.load(f)

with open('constraints.json', 'r') as f:
    constraints = json.load(f)

# Display Statistics Summary
features = statistics.get('features', [])  # FIXED THIS LINE

print("\n=== Baseline Statistics Summary ===\n")
for feature in features:
    name = feature['name']
    stat = feature.get('statistics', {})
    print(f"Feature: {name}")
    print(f" - Mean: {stat.get('mean', 'N/A')}")
    print(f" - Std Dev: {stat.get('stddev', 'N/A')}")
    print(f" - Min: {stat.get('min', 'N/A')}")
    print(f" - Max: {stat.get('max', 'N/A')}\n")


=== Baseline Statistics Summary ===

Feature: _c0
 - Mean: N/A
 - Std Dev: N/A
 - Min: N/A
 - Max: N/A

Feature: _c1
 - Mean: N/A
 - Std Dev: N/A
 - Min: N/A
 - Max: N/A

Feature: _c2
 - Mean: N/A
 - Std Dev: N/A
 - Min: N/A
 - Max: N/A

Feature: _c3
 - Mean: N/A
 - Std Dev: N/A
 - Min: N/A
 - Max: N/A

Feature: _c4
 - Mean: N/A
 - Std Dev: N/A
 - Min: N/A
 - Max: N/A

Feature: _c5
 - Mean: N/A
 - Std Dev: N/A
 - Min: N/A
 - Max: N/A

Feature: _c6
 - Mean: N/A
 - Std Dev: N/A
 - Min: N/A
 - Max: N/A

Feature: _c7
 - Mean: N/A
 - Std Dev: N/A
 - Min: N/A
 - Max: N/A

Feature: _c8
 - Mean: N/A
 - Std Dev: N/A
 - Min: N/A
 - Max: N/A

Feature: _c9
 - Mean: N/A
 - Std Dev: N/A
 - Min: N/A
 - Max: N/A

Feature: _c10
 - Mean: N/A
 - Std Dev: N/A
 - Min: N/A
 - Max: N/A

Feature: _c11
 - Mean: N/A
 - Std Dev: N/A
 - Min: N/A
 - Max: N/A

Feature: _c12
 - Mean: N/A
 - Std Dev: N/A
 - Min: N/A
 - Max: N/A

Feature: _c13
 - Mean: N/A
 - Std Dev: N/A
 - Min: N/A
 - Max: N/A

Feature: _c14
 - Mea

In [26]:
# Display Constraints Summary
print("\n=== Baseline Constraints Summary ===\n")
for feature in constraints.get('features', []):
    name = feature['name']
    violations = feature.get('violations', [])
    if violations:
        print(f"Feature '{name}' has violations: {violations}")
    else:
        print(f"Feature '{name}' has no violations detected.")


=== Baseline Constraints Summary ===

Feature '_c0' has no violations detected.
Feature '_c1' has no violations detected.
Feature '_c2' has no violations detected.
Feature '_c3' has no violations detected.
Feature '_c4' has no violations detected.
Feature '_c5' has no violations detected.
Feature '_c6' has no violations detected.
Feature '_c7' has no violations detected.
Feature '_c8' has no violations detected.
Feature '_c9' has no violations detected.
Feature '_c10' has no violations detected.
Feature '_c11' has no violations detected.
Feature '_c12' has no violations detected.
Feature '_c13' has no violations detected.
Feature '_c14' has no violations detected.
Feature '_c15' has no violations detected.
Feature '_c16' has no violations detected.
Feature '_c17' has no violations detected.
Feature '_c18' has no violations detected.
Feature '_c19' has no violations detected.
Feature '_c20' has no violations detected.
Feature '_c21' has no violations detected.
Feature '_c22' has no vio

After analyzing all datasets, your cardio_engineered.csv contains the full cleaned and feature-engineered dataset with 68,385 rows and includes the target label (cardio), which makes it the most complete representation of your original data. The cardio_prod_split40.csv is a smaller 40% production slice (27,354 rows), still properly formatted and labeled with all engineered features plus the target — useful for partial or staged baselines if you want to simulate real-world production pipelines. On the other hand, cardio_prod_no_label.csv contains 27,353 rows but is entirely scaled/transformed and stripped of column names, making it poorly structured for direct baseline generation (all columns are numeric placeholders like _c0, and no label column exists). This structure is likely why your baseline stats returned all N/A values. Therefore, the correct file for baseline generation should be cardio_engineered.csv, as it fully preserves both feature names and distributions required by SageMaker Model Monitor to accurately compute statistics and constraints.

In [33]:
# First assign actual bucket and folder values
bucket = 'sagemaker-us-east-1-531690656306'
folder = 'cardio_project/'

# Then run using an f-string, with shell=False
os.system(f"aws s3 cp cardio_cloudwatch_no_label.ipynb s3://{bucket}/{folder}cardio_cloudwatch_no_label.ipynb")

upload: ./cardio_cloudwatch_no_label.ipynb to s3://sagemaker-us-east-1-531690656306/cardio_project/cardio_cloudwatch_no_label.ipynb


0