## CloudWatch Dashboard Setup

In [71]:
# Import Libraries
import boto3
import json
import os
import pandas as pd
import io
from io import StringIO
import tarfile
from sagemaker import Session
from datetime import datetime, timedelta

In [48]:
# S3 bucket and key details
session = Session()
region = session.boto_region_name
cloudwatch = boto3.client('cloudwatch', region_name=region)

endpoint_name = 'cardio-logistic-monitor-endpoint'
variant_name = 'AllTraffic'
dashboard_name = 'CardioMonitoringDashboard'
bucket = 'sagemaker-us-east-1-531690656306'
prefix = 'cardio_data/baseline-results/'

# Initialize S3 client
s3 = boto3.client('s3')

In [49]:
# Redeploy inference.py (if needed)
!tar -czvf logistic_model.tar.gz inference.py logistic_model.pkl

inference.py
logistic_model.pkl


In [50]:
# Load statistics.json from S3
stats_obj = s3.get_object(Bucket=bucket, Key=prefix + 'statistics.json')
stats_content = json.loads(stats_obj['Body'].read().decode('utf-8'))

# Convert features to DataFrame
stats_df = pd.DataFrame(stats_content['features'])
display(stats_df.head(10))  # or print(stats_df.head())

Unnamed: 0,name,inferred_type,numerical_statistics,string_statistics
0,age,Integral,"{'common': {'num_present': 68385, 'num_missing...",
1,gender,Integral,"{'common': {'num_present': 68385, 'num_missing...",
2,height_ft,Fractional,"{'common': {'num_present': 68385, 'num_missing...",
3,weight_lbs,Fractional,"{'common': {'num_present': 68385, 'num_missing...",
4,systolic_bp,Integral,"{'common': {'num_present': 68385, 'num_missing...",
5,diastolic_bp,Integral,"{'common': {'num_present': 68385, 'num_missing...",
6,cholesterol,Integral,"{'common': {'num_present': 68385, 'num_missing...",
7,gluc,Integral,"{'common': {'num_present': 68385, 'num_missing...",
8,smoke,Integral,"{'common': {'num_present': 68385, 'num_missing...",
9,alco,Integral,"{'common': {'num_present': 68385, 'num_missing...",


### Load and Parse Constraints and Statistics

In [51]:
# Load constraints.json
constraints_obj = s3.get_object(Bucket=bucket, Key=f'{prefix}constraints.json')
constraints = json.loads(constraints_obj['Body'].read().decode('utf-8'))

# Load statistics.json
statistics_obj = s3.get_object(Bucket=bucket, Key=f'{prefix}statistics.json')
statistics = json.loads(statistics_obj['Body'].read().decode('utf-8'))

### Display Constraint Violations Summary

In [52]:
# Baseline Constraints Summary
print("\nBaseline Constraints Summary\n")

for feature in constraints.get('features', []):
    name = feature['name']
    violations = feature.get('violations', [])
    
    if violations:
        print(f"Feature '{name}' has violations: {violations}")
    else:
        print(f"Feature '{name}' has no violations detected.")


Baseline Constraints Summary

Feature 'age' has no violations detected.
Feature 'gender' has no violations detected.
Feature 'height_ft' has no violations detected.
Feature 'weight_lbs' has no violations detected.
Feature 'systolic_bp' has no violations detected.
Feature 'diastolic_bp' has no violations detected.
Feature 'cholesterol' has no violations detected.
Feature 'gluc' has no violations detected.
Feature 'smoke' has no violations detected.
Feature 'alco' has no violations detected.
Feature 'active' has no violations detected.
Feature 'cardio' has no violations detected.
Feature 'bmi' has no violations detected.
Feature 'age_group' has no violations detected.
Feature 'cholesterol_label' has no violations detected.
Feature 'pulse_pressure' has no violations detected.
Feature 'chol_bmi_ratio' has no violations detected.
Feature 'height_in' has no violations detected.
Feature 'age_years' has no violations detected.
Feature 'is_hypertensive' has no violations detected.
Feature 'bp_

In [53]:
print("\nBaseline Statistics Summary \n")

for feature in statistics.get('features', []):
    name = feature['name']
    stats = feature.get('numerical_statistics', {})
    
    if stats:  # Only for numerical features
        print(f"Feature: {name}")
        print(f" - Mean: {stats.get('mean', 'N/A')}")
        print(f" - Std Dev: {stats.get('standard_deviation', 'N/A')}")
        print(f" - Min: {stats.get('min', 'N/A')}")
        print(f" - Max: {stats.get('max', 'N/A')}")
        print("----------------------------------------------------")
    else:
        print(f"Feature: {name} (non-numeric or no statistics)")


Baseline Statistics Summary 

Feature: age
 - Mean: 52.83139577392703
 - Std Dev: N/A
 - Min: 29.0
 - Max: 64.0
----------------------------------------------------
Feature: gender
 - Mean: 1.3487753162243181
 - Std Dev: N/A
 - Min: 1.0
 - Max: 2.0
----------------------------------------------------
Feature: height_ft
 - Mean: 5.393331139869341
 - Std Dev: N/A
 - Min: 3.28
 - Max: 6.79
----------------------------------------------------
Feature: weight_lbs
 - Mean: 163.42664692552245
 - Std Dev: N/A
 - Min: 66.14
 - Max: 440.92
----------------------------------------------------
Feature: systolic_bp
 - Mean: 126.64601886378591
 - Std Dev: N/A
 - Min: 90.0
 - Max: 200.0
----------------------------------------------------
Feature: diastolic_bp
 - Mean: 81.32382832492506
 - Std Dev: N/A
 - Min: 60.0
 - Max: 120.0
----------------------------------------------------
Feature: cholesterol
 - Mean: 1.3643635300138919
 - Std Dev: N/A
 - Min: 1.0
 - Max: 3.0
-------------------------------

The baseline statistics summary confirms that all 24 features in the dataset were processed correctly, with numerical statistics available for the quantitative variables and appropriately omitted for categorical features such as age_group, bp_category, and bmi_category. Key variables like age, systolic_bp, cholesterol, and bmi show realistic ranges, indicating clean and reliable data. The target variable cardio has a near-balanced mean of 0.49, suggesting an even class distribution. While the standard deviations were not included, the means, minimums, and maximums provide sufficient context to verify that the data is within logical bounds. No missing values were reported, and non-numeric features were correctly excluded from numerical summary computations. This confirms the dataset is properly structured for monitoring and serves as a valid baseline for real-time model quality tracking in SageMaker.

In [54]:
# Define S3 details
bucket_name = "sagemaker-us-east-1-531690656306"
file_key = "cardio_data/cardio_engineered_clean.csv"

# Use boto3 to get the file
s3 = boto3.client("s3")
response = s3.get_object(Bucket=bucket_name, Key=file_key)

# Read file content into a DataFrame
df = pd.read_csv(io.BytesIO(response['Body'].read()))

# Show the column names (features)
df.drop(columns=["cardio"], errors="ignore").columns.tolist()

['age',
 'gender',
 'height_ft',
 'weight_lbs',
 'systolic_bp',
 'diastolic_bp',
 'cholesterol',
 'gluc',
 'smoke',
 'alco',
 'active',
 'bmi',
 'age_group',
 'cholesterol_label',
 'pulse_pressure',
 'chol_bmi_ratio',
 'height_in',
 'age_years',
 'is_hypertensive',
 'bp_category',
 'bmi_category',
 'age_gluc_interaction',
 'lifestyle_score']

In [55]:
# S3 details
bucket_name = "sagemaker-us-east-1-531690656306"
file_key = "cardio_data/cardio_engineered_clean.csv"

# Download the file from S3 and load into DataFrame
s3 = boto3.client("s3")
response = s3.get_object(Bucket=bucket_name, Key=file_key)

df = pd.read_csv(io.BytesIO(response["Body"].read()))

# Show the first 5 rows of the DataFrame
df.head()

Unnamed: 0,age,gender,height_ft,weight_lbs,systolic_bp,diastolic_bp,cholesterol,gluc,smoke,alco,...,cholesterol_label,pulse_pressure,chol_bmi_ratio,height_in,age_years,is_hypertensive,bp_category,bmi_category,age_gluc_interaction,lifestyle_score
0,50,2,5.51,136.69,110,80,1,1,0,0,...,Normal,30,4.55,66.12,50,0,stage1,normal,50,-1
1,55,1,5.12,187.39,140,90,3,1,0,0,...,Well Above Normal,50,8.6,61.44,55,1,stage2,obese,55,-1
2,51,1,5.41,141.1,130,70,3,1,0,0,...,Well Above Normal,60,12.74,64.92,51,0,stage1,normal,51,0
3,48,2,5.54,180.78,150,100,1,1,0,0,...,Normal,50,3.48,66.48,48,1,stage2,overweight,48,-1
4,47,1,5.12,123.46,100,60,1,1,0,0,...,Normal,40,4.35,61.44,47,0,normal,normal,47,0


In [56]:
# S3 configuration
bucket = "sagemaker-us-east-1-531690656306"
key = "cardio_data/cardio_prod_no_label.csv"

# Load file from S3
s3 = boto3.client("s3")
response = s3.get_object(Bucket=bucket, Key=key)
content = response['Body'].read().decode('utf-8')

# Load into DataFrame
inference_df = pd.read_csv(StringIO(content), header=None)
inference_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,13,14,15,16,17,18,19,20,21,22
0,-0.860483,-0.32358,-0.914107,-0.410657,-0.147087,-0.538657,-0.390761,-0.312731,-0.23822,0.494625,...,-0.32358,-0.860483,-0.607947,-0.543807,-0.596864,0.0,2.0,0.0,1.0,1.0
1,-0.268438,1.21118,0.266123,-0.410657,-0.147087,-0.538657,-0.390761,-0.312731,-0.23822,-2.021734,...,1.21118,-0.268438,-0.607947,-0.422052,1.161966,1.0,2.0,2.0,2.0,1.0
2,0.915651,-1.052592,-0.983384,-0.410657,-0.147087,-0.538657,-0.390761,-0.312731,-0.23822,0.494625,...,-1.052592,0.915651,-0.607947,-0.178543,-0.596864,0.0,2.0,0.0,2.0,1.0
3,-0.712472,-0.822378,-1.191845,-0.410657,-0.147087,-0.538657,-0.390761,-0.312731,-0.23822,0.494625,...,-0.822378,-0.712472,-0.607947,-0.513368,-0.596864,0.0,2.0,0.0,1.0,1.0
4,0.323606,-0.170104,-0.358631,-0.410657,-0.147087,-0.538657,-0.390761,-0.312731,-0.23822,0.494625,...,-0.170104,0.323606,-0.607947,-0.300298,-0.596864,0.0,2.0,2.0,2.0,1.0


### Create Infrastructure CloudWatch Alarms

In [57]:
sm_client = boto3.client('sagemaker', region_name='us-east-1')
response = sm_client.describe_endpoint(EndpointName='cardio-logistic-monitor-endpoint')
print("Endpoint status:", response['EndpointStatus'])

Endpoint status: InService


In [58]:
# View current state of alarms
alarms = cloudwatch.describe_alarms()

print("\nCloudWatch Alarm States:")
for alarm in alarms['MetricAlarms']:
    if endpoint_name in alarm['AlarmName']:
        print(f"{alarm['AlarmName']} → State: {alarm['StateValue']}")


CloudWatch Alarm States:
cardio-logistic-monitor-endpoint-CPUUtilization-Alarm → State: INSUFFICIENT_DATA
cardio-logistic-monitor-endpoint-DiskUtilization-Alarm → State: INSUFFICIENT_DATA
cardio-logistic-monitor-endpoint-Invocation5XXErrors-Alarm → State: INSUFFICIENT_DATA
cardio-logistic-monitor-endpoint-MemoryUtilization-Alarm → State: INSUFFICIENT_DATA


In [59]:
# Add CloudWatch Dashboard
# Set up variables
session = Session()
region = session.boto_region_name
cloudwatch = boto3.client('cloudwatch', region_name=region)

endpoint_name = 'cardio-logistic-monitor-endpoint'
variant_name = 'AllTraffic'
dashboard_name = 'CardioMonitoringDashboard'

# Create CloudWatch dashboard
dashboard_body = {
    "widgets": [
        {
            "type": "metric",
            "x": 0,
            "y": 0,
            "width": 24,
            "height": 6,
            "properties": {
                "metrics": [
                    ["AWS/SageMaker", "CPUUtilization", "EndpointName", endpoint_name, "VariantName", variant_name],
                    ["AWS/SageMaker", "MemoryUtilization", "EndpointName", endpoint_name, "VariantName", variant_name],
                    ["AWS/SageMaker", "DiskUtilization", "EndpointName", endpoint_name, "VariantName", variant_name],
                    ["AWS/SageMaker", "Invocation5XXErrors", "EndpointName", endpoint_name, "VariantName", variant_name]
                ],
                "view": "timeSeries",
                "stacked": False,
                "region": region,
                "title": "Cardio Endpoint Monitoring - CPU, Memory, Disk, Errors"
            }
        }
    ]
}

# Create dashboard
cloudwatch.put_dashboard(
    DashboardName=dashboard_name,
    DashboardBody=json.dumps(dashboard_body)
)

print(f"CloudWatch Dashboard '{dashboard_name}' created successfully.")

CloudWatch Dashboard 'CardioMonitoringDashboard' created successfully.


In [60]:
# Initialize SageMaker client
sagemaker_client = boto3.client('sagemaker', region_name='us-east-1')

# List all endpoints
response = sagemaker_client.list_endpoints()

# Extract endpoint names
for ep in response['Endpoints']:
    print(f"Endpoint Name: {ep['EndpointName']} | Status: {ep['EndpointStatus']}")

Endpoint Name: cardio-logistic-monitor-endpoint | Status: InService


In [61]:
bucket = 'sagemaker-us-east-1-531690656306'
key = 'model/inference.py'

s3 = boto3.client('s3')
response = s3.get_object(Bucket=bucket, Key=key)

inference_code = response['Body'].read().decode('utf-8')
print(inference_code)


import pandas as pd
from io import StringIO
import joblib
import os

# Define feature columns matching your final dataset (23 columns)
FEATURE_COLUMNS = [
    'age', 'gender', 'height_ft', 'weight_lbs', 'systolic_bp', 'diastolic_bp',
    'cholesterol', 'gluc', 'smoke', 'alco', 'active',
    'bmi', 'age_group', 'cholesterol_label', 'pulse_pressure', 'chol_bmi_ratio',
    'height_in', 'age_years', 'is_hypertensive', 'bp_category', 'bmi_category',
    'age_gluc_interaction', 'lifestyle_score'
]

def model_fn(model_dir):
    return joblib.load(os.path.join(model_dir, "logistic_model.pkl"))

def input_fn(input_data, content_type):
    if content_type == "text/csv":
        df = pd.read_csv(StringIO(input_data), header=None)
        if df.shape[1] != len(FEATURE_COLUMNS):
            raise ValueError(f"Column mismatch: expected {len(FEATURE_COLUMNS)}, got {df.shape[1]}")
        df.columns = FEATURE_COLUMNS
        return df
    else:
        raise ValueError(f"Unsupported content type: {co

In [62]:
def input_fn(input_data, content_type):
    if content_type == "text/csv":
        df = pd.read_csv(StringIO(input_data), header=None)
        return df  # Do NOT assign column names

In [63]:
# Tigger Metric Activity
# Load the engineered dataset
file_path = 'cardio_engineered_clean.csv'  # Or full S3 path if local not available

try:
    df = pd.read_csv(file_path)
    print("File loaded successfully.")
except Exception as e:
    print(f"Failed to load file: {e}")
    raise
    
# Drop the label column (cardio) if it exists
if "cardio" in df.columns:
    df = df.drop(columns=["cardio"])

# Preview first row (excluding header)
sample_row = df.iloc[0]
print("\nSample input row (raw):")
print(sample_row)

# Convert to CSV-style payload
payload = ",".join(str(x) for x in sample_row.values)
print("\nPayload to send to endpoint:")
print(payload)

File loaded successfully.

Sample input row (raw):
age                         50
gender                       2
height_ft                 5.51
weight_lbs              136.69
systolic_bp                110
diastolic_bp                80
cholesterol                  1
gluc                         1
smoke                        0
alco                         0
active                       1
bmi                      21.98
age_group                  50s
cholesterol_label       Normal
pulse_pressure              30
chol_bmi_ratio            4.55
height_in                66.12
age_years                   50
is_hypertensive              0
bp_category             stage1
bmi_category            normal
age_gluc_interaction        50
lifestyle_score             -1
Name: 0, dtype: object

Payload to send to endpoint:
50,2,5.51,136.69,110,80,1,1,0,0,1,21.98,50s,Normal,30,4.55,66.12,50,0,stage1,normal,50,-1


In [64]:
print(f"Payload length: {len(payload.split(','))}")

Payload length: 23


In [65]:
print("Received input shape:", df.shape)
print("First few rows:\n", df.head())

Received input shape: (68385, 23)
First few rows:
    age  gender  height_ft  weight_lbs  systolic_bp  diastolic_bp  cholesterol  \
0   50       2       5.51      136.69          110            80            1   
1   55       1       5.12      187.39          140            90            3   
2   51       1       5.41      141.10          130            70            3   
3   48       2       5.54      180.78          150           100            1   
4   47       1       5.12      123.46          100            60            1   

   gluc  smoke  alco  ...  cholesterol_label  pulse_pressure chol_bmi_ratio  \
0     1      0     0  ...             Normal              30           4.55   
1     1      0     0  ...  Well Above Normal              50           8.60   
2     1      0     0  ...  Well Above Normal              60          12.74   
3     1      0     0  ...             Normal              50           3.48   
4     1      0     0  ...             Normal              40       

In [66]:
# Load a clean numeric-only dataset
df = pd.read_csv("cardio_prod_no_label.csv")

# Preview and format the first row
sample_row = df.iloc[0]
payload = ",".join(str(x) for x in sample_row.values)

print("Formatted Payload:\n", payload)

Formatted Payload:
 -0.2684383443805186,1.211180261502781,0.2661226839643628,-0.4106570556636961,-0.1470868943108747,-0.5386571007688755,-0.3907613525135796,-0.3127311730224256,-0.2382201119908524,-2.021734239721185,-0.3196689646266312,-0.4673074561443081,-0.4730751945221336,1.2111802615027842,-0.2684383443805186,-0.6079465096487536,-0.4220522955732228,1.1619656440612056,1.0,2.0,2.0,2.0,1.0


In [67]:
# Trigger Endpoint Inference
# Initialize runtime client
runtime = boto3.client("sagemaker-runtime", region_name="us-east-1")

# Your deployed SageMaker endpoint name
endpoint_name = "cardio-logistic-monitor-endpoint"

# Replace this with the actual formatted payload (already scaled & numeric)
payload = "-0.2684383443805186,1.211180261502781,0.2661226839643628,-0.4106570556636961,-0.1470868943108747,-0.5386571007688755,-0.3907613525135796,-0.3127311730224256,-0.2382201119908524,-2.021734239721185,-0.3196689646266312,-0.4673074561443081,-0.4730751945221336,1.2111802615027842,-0.2684383443805186,-0.6079465096487536,-0.4220522955732228,1.1619656440612056,1.0,2.0,2.0,2.0,1.0"

# Send multiple requests to generate CloudWatch metrics and monitor inference
for i in range(10):
    try:
        response = runtime.invoke_endpoint(
            EndpointName=endpoint_name,
            ContentType="text/csv",
            Body=payload
        )
        result = response["Body"].read().decode("utf-8")
        print(f"Request {i+1}: Prediction = {result.strip()}")
    except Exception as e:
        print(f"Request {i+1} failed: {e}")

Request 1: Prediction = 0
Request 2: Prediction = 0
Request 3: Prediction = 0
Request 4: Prediction = 0
Request 5: Prediction = 0
Request 6: Prediction = 0
Request 7: Prediction = 0
Request 8: Prediction = 0
Request 9: Prediction = 0
Request 10: Prediction = 0


In [72]:
# Confirm metrics
cloudwatch = boto3.client('cloudwatch', region_name='us-east-1')

metrics = cloudwatch.list_metrics(
    Namespace='AWS/SageMaker',
    Dimensions=[{'Name': 'EndpointName', 'Value': 'cardio-logistic-monitor-endpoint'}]
)

print("Metrics found:")
for metric in metrics['Metrics']:
    print(f"- {metric['MetricName']}")

Metrics found:
- Invocations
- InvocationsPerInstance
- Invocation4XXErrors
- ModelLatency
- Invocation4XXErrors
- InvocationModelErrors
- InvocationModelErrors
- Invocation5XXErrors
- Invocations
- OverheadLatency
- OverheadLatency
- InvocationsPerInstance
- Invocation5XXErrors
- ModelLatency
- ConcurrentRequestsPerModel


In [73]:
notebook_name = 'cardio_cloudwatch_and_data_reports.ipynb'
s3_path = f's3://{bucket}/cardio_project/{notebook_name}'

os.system(f"aws s3 cp {notebook_name} {s3_path}")
print(f"Notebook uploaded to: {s3_path}")

upload: ./cardio_cloudwatch_and_data_reports.ipynb to s3://sagemaker-us-east-1-531690656306/cardio_project/cardio_cloudwatch_and_data_reports.ipynb
Notebook uploaded to: s3://sagemaker-us-east-1-531690656306/cardio_project/cardio_cloudwatch_and_data_reports.ipynb
