# Assignment 5.1: ML System Observability - Exercise

## Section 1: Setup

### 1.1 Import Libraries

In [1]:
%%time

from datetime import datetime, timedelta, timezone
import json
import os
import boto3
from time import sleep
from threading import Thread

import pandas as pd

from sagemaker import get_execution_role, Session, image_uris
from sagemaker.s3 import S3Downloader, S3Uploader
from sagemaker.serializers import CSVSerializer
from sagemaker.model import Model
from sagemaker.model_monitor import (
    DataCaptureConfig,
    ModelQualityMonitor,
    ModelBiasMonitor,
    EndpointInput,
    CronExpressionGenerator,
)
from sagemaker.model_monitor.dataset_format import DatasetFormat
from sagemaker.predictor import Predictor
from sagemaker.clarify import BiasConfig, DataConfig, ModelConfig, ModelPredictedLabelConfig

session = Session()

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
CPU times: user 1.87 s, sys: 269 ms, total: 2.14 s
Wall time: 2.23 s


### 1.2 AWS Configuration

In [2]:
role = get_execution_role()
region = session.boto_region_name
bucket = session.default_bucket()

prefix = f"sagemaker/Assignment5-1-Monitors-{datetime.now(timezone.utc):%Y%m%d}"

lab_path = "lab-5-1-model-monitoring-with-sagemaker-and-cloudwatch"
if not os.path.exists(lab_path):
    lab_path = "aai-540-labs/lab-5-1-model-monitoring-with-sagemaker-and-cloudwatch"

data_capture_prefix = f"{prefix}/datacapture"
s3_capture_upload_path = f"s3://{bucket}/{data_capture_prefix}"

baseline_prefix = f"{prefix}/baselining"
baseline_data_prefix = f"{baseline_prefix}/data"
baseline_results_prefix = f"{baseline_prefix}/results"
baseline_data_uri = f"s3://{bucket}/{baseline_data_prefix}"
baseline_results_uri = f"s3://{bucket}/{baseline_results_prefix}"
model_bias_baseline_uri = f"{baseline_results_uri}/model_bias"

ground_truth_upload_path = f"s3://{bucket}/{prefix}/ground_truth_data/{datetime.now(timezone.utc):%Y-%m-%d-%H-%M-%S}"

print(f"Region: {region}")
print(f"Bucket: {bucket}")
print(f"Prefix: {prefix}")
print(f"Lab path: {lab_path}")

Region: us-east-1
Bucket: sagemaker-us-east-1-838922747260
Prefix: sagemaker/Assignment5-1-Monitors-20260209
Lab path: aai-540-labs/lab-5-1-model-monitoring-with-sagemaker-and-cloudwatch


### 1.3 Verify S3 Access

In [3]:
S3Uploader.upload(
    f"{lab_path}/test_data/upload-test-file.txt",
    f"s3://{bucket}/test_upload"
)
print("✓ S3 access verified.")

✓ S3 access verified.


## Section 2: Deploy Model with Data Capture

### 2.1 Upload Pre-trained Model

In [4]:
model_local_path = f"{lab_path}/model/xgb-churn-prediction-model.tar.gz"
s3_model_key = f"s3://{bucket}/{prefix}/model"
model_url = S3Uploader.upload(model_local_path, s3_model_key)
print(f"Model uploaded: {model_url}")

Model uploaded: s3://sagemaker-us-east-1-838922747260/sagemaker/Assignment5-1-Monitors-20260209/model/xgb-churn-prediction-model.tar.gz


### 2.2 Create and Deploy SageMaker Model

In [5]:
model_name = f"DEMO-xgb-churn-assign51-{datetime.now(timezone.utc):%Y-%m-%d-%H%M}"
endpoint_name = f"DEMO-xgb-churn-assign51-ep-{datetime.now(timezone.utc):%Y-%m-%d-%H%M}"

image_uri = image_uris.retrieve(framework="xgboost", version="0.90-1", region=region)

model = Model(
    image_uri=image_uri,
    model_data=model_url,
    role=role,
    sagemaker_session=session,
    name=model_name,
)

data_capture_config = DataCaptureConfig(
    enable_capture=True,
    sampling_percentage=100,
    destination_s3_uri=s3_capture_upload_path,
)

print(f"Deploying model {model_name} to endpoint {endpoint_name}...")
model.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.large",
    endpoint_name=endpoint_name,
    data_capture_config=data_capture_config,
)
print("\n✓ Model deployed.")

Deploying model DEMO-xgb-churn-assign51-2026-02-09-1617 to endpoint DEMO-xgb-churn-assign51-ep-2026-02-09-1617...
-----!
✓ Model deployed.


## Section 3: Model Quality Monitor

### 3.1 Generate Predictions for Baseline

In [6]:
predictor = Predictor(
    endpoint_name=endpoint_name,
    sagemaker_session=session,
    serializer=CSVSerializer(),
)

churn_cutoff = 0.8
validation_file = f"{lab_path}/test_data/validation.csv"
predictions_file = f"{lab_path}/test_data/validation_with_predictions.csv"

limit = 200
with open(predictions_file, "w") as f:
    f.write("probability,prediction,label\n")
    with open(validation_file, "r") as vf:
        for i, row in enumerate(vf):
            if i >= limit:
                break
            label, input_cols = row.split(",", 1)
            prob = float(predictor.predict(input_cols))
            pred = "1" if prob > churn_cutoff else "0"
            f.write(f"{prob},{pred},{label}\n")
            print(".", end="", flush=True)
            sleep(0.5)
print("\n✓ Predictions generated.")

........................................................................................................................................................................................................
✓ Predictions generated.


### 3.2 Run Model Quality Baseline Job

In [7]:
baseline_dataset_uri = S3Uploader.upload(predictions_file, baseline_data_uri)

model_quality_monitor = ModelQualityMonitor(
    role=role,
    instance_count=1,
    instance_type="ml.m5.large",
    volume_size_in_gb=20,
    max_runtime_in_seconds=1800,
    sagemaker_session=session,
)

mq_baseline_job_name = f"mq-baseline-{datetime.now(timezone.utc):%Y-%m-%d-%H%M}"

mq_job = model_quality_monitor.suggest_baseline(
    job_name=mq_baseline_job_name,
    baseline_dataset=baseline_dataset_uri,
    dataset_format=DatasetFormat.csv(header=True),
    output_s3_uri=baseline_results_uri,
    problem_type="BinaryClassification",
    inference_attribute="prediction",
    probability_attribute="probability",
    ground_truth_attribute="label",
)
mq_job.wait(logs=False)
print("\n✓ Model Quality baseline completed.")

INFO:sagemaker:Creating processing-job with name mq-baseline-2026-02-09-1622


.........................................................................................!
✓ Model Quality baseline completed.


## Section 4: Model Bias Monitor

### 4.1 Prepare Bias Baseline Data

In [8]:
validation_dataset = f"{lab_path}/test_data/validation-dataset-with-header.csv"

# Create validation-dataset-with-header.csv if it doesn't exist (from validation.csv)
if not os.path.exists(validation_dataset):
    val_file = f"{lab_path}/test_data/validation.csv"
    df = pd.read_csv(val_file, header=None)
    n_cols = df.shape[1]
    churn_headers = ["label", "Account Length", "VMail Message", "Day Mins", "Day Calls", "Day Charge",
                     "Eve Mins", "Eve Calls", "Eve Charge", "Night Mins", "Night Calls", "Night Charge",
                     "Intl Mins", "Intl Calls", "Intl Charge", "CustServ Calls"]
    all_headers = churn_headers + [f"f{i}" for i in range(len(churn_headers), n_cols)]
    df.columns = all_headers
    df.to_csv(validation_dataset, index=False)
    print(f"Created {validation_dataset}")

with open(validation_dataset) as f:
    headers_line = f.readline().rstrip()
all_headers = headers_line.split(",")
label_header = all_headers[0]

print(f"Validation dataset: {validation_dataset}")
print(f"Label column: {label_header}")
print(f"Facet column: Account Length")

bias_baseline_dataset_uri = S3Uploader.upload(
    validation_dataset,
    f"{baseline_data_uri}/bias"
)
print(f"Bias baseline data uploaded: {bias_baseline_dataset_uri}")

Validation dataset: aai-540-labs/lab-5-1-model-monitoring-with-sagemaker-and-cloudwatch/test_data/validation-dataset-with-header.csv
Label column: label
Facet column: Account Length
Bias baseline data uploaded: s3://sagemaker-us-east-1-838922747260/sagemaker/Assignment5-1-Monitors-20260209/baselining/data/bias/validation-dataset-with-header.csv


### 4.2 Configure and Run Bias Baseline Job

In [9]:
model_bias_data_config = DataConfig(
    s3_data_input_path=bias_baseline_dataset_uri,
    s3_output_path=model_bias_baseline_uri,
    label=label_header,
    headers=all_headers,
    dataset_type="text/csv",
)

model_bias_config = BiasConfig(
    label_values_or_threshold=[1],
    facet_name="Account Length",
    facet_values_or_threshold=[100],
)

model_predicted_label_config = ModelPredictedLabelConfig(
    probability_threshold=churn_cutoff,
)

model_config = ModelConfig(
    model_name=model_name,
    instance_count=1,
    instance_type="ml.m5.large",
    content_type="text/csv",
    accept_type="text/csv",
)

model_bias_monitor = ModelBiasMonitor(
    role=role,
    instance_count=1,
    instance_type="ml.m5.large",
    volume_size_in_gb=20,
    max_runtime_in_seconds=3600,
    sagemaker_session=session,
)

bias_baseline_job_name = f"bias-baseline-{datetime.now(timezone.utc):%Y-%m-%d-%H%M}"

model_bias_monitor.suggest_baseline(
    model_config=model_config,
    data_config=model_bias_data_config,
    bias_config=model_bias_config,
    model_predicted_label_config=model_predicted_label_config,
    job_name=bias_baseline_job_name,
)

print(f"Model Bias baseline job started: {bias_baseline_job_name}")
print("Waiting for completion (typically 10–15 minutes)...")

baseline_job = model_bias_monitor.latest_baselining_job
baseline_job.wait(logs=False)

print("\n✓ Model Bias baseline completed!")

INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker.clarify:Analysis Config: {'dataset_type': 'text/csv', 'headers': ['label', 'Account Length', 'VMail Message', 'Day Mins', 'Day Calls', 'Day Charge', 'Eve Mins', 'Eve Calls', 'Eve Charge', 'Night Mins', 'Night Calls', 'Night Charge', 'Intl Mins', 'Intl Calls', 'Intl Charge', 'CustServ Calls', 'f16', 'f17', 'f18', 'f19', 'f20', 'f21', 'f22', 'f23', 'f24', 'f25', 'f26', 'f27', 'f28', 'f29', 'f30', 'f31', 'f32', 'f33', 'f34', 'f35', 'f36', 'f37', 'f38', 'f39', 'f40', 'f41', 'f42', 'f43', 'f44', 'f45', 'f46', 'f47', 'f48', 'f49', 'f50', 'f51', 'f52', 'f53', 'f54', 'f55', 'f56', 'f57', 'f58', 'f59', 'f60', 'f61', 'f62', 'f63', 'f64', 'f65', 'f66', 'f67', 'f68', 'f69'], 'label': 'label', 'label_values_or_threshold': [1], 'facet': [{'name_or_index': 'Account Length', 'value_or_threshold': [100]}], 'methods': {'report': {'name': 'report', 'title'

Model Bias baseline job started: bias-baseline-2026-02-09-1630
Waiting for completion (typically 10–15 minutes)...
.......................................................................................................................!
✓ Model Bias baseline completed!


## Section 5: Model Bias Report

In [10]:
baseline_job = model_bias_monitor.latest_baselining_job
job_desc = baseline_job.describe()

print("=" * 70)
print("MODEL BIAS MONITOR RESULTS")
print("=" * 70)
print(f"Job Name: {job_desc['ProcessingJobName']}")
print(f"Status: {job_desc['ProcessingJobStatus']}")
print()

if job_desc['ProcessingJobStatus'] == 'Completed':
    output_config = job_desc['ProcessingOutputConfig']
    output_path = output_config['Outputs'][0]['S3Output']['S3Uri']
    print(f"Output Location: {output_path}")
    print()
    
    try:
        suggested_constraints = baseline_job.suggested_constraints().body_dict
        print("--- SUGGESTED BIAS CONSTRAINTS ---")
        print(json.dumps(suggested_constraints, indent=2))
        print()
    except Exception as e:
        print(f"Could not load constraints: {e}")
    
    try:
        analysis_path = f"{output_path.rstrip('/')}/analysis.json"
        analysis_content = S3Downloader.read_file(analysis_path)
        analysis_data = json.loads(analysis_content)
        print("--- ANALYSIS REPORT ---")
        print(json.dumps(analysis_data, indent=2))
    except Exception as e:
        print(f"Could not load analysis.json: {e}")
    
    output_files = S3Downloader.list(output_path)
    print()
    print("Output Files:")
    for f in output_files:
        print(f"  - {f}")
else:
    if 'FailureReason' in job_desc:
        print(f"Failure Reason: {job_desc['FailureReason']}")
    print("Job did not complete successfully.")

print()
print("=" * 70)

MODEL BIAS MONITOR RESULTS
Job Name: bias-baseline-2026-02-09-1630
Status: Completed

Output Location: s3://sagemaker-us-east-1-838922747260/sagemaker/Assignment5-1-Monitors-20260209/baselining/results/model_bias

--- SUGGESTED BIAS CONSTRAINTS ---
{
  "version": "1.0",
  "post_training_bias_metrics": {
    "label": "label",
    "facets": {
      "Account Length": [
        {
          "value_or_threshold": "(100, 225]",
          "metrics": [
            {
              "name": "AD",
              "description": "Accuracy Difference (AD)",
              "value": 0.03416521605801226
            },
            {
              "name": "CDDPL",
              "description": "Conditional Demographic Disparity in Predicted Labels (CDDPL)",
              "value": null,
              "error": "Group variable is empty or not provided"
            },
            {
              "name": "DAR",
              "description": "Difference in Acceptance Rates (DAR)",
              "value": 0.0
        

## Section 6: Cleanup

In [11]:
from botocore.exceptions import ClientError

print("Deleting endpoint and model...")

sm_client = session.sagemaker_client

# 1. Delete endpoint 
try:
    sm_client.delete_endpoint(EndpointName=endpoint_name)
    print(f"✓ Endpoint {endpoint_name} deleted")
except ClientError as e:
    if e.response['Error']['Code'] == 'ValidationException':
        print(f"Endpoint already deleted or not found")
    else:
        raise

# 2. Delete endpoint config
try:
    sm_client.delete_endpoint_config(EndpointConfigName=endpoint_name)
    print(f"✓ Endpoint config deleted")
except ClientError as e:
    if e.response['Error']['Code'] == 'ValidationException':
        print(f"Endpoint config already deleted or not found")
    else:
        raise

# 3. Delete model
try:
    sm_client.delete_model(ModelName=model_name)
    print(f"✓ Model {model_name} deleted")
except ClientError as e:
    if e.response['Error']['Code'] == 'ValidationException':
        print(f"Model already deleted or not found")
    else:
        raise

print("✓ Cleanup complete. Remember to delete S3 data if desired.")

Deleting endpoint and model...
✓ Endpoint DEMO-xgb-churn-assign51-ep-2026-02-09-1617 deleted
✓ Endpoint config deleted
✓ Model DEMO-xgb-churn-assign51-2026-02-09-1617 deleted
✓ Cleanup complete. Remember to delete S3 data if desired.
