# Amazon SageMaker Model Bais Monitor


## Section 1 - Setup <a id='setup'></a>

### Import necessary libraries

In [2]:
import json
import random
import time
import pandas as pd

from datetime import datetime, timedelta

from sagemaker import get_execution_role, image_uris, Session
from sagemaker.clarify import (
    BiasConfig,
    DataConfig,
    ModelConfig,
    ModelPredictedLabelConfig,
    SHAPConfig,
)
from sagemaker.model import Model
from sagemaker.model_monitor import (
    BiasAnalysisConfig,
    CronExpressionGenerator,
    DataCaptureConfig,
    EndpointInput,
    ExplainabilityAnalysisConfig,
    ModelBiasMonitor,
    ModelExplainabilityMonitor,
)
from sagemaker.s3 import S3Downloader, S3Uploader

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


### Initialize SageMaker Role and Session

In [3]:
role = get_execution_role()
print(f"RoleArn: {role}")

sagemaker_session = Session()
sagemaker_client = sagemaker_session.sagemaker_client
sagemaker_runtime_client = sagemaker_session.sagemaker_runtime_client

region = sagemaker_session.boto_region_name
print(f"AWS region: {region}")

# A different bucket can be used, but make sure the role for this notebook has
# the s3:PutObject permissions. This is the bucket into which the data is captured
bucket = Session().default_bucket()
print(f"Demo Bucket: {bucket}")
prefix = "sagemaker/DEMO-ClarifyModelMonitor-20240615"
s3_key = f"s3://{bucket}/{prefix}"
print(f"S3 key: {s3_key}")

s3_capture_upload_path = f"{s3_key}/datacapture"
ground_truth_upload_path = f"{s3_key}/ground_truth_data/{datetime.now():%Y-%m-%d-%H-%M-%S}"
s3_report_path = f"{s3_key}/reports"

print(f"Capture path: {s3_capture_upload_path}")
print(f"Ground truth path: {ground_truth_upload_path}")
print(f"Report path: {s3_report_path}")

baseline_results_uri = f"{s3_key}/baselining"
print(f"Baseline results uri: {baseline_results_uri}")

endpoint_instance_count = 1
endpoint_instance_type = "ml.m5.large"
schedule_expression = CronExpressionGenerator.hourly()

RoleArn: arn:aws:iam::741135916424:role/LabRole
AWS region: us-east-1
Demo Bucket: sagemaker-us-east-1-741135916424
S3 key: s3://sagemaker-us-east-1-741135916424/sagemaker/DEMO-ClarifyModelMonitor-20240615
Capture path: s3://sagemaker-us-east-1-741135916424/sagemaker/DEMO-ClarifyModelMonitor-20240615/datacapture
Ground truth path: s3://sagemaker-us-east-1-741135916424/sagemaker/DEMO-ClarifyModelMonitor-20240615/ground_truth_data/2024-06-17-01-22-05
Report path: s3://sagemaker-us-east-1-741135916424/sagemaker/DEMO-ClarifyModelMonitor-20240615/reports
Baseline results uri: s3://sagemaker-us-east-1-741135916424/sagemaker/DEMO-ClarifyModelMonitor-20240615/baselining


In [4]:
test_dataset = "test_data/test.csv"
validation_dataset = "test_data/validation.csv"
dataset_type = "text/csv"

with open(validation_dataset) as f:
    headers_line = f.readline().rstrip()
all_headers = headers_line.split(",")
label_header = all_headers[0]

### Define S3 Paths

In [5]:
model_url = "s3://sagemaker-us-east-1-741135916424/demo-sagemaker-xgboost-Diabetes-prediction/output/xgb-2024-06-16-13-27-27/xgb-2024-06-16-13-27-27/output/model.tar.gz"
print(f"Model file has been uploaded to {model_url}")

Model file has been uploaded to s3://sagemaker-us-east-1-741135916424/demo-sagemaker-xgboost-Diabetes-prediction/output/xgb-2024-06-16-13-27-27/xgb-2024-06-16-13-27-27/output/model.tar.gz


### Model and Endpoint Configuration

In [6]:
# Model Name and Endpoint Name
model_name = f"DEMO-xgb-diabetes-pred-model-monitor-{datetime.utcnow():%Y-%m-%d-%H%M}"
print("Model name: ", model_name)
endpoint_name = f"DEMO-xgb-diabetes-model-monitor-{datetime.utcnow():%Y-%m-%d-%H%M}"
print("Endpoint name: ", endpoint_name)

Model name:  DEMO-xgb-diabetes-pred-model-monitor-2024-06-17-0122
Endpoint name:  DEMO-xgb-diabetes-model-monitor-2024-06-17-0122


In [7]:
# Image URI and Model Definition
image_uri = image_uris.retrieve("xgboost", region, "0.90-1")
print(f"XGBoost image uri: {image_uri}")
model = Model(
    role=role,
    name=model_name,
    image_uri=image_uri,
    model_data=model_url,
    sagemaker_session=sagemaker_session,
)

# Deploy Model to Endpoint
data_capture_config = DataCaptureConfig(
    enable_capture=True,
    sampling_percentage=100,
    destination_s3_uri=s3_capture_upload_path,
)
print(f"Deploying model {model_name} to endpoint {endpoint_name}")
model.deploy(
    initial_instance_count=endpoint_instance_count,
    instance_type=endpoint_instance_type,
    endpoint_name=endpoint_name,
    data_capture_config=data_capture_config,
)

XGBoost image uri: 683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:0.90-1-cpu-py3
Deploying model DEMO-xgb-diabetes-pred-model-monitor-2024-06-17-0122 to endpoint DEMO-xgb-diabetes-model-monitor-2024-06-17-0122
----------!

### Send Test Traffic

In [8]:
print(f"Sending test traffic to the endpoint {endpoint_name}. \nPlease wait", end="")
test_dataset_size = 0  # record the number of rows in data we're sending for inference
with open(test_dataset, "r") as f:
    for row in f:
        if test_dataset_size < 120:
            payload = row.rstrip("\n")
            response = sagemaker_runtime_client.invoke_endpoint(
                EndpointName=endpoint_name,
                Body=payload,
                ContentType=dataset_type,
            )
            prediction = response["Body"].read()
            print(".", end="", flush=True)
            time.sleep(0.5)
        test_dataset_size += 1

print()
print("Done!")


Sending test traffic to the endpoint DEMO-xgb-diabetes-model-monitor-2024-06-17-0122. 
Please wait........................................................................................................................
Done!


## Section 2: Data Capture and Ground Truth

### Wait for Data Capture

In [9]:
print("Waiting 30 seconds for captures to show up", end="")
for _ in range(30):
    capture_files = sorted(S3Downloader.list(f"{s3_capture_upload_path}/{endpoint_name}"))
    if capture_files:
        break
    print(".", end="", flush=True)
    time.sleep(1)
print()
print("Found Capture Files:")
print("\n ".join(capture_files[-5:]))

Waiting 30 seconds for captures to show up.....
Found Capture Files:
s3://sagemaker-us-east-1-741135916424/sagemaker/DEMO-ClarifyModelMonitor-20240615/datacapture/DEMO-xgb-diabetes-model-monitor-2024-06-17-0122/AllTraffic/2024/06/17/01/27-39-410-871ad531-14d0-4765-8fa9-fa13d77030d6.jsonl


### Check for Captured Files

In [10]:
capture_file = S3Downloader.read_file(capture_files[-1]).split("\n")[-10:-1]
print(capture_file[-1])

{"captureData":{"endpointInput":{"observedContentType":"text/csv","mode":"INPUT","data":"23,31,156.6,84,161.5,96,294.6,107,9.4,6,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1","encoding":"CSV"},"endpointOutput":{"observedContentType":"text/csv; charset=utf-8","mode":"OUTPUT","data":"0.02614814229309559","encoding":"CSV"}},"eventMetadata":{"eventId":"fbf985cd-edfe-488d-9344-4890f55c1d71","inferenceTime":"2024-06-17T01:28:39Z"},"eventVersion":"0"}


In [11]:
print(json.dumps(json.loads(capture_file[-1]), indent=2))

{
  "captureData": {
    "endpointInput": {
      "observedContentType": "text/csv",
      "mode": "INPUT",
      "data": "23,31,156.6,84,161.5,96,294.6,107,9.4,6,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1",
      "encoding": "CSV"
    },
    "endpointOutput": {
      "observedContentType": "text/csv; charset=utf-8",
      "mode": "OUTPUT",
      "data": "0.02614814229309559",
      "encoding": "CSV"
    }
  },
  "eventMetadata": {
    "eventId": "fbf985cd-edfe-488d-9344-4890f55c1d71",
    "inferenceTime": "2024-06-17T01:28:39Z"
  },
  "eventVersion": "0"
}


## Section 3: Model Bias Monitor

In [12]:
import threading


class WorkerThread(threading.Thread):
    def __init__(self, do_run, *args, **kwargs):
        super(WorkerThread, self).__init__(*args, **kwargs)
        self.__do_run = do_run
        self.__terminate_event = threading.Event()

    def terminate(self):
        self.__terminate_event.set()

    def run(self):
        while not self.__terminate_event.is_set():
            self.__do_run(self.__terminate_event)

In [13]:
def invoke_endpoint(terminate_event):
    with open(test_dataset, "r") as f:
        i = 0
        for row in f:
            payload = row.rstrip("\n")
            response = sagemaker_runtime_client.invoke_endpoint(
                EndpointName=endpoint_name,
                ContentType="text/csv",
                Body=payload,
                InferenceId=str(i),  # unique ID per row
            )
            i += 1
            response["Body"].read()
            time.sleep(1)
            if terminate_event.is_set():
                break


# Keep invoking the endpoint with test data
invoke_endpoint_thread = WorkerThread(do_run=invoke_endpoint)
invoke_endpoint_thread.start()

In [14]:
import random

def ground_truth_with_id(inference_id):
    random.seed(inference_id)  # to get consistent results
    rand = random.random()
    # format required by the merge container
    return {
        "groundTruthData": {
            "data": "1" if rand < 0.7 else "0",  # randomly generate positive labels 70% of the time
            "encoding": "CSV",
        },
        "eventMetadata": {
            "eventId": str(inference_id),
        },
        "eventVersion": "0",
    }


def upload_ground_truth(upload_time):
    records = [ground_truth_with_id(i) for i in range(test_dataset_size)]
    fake_records = [json.dumps(r) for r in records]
    data_to_upload = "\n".join(fake_records)
    target_s3_uri = f"{ground_truth_upload_path}/{upload_time:%Y/%m/%d/%H/%M%S}.jsonl"
    print(f"Uploading {len(fake_records)} records to", target_s3_uri)
    S3Uploader.upload_string_as_file_body(data_to_upload, target_s3_uri)

In [15]:
# Generate data for the last hour
upload_ground_truth(datetime.utcnow() - timedelta(hours=1))

Uploading 334 records to s3://sagemaker-us-east-1-741135916424/sagemaker/DEMO-ClarifyModelMonitor-20240615/ground_truth_data/2024-06-17-01-22-05/2024/06/17/00/2849.jsonl


In [16]:
# Generate data once a hour
def generate_fake_ground_truth(terminate_event):
    upload_ground_truth(datetime.utcnow())
    for _ in range(0, 60):
        time.sleep(60)
        if terminate_event.is_set():
            break


ground_truth_thread = WorkerThread(do_run=generate_fake_ground_truth)
ground_truth_thread.start()

Uploading 334 records to s3://sagemaker-us-east-1-741135916424/sagemaker/DEMO-ClarifyModelMonitor-20240615/ground_truth_data/2024-06-17-01-22-05/2024/06/17/01/2849.jsonl


### Configure and Suggest Baseline

In [17]:
model_bias_monitor = ModelBiasMonitor(
    role=role,
    sagemaker_session=sagemaker_session,
    max_runtime_in_seconds=1800,
)

In [18]:
model_bias_baselining_job_result_uri = f"{baseline_results_uri}/model_bias"
model_bias_data_config = DataConfig(
    s3_data_input_path=validation_dataset,
    s3_output_path=model_bias_baselining_job_result_uri,
    label=label_header,
    headers=all_headers,
    dataset_type=dataset_type,
)

In [19]:
model_bias_config = BiasConfig(
    label_values_or_threshold=[1],
    facet_name="47",
    facet_values_or_threshold=[100],
)

In [20]:
model_predicted_label_config = ModelPredictedLabelConfig(
    probability_threshold=0.8,
)

In [21]:
model_config = ModelConfig(
    model_name=model_name,
    instance_count=endpoint_instance_count,
    instance_type=endpoint_instance_type,
    content_type=dataset_type,
    accept_type=dataset_type,
)

In [22]:
model_bias_monitor.suggest_baseline(
    model_config=model_config,
    data_config=model_bias_data_config,
    bias_config=model_bias_config,
    model_predicted_label_config=model_predicted_label_config,
)
print(f"ModelBiasMonitor baselining job: {model_bias_monitor.latest_baselining_job_name}")

INFO:sagemaker:Creating processing-job with name baseline-suggestion-job-2024-06-17-01-28-50-177


ModelBiasMonitor baselining job: baseline-suggestion-job-2024-06-17-01-28-50-177


### Create Monitoring Schedule

In [23]:
model_bias_analysis_config = None
if not model_bias_monitor.latest_baselining_job:
    model_bias_analysis_config = BiasAnalysisConfig(
        model_bias_config,
        headers=all_headers,
        label=label_header,
    )
model_bias_monitor.create_monitoring_schedule(
    analysis_config=model_bias_analysis_config,
    output_s3_uri=s3_report_path,
    endpoint_input=EndpointInput(
        endpoint_name=endpoint_name,
        destination="/opt/ml/processing/input/endpoint",
        start_time_offset="-PT1H",
        end_time_offset="-PT0H",
        probability_threshold_attribute=0.8,
    ),
    ground_truth_input=ground_truth_upload_path,
    schedule_cron_expression=schedule_expression,
)
print(f"Model bias monitoring schedule: {model_bias_monitor.monitoring_schedule_name}")

INFO:sagemaker.model_monitor.clarify_model_monitoring:Uploading analysis config to {s3_uri}.
INFO:sagemaker.model_monitor.model_monitoring:Creating Monitoring Schedule with name: monitoring-schedule-2024-06-17-01-28-51-052


Model bias monitoring schedule: monitoring-schedule-2024-06-17-01-28-51-052


### Wait for Execution and Retrieve Results

In [24]:
def wait_for_execution_to_start(model_monitor):
    print(
        "A hourly schedule was created above and it will kick off executions ON the hour (plus 0 - 20 min buffer)."
    )

    print("Waiting for the first execution to happen", end="")
    schedule_desc = model_monitor.describe_schedule()
    while "LastMonitoringExecutionSummary" not in schedule_desc:
        schedule_desc = model_monitor.describe_schedule()
        print(".", end="", flush=True)
        time.sleep(60)
    print()
    print("Done! Execution has been created")

    print("Now waiting for execution to start", end="")
    while schedule_desc["LastMonitoringExecutionSummary"]["MonitoringExecutionStatus"] in "Pending":
        schedule_desc = model_monitor.describe_schedule()
        print(".", end="", flush=True)
        time.sleep(10)

    print()
    print("Done! Execution has started")

In [25]:
wait_for_execution_to_start(model_bias_monitor)

A hourly schedule was created above and it will kick off executions ON the hour (plus 0 - 20 min buffer).
Waiting for the first execution to happen.................................
Done! Execution has been created
Now waiting for execution to start.
Done! Execution has started


In [26]:
model_bias_monitor.stop_monitoring_schedule()

INFO:sagemaker:Stopping Monitoring Schedule with name: monitoring-schedule-2024-06-17-01-28-51-052


In [27]:
# Waits for the schedule to have last execution in a terminal status.
def wait_for_execution_to_finish(model_monitor):
    schedule_desc = model_monitor.describe_schedule()
    execution_summary = schedule_desc.get("LastMonitoringExecutionSummary")
    if execution_summary is not None:
        print("Waiting for execution to finish", end="")
        while execution_summary["MonitoringExecutionStatus"] not in [
            "Completed",
            "CompletedWithViolations",
            "Failed",
            "Stopped",
        ]:
            print(".", end="", flush=True)
            time.sleep(60)
            schedule_desc = model_monitor.describe_schedule()
            execution_summary = schedule_desc["LastMonitoringExecutionSummary"]
        print()
        print("Done! Execution has finished")
    else:
        print("Last execution not found")


In [29]:
wait_for_execution_to_finish(model_bias_monitor)

Waiting for execution to finish....
Done! Execution has finished


### Review Monitoring Results

In [30]:
schedule_desc = model_bias_monitor.describe_schedule()
execution_summary = schedule_desc.get("LastMonitoringExecutionSummary")
if execution_summary and execution_summary["MonitoringExecutionStatus"] in [
    "Completed",
    "CompletedWithViolations",
]:
    last_model_bias_monitor_execution = model_bias_monitor.list_executions()[-1]
    last_model_bias_monitor_execution_report_uri = (
        last_model_bias_monitor_execution.output.destination
    )
    print(f"Report URI: {last_model_bias_monitor_execution_report_uri}")
    last_model_bias_monitor_execution_report_files = sorted(
        S3Downloader.list(last_model_bias_monitor_execution_report_uri)
    )
    print("Found Report Files:")
    print("\n ".join(last_model_bias_monitor_execution_report_files))
else:
    last_model_bias_monitor_execution = None
    print(
        "====STOP==== \n No completed executions to inspect further. Please wait till an execution completes or investigate previously reported failures."
    )

Report URI: s3://sagemaker-us-east-1-741135916424/sagemaker/DEMO-ClarifyModelMonitor-20240615/reports/DEMO-xgb-diabetes-model-monitor-2024-06-17-0122/monitoring-schedule-2024-06-17-01-28-51-052/2024/06/17/02
Found Report Files:
s3://sagemaker-us-east-1-741135916424/sagemaker/DEMO-ClarifyModelMonitor-20240615/reports/DEMO-xgb-diabetes-model-monitor-2024-06-17-0122/monitoring-schedule-2024-06-17-01-28-51-052/2024/06/17/02/analysis.json
 s3://sagemaker-us-east-1-741135916424/sagemaker/DEMO-ClarifyModelMonitor-20240615/reports/DEMO-xgb-diabetes-model-monitor-2024-06-17-0122/monitoring-schedule-2024-06-17-01-28-51-052/2024/06/17/02/constraint_violations.json
 s3://sagemaker-us-east-1-741135916424/sagemaker/DEMO-ClarifyModelMonitor-20240615/reports/DEMO-xgb-diabetes-model-monitor-2024-06-17-0122/monitoring-schedule-2024-06-17-01-28-51-052/2024/06/17/02/report.html
 s3://sagemaker-us-east-1-741135916424/sagemaker/DEMO-ClarifyModelMonitor-20240615/reports/DEMO-xgb-diabetes-model-monitor-2024-0

In [31]:
if last_model_bias_monitor_execution:
    model_bias_violations = last_model_bias_monitor_execution.constraint_violations()
    if model_bias_violations:
        D = model_bias_violations.body_dict.copy()
        print(model_bias_violations.body_dict)

{'version': '1.0', 'violations': [{'facet': 'HighBP', 'facet_value': '(100, 232]', 'metric_name': 'AD', 'constraint_check_type': 'bias_drift_check', 'description': "Metric value -0.0371077571077571 doesn't meet the baseline constraint requirement 0.033979879858145656"}, {'facet': 'HighBP', 'facet_value': '(100, 232]', 'metric_name': 'DAR', 'constraint_check_type': 'bias_drift_check', 'description': "Metric value -0.14871794871794874 doesn't meet the baseline constraint requirement 0.0"}, {'facet': 'HighBP', 'facet_value': '(100, 232]', 'metric_name': 'DCA', 'constraint_check_type': 'bias_drift_check', 'description': "Metric value 3.83173076923077 doesn't meet the baseline constraint requirement -0.4137931034482758"}, {'facet': 'HighBP', 'facet_value': '(100, 232]', 'metric_name': 'DI', 'constraint_check_type': 'bias_drift_check', 'description': "Metric value 1.355632215007215 doesn't meet the baseline constraint requirement 0.9732937685459941"}, {'facet': 'HighBP', 'facet_value': '(100

### Cleanup

In [32]:
invoke_endpoint_thread.terminate()
ground_truth_thread.terminate()

In [33]:
from sagemaker.predictor import Predictor

predictor = Predictor(endpoint_name, sagemaker_session=sagemaker_session)
model_monitors = predictor.list_monitors()
for model_monitor in model_monitors:
    model_monitor.stop_monitoring_schedule()
    wait_for_execution_to_finish(model_monitor)
    model_monitor.delete_monitoring_schedule()

INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: 1.0.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Stopping Monitoring Schedule with name: monitoring-schedule-2024-06-17-01-28-51-052
INFO:sagemaker:Deleting Monitoring Schedule with name: monitoring-schedule-2024-06-17-01-28-51-052


Waiting for execution to finish
Done! Execution has finished


INFO:sagemaker.model_monitor.clarify_model_monitoring:Deleting Model Bias Job Definition with name: model-bias-job-definition-2024-06-17-01-28-51-052


In [None]:
predictor.delete_endpoint()
predictor.delete_model()