## Part IX.2 - Configure Model Quality Monitor for Deep AR

University of San Diego - MS Applied AI

AAI-540 Team 5

October 21, 2024

In [1]:
# setup environment
%run 0-Environment_Setup.ipynb

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
Stored 's3_datalake_path_csv' (str)
Stored 'local_data_path_csv' (str)
Stored 's3_datalake_path_parquet' (str)


In [2]:
from sagemaker import image_uris
from sagemaker.model import Model
from sagemaker.model_monitor import DataCaptureConfig
from sagemaker.model_monitor import DefaultModelMonitor
from sagemaker.model_monitor import ModelQualityMonitor
from sagemaker.model_monitor import EndpointInput
from sagemaker.model_monitor.dataset_format import DatasetFormat
from datetime import datetime, timedelta, timezone
from sagemaker.base_serializers import JSONSerializer, IdentitySerializer
from sagemaker.base_deserializers import JSONDeserializer

In [3]:
# Setup S3 bucket
bucket = sess.default_bucket()
print("Demo Bucket:", bucket)
prefix = "sagemaker/deepAR-sales-forecasting-ModelQualityMonitor"

##S3 prefixes
data_capture_prefix = f"{prefix}/datacapture"
s3_capture_upload_path = f"s3://{bucket}/{data_capture_prefix}"

ground_truth_upload_path = (
    f"s3://{bucket}/{prefix}/ground_truth_data/{datetime.now():%Y-%m-%d-%H-%M-%S}"
)

reports_prefix = f"{prefix}/reports"
s3_report_path = f"s3://{bucket}/{reports_prefix}"

##Get the model monitor image
monitor_image_uri = image_uris.retrieve(framework="model-monitor", region=region)

print("Image URI:", monitor_image_uri)
print(f"Capture path: {s3_capture_upload_path}")
print(f"Ground truth path: {ground_truth_upload_path}")
print(f"Report path: {s3_report_path}")

Demo Bucket: sagemaker-us-east-1-343218227212
Image URI: 156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-analyzer
Capture path: s3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/datacapture
Ground truth path: s3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/ground_truth_data/2024-10-13-20-30-01
Report path: s3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/reports


### Setup dedicated endpoint for our monitoring using our best model

In [4]:
model_name = "deepar-hyperparamete-241007-2220-007-best"
image_uri = sagemaker.image_uris.retrieve("forecasting-deepar", region)
model_url = "s3://sagemaker-us-east-1-343218227212/store-sales-forecasting/deepar/gold-dataset/output/deepar-hyperparamete-241007-2220-007-b18b6b1e/output/model.tar.gz"
endpoint_name = 'deepar-store-sales-forecasting-model-quality-monitor'

In [5]:
# Create Endpoint Configuration
model = Model(image_uri=image_uri, model_data=model_url, role=role, sagemaker_session=sess)

In [None]:
# Deploy our best model to a monitoring endpoint
print("EndpointName =", endpoint_name)

data_capture_config = DataCaptureConfig(
    enable_capture=True, sampling_percentage=100, destination_s3_uri=s3_capture_upload_path
)

model.deploy(
    initial_instance_count=1,
    instance_type="ml.m4.xlarge",
    endpoint_name=endpoint_name,
    data_capture_config=data_capture_config,
)

In [7]:
# create predictor for running predictions through endpoint
json_serializer = JSONSerializer()
predictor = sagemaker.predictor.RealTimePredictor(
    endpoint_name, 
    sagemaker_session=sess, 
    serializer=IdentitySerializer(content_type="application/json"),
    deserializer=JSONDeserializer(),
    content_type="application/json")

The class RealTimePredictor has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
content_type is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


### Helper Functions for Model Jobs

In [8]:
# helper function to transform test dataset to ground truth data frame
def create_ground_truths_from_test(test_data):
    ground_truths = []
    start_date = pd.to_datetime(test_data[0]['start'])
    for sample in test_data:
        if(pd.to_datetime(sample['start']) > start_date):
            for idx, daily_ground_truth in enumerate(sample['target']):
                current_date = pd.to_datetime(sample['start']) + timedelta(days=idx)
                ground_truths.append([current_date, sample['cat'][0] + 1, daily_ground_truth, 0])
    
    ground_truth_df = pd.DataFrame(columns=['date', 'store_nbr', 'label', 'prediction'], data=ground_truths)
    ground_truth_df['date'] = pd.to_datetime(ground_truth_df['date'])
    return ground_truth_df

# run predictions through the end point
def run_predictions_on_dataset(test_data_input, test_ground_truth_df):
    for idx, model_input in enumerate(test_data_input):
        start_date = model_input['start']
        result = predictor.predict(create_deepar_input_from_test(model_input))
        model_predictions = result['predictions'][0]['mean']
        for day_idx, prediction in enumerate(model_predictions):
            test_ground_truth_df.loc[((idx * 7) + day_idx), 'prediction'] = round(prediction, 2)
    return test_ground_truth_df

### Run job to generate baseline for model quality

In [213]:
# helper function to transform test dataset to ground truth data frame
def create_ground_truths_from_test(test_data):
    ground_truths = []
    start_date = pd.to_datetime(test_data[0]['start'])
    for sample in test_data:
        if(pd.to_datetime(sample['start']) > start_date):
            for idx, daily_ground_truth in enumerate(sample['target']):
                current_date = pd.to_datetime(sample['start']) + timedelta(days=idx)
                ground_truths.append([current_date, sample['cat'][0] + 1, daily_ground_truth, 0])
    
    ground_truth_df = pd.DataFrame(columns=['date', 'store_nbr', 'label', 'prediction'], data=ground_truths)
    ground_truth_df['date'] = pd.to_datetime(ground_truth_df['date'])
    return ground_truth_df

In [9]:
# setup variables
input_data_path = "{}/test/test.json".format(s3_deepar_gold_dataset_path)
inference_filename = "test.json"
input_local_filename = f"test-data/{inference_filename}"

baseline_prefix = prefix + "/baselining"
baseline_data_prefix = baseline_prefix + "/data"
baseline_results_prefix = baseline_prefix + "/results"

baseline_data_uri = f"s3://{bucket}/{baseline_data_prefix}"
baseline_results_uri = f"s3://{bucket}/{baseline_results_prefix}"
print(f"Baseline data uri: {baseline_data_uri}")
print(f"Baseline results uri: {baseline_results_uri}")

Baseline data uri: s3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/baselining/data
Baseline results uri: s3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/baselining/results


In [10]:
# copy validation dataset local
!aws s3 cp $input_data_path $input_local_filename

download: s3://sagemaker-us-east-1-343218227212/store-sales-forecasting/deepar/gold-dataset/test/test.json to test-data/test.json


In [11]:
# load test data
test_input = load_json_by_line(input_local_filename)

In [12]:
# create the ground truth dataframe
test_ground_truth_df = create_ground_truths_from_test(test_input)
test_ground_truth_df

Unnamed: 0,date,store_nbr,label,prediction
0,2017-03-06,1,10819.11,0
1,2017-03-07,1,14300.59,0
2,2017-03-08,1,13247.73,0
3,2017-03-09,1,10605.34,0
4,2017-03-10,1,12329.87,0
...,...,...,...,...
7933,2017-07-26,54,6572.49,0
7934,2017-07-27,54,6911.35,0
7935,2017-07-28,54,9531.09,0
7936,2017-07-29,54,10878.75,0


In [13]:
# run predictions to generate ground truth
ground_truth_df = run_predictions_on_dataset(test_input, test_ground_truth_df)

  test_ground_truth_df.loc[((idx * 7) + day_idx), 'prediction'] = round(prediction, 2)


In [14]:
# store the copy to CSV
test_ground_truth_df_sub = ground_truth_df[['prediction', 'label']]
test_ground_truth_df_sub = test_ground_truth_df_sub[:200]
test_ground_truth_df_sub.to_csv("test-data/test_with_predictions.csv", index=False)

In [15]:
!head test-data/test_with_predictions.csv

prediction,label
8658.6,10819.11
8072.99,14300.59
11284.56,13247.73
7785.69,10605.34
8636.93,12329.87
7246.98,11997.51
6906.86,5897.18
16384.1,13852.91
16360.5,13113.42


In [16]:
# upload to S3
baseline_dataset_uri = "{}/test_with_predictions.csv".format(baseline_data_uri)
copy_to_s3(f"test-data/test_with_predictions.csv", baseline_dataset_uri, True)
baseline_dataset_uri

Overwriting existing file
Uploading file to s3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/baselining/data/test_with_predictions.csv


's3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/baselining/data/test_with_predictions.csv'

### Create Baselining Job

In [23]:
# Create the model quality monitoring object
deepar_model_quality_monitor = ModelQualityMonitor(
    role=role,
    instance_count=1,
    instance_type="ml.m5.2xlarge",
    volume_size_in_gb=20,
    max_runtime_in_seconds=1800,
    sagemaker_session=sess,
)

INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: .
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


In [24]:
# Name of the model quality baseline job
baseline_job_name = f"deepar-store-sales-predictions-baseline-job-{datetime.utcnow():%Y-%m-%d-%H%M}"

In [25]:
# Execute the baseline suggestion job.
job = deepar_model_quality_monitor.suggest_baseline(
    job_name=baseline_job_name,
    baseline_dataset=baseline_dataset_uri,
    dataset_format=DatasetFormat.csv(header=True),
    output_s3_uri=baseline_results_uri,
    problem_type="Regression",
    inference_attribute="prediction",
    ground_truth_attribute="label",
)
job.wait(logs=False)

INFO:sagemaker:Creating processing-job with name deepar-store-sales-predictions-baseline-job-2024-10-13-2041


............................................................!

### Inspect results and thresholds from baseline job

In [26]:
baseline_job = deepar_model_quality_monitor.latest_baselining_job

In [27]:
#baseline_job.baseline_statistics().body_dict
regression_metrics = baseline_job.baseline_statistics().body_dict["regression_metrics"]
pd.json_normalize(regression_metrics).T

Unnamed: 0,0
mae.value,2620.179
mae.standard_deviation,55.21575
mse.value,15567140.0
mse.standard_deviation,873282.6
rmse.value,3945.522
rmse.standard_deviation,111.9801
r2.value,0.7555614
r2.standard_deviation,0.01284822


In [28]:
pd.DataFrame(baseline_job.suggested_constraints().body_dict["regression_constraints"]).T

Unnamed: 0,threshold,comparison_operator
mae,2620.1789,GreaterThanThreshold
mse,15567140.995764,GreaterThanThreshold
rmse,3945.521638,GreaterThanThreshold
r2,0.755561,LessThanThreshold


### Generate some traffic and observe

In [29]:
from sagemaker.s3 import S3Downloader, S3Uploader
from time import sleep

In [190]:
# run predictions on all test samples
ground_truth_df = run_predictions_on_dataset(test_input, test_ground_truth_df)

In [199]:
# wait for capture files
print("Waiting for captures to show up", end="")
for _ in range(120):
    capture_files = sorted(S3Downloader.list(f"{s3_capture_upload_path}/{endpoint_name}"))
    if capture_files:
        capture_file = S3Downloader.read_file(capture_files[-1]).split("\n")
        capture_record = json.loads(capture_file[0])
        if "inferenceId" in capture_record["eventMetadata"]:
            break
    print(".", end="", flush=True)
    sleep(1)
print()
print("Found Capture Files:")
print("\n ".join(capture_files[-3:]))

Waiting for captures to show up........................................................................................................................
Found Capture Files:
s3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/datacapture/deepar-store-sales-forecasting-model-quality-monitor/AllTraffic/2024/10/13/23/13-03-807-a1488047-86ee-4af6-bf7a-dfa70b8bd691.jsonl
 s3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/datacapture/deepar-store-sales-forecasting-model-quality-monitor/AllTraffic/2024/10/14/00/50-51-369-a6156a76-8a40-4560-bdca-2d5e7f3066a3.jsonl
 s3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/datacapture/deepar-store-sales-forecasting-model-quality-monitor/AllTraffic/2024/10/14/01/21-04-526-4a6f5d4e-3efc-44cb-8df7-878eac82f557.jsonl


In [200]:
print("\n".join(capture_file[-3:-1]))

{"captureData":{"endpointInput":{"observedContentType":"application/json","mode":"INPUT","data":"{\"instances\": [{\"start\": \"2017-07-24 00:00:00\", \"target\": [11461.16, 8396.32, 12397.49, 7726.76, 11183.51, 14961.54, 17141.29], \"cat\": [52], \"dynamic_feat\": [[46.21, 47.77, 48.58, 49.05, 49.72, 49.72, 49.72, 50.21, 49.19, 49.6, 49.03, 49.57, 49.57, 49.57], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [72, 65, 171, 48, 0, 57, 55, 58, 46, 181, 31, 50, 37, 36]]}], \"configuration\": {\"output_types\": [\"mean\"]}}","encoding":"JSON"},"endpointOutput":{"observedContentType":"application/json","mode":"OUTPUT","data":"{\"predictions\":[{\"mean\":[10779.47265625,11509.4111328125,13697.2021484375,10186.529296875,11533.44140625,11876.40625,11456.9658203125]}]}","encoding":"JSON"}},"eventMetadata":{"eventId":"9f89d496-2501-4f1a-bcc3-9bc3c4fb796c","inferenceTime":"2024-10-14T01:21:37Z"},"eventVersion":"0"}
{"captureData":{"endpointInput":{"observedContentType":"application/json","mode":"INP

In [201]:
print(json.dumps(capture_record, indent=2))

{
  "captureData": {
    "endpointInput": {
      "observedContentType": "application/json",
      "mode": "INPUT",
      "data": "{\"instances\": [{\"start\": \"2017-02-27 00:00:00\", \"target\": [2951.04, 5553.76, 16819.46, 12781.88, 13219.13, 12086.53, 5156.63], \"cat\": [0], \"dynamic_feat\": [[54.04, 54.0, 53.82, 52.63, 53.33, 53.33, 53.33, 53.19, 52.68, 49.83, 48.75, 48.05, 48.05, 48.05], [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [52, 53, 216, 52, 56, 55, 41, 39, 41, 223, 28, 53, 31, 23]]}], \"configuration\": {\"output_types\": [\"mean\"]}}",
      "encoding": "JSON"
    },
    "endpointOutput": {
      "observedContentType": "application/json",
      "mode": "OUTPUT",
      "data": "{\"predictions\":[{\"mean\":[7529.17578125,8080.9780273438,10841.1162109375,7999.193359375,9391.1884765625,8056.025390625,7046.2055664062]}]}",
      "encoding": "JSON"
    }
  },
  "eventMetadata": {
    "eventId": "9c75dbbc-f27c-4491-875e-5a7462ec9fe6",
    "inferenceTime": "2024-10-14T01:21:04Z

### Generate Ground Truth

In [203]:
#!mkdir code

In [204]:
# %%writefile code/preprocess.py
# import json
# def preprocess_handler(inference_record, logger):
#     logger.info("[Preprocessor Script Starting]")
#     logger.info(f"[Preprocessor Record]: {inference_record}")
#     output_data = json.loads(inference_record.endpoint_output.data)
#     logger.info(f"[Preprocessor Output Chunk 1]: {output_data}")
#     output_data = output_data['predictions'][0]['mean']
#     logger.info(f"[Preprocessor Output Chunk 2]: {output_data}")
#     logger.info(f"[Preprocessor Output Chunk 2]: {str(output_data[0])}")
#     return json.dumps({ "0" : str(output_data[0]) })

In [205]:
# helper function to generate ground truth data from our model capture
def generate_ground_truth_from_capture(model_inputs, capture_outputs):
    capture_idx = 0
    ground_truths = []
    start_date = pd.to_datetime(model_inputs[0]['start'])
    for model_input in model_inputs:
        #print(str(model_input['target']))
        if(pd.to_datetime(model_input['start']) > start_date):
            capture = capture_outputs[capture_idx]
            event_id = capture["eventMetadata"]["eventId"]
            ground_truth = {
                "groundTruthData": {
                    "data": str(model_input['target'][0]),
                    "encoding": "CSV",
                },
                "eventMetadata": {
                    "eventId": str(event_id),
                },
                "eventVersion": "0",
            }
            ground_truths.append(ground_truth)
            capture_idx += 1
            if(capture_idx > 200):
                break

    return ground_truths

# helper function to simplify capture data to a single prediction
def transofrm_capture_data(capture_data, multiplier):
    capture_idx = 0
    transformed_captures = []
    for idx, capture in enumerate(capture_data):
        capture_data = json.loads(capture['captureData']['endpointOutput']['data'])
        capture['captureData']['endpointOutput']['data'] = str(float(capture_data['predictions'][0]['mean'][0]) * multiplier)
        capture['captureData']['endpointOutput']['encoding'] = "CSV"
        capture['captureData']['endpointOutput']['observedContentType'] = "text/csv; charset-utf-8"        
        transformed_captures.append(capture)
        capture_idx += 1
        if(capture_idx > 200):
            break
    return transformed_captures

# helper function to upload ground truth data
def upload_ground_truth(records, upload_time):
    fake_records = [json.dumps(r) for r in records]
    data_to_upload = "\n".join(fake_records)
    target_s3_uri = f"{ground_truth_upload_path}/{upload_time:%Y/%m/%d/%H/%M%S}.jsonl"
    print(f"Uploading {len(fake_records)} records to", target_s3_uri)
    S3Uploader.upload_string_as_file_body(data_to_upload, target_s3_uri)

# helper function to upload ground truth data
def upload_transformed_capture(records, capture_file_path):
    fake_records = [json.dumps(r) for r in records]
    data_to_upload = "\n".join(fake_records)
    target_s3_uri = capture_file_path
    print(f"Uploading {len(fake_records)} records to", target_s3_uri)
    S3Uploader.upload_string_as_file_body(data_to_upload, target_s3_uri)

In [206]:
# get latest capture file
latest_capture_file = capture_files[-1]
capture_local_filename = f"test-data/latest-capture.jsonl"


In [207]:
# copy capture file to local storage for processing
!aws s3 cp $latest_capture_file $capture_local_filename

download: s3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/datacapture/deepar-store-sales-forecasting-model-quality-monitor/AllTraffic/2024/10/14/01/21-04-526-4a6f5d4e-3efc-44cb-8df7-878eac82f557.jsonl to test-data/latest-capture.jsonl


In [208]:
# load capture file into JSON
capture_data = load_json_by_line(capture_local_filename)

In [209]:
# generate ground truth data
capture_ground_truth = generate_ground_truth_from_capture(test_input, capture_data)

In [210]:
# convert capture file to CSV with only one prediction
capture_data_transformed = transofrm_capture_data(capture_data, 1)

In [211]:
# upload ground truth
upload_ground_truth(capture_ground_truth, datetime.utcnow())

Uploading 201 records to s3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/ground_truth_data/2024-10-13-20-30-01/2024/10/14/01/3118.jsonl


In [212]:
# upload transformed capture
upload_transformed_capture(capture_data_transformed, latest_capture_file)

Uploading 201 records to s3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/datacapture/deepar-store-sales-forecasting-model-quality-monitor/AllTraffic/2024/10/14/01/21-04-526-4a6f5d4e-3efc-44cb-8df7-878eac82f557.jsonl


### Create Monitoring Schedule and Inspect Execution Results

In [143]:
# monitoring schedule name
deepar_monitor_schedule_name = (
    f"deepar-sales-forecasting-model-monitoring-schedule-12"
)

In [145]:
# Create an enpointInput
endpoint_input = EndpointInput(
    endpoint_name=endpoint_name, 
    inference_attribute="0", 
    destination="/opt/ml/processing/input_data",
)

In [53]:
# upload preprocessing script
copy_to_s3("code/preprocess.py", f"s3://{bucket}/{prefix}" + "/preprocess.py", True)
preprocessing_script_location = f"s3://{bucket}/{prefix}" + "/preprocess.py"

Overwriting existing file
Uploading file to s3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/preprocess.py


In [146]:
from sagemaker.model_monitor import CronExpressionGenerator

In [147]:
# Create the monitoring schedule to execute every hour.
response = deepar_model_quality_monitor.create_monitoring_schedule(
    monitor_schedule_name=deepar_monitor_schedule_name,
    endpoint_input=endpoint_input,
    output_s3_uri=baseline_results_uri,
    problem_type="Regression",
    #record_preprocessor_script=preprocessing_script_location,
    ground_truth_input=ground_truth_upload_path,
    constraints=baseline_job.suggested_constraints(),
    schedule_cron_expression=CronExpressionGenerator.hourly(),
    enable_cloudwatch_metrics=True
)

INFO:sagemaker.model_monitor.model_monitoring:Creating Monitoring Schedule with name: deepar-sales-forecasting-model-monitoring-schedule-12


In [170]:
# inspect
deepar_model_quality_monitor.describe_schedule()

{'MonitoringScheduleArn': 'arn:aws:sagemaker:us-east-1:343218227212:monitoring-schedule/deepar-sales-forecasting-model-monitoring-schedule-12',
 'MonitoringScheduleName': 'deepar-sales-forecasting-model-monitoring-schedule-12',
 'MonitoringScheduleStatus': 'Scheduled',
 'MonitoringType': 'ModelQuality',
 'CreationTime': datetime.datetime(2024, 10, 13, 21, 24, 10, 556000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2024, 10, 13, 23, 3, 15, 383000, tzinfo=tzlocal()),
 'MonitoringScheduleConfig': {'ScheduleConfig': {'ScheduleExpression': 'cron(0 * ? * * *)'},
  'MonitoringJobDefinitionName': 'model-quality-job-definition-2024-10-13-21-24-09-864',
  'MonitoringType': 'ModelQuality'},
 'EndpointName': 'deepar-store-sales-forecasting-model-quality-monitor',
 'LastMonitoringExecutionSummary': {'MonitoringScheduleName': 'deepar-sales-forecasting-model-monitoring-schedule-12',
  'ScheduledTime': datetime.datetime(2024, 10, 13, 23, 0, tzinfo=tzlocal()),
  'CreationTime': datetime.d

In [171]:
# Wait for the first execution of the monitoring_schedule
print("Waiting for first execution", end="")
while True:
    execution = deepar_model_quality_monitor.describe_schedule().get(
        "LastMonitoringExecutionSummary"
    )
    if execution:
        break
    print(".", end="", flush=True)
    sleep(10)
print()
print("Execution found!")

Waiting for first execution
Execution found!


In [172]:
# get the model monitor executions
executions = deepar_model_quality_monitor.list_executions()
executions

[<sagemaker.model_monitor.model_monitoring.MonitoringExecution at 0x7f82a07cd410>]

In [173]:
# get latest executions
while not executions:
    executions = deepar_model_quality_monitor.list_executions()
    print(".", end="", flush=True)
    sleep(10)
latest_execution = executions[-1]
latest_execution.describe()

{'ProcessingInputs': [{'InputName': 'constraints',
   'AppManaged': False,
   'S3Input': {'S3Uri': 's3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/baselining/results/constraints.json',
    'LocalPath': '/opt/ml/processing/baseline/constraints',
    'S3DataType': 'S3Prefix',
    'S3InputMode': 'File',
    'S3DataDistributionType': 'FullyReplicated'}},
  {'InputName': 'endpoint_input_1',
   'AppManaged': False,
   'S3Input': {'S3Uri': 's3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/baselining/results/merge/deepar-store-sales-forecasting-model-quality-monitor/AllTraffic/2024/10/13/21',
    'LocalPath': '/opt/ml/processing/input_data/deepar-store-sales-forecasting-model-quality-monitor/AllTraffic/2024/10/13/21',
    'S3DataType': 'S3Prefix',
    'S3InputMode': 'File',
    'S3DataDistributionType': 'FullyReplicated',
    'S3CompressionType': 'None'}}],
 'ProcessingOutputConfig': {'Outputs': [{'Output

In [None]:
# wait for execution to finish 
status = execution["MonitoringExecutionStatus"]

while status in ["Pending", "InProgress"]:
    print("Waiting for execution to finish", end="")
    latest_execution.wait(logs=False)
    latest_job = latest_execution.describe()
    print()
    print(f"{latest_job['ProcessingJobName']} job status:", latest_job["ProcessingJobStatus"])
    print(
        f"{latest_job['ProcessingJobName']} job exit message, if any:",
        latest_job.get("ExitMessage"),
    )
    print(
        f"{latest_job['ProcessingJobName']} job failure reason, if any:",
        latest_job.get("FailureReason"),
    )
    sleep(
        30
    )  # model quality executions consist of two Processing jobs, wait for second job to start
    latest_execution = deepar_model_quality_monitor.list_executions()[-1]
    execution = deepar_model_quality_monitor.describe_schedule()["LastMonitoringExecutionSummary"]
    status = execution["MonitoringExecutionStatus"]

print("Execution status is:", status)

if status != "Completed":
    print(execution)
    print(
        "====STOP==== \n No completed executions to inspect further. Please wait till an execution completes or investigate previously reported failures."
    )

In [175]:
# get link to report
latest_execution = deepar_model_quality_monitor.list_executions()[-1]
report_uri = latest_execution.describe()["ProcessingOutputConfig"]["Outputs"][0]["S3Output"][
    "S3Uri"
]
print("Report Uri:", report_uri)

Report Uri: s3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/baselining/results/deepar-store-sales-forecasting-model-quality-monitor/deepar-sales-forecasting-model-monitoring-schedule-12/2024/10/13/22


In [None]:
# cleanup - delete monitor if we don't need anymore
DO_CLEANUP = False
if(DO_CLEANUP):
    deepar_model_quality_monitor.delete_monitoring_schedule()