## Part IX.2 - Configure Model Quality Monitor for Deep AR

University of San Diego - MS Applied AI

AAI-540 Team 5

October 21, 2024

In [1]:
# setup environment
%run 0-Environment_Setup.ipynb

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
Stored 's3_datalake_path_csv' (str)
Stored 'local_data_path_csv' (str)
Stored 's3_datalake_path_parquet' (str)


In [55]:
from sagemaker import image_uris
from sagemaker.model import Model
from sagemaker.model_monitor import DataCaptureConfig
from sagemaker.model_monitor import DefaultModelMonitor
from sagemaker.model_monitor import ModelQualityMonitor
from sagemaker.model_monitor import EndpointInput
from sagemaker.model_monitor.dataset_format import DatasetFormat
from datetime import datetime, timedelta, timezone
from sagemaker.base_serializers import JSONSerializer, IdentitySerializer
from sagemaker.base_deserializers import JSONDeserializer

In [5]:
# Setup S3 bucket
bucket = sess.default_bucket()
print("Demo Bucket:", bucket)
prefix = "sagemaker/deepAR-sales-forecasting-ModelQualityMonitor"

##S3 prefixes
data_capture_prefix = f"{prefix}/datacapture"
s3_capture_upload_path = f"s3://{bucket}/{data_capture_prefix}"

ground_truth_upload_path = (
    f"s3://{bucket}/{prefix}/ground_truth_data/{datetime.now():%Y-%m-%d-%H-%M-%S}"
)

reports_prefix = f"{prefix}/reports"
s3_report_path = f"s3://{bucket}/{reports_prefix}"

##Get the model monitor image
monitor_image_uri = image_uris.retrieve(framework="model-monitor", region=region)

print("Image URI:", monitor_image_uri)
print(f"Capture path: {s3_capture_upload_path}")
print(f"Ground truth path: {ground_truth_upload_path}")
print(f"Report path: {s3_report_path}")

Demo Bucket: sagemaker-us-east-1-343218227212
Image URI: 156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-analyzer
Capture path: s3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/datacapture
Ground truth path: s3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/ground_truth_data/2024-10-11-03-03-21
Report path: s3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/reports


### Setup dedicated endpoint for our monitoring using our best model

In [85]:
model_name = "deepar-hyperparamete-241007-2220-007-best"
image_uri = sagemaker.image_uris.retrieve("forecasting-deepar", region)
model_url = "s3://sagemaker-us-east-1-343218227212/store-sales-forecasting/deepar/gold-dataset/output/deepar-hyperparamete-241007-2220-007-b18b6b1e/output/model.tar.gz"
endpoint_name = 'deepar-store-sales-forecasting-model-quality-monitor'

INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.
INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: 1.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


In [87]:
# Create Endpoint Configuration
model = Model(image_uri=image_uri, model_data=model_url, role=role, sagemaker_session=sess)

In [88]:
# Deploy our best model to a monitoring endpoint
print("EndpointName =", endpoint_name)

data_capture_config = DataCaptureConfig(
    enable_capture=True, sampling_percentage=100, destination_s3_uri=s3_capture_upload_path
)

model.deploy(
    initial_instance_count=1,
    instance_type="ml.m4.xlarge",
    endpoint_name=endpoint_name,
    data_capture_config=data_capture_config,
)

INFO:sagemaker:Creating model with name: forecasting-deepar-2024-10-11-05-27-43-759


EndpointName = deepar-store-sales-forecasting-model-quality-monitor


INFO:sagemaker:Creating endpoint-config with name deepar-store-sales-forecasting-model-quality-monitor
INFO:sagemaker:Creating endpoint with name deepar-store-sales-forecasting-model-quality-monitor


--------------!

In [89]:
# create predictor for running predictions through endpoint
json_serializer = JSONSerializer()
predictor = sagemaker.predictor.RealTimePredictor(
    endpoint_name, 
    sagemaker_session=sess, 
    serializer=IdentitySerializer(content_type="application/json"),
    deserializer=JSONDeserializer(),
    content_type="application/json")

See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


### Run job to generate baseline for model quality

In [90]:
# setup variables
input_data_path = "{}/test/test.json".format(s3_deepar_gold_dataset_path)
inference_filename = "test.json"
input_local_filename = f"test-data/{inference_filename}"

baseline_prefix = prefix + "/baselining"
baseline_data_prefix = baseline_prefix + "/data"
baseline_results_prefix = baseline_prefix + "/results"

baseline_data_uri = f"s3://{bucket}/{baseline_data_prefix}"
baseline_results_uri = f"s3://{bucket}/{baseline_results_prefix}"
print(f"Baseline data uri: {baseline_data_uri}")
print(f"Baseline results uri: {baseline_results_uri}")

Baseline data uri: s3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/baselining/data
Baseline results uri: s3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/baselining/results


In [91]:
# copy validation dataset local
!aws s3 cp $input_data_path $input_local_filename

download: s3://sagemaker-us-east-1-343218227212/store-sales-forecasting/deepar/gold-dataset/test/test.json to test-data/test.json


In [92]:
# load test data
test_input = load_json_by_line(input_local_filename)

In [104]:
# helper function to transform test dataset to ground truth data frame
def create_ground_truths_from_test(test_data):
    ground_truths = []
    start_date = pd.to_datetime(test_data[0]['start'])
    for sample in test_data:
        if(pd.to_datetime(sample['start']) > start_date):
            for idx, daily_ground_truth in enumerate(sample['target']):
                current_date = pd.to_datetime(sample['start']) + timedelta(days=idx)
                ground_truths.append([current_date, sample['cat'][0] + 1, daily_ground_truth, 0])
    
    ground_truth_df = pd.DataFrame(columns=['date', 'store_nbr', 'label', 'prediction'], data=ground_truths)
    ground_truth_df['date'] = pd.to_datetime(ground_truth_df['date'])
    return ground_truth_df

In [105]:
# create the ground truth dataframe
test_ground_truth_df = create_ground_truths_from_test(test_input)
test_ground_truth_df

Unnamed: 0,date,store_nbr,label,prediction
0,2017-03-06,1,10819.11,0
1,2017-03-07,1,14300.59,0
2,2017-03-08,1,13247.73,0
3,2017-03-09,1,10605.34,0
4,2017-03-10,1,12329.87,0
...,...,...,...,...
7933,2017-07-26,54,6572.49,0
7934,2017-07-27,54,6911.35,0
7935,2017-07-28,54,9531.09,0
7936,2017-07-29,54,10878.75,0


In [128]:
# run predictions through the end point
def run_predictions_on_dataset(test_data_input):
    for idx, model_input in enumerate(test_data_input):
        start_date = model_input['start']
        result = predictor.predict(create_deepar_input_from_test(model_input))
        model_predictions = result['predictions'][0]['mean']
        for day_idx, prediction in enumerate(model_predictions):
            test_ground_truth_df.loc[((idx * 7) + day_idx), 'prediction'] = round(prediction, 2)
    return test_ground_truth_df

In [129]:
# run predictions to generate ground truth
ground_truth_df = run_predictions_on_dataset(test_input)

In [110]:
# store the copy to CSV
test_ground_truth_df_sub = ground_truth_df[['prediction', 'label']]
test_ground_truth_df_sub = test_ground_truth_df_sub[:200]
test_ground_truth_df_sub.to_csv("test-data/test_with_predictions.csv", index=False)

In [111]:
!head test-data/test_with_predictions.csv

prediction,label
7577.77,10819.11
7283.98,14300.59
11458.74,13247.73
8445.14,10605.34
10105.5,12329.87
8620.21,11997.51
6029.9,5897.18
16140.39,13852.91
15678.04,13113.42


In [113]:
# upload to S3
baseline_dataset_uri = "{}/test_with_predictions.csv".format(baseline_data_uri)
copy_to_s3(f"test-data/test_with_predictions.csv", baseline_dataset_uri)
baseline_dataset_uri

Uploading file to s3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/baselining/data/test_with_predictions.csv


's3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/baselining/data/test_with_predictions.csv'

### Create Baselining Job

In [117]:
# Create the model quality monitoring object
deepar_model_quality_monitor = ModelQualityMonitor(
    role=role,
    instance_count=1,
    instance_type="ml.m5.xlarge",
    volume_size_in_gb=20,
    max_runtime_in_seconds=1800,
    sagemaker_session=sess,
)

INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: .
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


In [118]:
# Name of the model quality baseline job
baseline_job_name = f"deepar-store-sales-predictions-baseline-job-{datetime.utcnow():%Y-%m-%d-%H%M}"

In [119]:
# Execute the baseline suggestion job.
job = deepar_model_quality_monitor.suggest_baseline(
    job_name=baseline_job_name,
    baseline_dataset=baseline_dataset_uri,
    dataset_format=DatasetFormat.csv(header=True),
    output_s3_uri=baseline_results_uri,
    problem_type="Regression",
    inference_attribute="prediction",
    ground_truth_attribute="label",
)
job.wait(logs=False)

INFO:sagemaker:Creating processing-job with name deepar-store-sales-predictions-baseline-job-2024-10-11-0551


............................................................!

### Inspect results and thresholds from baseline job

In [120]:
baseline_job = deepar_model_quality_monitor.latest_baselining_job

In [125]:
#baseline_job.baseline_statistics().body_dict
regression_metrics = baseline_job.baseline_statistics().body_dict["regression_metrics"]
pd.json_normalize(regression_metrics).T

Unnamed: 0,0
mae.value,2546.747
mae.standard_deviation,129.377
mse.value,15044400.0
mse.standard_deviation,2018684.0
rmse.value,3878.711
rmse.standard_deviation,274.1669
r2.value,0.7637696
r2.standard_deviation,0.01008255


In [126]:
pd.DataFrame(baseline_job.suggested_constraints().body_dict["regression_constraints"]).T

Unnamed: 0,threshold,comparison_operator
mae,2546.74675,GreaterThanThreshold
mse,15044400.53715,GreaterThanThreshold
rmse,3878.711195,GreaterThanThreshold
r2,0.76377,LessThanThreshold


### Generate some traffic and observe

In [134]:
from sagemaker.s3 import S3Downloader, S3Uploader
from time import sleep

In [130]:
# run predictions on all test samples
ground_truth_df = run_predictions_on_dataset(test_input)

In [135]:
print("Waiting for captures to show up", end="")
for _ in range(120):
    capture_files = sorted(S3Downloader.list(f"{s3_capture_upload_path}/{endpoint_name}"))
    if capture_files:
        capture_file = S3Downloader.read_file(capture_files[-1]).split("\n")
        capture_record = json.loads(capture_file[0])
        if "inferenceId" in capture_record["eventMetadata"]:
            break
    print(".", end="", flush=True)
    sleep(1)
print()
print("Found Capture Files:")
print("\n ".join(capture_files[-3:]))

Waiting for captures to show up.

.......................................................................................................................
Found Capture Files:
s3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/datacapture/deepar-store-sales-forecasting-model-quality-monitor/AllTraffic/2024/10/11/05/46-19-223-2c918764-eef0-4f10-85c3-bcf135e30ad6.jsonl
 s3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/datacapture/deepar-store-sales-forecasting-model-quality-monitor/AllTraffic/2024/10/11/06/02-06-889-c619a3d2-f326-49f5-8407-75c820bda222.jsonl
 s3://sagemaker-us-east-1-343218227212/sagemaker/deepAR-sales-forecasting-ModelQualityMonitor/datacapture/deepar-store-sales-forecasting-model-quality-monitor/AllTraffic/2024/10/11/06/04-18-529-11a6c492-10b2-43c5-8425-4593b63e8127.jsonl


In [136]:
print("\n".join(capture_file[-3:-1]))

{"captureData":{"endpointInput":{"observedContentType":"application/json","mode":"INPUT","data":"{\"instances\": [{\"start\": \"2017-07-24 00:00:00\", \"target\": [11461.16, 8396.32, 12397.49, 7726.76, 11183.51, 14961.54, 17141.29], \"cat\": [52], \"dynamic_feat\": [[46.21, 47.77, 48.58, 49.05, 49.72, 49.72, 49.72, 50.21, 49.19, 49.6, 49.03, 49.57, 49.57, 49.57], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [72, 65, 171, 48, 0, 57, 55, 58, 46, 181, 31, 50, 37, 36]]}], \"configuration\": {\"output_types\": [\"mean\"]}}","encoding":"JSON"},"endpointOutput":{"observedContentType":"application/json","mode":"OUTPUT","data":"{\"predictions\":[{\"mean\":[11264.3486328125,12250.8837890625,13282.298828125,9759.71484375,11912.3515625,12823.462890625,12982.76171875]}]}","encoding":"JSON"}},"eventMetadata":{"eventId":"3789e70a-6a8d-4dfd-8ab0-80c0b703bb6f","inferenceTime":"2024-10-11T06:04:51Z"},"eventVersion":"0"}
{"captureData":{"endpointInput":{"observedContentType":"application/json","mode":"INP

In [137]:
print(json.dumps(capture_record, indent=2))

{
  "captureData": {
    "endpointInput": {
      "observedContentType": "application/json",
      "mode": "INPUT",
      "data": "{\"instances\": [{\"start\": \"2017-02-27 00:00:00\", \"target\": [2951.04, 5553.76, 16819.46, 12781.88, 13219.13, 12086.53, 5156.63], \"cat\": [0], \"dynamic_feat\": [[54.04, 54.0, 53.82, 52.63, 53.33, 53.33, 53.33, 53.19, 52.68, 49.83, 48.75, 48.05, 48.05, 48.05], [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [52, 53, 216, 52, 56, 55, 41, 39, 41, 223, 28, 53, 31, 23]]}], \"configuration\": {\"output_types\": [\"mean\"]}}",
      "encoding": "JSON"
    },
    "endpointOutput": {
      "observedContentType": "application/json",
      "mode": "OUTPUT",
      "data": "{\"predictions\":[{\"mean\":[7871.1533203125,8301.6552734375,11504.18359375,8579.9873046875,9544.8759765625,8357.306640625,6357.92578125]}]}",
      "encoding": "JSON"
    }
  },
  "eventMetadata": {
    "eventId": "9081ea61-7b8d-44ed-a426-c1e7e1ffeacf",
    "inferenceTime": "2024-10-11T06:04:18Z"

### Generate Ground Truth