In [2]:
# Setup environment
%run 0-Environment_Setup.ipynb

[0msagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
Stored 's3_datalake_path_csv' (str)
Stored 'local_data_path_csv' (str)
Stored 's3_datalake_path_parquet' (str)


In [3]:
# Set session variables
sm_client = boto3.client('sagemaker', region_name=region)
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
region = sagemaker_session.boto_session.region_name
bucket = sess.default_bucket()

train_prefix = "store-sales-forecasting/train"
test_prefix = "store-sales-forecasting/test"
val_prefix = "store-sales-forecasting/val"
transform_input_prefix = "store-sales-forecasting/transform-input"
transform_output_prefix = "store-sales-forecasting/transform-output"
transform_results_prefix = "store-sales-forecasting/transform-results"
baseline_prefix = "store-sales-forecasting/baseline"
ground_truth_prefix = "store-sales-forecasting/ground-truth"


In [51]:
sagemaker_session.upload_data("tmp/validation_data.ndjson", bucket=bucket, key_prefix=transform_input_prefix)

's3://sagemaker-us-east-1-342408968837/store-sales-forecasting/transform-input/validation_data.ndjson'

In [4]:
prefix = "store-sales-forecasting"
data_capture_prefix = "{}/datacapture".format(prefix)
s3_capture_upload_path = "s3://{}/{}".format(bucket, data_capture_prefix)
reports_prefix = "{}/reports".format(prefix)
s3_report_path = "s3://{}/{}".format(bucket, reports_prefix)

transform_output_path = "s3://{}/{}/transform-outputs".format(bucket, prefix)

print("Transform Output path: {}".format(transform_output_path))
print("Capture path: {}".format(s3_capture_upload_path))
print("Report path: {}".format(s3_report_path))

Transform Output path: s3://sagemaker-us-east-1-342408968837/store-sales-forecasting/transform-outputs
Capture path: s3://sagemaker-us-east-1-342408968837/store-sales-forecasting/datacapture
Report path: s3://sagemaker-us-east-1-342408968837/store-sales-forecasting/reports


In [5]:
# Get most recent model info
model_name = sm_client.list_models()['Models'][0]['ModelName']
model_info = sm_client.describe_model(ModelName=model_name)
model_container = {
    "Image": model_info['PrimaryContainer']['Image'],
    "ModelDataUrl": model_info['PrimaryContainer']['ModelDataUrl']
}
    
print(model_name)
print(model_info)

store-sales-forecasting-custom-model-2024-10-15-05-53-46
{'ModelName': 'store-sales-forecasting-custom-model-2024-10-15-05-53-46', 'PrimaryContainer': {'Image': '763104351884.dkr.ecr.us-east-1.amazonaws.com/tensorflow-inference:2.6-cpu', 'Mode': 'SingleModel', 'ModelDataUrl': 's3://sagemaker-us-east-1-342408968837/store-sales-forecasting/training-output/tensorflow-training-2024-10-15-05-41-38-410/output/model.tar.gz', 'ModelDataSource': {'S3DataSource': {'S3Uri': 's3://sagemaker-us-east-1-342408968837/store-sales-forecasting/training-output/tensorflow-training-2024-10-15-05-41-38-410/output/model.tar.gz', 'S3DataType': 'S3Object', 'CompressionType': 'Gzip'}}, 'Environment': {'SAGEMAKER_TFS_NGINX_LOGLEVEL': 'info'}}, 'ExecutionRoleArn': 'arn:aws:iam::342408968837:role/LabRole', 'CreationTime': datetime.datetime(2024, 10, 15, 5, 53, 47, 276000, tzinfo=tzlocal()), 'ModelArn': 'arn:aws:sagemaker:us-east-1:342408968837:model/store-sales-forecasting-custom-model-2024-10-15-05-53-46', 'Enable

In [8]:
from sagemaker.transformer import Transformer
from sagemaker.inputs import BatchDataCaptureConfig

In [9]:
# Configure a transformer for the model
transformer = Transformer(
    model_name=model_name,
    instance_count=1,
    instance_type="ml.m5.xlarge",
    strategy="MultiRecord",
    assemble_with="Line",
    output_path=transform_output_path,
    accept="application/jsonlines"
)

# Run a batch transform job
transformer.transform(
    data=f"s3://{bucket}/{transform_input_prefix}",
    content_type="application/jsonlines",
    split_type="Line",
    batch_data_capture_config=BatchDataCaptureConfig(
        destination_s3_uri=s3_capture_upload_path,
        # set it to true for model quality monitoring
        generate_inference_id=True,
    ),
)

transformer.wait()


INFO:sagemaker:Creating transform job with name: tensorflow-inference-2024-10-16-17-23-53-228


................................[34mINFO:__main__:PYTHON SERVICE: False[0m
[34mINFO:__main__:starting services[0m
[34mINFO:__main__:using default model name: model[0m
[34mINFO:__main__:tensorflow serving model config: [0m
[34mmodel_config_list: {
  config: {
    name: 'model'
    base_path: '/opt/ml/model'
    model_platform: 'tensorflow'
    model_version_policy: {
      specific: {
        versions: 1
      }
    }
  }[0m
[34m}[0m
[34mINFO:__main__:tensorflow version info:[0m
[34m2024-10-16 17:29:15.808730: W external/org_tensorflow/tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.[0m
[34m2024-10-16 17:29:15.808837: W external/org_tensorflow/tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.[0m
[34mTensorFlow ModelServer: 2.6.0-rc2+dev.sha.dca3641[0m
[34mTensorFlow Library: 2.6.0[0m

In [140]:
transform_output_path

's3://sagemaker-us-east-1-342408968837/store-sales-forecasting/transform-outputs'

In [141]:
!aws s3 ls {s3_capture_upload_path}/input/ --recursive

2024-10-16 01:53:05        118 store-sales-forecasting/datacapture/input/2024/10/16/01/4389b70a-87b7-4649-99eb-29a5278a083e.json
2024-10-16 04:23:40        118 store-sales-forecasting/datacapture/input/2024/10/16/04/5d4eb61e-075a-482c-addd-b4d00c32c660.json
2024-10-16 05:39:07        118 store-sales-forecasting/datacapture/input/2024/10/16/05/c503cfd7-91e9-4460-b13a-7797fc27d28b.json
2024-10-16 06:33:55        118 store-sales-forecasting/datacapture/input/2024/10/16/06/9b3fc865-1c0a-4700-834f-fc8648c33f17.json


In [142]:
!aws s3 ls {s3_capture_upload_path}/output/ --recursive

2024-10-16 01:53:05        124 store-sales-forecasting/datacapture/output/2024/10/16/01/c756b496-6ae5-4900-b4c9-322ad22ca01b.json
2024-10-16 04:23:40        124 store-sales-forecasting/datacapture/output/2024/10/16/04/05b1f1f3-54e0-4b8e-83c8-7969328d2ad6.json
2024-10-16 05:39:07        124 store-sales-forecasting/datacapture/output/2024/10/16/05/c3a0beac-94f2-4cae-a16d-01b34798b78b.json
2024-10-16 06:33:55        124 store-sales-forecasting/datacapture/output/2024/10/16/06/2072fd4b-3ed4-47e3-afb0-f7cd60f1763e.json


In [10]:
sagemaker_session.download_data(path="tmp/", bucket=bucket, key_prefix="store-sales-forecasting/transform-outputs")

['tmp/validation_data.ndjson.out']

In [12]:
val_targets = np.load(os.path.join("tmp", 'val_targets.npy'))

In [13]:
import json

transform_outputs = []
with open("tmp/validation_data.ndjson.out", "r") as f:
    for line in f:
        outputs = {}
        obj = json.loads(line.strip())
       
        outputs['SageMakerInferenceId'] = obj['SageMakerInferenceId']
        outputs['predictions'] = obj['predictions']
        transform_outputs.append(outputs)

predictions_array1 = np.array(transform_outputs[0]['predictions'])
predictions_array2 = np.array(transform_outputs[1]['predictions'])
print(predictions_array1.shape)
print(predictions_array2.shape)

(86, 54, 1)
(76, 54, 1)


In [14]:
# Split val targets to have the same shape as the transform output
val_targets1 = val_targets[:predictions_array1.shape[0]]
val_targets2 = val_targets[:predictions_array2.shape[0]]
print(val_targets1.shape)
print(val_targets2.shape)

(86, 54, 1)
(76, 54, 1)


In [15]:
# TODO: Clean out this folder before running again
for targets, capture in zip([val_targets1, val_targets2], transform_outputs):
    #print(len(item))
    #print(item[0].shape)
    #print(item[1]['SageMakerInferenceId'])
    inference_id = capture['SageMakerInferenceId']
    print(inference_id)
    print(targets.flatten().shape)
    flattened_data = ",".join(map(str, targets.flatten()))
    #flattened_data = "1,2,3,4"
    
    #print(flattened_data)
    json.dump(
        {
            "groundTruthData": {
                # note that the data has to be a comma delimited string
                "data": flattened_data,
                "encoding": "CSV",
            },
            "eventMetadata": {
                "eventId": str(inference_id),
            },
            "eventVersion": "0",
        },
        open(f"./tmp/ground-truth/{inference_id}.jsonl", "w"),
    )

9eeb46d4-ad00-4612-b83f-f18bf3f74c6b
(4644,)
44c45205-62c6-46fc-b43a-d453c7752554
(4104,)


In [146]:
from datetime import datetime

upload_time = datetime.utcnow()
ground_truth_output_path = f"s3://{bucket}/{ground_truth_prefix}/{upload_time:%Y/%m/%d/%H}"
print(ground_truth_output_path)

sagemaker_session.upload_data(f"tmp/ground-truth/", bucket=bucket, key_prefix=f"{ground_truth_prefix}/{upload_time:%Y/%m/%d/%H}")

s3://sagemaker-us-east-1-342408968837/store-sales-forecasting/ground-truth/2024/10/16/06


's3://sagemaker-us-east-1-342408968837/store-sales-forecasting/ground-truth/2024/10/16/06'

In [14]:
from sagemaker.model_monitor import ModelQualityMonitor, MonitoringDatasetFormat, DatasetFormat, BatchTransformInput
from sagemaker.model_monitor import CronExpressionGenerator

In [104]:
model_quality_monitor = ModelQualityMonitor(
    role=role,
    instance_count=1,
    instance_type="ml.m5.xlarge",
    sagemaker_session=sagemaker_session,
    max_runtime_in_seconds=1800
)

INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: .
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


In [16]:
# Run a baseline job to get baseline metrics for the monitor
baseline_job = model_quality_monitor.suggest_baseline(
    baseline_dataset=f"s3://{bucket}/{baseline_prefix}/baseline.csv",
    dataset_format=DatasetFormat.csv(header=True),
    output_s3_uri=f"s3://{bucket}/{baseline_prefix}/results/",
    problem_type="Regression",
    inference_attribute="predicted_sales",
    ground_truth_attribute="actual_sales"
)

baseline_job.wait(logs=False)

INFO:sagemaker:Creating processing-job with name baseline-suggestion-job-2024-10-15-23-04-07-046


...........................................................!

In [17]:
# Check that baseline job finished
baseline_job = model_quality_monitor.latest_baselining_job
baseline_job

<sagemaker.model_monitor.model_monitoring.BaseliningJob at 0x7fe942677700>

In [18]:
baseline_job.baseline_statistics().body_dict["regression_metrics"]

{'mae': {'value': 0.36809090231994046,
  'standard_deviation': 0.00423036689125015},
 'mse': {'value': 0.4658149918758164,
  'standard_deviation': 0.019959703107999428},
 'rmse': {'value': 0.68250640427458,
  'standard_deviation': 0.014535927309843825},
 'r2': {'value': 0.6925847662229317,
  'standard_deviation': 0.006409018712575836}}

In [19]:
pd.DataFrame(baseline_job.suggested_constraints().body_dict["regression_constraints"]).T

Unnamed: 0,threshold,comparison_operator
mae,0.368091,GreaterThanThreshold
mse,0.465815,GreaterThanThreshold
rmse,0.682506,GreaterThanThreshold
r2,0.692585,LessThanThreshold


In [105]:
schedule = model_quality_monitor.create_monitoring_schedule(
    batch_transform_input=BatchTransformInput(
        data_captured_destination_s3_uri=s3_capture_upload_path,
        destination="/opt/ml/processing/input",
        dataset_format=MonitoringDatasetFormat.json(),
        inference_attribute="predictions",
    ),
    ground_truth_input=f"s3://{bucket}/{ground_truth_prefix}",
    output_s3_uri=f"s3://{bucket}/{transform_results_prefix}",
    problem_type="Regression",
    constraints=f"s3://{bucket}/{baseline_prefix}/results/constraints.json",
    schedule_cron_expression=CronExpressionGenerator.hourly(),
    enable_cloudwatch_metrics=True
)

INFO:sagemaker.model_monitor.model_monitoring:Creating Monitoring Schedule with name: monitoring-schedule-2024-10-16-04-17-45-516


In [59]:
model_quality_monitor.describe_schedule()

{'MonitoringScheduleArn': 'arn:aws:sagemaker:us-east-1:342408968837:monitoring-schedule/monitoring-schedule-2024-10-16-01-55-09-247',
 'MonitoringScheduleName': 'monitoring-schedule-2024-10-16-01-55-09-247',
 'MonitoringScheduleStatus': 'Pending',
 'MonitoringType': 'ModelQuality',
 'CreationTime': datetime.datetime(2024, 10, 16, 1, 55, 9, 826000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2024, 10, 16, 1, 55, 9, 888000, tzinfo=tzlocal()),
 'MonitoringScheduleConfig': {'ScheduleConfig': {'ScheduleExpression': 'cron(0 * ? * * *)'},
  'MonitoringJobDefinitionName': 'model-quality-job-definition-2024-10-16-01-55-09-247',
  'MonitoringType': 'ModelQuality'},
 'ResponseMetadata': {'RequestId': '6bd2be42-e6bb-4076-890c-d83b53528a2b',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '6bd2be42-e6bb-4076-890c-d83b53528a2b',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '546',
   'date': 'Wed, 16 Oct 2024 01:55:13 GMT'},
  'RetryAttempts': 0}}

store-sales-forecasting-custom-model-2024-10-15-05-53-46
{'ModelName': 'store-sales-forecasting-custom-model-2024-10-15-05-53-46', 'PrimaryContainer': {'Image': '763104351884.dkr.ecr.us-east-1.amazonaws.com/tensorflow-inference:2.6-cpu', 'Mode': 'SingleModel', 'ModelDataUrl': 's3://sagemaker-us-east-1-342408968837/store-sales-forecasting/training-output/tensorflow-training-2024-10-15-05-41-38-410/output/model.tar.gz', 'ModelDataSource': {'S3DataSource': {'S3Uri': 's3://sagemaker-us-east-1-342408968837/store-sales-forecasting/training-output/tensorflow-training-2024-10-15-05-41-38-410/output/model.tar.gz', 'S3DataType': 'S3Object', 'CompressionType': 'Gzip'}}, 'Environment': {'SAGEMAKER_TFS_NGINX_LOGLEVEL': 'info'}}, 'ExecutionRoleArn': 'arn:aws:iam::342408968837:role/LabRole', 'CreationTime': datetime.datetime(2024, 10, 15, 5, 53, 47, 276000, tzinfo=tzlocal()), 'ModelArn': 'arn:aws:sagemaker:us-east-1:342408968837:model/store-sales-forecasting-custom-model-2024-10-15-05-53-46', 'Enable

In [23]:
from sagemaker.transformer import Transformer
from sagemaker.inputs import BatchDataCaptureConfig

In [34]:
# from datetime import datetime

# upload_time = datetime.utcnow()
# transform_input_path = f"s3://{bucket}/{transform_input_prefix}/{upload_time:%Y/%m/%d/%H}"
# transform_output_path = f"s3://{bucket}/{transform_output_prefix}/{upload_time:%Y/%m/%d/%H}"
# ground_truth_output_path = f"s3://{bucket}/{ground_truth_prefix}/{upload_time:%Y/%m/%d/%H}"
# print(transform_input_path)
# print(transform_output_path)
# print(ground_truth_output_path)

# sagemaker_session.upload_data("tmp/validation_data.ndjson", bucket=bucket, key_prefix=f"{transform_input_prefix}/{upload_time:%Y/%m/%d/%H}")
# sagemaker_session.upload_data(f"tmp/ground_truth.ndjson", bucket=bucket, key_prefix=f"{ground_truth_prefix}/{upload_time:%Y/%m/%d/%H}")

s3://sagemaker-us-east-1-342408968837/store-sales-forecasting/transform-input/2024/10/16/00
s3://sagemaker-us-east-1-342408968837/store-sales-forecasting/transform-output/2024/10/16/00
s3://sagemaker-us-east-1-342408968837/store-sales-forecasting/ground-truth/2024/10/16/00


's3://sagemaker-us-east-1-342408968837/store-sales-forecasting/ground-truth/2024/10/16/00/ground_truth.ndjson'

In [35]:
# # Configure a transformer for the model
# transformer = Transformer(
#     model_name=model_name,
#     instance_count=1,
#     instance_type="ml.m5.xlarge",
#     strategy="MultiRecord",
#     assemble_with="Line",
#     output_path=transform_output_path,
#     accept="application/jsonlines"
# )

In [None]:
# # Run a batch transform job
# transformer.transform(
#     data=transform_input_path,
#     content_type="application/jsonlines",
#     split_type="Line"
# )

# transformer.wait()

In [29]:
execution = model_quality_monitor.describe_schedule()
execution

{'MonitoringScheduleArn': 'arn:aws:sagemaker:us-east-1:342408968837:monitoring-schedule/monitoring-schedule-2024-10-15-23-09-19-671',
 'MonitoringScheduleName': 'monitoring-schedule-2024-10-15-23-09-19-671',
 'MonitoringScheduleStatus': 'Scheduled',
 'MonitoringType': 'ModelQuality',
 'CreationTime': datetime.datetime(2024, 10, 15, 23, 9, 20, 245000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2024, 10, 15, 23, 9, 29, 241000, tzinfo=tzlocal()),
 'MonitoringScheduleConfig': {'ScheduleConfig': {'ScheduleExpression': 'cron(0 * ? * * *)'},
  'MonitoringJobDefinitionName': 'model-quality-job-definition-2024-10-15-23-09-19-671',
  'MonitoringType': 'ModelQuality'},
 'ResponseMetadata': {'RequestId': 'dfca7867-1d92-43af-905e-5b5787c2b42e',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'dfca7867-1d92-43af-905e-5b5787c2b42e',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '548',
   'date': 'Tue, 15 Oct 2024 23:29:24 GMT'},
  'RetryAttempts':

In [32]:
import time

In [42]:
# Wait for the first execution of the monitoring_schedule
print("Waiting for first execution", end="")
while True:
    execution = model_quality_monitor.describe_schedule().get(
        "LastMonitoringExecutionSummary"
    )
    if execution:
        break
    print(".", end="", flush=True)
    time.sleep(10)
print()
print("Execution found!")

Waiting for first execution.................................
Execution found!
