In [2]:
import os
import sagemaker
import time
from sagemaker.model_monitor import CronExpressionGenerator

%load_ext dotenv
%dotenv

bucket_name = os.environ["BUCKET"]
endpoint = "football-endpoint"
role_name = os.environ["ROLE_NAME"]
account_id = os.environ["ACCOUNT_ID"]

S3_LOCATION = f"s3://{bucket_name}/football"
GROUND_TRUTH_LOCATION = f"{S3_LOCATION}/monitoring/groundtruth"
MODEL_QUALITY_LOCATION = f"{S3_LOCATION}/monitoring/model-quality"

sagemaker_session = sagemaker.session.Session()

role_arn = f"arn:aws:iam::{account_id}:role/{role_name}"

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [None]:
from sagemaker.serializers import CSVSerializer
from sagemaker.predictor import Predictor

predictor = Predictor(
    endpoint_name=endpoint,
    serializer=CSVSerializer(),
    sagemaker_session=sagemaker_session,
)

In [None]:
import pandas as pd

df = pd.read_csv("football.csv")

def generate_fake_traffic(data):
    """Generate fake traffic to the endpoint."""

    for index, row in data.iterrows():
        payload = ",".join([str(x) for x in row.to_list()])
        predictor.predict(
            payload,
            initial_args={"ContentType": "text/csv", "Accept": "text/csv"},
            # The `inference_id` field is important to match
            # it later with a corresponding ground-truth label.
            inference_id=str(index),
        )

generate_fake_traffic(df)

In [None]:
import boto3
from pathlib import Path

DATA_QUALITY_PREPROCESSOR = "data_quality_preprocessor.py"

bucket = boto3.Session().resource("s3").Bucket(bucket_name)

prefix = Path("football/monitoring")
bucket.Object((prefix / DATA_QUALITY_PREPROCESSOR).as_posix()).upload_file(DATA_QUALITY_PREPROCESSOR.as_posix())
data_quality_preprocessor = f"s3://{(bucket.name / prefix / DATA_QUALITY_PREPROCESSOR)}"
data_quality_preprocessor

In [None]:
from sagemaker.model_monitor import DefaultModelMonitor

data_monitor = DefaultModelMonitor(
    instance_type='ml.m5.xlarge',
    instance_count=1,
    max_runtime_in_seconds=1800,
    volume_size_in_gb=20,
    role=role_arn,
)

In [None]:
data_monitor.create_monitoring_schedule(
    monitor_schedule_name="football-model-monitoring-schedule",
    endpoint_input=endpoint,
    problem_type="BinaryClassification",
    ground_truth_input=GROUND_TRUTH_LOCATION,
    constraints=f"{MODEL_QUALITY_LOCATION}/constraints.json",
    schedule_cron_expression=CronExpressionGenerator.hourly(),
    output_s3_uri=MODEL_QUALITY_LOCATION,
    enable_cloudwatch_metrics=True,
)

time.sleep(10)
data_monitor.start_monitoring_schedule()

In [None]:
from sagemaker.s3 import S3Downloader


def check_execution(monitoring_schedule):
    """Check the execution of the Monitoring Job.

    This function checks the execution of the Monitoring
    Job and prints out the list of violations if the job
    completed.
    """
    executions = monitoring_schedule.list_executions()

    if executions:
        execution = executions[-1].describe()
        print(f"Processing Job Status: {execution['ProcessingJobStatus']}")

        if execution["ProcessingJobStatus"] == "Completed":
            print(
                f"Last Modified Time: {execution['LastModifiedTime']}",
                end="\n\n",
            )
            print("Execution:")
            print(json.dumps(execution, default=str, indent=2), end="\n\n")

            latest_monitoring_violations = (
                monitoring_schedule.latest_monitoring_constraint_violations()
            )
            
            print(f'latest_monitoring_violations: {latest_monitoring_violations}')
            
            response = json.loads(
                S3Downloader.read_file(latest_monitoring_violations.file_s3_uri),
            )
            print("Violations:")
            print(json.dumps(response, indent=2))
            
check_execution(data_monitor)

In [None]:
try:
    data_monitor.delete_monitoring_schedule()
except Exception as e:
    print(e)

In [None]:
import json
import random
from datetime import datetime
from sagemaker.s3 import S3Uploader
import pandas as pd

df =pd.read_csv("football-football.csv")

records = []
for inference_id in range(len(df)):
    random.seed(inference_id)

    records.append(
        json.dumps(
            {
                "groundTruthData": {
                    "data": random.choice(["home_win", "home_not_win"]),
                    "encoding": "CSV",
                },
                "eventMetadata": {
                    "eventId": str(inference_id),
                },
                "eventVersion": "0",
            },
        ),
    )

groundtruth_payload = "\n".join(records)
upload_time = datetime.utcnow()
uri = f"{GROUND_TRUTH_LOCATION}/{upload_time:%Y/%m/%d/%H/%M%S}.jsonl"
S3Uploader.upload_string_as_file_body(groundtruth_payload, uri)