In [15]:
from sagemaker.predictor import Predictor
import sagemaker
import json
import numpy as np
import pandas as pd
import os

%load_ext dotenv
%dotenv

BUCKET = os.environ["BUCKET"]
role = os.environ["ROLE"]
ENDPOINT = "football-endpoint"
S3_LOCATION = f"s3://{BUCKET}/football"
GROUND_TRUTH_LOCATION = f"{S3_LOCATION}/monitoring/groundtruth"
DATA_QUALITY_LOCATION = f"{S3_LOCATION}/monitoring/data-quality"
MODEL_QUALITY_LOCATION = f"{S3_LOCATION}/monitoring/model-quality"

In [16]:
payload = """
86,86,86,86,86,86,83,83,83,88,74,76,76,76,76,76,76,79,68,70,81,79,1.72,2.24,1.7,0.76,49,80,3.2707983786677315,1.3128173828363914,84.27,75.73,22.92,40.63,7.048,5.128,47.796,52.362,82.83,75.5,76.0,86.0,84.27272727272727,75.72727272727273,7,0,11.9816,31.614503816793892,24.044585987261147,7.569917829532745
79,79,79,79,79,79,81,86,88,85,82,76,76,76,76,76,76,74,75,72,76,75,1.52,0.82,0.92,1.92,68,17,4.144031675155845,3.887158709814261,81.45,75.27,50.07,3.55,7.592,4.772,43.5,52.919,83.5,74.67,76.0,79.0,81.45454545454545,75.27272727272727,3,0,6.98464,34.50413223140496,43.41279069767442,-8.908658466269458
77,77,77,77,77,77,76,75,75,79,77,80,80,80,80,80,80,78,80,86,73,88,2.15,2.24,1.63,1.67,46,65,2.6942883775913384,1.008610065655301,76.73,80.45,42.34,27.71,5.379,7.424,50.032,45.678,76.5,80.83,80.0,77.0,76.72727272727273,80.45454545454545,0,2,8.767769999999999,25.081967213114755,25.74203821656051,-0.6600710034457542

"""

In [17]:
# predictor = Predictor(endpoint_name=ENDPOINT)
# 
# response = predictor.predict(payload, initial_args={"ContentType": "text/csv"})
# scores = response.decode("utf-8").strip().split('\n')
# scores = [float(score) for score in scores]
# 
# print(json.dumps(scores, indent=2))

In [18]:
from sagemaker.serializers import CSVSerializer

sagemaker_session = sagemaker.session.Session()

predictor = Predictor(
    endpoint_name=ENDPOINT,
    serializer=CSVSerializer(),
    sagemaker_session=sagemaker_session,
)

print(f"Payload:\n{payload}")

try:

    response = predictor.predict(payload, initial_args={"ContentType": "text/csv"})
    response = json.loads(response.decode("utf-8"))
    print(json.dumps(response, indent=2))
except Exception as e:
    print(e)

In [37]:
from sagemaker.deserializers import JSONDeserializer
from sagemaker.serializers import JSONSerializer

sample = [
    {
        "player_rating_home_player_1": 89,
        "player_rating_home_player_2": 79,
        "player_rating_home_player_3": 59,
        "player_rating_home_player_4": 69,
        "player_rating_home_player_5": 69,
        "player_rating_home_player_6": 79,
        "player_rating_home_player_7": 69,
        "player_rating_home_player_8": 79,
        "player_rating_home_player_9": 69,
        "player_rating_home_player_10": 89,
        "player_rating_home_player_11": 89,
        "player_rating_away_player_1": 79,
        "player_rating_away_player_2": 79,
        "player_rating_away_player_3": 79,
        "player_rating_away_player_4": 79,
        "player_rating_away_player_5": 79,
        "player_rating_away_player_6": 79,
        "player_rating_away_player_7": 80,
        "player_rating_away_player_8": 80,
        "player_rating_away_player_9": 71,
        "player_rating_away_player_10": 83,
        "player_rating_away_player_11": 80,
        "ewm_home_team_goals": 5.54,
        "ewm_away_team_goals": 0.61,
        "ewm_home_team_goals_conceded": 0.26,
        "ewm_away_team_goals_conceded": 4.76,
        "points_home": 30,
        "points_away": 15,
        "home_weighted_wins": 5.377149515625,
        "away_weighted_wins": 2.5561203576634663,
        "avg_home_team_rating": 84.18,
        "avg_away_team_rating": 70.91,
        "home_streak_wins": 11.75,
        "away_streak_wins": 5.58,
        "ewm_shoton_home": 3.55,
        "ewm_shoton_away": 1.805,
        "ewm_possession_home": 53.639,
        "ewm_possession_away": 20.03,
        "avg_home_rating_attack": 71.33,
        "avg_away_rating_attack": 78.83,
        "avg_away_rating_defence": 79.0,
        "avg_home_rating_defence": 71.0,
        "average_rating_home": 89.18181818181819,
        "average_rating_away": 78.9090909090909,
        "num_top_players_home": 0,
        "num_top_players_away": 0,
        "ewm_home_team_goals_conceded_x_ewm_shoton_home": 4.473,
        "attacking_strength_home": 80.233606557377048,
        "attacking_strength_away": 31.40637450199203,
        "attacking_strength_diff": -2.172767944614982
    },
    {
        "player_rating_home_player_1": 89,
        "player_rating_home_player_2": 79,
        "player_rating_home_player_3": 59,
        "player_rating_home_player_4": 69,
        "player_rating_home_player_5": 69,
        "player_rating_home_player_6": 79,
        "player_rating_home_player_7": 69,
        "player_rating_home_player_8": 79,
        "player_rating_home_player_9": 69,
        "player_rating_home_player_10": 89,
        "player_rating_home_player_11": 89,
        "player_rating_away_player_1": 79,
        "player_rating_away_player_2": 79,
        "player_rating_away_player_3": 79,
        "player_rating_away_player_4": 79,
        "player_rating_away_player_5": 79,
        "player_rating_away_player_6": 79,
        "player_rating_away_player_7": 80,
        "player_rating_away_player_8": 80,
        "player_rating_away_player_9": 71,
        "player_rating_away_player_10": 83,
        "player_rating_away_player_11": 80,
        "ewm_home_team_goals": 5.54,
        "ewm_away_team_goals": 0.61,
        "ewm_home_team_goals_conceded": 0.26,
        "ewm_away_team_goals_conceded": 4.76,
        "points_home": 30,
        "points_away": 15,
        "home_weighted_wins": 5.377149515625,
        "away_weighted_wins": 2.5561203576634663,
        "avg_home_team_rating": 84.18,
        "avg_away_team_rating": 70.91,
        "home_streak_wins": 11.75,
        "away_streak_wins": 5.58,
        "ewm_shoton_home": 3.55,
        "ewm_shoton_away": 1.805,
        "ewm_possession_home": 53.639,
        "ewm_possession_away": 20.03,
        "avg_home_rating_attack": 71.33,
        "avg_away_rating_attack": 78.83,
        "avg_away_rating_defence": 79.0,
        "avg_home_rating_defence": 71.0,
        "average_rating_home": 89.18181818181819,
        "average_rating_away": 78.9090909090909,
        "num_top_players_home": 0,
        "num_top_players_away": 0,
        "ewm_home_team_goals_conceded_x_ewm_shoton_home": 4.473,
        "attacking_strength_home": 80.233606557377048,
        "attacking_strength_away": 31.40637450199203,
        "attacking_strength_diff": -2.172767944614982
    }
]

predictor = Predictor(
    endpoint_name=ENDPOINT,
    serializer=JSONSerializer(),
    deserializer=JSONDeserializer(),
    sagemaker_session=sagemaker_session,
)

try:
    response = predictor.predict(sample)
    print(response)
except Exception as e:
    print(e)

In [2]:
from sagemaker.base_serializers import CSVSerializer
from sagemaker.base_deserializers import CSVDeserializer, JSONDeserializer
from sagemaker import Predictor
import pandas as pd

df_local_path = str(os.environ['DATA_FILEPATH_X'])
y_local_path = str(os.environ['DATA_FILEPATH_Y'])

def read_last_few_lines(csv_path):
    chunk_size = 2000
    last_chunk = None

    for chunk in pd.read_csv(csv_path, chunksize=chunk_size):
        last_chunk = chunk

    if last_chunk is not None:
        return last_chunk.tail(10)
    else:
        return pd.DataFrame()

df = read_last_few_lines(df_local_path)
y = read_last_few_lines(y_local_path)

In [3]:
df

In [4]:
y

In [None]:
predictor = Predictor(
    endpoint_name=ENDPOINT,
    serializer=CSVSerializer(),
    deserializer=CSVDeserializer(),
    sagemaker_session=sagemaker_session,
)

def generate_fake_traffic(data):
    """Generate fake traffic to the endpoint."""
    df_ = data.copy()
    # df_.reset_index(drop=True, inplace=True)
    for index, row in df_.iterrows():
        print(f'index: {index}')
        payload = ",".join([str(x) for x in row.to_list()])
        predictor.predict(
            payload,
            initial_args={"ContentType": "text/csv", "Accept": "text/csv"},
            # The `inference_id` field is important to match
            # it later with a corresponding ground-truth label.
            inference_id=str(index),
        )

generate_fake_traffic(df)

In [None]:
from sagemaker.s3 import S3Downloader

try:
    response = json.loads(
        S3Downloader.read_file(f"{DATA_QUALITY_LOCATION}/statistics.json"),
    )
    print(json.dumps(response["features"][49], indent=2))
except Exception:
    pass

In [None]:
try:
    response = json.loads(
        S3Downloader.read_file(f"{DATA_QUALITY_LOCATION}/constraints.json"),
    )
    print(json.dumps(response, indent=2))
except Exception:
    pass

In [None]:
try:
    response = json.loads(
        S3Downloader.read_file(f"{MODEL_QUALITY_LOCATION}/constraints.json"),
    )
    print(json.dumps(response, indent=2))
except Exception:
    pass

In [None]:
import random
from datetime import datetime
from sagemaker.s3 import S3Uploader

records = []
for inference_id in range(len(df)):
    random.seed(inference_id)

    records.append(
        json.dumps(
            {
                "groundTruthData": {
                    "data": random.choice(["home_win", "home_not_win"]),
                    "encoding": "CSV",
                },
                "eventMetadata": {
                    "eventId": str(inference_id),
                },
                "eventVersion": "0",
            },
        ),
    )

groundtruth_payload = "\n".join(records)
upload_time = datetime.utcnow()
uri = f"{GROUND_TRUTH_LOCATION}/{upload_time:%Y/%m/%d/%H/%M%S}.jsonl"
S3Uploader.upload_string_as_file_body(groundtruth_payload, uri)

In [None]:
from sagemaker.model_monitor import ModelQualityMonitor

model_monitor = ModelQualityMonitor(
    instance_type="ml.c5.4xlarge",
    instance_count=1,
    max_runtime_in_seconds=1800,
    volume_size_in_gb=20,
    role=role,
    sagemaker_session=sagemaker_session
)

In [None]:
import time
from sagemaker.model_monitor import CronExpressionGenerator, EndpointInput

model_monitor.create_monitoring_schedule(
    monitor_schedule_name="football-model-monitoring-schedule",
    endpoint_input=EndpointInput(
        endpoint_name=predictor.endpoint_name,
        inference_attribute="0",
        destination="/opt/ml/processing/input_data",
    ),
    problem_type="MulticlassClassification",
    ground_truth_input=GROUND_TRUTH_LOCATION,
    constraints=f"{MODEL_QUALITY_LOCATION}/constraints.json",
    schedule_cron_expression=CronExpressionGenerator.hourly(),
    output_s3_uri=MODEL_QUALITY_LOCATION,
    enable_cloudwatch_metrics=True,
)

time.sleep(10)
model_monitor.start_monitoring_schedule()

In [None]:
def check_execution(monitoring_schedule):
    """Check the execution of the Monitoring Job.

    This function checks the execution of the Monitoring
    Job and prints out the list of violations if the job
    completed.
    """
    executions = monitoring_schedule.list_executions()

    if executions:
        execution = executions[-1].describe()
        print(f"Processing Job Status: {execution['ProcessingJobStatus']}")

        if execution["ProcessingJobStatus"] == "Completed":
            # print(f"Exit Message: \"{execution['ExitMessage']}\"")
            print(
                f"Last Modified Time: {execution['LastModifiedTime']}",
                end="\n\n",
            )
            print("Execution:")
            print(json.dumps(execution, default=str, indent=2), end="\n\n")

            latest_monitoring_violations = (
                monitoring_schedule.latest_monitoring_constraint_violations()
            )
            
            print(f'latest_monitoring_violations: {latest_monitoring_violations}')
            
            response = json.loads(
                S3Downloader.read_file(latest_monitoring_violations.file_s3_uri),
            )
            print("Violations:")
            print(json.dumps(response, indent=2))


In [None]:
check_execution(model_monitor)

In [None]:
try:
    model_monitor.delete_monitoring_schedule()
except Exception as e:
    print(e)
    
# # Define the monitoring schedule name
# monitoring_schedule_name = "football-model-monitoring-schedule"
# 
# # Delete the monitoring schedule
# sagemaker_session.delete_monitoring_schedule(monitoring_schedule_name)