# Create events

In [7]:
import json
import random
import time
from datetime import datetime, timedelta
from threading import Thread
from uuid import uuid4

import pandas as pd
import sagemaker

### Useful objects and variables

In [3]:
session = sagemaker.Session()
sm_client = session.sagemaker_client
role = sagemaker.get_execution_role()
bucket = session.default_bucket()
region = session.boto_region_name

In [14]:
prefix = "nlp-model-monitor-demo"
model_package_group_name = prefix

In [15]:
model_monitor_uri = f"s3://{bucket}/{prefix}/model_monitor"

In [4]:
%store -r endpoint_name

## Predictor for the endpoint

In [5]:
predictor = sagemaker.predictor.Predictor(
    endpoint_name=endpoint_name,
    serializer=sagemaker.serializers.JSONSerializer(),
    deserializer=sagemaker.deserializers.JSONDeserializer(),
)

In [6]:
predictor.predict({"inputs": "another short sentence"}, inference_id="test_id")

{'output': 3}

## Create invocations

In [8]:
df_test = pd.read_csv(
    "ag_news_csv/test.csv",
    header=None,
    names=["label", "title", "text"],
)

In [22]:
def gen_event(skewed: bool = False):
    line = df_test.sample(n=1)
    data = dict(inputs=line.text.values)
    inference_id = uuid4().hex
    gt = line.label.values[0] - 1
    if skewed:
        gt = random.randint(0, 3)
    gt_payload = {
        "groundTruthData": {
            "data": f"{gt}",
            "encoding": "CSV",
        },
        "eventMetadata": {
            "eventId": f"{inference_id}",
        },
        "eventVersion": "0",
    }

    return {"data": data, "inference_id": inference_id}, gt_payload


def upload_gt(base_path_uri, gt_payload, delay_mins: int = 10):
    gt_time = datetime.utcnow() + timedelta(minutes=delay_mins)
    target_s3_uri = f"{base_path_uri}/{gt_time:%Y/%m/%d/%H/%M%S}.jsonl"
    return sagemaker.s3.S3Uploader.upload_string_as_file_body(json.dumps(gt_payload), target_s3_uri)


def simulate_event(skewed: bool = False):
    endpoint_payload, gt_payload = gen_event(skewed)
    pred = predictor.predict(**endpoint_payload)
    base_path_uri = f"{model_monitor_uri}/ground_truth"
    gt_uri = upload_gt(base_path_uri, gt_payload)
    return pred, gt_uri


def generate_fake_events(testing=False):
    while not stop_flag:
        simulate_event(skew)
        time.sleep(sleeping_time)
        if testing:
            break

In [17]:
# simulate_event()
gen_event(skewed=True)

({'data': {'inputs': array(['LONDON, November 30 (newratings.com) - Pinault-Printemps Redoute SA (PPX.FSE) plans to sell its controlling stake in the electrical parts distributor, Rexel (RXL), to a group of private firms for 1.92 billion (\\$2.55 billion).'],
         dtype=object)},
  'inference_id': '8e97b4e9aef04fe2add59ed38434d6f5'},
 {'groundTruthData': {'data': '1', 'encoding': 'CSV'},
  'eventMetadata': {'eventId': '8e97b4e9aef04fe2add59ed38434d6f5'},
  'eventVersion': '0'})

In [18]:
stop_flag = False
skew = False
sleeping_time = 0.1

In [23]:
threads = [Thread(target=generate_fake_events) for _ in range(4)]
[thread.start() for thread in threads]

[None, None, None, None]

In [24]:
[thread.is_alive() for thread in threads]

[True, True, True, True]

In [37]:
stop_flag = True