In [None]:
import sagemaker
from sagemaker.predictor import Predictor
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import CSVDeserializer
from sagemaker.feature_store.feature_group import FeatureGroup
from sagemaker.s3 import S3Downloader
import json
import jsonlines
import utils
from importlib import reload

reload(utils)

In [None]:
sm_session = sagemaker.Session()

endpoint_name = "index-predictor-endpoint"
feature_group_name = "index-predictor-feature-group-v7"
bucket_name = "team1-index-predictor-bucket"
data_version = "2024-06-26-09-33"

data_capture_prefix = "data-capture"
data_capture_s3_url = f"s3://{bucket_name}/{data_capture_prefix}"

In [None]:
predictor = Predictor(
    endpoint_name=endpoint_name,
    serializer=CSVSerializer(),
    deserializer=CSVDeserializer(),
)

### Downloading data from feature store

In [None]:
feature_group = FeatureGroup(name=feature_group_name, sagemaker_session=sm_session)

query = feature_group.athena_query()

query.run(
    query_string=f"SELECT * FROM {feature_group_name} WHERE version = '{data_version}'",
    output_location=f"s3://{bucket_name}/model_monitor/data/",
)

query.wait()

df = query.as_dataframe()

train_df = df[df["type"] == "train"].copy()
validation_df = df[df["type"] == "validation"].copy()
test_df = df[df["type"] == "test"].copy()

selected_test_df = test_df.copy().sample(n=100, random_state=1)

In [None]:
columns_to_drop = ["type", "version", "write_time", "api_invocation_time", "is_deleted"]

df.drop(
    columns=columns_to_drop,
    inplace=True,
)
train_df.drop(
    columns=columns_to_drop,
    inplace=True,
)
validation_df.drop(
    columns=columns_to_drop,
    inplace=True,
)
test_df.drop(
    columns=columns_to_drop,
    inplace=True,
)
selected_test_df.drop(
    columns=columns_to_drop,
    inplace=True,
)

In [None]:
utils.generate_endpoint_traffic(predictor, selected_test_df)

In [None]:
capture_files = utils.get_file_list(bucket_name, data_capture_prefix)

In [None]:
file_key = capture_files[-1]
S3Downloader.download(f"s3://{bucket_name}/{file_key}", f"./tmp")

print(f"Content of the capture file:")
with jsonlines.open(f"./tmp/{file_key.split('/')[-1]}") as reader:
    print(json.dumps(reader.read(), indent=2))