# Eable the endpoint monitor
* model_monitor가 capture하여 저장하는 형식이 jsonl이고, baseline 계산에서도 csv, json 형식만 지원하는 등 use case가 제한적임

In [None]:
from sagemaker.predictor import RealTimePredictor
from sagemaker.predictor import json_serializer, json_deserializer

endpoint = 'your-endpoint-name'

predictor = RealTimePredictor(endpoint=endpoint, content_type='application/json',
                              serializer=json_serializer, deserializer=json_deserializer)

In [None]:
from sagemaker.model_monitor import DataCaptureConfig

bucket = 'your-bucket'
data_capture_prefix = 'your-prefix'
s3_capture_upload_path='s3://{}/{}'.format(bucket, data_capture_prefix)
data_capture_conf = DataCaptureConfig(enable_capture=True,
                                     sampling_percentage=100,
                                     destination_s3_uri=s3_capture_upload_path)
predictor.update_data_capture_config(data_capture_conf)

In [None]:
import numpy as np
sample_data = np.loadtxt('../00_Basics/test_sample.csv', delimiter=',')

In [None]:
test_data = sample_data[31:50, 1:]
test_label = sample_data[31:50, 0]
predictions = predictor.predict(test_data)

## To see the capture files

In [None]:
import boto3
s3_client = boto3.Session().client('s3')
current_endpoint_capture_prefix = '{}/{}'.format(data_capture_prefix, endpoint)
result = s3_client.list_objects(Bucket=bucket, Prefix=current_endpoint_capture_prefix)
capture_files = [capture_file.get("Key") for capture_file in result.get('Contents')]
print("Found Capture Files:")
print("\n ".join(capture_files))

In [None]:
def get_obj_body(obj_key):
    return s3_client.get_object(Bucket=bucket, Key=obj_key).get('Body').read().decode("utf-8")

capture_file = get_obj_body(capture_files[-1])
print(capture_file[:2000])

In [None]:
import json
print(json.dumps(json.loads(capture_file.split('\n')[0]), indent=2))

# Data baseline
* Don't use the codes below now because the model_monitor.suggest_baseline seems to have a limit of data column size.(It's not sure, so I'm going to check it by cutting a TT.)

In [None]:
%%time
from sagemaker.model_monitor import DefaultModelMonitor
from sagemaker.model_monitor.dataset_format import DatasetFormat
from sagemaker import get_execution_role

bucket = 'your-bucket'
role = get_execution_role()

baseline_results_prefix = 'your-baseline-prefix'
baseline_results_uri = 's3://{}/{}'.format(bucket, baseline_results_prefix)

my_default_monitor = DefaultModelMonitor(
    role=role,
    instance_count=1,
    instance_type='ml.r5.16xlarge'
)

my_default_monitor.suggest_baseline(
    baseline_dataset='s3://your-bucket/your-prefix/raw_data.csv',
    dataset_format=DatasetFormat.csv(header=False, output_columns_position='START'),
    output_s3_uri=baseline_results_uri,
    wait=True,
    logs=True
)

In [None]:
import pandas as pd

baseline_job = my_default_monitor.latest_baselining_job
schema_df = pd.io.json.json_normalize(baseline_job.baseline_statistics().body_dict["features"])
schema_df

In [None]:
constraints_df = pd.io.json.json_normalize(baseline_job.suggested_constraints().body_dict["features"])
constraints_df

In [None]:
!wget https://raw.githubusercontent.com/awslabs/amazon-sagemaker-examples/master/sagemaker_model_monitor/visualization/utils.py

In [None]:
import utils as mu
import json
from sagemaker.s3 import S3Downloader

baseline_stats_path='s3://your-bucket/your-baseline-prefix/statistics.json'
baseline_stats = json.loads(S3Downloader.read_file(baseline_stats_path))

In [None]:
feature_baselines = mu.get_features(baseline_stats)
mu.show_distributions(feature_baselines)