# リアルタイム推論による RRCF の学習と推論

In [3]:
from typing import Final
import os
import sagemaker
from sagemaker.sklearn import SKLearnModel
import boto3
import json
from time import sleep
import glob
import pandas as pd
import io
from matplotlib import pyplot as plt

smr_client:Final = boto3.client('sagemaker-runtime')
sm_client:Final = boto3.client('sagemaker')
s3_client:Final = boto3.client('s3')
endpoint_inservice_waiter:Final = sm_client.get_waiter('endpoint_in_service')
role: Final[str] = sagemaker.get_execution_role()
region: Final[str] = sagemaker.Session().boto_region_name
sess = sagemaker.session.Session()
bucket = sess.default_bucket()

## データ準備
* 5_xxx.ipynb のベンチマークデータを使う
* ファイルリストを取得する

In [4]:
file_name_list = sorted(glob.glob('./bench_data/**/*.csv'))
print(*file_name_list)

./bench_data/00000/data.csv ./bench_data/00001/data.csv ./bench_data/00002/data.csv ./bench_data/00003/data.csv ./bench_data/00004/data.csv ./bench_data/00005/data.csv ./bench_data/00006/data.csv ./bench_data/00007/data.csv ./bench_data/00008/data.csv ./bench_data/00009/data.csv ./bench_data/00010/data.csv ./bench_data/00011/data.csv ./bench_data/00012/data.csv ./bench_data/00013/data.csv ./bench_data/00014/data.csv ./bench_data/00015/data.csv ./bench_data/00016/data.csv ./bench_data/00017/data.csv ./bench_data/00018/data.csv ./bench_data/00019/data.csv ./bench_data/00020/data.csv ./bench_data/00021/data.csv ./bench_data/00022/data.csv ./bench_data/00023/data.csv ./bench_data/00024/data.csv ./bench_data/00025/data.csv ./bench_data/00026/data.csv ./bench_data/00027/data.csv ./bench_data/00028/data.csv ./bench_data/00029/data.csv ./bench_data/00030/data.csv ./bench_data/00031/data.csv ./bench_data/00032/data.csv ./bench_data/00033/data.csv ./bench_data/00034/data.csv ./bench_data/00035/d

## 推論コード作成

In [16]:
source_dir = 'rt_bench_src/'
!rm -rf {source_dir}
!mkdir {source_dir}

In [17]:
%%writefile rt_bench_src/requirements.txt
rrcf==0.4.3
dill==0.3.4
matplotlib==3.5.3

Writing rt_bench_src/requirements.txt


In [18]:
%%writefile rt_bench_src/inference.py
import pandas as pd
import io
import rrcf
import numpy as np
from scipy import stats
import logging
import sys
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler(sys.stdout))

hps={
    'num_trees':128,
    'shingle_size':10,
    'tree_size':1024
}


def calc_score(df):
    # RCF 準備
    logger.debug('preparing RCF...')
    data = df[0].astype(float).values
    points = rrcf.shingle(data, size=hps['shingle_size'])
    points = np.vstack([point for point in points])
    n = points.shape[0]
    sample_size_range = (n // hps['tree_size'], hps['tree_size'])
    logger.debug('prepared RCF')
    
    # RCF を生成
    logger.debug('generating RCF...')
    forest = []
    while len(forest) < hps['num_trees']:
        ixs = np.random.choice(n, size=sample_size_range,
                               replace=False)
        trees = [rrcf.RCTree(points[ix], index_labels=ix) for ix in ixs]
        forest.extend(trees)
    logger.debug('generated RCF')
    
    # 異常スコア算出
    logger.debug('calculating score...')
    avg_codisp = pd.Series(0.0, index=np.arange(n))
    index = np.zeros(n)
    for tree in forest:
        codisp = pd.Series({leaf : tree.codisp(leaf) for leaf in tree.leaves})
        avg_codisp[codisp.index] += codisp
        np.add.at(index, codisp.index.values, 1)
    avg_codisp /= index
    logger.debug('calculated score')
    
    # result の整理
    logger.debug('organizing score...')
    columns = [i for i in range(points.shape[1])]
    result_df = pd.DataFrame(points, columns=columns, dtype='float')
    result_df['score'] = pd.Series(avg_codisp)
    result_df['scaled_score'] = result_df['score']/result_df['score'].max()
    
    logger.debug('organized score')
    return result_df, forest

def calc_threshold(df):
    logger.debug('calculating calc_threshold...')
    df['zscore'] = stats.zscore(df['score'])
    df['anomaly'] = df['zscore'].apply(lambda x: True if x>3 else False)
    return df

def model_fn(model_dir):
    return None
def input_fn(input_data, content_type):
    if content_type=='text/csv':
        df = pd.read_csv(io.StringIO(input_data), header=None)
    else:
        raise TypeError('allowed only text/csv')
    return df
def predict_fn(transformed_data, model):
    result_df, forest = calc_score(transformed_data)
    result_df = calc_threshold(result_df)
    return result_df
def output_fn(df, accept_type):
    buffer = io.StringIO()
    df.to_csv(buffer, index=False)
    return buffer.getvalue()

Writing rt_bench_src/inference.py


## 推論コードアップロード

In [19]:
%cd {source_dir}
!tar zcvf sourcedir.tar.gz ./*
%cd ..

/home/ec2-user/SageMaker/rrcf_train_inference/rt_bench_src
./inference.py
./requirements.txt
/home/ec2-user/SageMaker/rrcf_train_inference


In [20]:
source_s3_uri:Final[str] = sagemaker.session.Session().upload_data(
    f'./{source_dir}/sourcedir.tar.gz',
    key_prefix = 'rt_bench'
)
print(source_s3_uri)

s3://sagemaker-us-east-1-290000338583/rt_bench/sourcedir.tar.gz


## リアルタイム推論エンドポイント作成

In [5]:
# 名前の設定
model_name: Final[str] = 'rrcf-realtime'
endpoint_config_name: Final[str] = model_name + 'EndpointConfig'
endpoint_name: Final[str] = model_name + 'Endpoint'
role: Final[str] = sagemaker.get_execution_role()

In [22]:
# コンテナイメージの URI を取得
container_image_uri: Final[str] = sagemaker.image_uris.retrieve(
    "sklearn",  # SKLearn のマネージドコンテナを利用
    sagemaker.session.Session().boto_region_name, # ECR のリージョンを指定
    version='1.0-1', # SKLearn のバージョンを指定
    instance_type = 'ml.c5.xlarge', # インスタンスタイプを指定
    image_scope = 'inference' # 推論コンテナを指定
)
print(container_image_uri)

683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn:1.0-1-cpu-py3


## c5.xlarge

In [37]:
# Model 作成
response = sm_client.create_model(
    ModelName=model_name,
    PrimaryContainer={
        'Image': container_image_uri,
        'Environment': {
            'SAGEMAKER_CONTAINER_LOG_LEVEL': '20',
            'SAGEMAKER_PROGRAM': 'inference.py',
            'SAGEMAKER_REGION': region,
            'SAGEMAKER_SUBMIT_DIRECTORY': source_s3_uri}
    },
    ExecutionRoleArn=role,
)
# EndpointConfig 作成
response = sm_client.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[
        {
            'VariantName': 'AllTrafic',
            'ModelName': model_name,
            'InitialInstanceCount':1,
            'InstanceType': 'ml.c5.xlarge',
        },
    ]
)
# Endpoint 作成
response = sm_client.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=endpoint_config_name,
)
# Endpoint が有効化されるまで待つ
endpoint_inservice_waiter.wait(
    EndpointName=endpoint_name,
    WaiterConfig={'Delay': 5,}
)

ClientError: An error occurred (ValidationException) when calling the CreateModel operation: Cannot create already existing model "arn:aws:sagemaker:us-east-1:290000338583:model/rrcf-realtime".

## リアルタイム推論

### お行儀のいいAPI呼び出し

In [6]:
from multiprocessing import Pool

In [7]:
def invoke_rt_endpoint(file_name):
    with open(file_name,'rt') as f:
        csv = f.read()
    response = smr_client.invoke_endpoint(
        EndpointName=endpoint_name, 
        Body=csv,
        ContentType='text/csv',
        Accept='text/csv',
    )

In [39]:
%%time
with Pool(processes=4) as pool:
        pool.map(invoke_rt_endpoint, file_name_list)

CPU times: user 306 ms, sys: 102 ms, total: 409 ms
Wall time: 2h 12min 35s


## 過負荷

In [9]:
%%time
with Pool(processes=8) as pool:
        pool.map(invoke_rt_endpoint, file_name_list)

ClientError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received server error (504) from primary with message "<html>
<head><title>504 Gateway Time-out</title></head>
<body bgcolor="white">
<center><h1>504 Gateway Time-out</h1></center>
<hr><center>nginx/1.14.0 (Ubuntu)</center>
</body>
</html>
". See https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logEventViewer:group=/aws/sagemaker/Endpoints/rrcf-realtimeEndpoint in account 290000338583 for more information.

In [10]:
sm_client.delete_endpoint(EndpointName=endpoint_name)
sm_client.delete_endpoint_config(EndpointConfigName=endpoint_config_name)
sm_client.delete_model(ModelName=model_name)

{'ResponseMetadata': {'RequestId': 'df0e0d6f-a4ac-4409-9247-84e65e57ca32',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'df0e0d6f-a4ac-4409-9247-84e65e57ca32',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Sat, 17 Sep 2022 14:31:15 GMT'},
  'RetryAttempts': 0}}