In [49]:


import yaml
import sagemaker
import boto3
import pandas as pd
import os
import json
from time import gmtime, strftime
from sagemaker.sklearn.model import SKLearnModel
import matplotlib.pyplot as plt
%matplotlib inline

In [50]:

import yaml
import sagemaker

SETTING_FILE_PATH = "../config/settings.yaml"

# AWSリソース設定読み込み
with open(SETTING_FILE_PATH) as file:
    aws_info = yaml.safe_load(file)

sess = sagemaker.Session()

role = aws_info["aws"]["sagemaker"]["role"]
bucket = aws_info["aws"]["sagemaker"]["s3bucket"]
region = aws_info["aws"]["sagemaker"]["region"]


In [3]:

! echo "this is dummy model" > dummy_model.txt |tar -czf dummy_model.tar.gz dummy_model.txt

In [4]:

prefix = "endpoint_latency_experiment"
model_file = "dummy_model.tar.gz"

s3_resource_session = boto3.Session().resource("s3").Bucket(bucket)
s3_resource_session.Object(os.path.join(prefix, "model", model_file)).upload_file(
    model_file
)

In [70]:
model_data = f"s3://{bucket}/{prefix}/model/{model_file}"
model = SKLearnModel(
    model_data=model_data,
    role=role,
    framework_version="0.23-1",
    py_version="py3",
    source_dir="model",
    entry_point="inference.py",
    sagemaker_session=sess,
)


In [71]:

model_name = "latency-experiment-model"

sess.create_model(
    model_name, role, model.prepare_container_def()
)


'latency-experiment-model'

## インスタンス種類別のエンドポイント作成

In [72]:
def check_invoke_endopoint(endpoint_name: str):
    runtime = boto3.Session().client("sagemaker-runtime")
    response = runtime.invoke_endpoint(
        EndpointName=endpoint_name,
        ContentType="application/json",
        Accept="application/json",
        Body="0",
    )
    predictions = json.loads(response["Body"].read().decode("utf-8"))
    print("prediction: ",predictions)


In [73]:
def create_endpoint(instance_type: str):
    endpoint_name = "{}-{}".format("latency-experiment-endpoint", instance_type.replace(".", ""))
    predictor = model.deploy(
        initial_instance_count=1,
        instance_type=instance_type,
        endpoint_name=endpoint_name,
    )

    check_invoke_endopoint(endpoint_name)

In [93]:
t2_instance_type_list = ["ml.t2.medium", "ml.t2.large", "ml.t2.xlarge", "ml.t2.2xlarge"]
m5_instance_type_list = ["ml.m5.large", "ml.m5.xlarge", "ml.m5.2xlarge", "ml.m5.4xlarge", "ml.m5.12xlarge", "ml.m5.24xlarge"]

In [78]:
for instance_type in t2_instance_type_list:
    print(instance_type)
    create_endpoint(instance_type=instance_type)

ml.t2.xlarge
-----!prediction:  0
ml.t2.2xlarge
-----!prediction:  0


In [80]:
for instance_type in m5_instance_type_list:
    print(instance_type)
    create_endpoint(instance_type=instance_type)

ml.m5.large
-----!prediction:  0
ml.m5.xlarge
-----!prediction:  0
ml.m5.2xlarge
----!prediction:  0
ml.m5.4xlarge
----!prediction:  0
ml.m5.12xlarge
----!prediction:  0
ml.m5.24xlarge
-----!prediction:  0


## VPC内にエンドポイント作成

In [82]:
endpoint_name = "{}-{}".format(
    "latency-experiment-endpoint", "default" 
)
predictor = model.deploy(
    initial_instance_count=2,
    instance_type="ml.t2.xlarge",
    endpoint_name=endpoint_name,
)


-----!

In [83]:
model_name = "latency-experiment-privatelink-model"
vpc = "vpc-0f5cec22138e455fe"
subnet_a = "subnet-02d49088de4cb6ac2"
subnet_c = "subnet-0b59cdf939f464244"
security_group = "sg-07c022fcdbd50acbd"

sess.create_model(
    model_name,
    role,
    model.prepare_container_def(),
    vpc_config={
        "SecurityGroupIds": [security_group],
        "Subnets": [subnet_a, subnet_c],
    },
)


Using already existing model: latency-experiment-privatelink-model


'latency-experiment-privatelink-model'

In [87]:
from sagemaker.session import production_variant
endpoint_name = "{}-{}".format(
    "latency-experiment-endpoint", "privatelink" 
)
model_variant = production_variant(
    model_name=model_name,
    instance_type="ml.t2.xlarge",
    initial_instance_count=2,
    variant_name="Variant",
    initial_weight=1,
)


In [89]:

sm = boto3.client('sagemaker')

endpoint_config_name = "{}".format("privatelink-endpoint-config")
endpoint_config = sm.create_endpoint_config(
    EndpointConfigName=endpoint_config_name, ProductionVariants=[model_variant]
)

endpoint_response = sm.create_endpoint(EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name)

## 結果

In [90]:
import matplotlib.pyplot as plt
%matplotlib inline

In [126]:
statictics_columns = [
    "type",
    "Average Response Time",
    "Min Response Time",
    "Max Response Time",
    "50%",
    "66%",
    "75%",
    "80%",
    "90%",
    "95%",
    "98%",
    "99%",
    "99.9%",
    "99.99%",
    "100%",
]


In [127]:
df_t2_family = pd.DataFrame()
for instance_type in t2_instance_type_list:
    prefix_instance_type = instance_type.replace(".", "")
    df = pd.read_csv(f"instance_results/{prefix_instance_type}_stats.csv")
    df["type"] = instance_type
    df_t2_family = pd.concat([df_t2_family, df[statictics_columns][:1]], axis=0)


In [128]:
df_t2_family

Unnamed: 0,type,Average Response Time,Min Response Time,Max Response Time,50%,66%,75%,80%,90%,95%,98%,99%,99.9%,99.99%,100%
0,ml.t2.medium,9.041108,8,483,9,9,9,9,10,10,11,12,22,65,480
0,ml.t2.large,7.364959,6,447,7,7,7,8,8,8,9,10,19,53,450
0,ml.t2.xlarge,7.268115,6,457,7,7,7,7,8,8,9,10,18,60,460
0,ml.t2.2xlarge,7.340908,6,476,7,7,7,7,8,9,9,10,19,54,480


In [129]:
df_m5_family = pd.DataFrame()
for instance_type in m5_instance_type_list:
    prefix_instance_type = instance_type.replace(".", "")
    df = pd.read_csv(f"instance_results/{prefix_instance_type}_stats.csv")
    df["type"] = instance_type
    df_m5_family = pd.concat([df_m5_family, df[statictics_columns][:1]], axis=0)


In [130]:
df_m5_family

Unnamed: 0,type,Average Response Time,Min Response Time,Max Response Time,50%,66%,75%,80%,90%,95%,98%,99%,99.9%,99.99%,100%
0,ml.m5.large,6.375926,5,457,5,8,8,8,8,9,9,11,19,47,460
0,ml.m5.xlarge,8.511622,8,78,8,9,9,9,9,9,10,14,20,61,78
0,ml.m5.2xlarge,6.825458,5,216,7,8,8,8,8,9,9,10,21,56,220
0,ml.m5.4xlarge,7.215649,5,66,8,9,9,9,9,9,10,11,20,55,66
0,ml.m5.12xlarge,8.995683,8,699,9,9,9,9,9,10,10,14,23,100,700
0,ml.m5.24xlarge,8.028491,6,475,8,8,8,8,9,9,10,12,22,72,480


In [132]:
df_network_family = pd.DataFrame()
for network_type in ["default", "PrivateLink"]:
    df = pd.read_csv(f"network_results/{network_type}_stats.csv")
    df["type"] = network_type 
    df_network_family = pd.concat([df_network_family, df[statictics_columns][:1]], axis=0)

In [133]:
df_network_family

Unnamed: 0,type,Average Response Time,Min Response Time,Max Response Time,50%,66%,75%,80%,90%,95%,98%,99%,99.9%,99.99%,100%
0,default,7.415329,7,59,7,7,8,8,8,8,9,10,18,46,59
0,PrivateLink,6.959126,5,428,7,7,7,8,8,8,9,11,18,360,430
