In [1]:
%load_ext autoreload
%autoreload 2

from tqdm.auto import tqdm
import itertools

import os
import time
import traceback
from datetime import datetime
import pytz

# for processing
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# my imports
from helpers import kube
from helpers import workload
from helpers import util
from helpers import request_funcs

# config
my_timezone = os.getenv('PY_TZ', 'America/Toronto')

# small funcs
from_js_timestamp = lambda x: datetime.utcfromtimestamp(x/1000).astimezone(pytz.timezone(my_timezone))
def get_time_with_tz():
    return datetime.now().astimezone(pytz.timezone(my_timezone))

fetching imagenet v2
resizing images


  0%|          | 0/100 [00:00<?, ?it/s]

converting to bentoml files


  0%|          | 0/100 [00:00<?, ?it/s]

extracting base64 files


  0%|          | 0/100 [00:00<?, ?it/s]

preprocessing for mobilenet


  0%|          | 0/100 [00:00<?, ?it/s]

In [2]:
# experiment that we want to perform
# service_name = 'tfserving-resnetv2'
# service_name = 'bentoml-onnx-resnet50'
service_name = 'bentoml-iris'
# service_name = 'tfserving-mobilenetv1'

override_url = f'http://localhost:3000/proxy/{service_name}'

cpu_m = 1000
ram_mb = 1000

rps_list = [5] * 60

In [3]:
config_names = [
    'bentoml-iris-250m-512mb',
    'bentoml-onnx-resnet50-250m-512mb',
    'tfserving-resnetv2-250m-512mb',
    'tfserving-mobilenetv1-250m-512mb',
]

workload_configs = {}
for exp_config_name in config_names:
    exp_file = f"deployments/{exp_config_name}.json"
    workload_spec = util.load_json_file(exp_file)
    workload_configs[workload_spec['name']] = workload_spec

In [4]:
# deploy the function
request_limit_override = f"'cpu={cpu_m}m,memory={ram_mb}Mi'"
print('Request Limit Overrid:', request_limit_override)

workload_spec = workload_configs[service_name]
# override request and limit values
workload_spec['opts']['--request'] = request_limit_override
workload_spec['opts']['--limit'] = request_limit_override
kn_command = kube.get_kn_command(**workload_spec)
print(kn_command)
!{kn_command}
print('waiting for settings to converge')
time.sleep(10)

Request Limit Overrid: 'cpu=1000m,memory=1000Mi'
kn service apply tfserving-resnetv2 --image ghcr.io/nimamahmoudi/tfserving-resnet:20210429213000 \
  --limit 'cpu=1000m,memory=1000Mi' \
  --request 'cpu=1000m,memory=1000Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'tfserving-resnetv2'.
Service 'tfserving-resnetv2' with latest revision 'tfserving-resnetv2-00079' (unchanged) is available at URL:
http://tfserving-resnetv2.default.kn.nima-dev.com
waiting for settings to converge


In [5]:
# call the request function with proper arguments
def call_request_func():
    request_func = request_funcs.workload_funcs[service_name]
    result = request_func(url=override_url)

    return {
        'response_time_ms': result['response_time_ms'],
        'request_id': result['headers']['X-Request-Id'],
        'queue_position': int(result['headers']['X-SmartProxy-queuePosition']),
        'received_at': from_js_timestamp(int(result['headers']['X-SmartProxy-receivedAt'])),
        'response_at': from_js_timestamp(int(result['headers']['X-SmartProxy-responseAt'])),
        'upstream_response_time': int(result['headers']['X-SmartProxy-upstreamResponseTime']),
        'upstream_request_count': int(result['headers']['X-SmartProxy-upstreamRequestCount']),
        'response_time_ms_server': int(result['headers']['X-SmartProxy-responseTime']),
        'queue_time_ms': int(result['headers']['X-SmartProxy-queueTime']),
    }

call_request_func()

{'response_time_ms': 3727.897,
 'request_id': 'f57b5daf-5191-4c6c-bd1f-ea4cf6576c51',
 'queue_position': 0,
 'received_at': datetime.datetime(2021, 5, 20, 13, 12, 34, 621000, tzinfo=<DstTzInfo 'America/Toronto' EDT-1 day, 20:00:00 DST>),
 'response_at': datetime.datetime(2021, 5, 20, 13, 12, 38, 322000, tzinfo=<DstTzInfo 'America/Toronto' EDT-1 day, 20:00:00 DST>),
 'upstream_response_time': 2698,
 'upstream_request_count': 1,
 'response_time_ms_server': 3701,
 'queue_time_ms': 1003}

In [6]:
sample_reqs = [call_request_func() for _ in range(10)]
sample_reqs = [d for d in sample_reqs if d is not None]
pd.DataFrame(data=sample_reqs)

Unnamed: 0,response_time_ms,request_id,queue_position,received_at,response_at,upstream_response_time,upstream_request_count,response_time_ms_server,queue_time_ms
0,1196.154,4e5f6cc7-1b08-471d-97e8-3c7c0d287978,0,2021-05-20 13:12:38.726000-04:00,2021-05-20 13:12:39.915000-04:00,187,1,1189,1002
1,1200.356,09350232-b219-4492-aaf9-44af1769b94a,0,2021-05-20 13:12:39.925000-04:00,2021-05-20 13:12:41.121000-04:00,194,1,1196,1002
2,1198.542,1d13577f-f69e-4b7e-8ad4-9b5057301969,0,2021-05-20 13:12:41.128000-04:00,2021-05-20 13:12:42.322000-04:00,192,1,1194,1002
3,1198.774,d7716a16-7a3d-45f3-8378-3306f781f3d9,0,2021-05-20 13:12:42.329000-04:00,2021-05-20 13:12:43.523000-04:00,193,1,1194,1001
4,1202.769,6cbcd11f-c0ae-459a-a34c-4ad699650e41,0,2021-05-20 13:12:43.531000-04:00,2021-05-20 13:12:44.728000-04:00,196,1,1197,1001
5,1184.119,75946ecd-5f71-4c8f-8c7a-ef219dc681a7,0,2021-05-20 13:12:44.737000-04:00,2021-05-20 13:12:45.916000-04:00,178,1,1179,1001
6,1192.738,0e47c702-92df-4ac5-b4ee-0d2c0c9e674c,0,2021-05-20 13:12:45.922000-04:00,2021-05-20 13:12:47.110000-04:00,187,1,1188,1001
7,1208.973,68c6d598-5035-4bd4-b4db-ce6c7b4ccd30,0,2021-05-20 13:12:47.118000-04:00,2021-05-20 13:12:48.322000-04:00,202,1,1204,1002
8,1205.177,612c0791-8b22-4c43-bc5f-293131fc0dbf,0,2021-05-20 13:12:48.331000-04:00,2021-05-20 13:12:49.531000-04:00,199,1,1200,1001
9,1183.162,624441a5-3f0b-45dd-bc90-441b62043e67,0,2021-05-20 13:12:49.541000-04:00,2021-05-20 13:12:50.717000-04:00,174,1,1176,1002


In [7]:
# adding exception handling to create worker func
def worker_func():
    try:
        return call_request_func()
    except Exception:
        print('exception occured:')
        traceback.print_exc()
        return None

In [8]:
# my library imports
from pacswg.timer import TimerClass
import pacswg

# start workload generator
wg = pacswg.WorkloadGenerator(worker_func=worker_func, rps=0, worker_thread_count=100)
wg.start_workers()
timer = TimerClass()

print("============ Experiment Started ============")
print("Time Started:", get_time_with_tz())

for rps in tqdm(rps_list):
    wg.set_rps(rps)
    timer.tic()
    # apply each for one minute
    while timer.toc() < 60:
        wg.fire_wait()

# get the results
wg.stop_workers()
all_res = wg.get_stats()
total_reqs = len(all_res)
all_res = [d for d in all_res if d is not None]
success_reqs = len(all_res)

print("Total Requests Made:", total_reqs)
print("Successful Requests Made:", success_reqs)

Time Started: 2021-05-20 13:12:51.546278-04:00


  0%|          | 0/60 [00:00<?, ?it/s]

exception occured:exception occured:
exception occured:

exception occured:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "<ipython-input-7-e79221552b7f>", line 4, in worker_func
    return call_request_func()
  File "<ipython-input-5-63e6ec19e9e8>", line 4, in call_request_func
    result = request_func(url=override_url)
  File "/home/ubuntu/serverless-ml-serving/experiments/helpers/request_funcs.py", line 160, in request_tfserving_resnetv2
    response.raise_for_status()
  File "/home/ubuntu/miniconda/lib/python3.8/site-packages/requests/models.py", line 943, in raise_for_status
    raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 500 Server Error: Internal Server Error for url: http://localhost:3000/proxy/tfserving-resnetv2
Traceback (most recent call last):
  File "<ipython-input-7-e79221552b7f>", line 4, in worker_func
    return call_request_func()
  File "<ipython-input-7-e79221552b7f>

In [9]:
# collect the results
df_res = pd.DataFrame(data=all_res)
# save the results
now = get_time_with_tz()
res_name = now.strftime('res-%Y-%m-%d_%H-%M-%S')
res_folder = f'results/trace1/{service_name}'
# make the directory and file names
! mkdir -p {res_folder}
requests_results_filename = f'{res_name}_reqs.csv'
df_res.to_csv(os.path.join(res_folder, requests_results_filename))
print('Results Name:', res_name)

Results Name: res-2021-05-20_14-13-02
