In [1]:
%load_ext autoreload
%autoreload 2

from tqdm.auto import tqdm
import itertools

import os
import time
import traceback
from datetime import datetime
import pytz

# for processing
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# my imports
from helpers import kube
from helpers import workload
from helpers import util
from helpers import request_funcs

# config
my_timezone = os.getenv('PY_TZ', 'America/Toronto')

# small funcs
from_js_timestamp = lambda x: datetime.utcfromtimestamp(x/1000).astimezone(pytz.timezone(my_timezone))
def get_time_with_tz():
    return datetime.now().astimezone(pytz.timezone(my_timezone))

fetching imagenet v2
resizing images


  0%|          | 0/100 [00:00<?, ?it/s]

converting to bentoml files


  0%|          | 0/100 [00:00<?, ?it/s]

extracting base64 files


  0%|          | 0/100 [00:00<?, ?it/s]

preprocessing for mobilenet


  0%|          | 0/100 [00:00<?, ?it/s]

In [2]:
# experiment that we want to perform
# service_name = 'tfserving-resnetv2'
# service_name = 'bentoml-onnx-resnet50'
# service_name = 'bentoml-iris'
# service_name = 'tfserving-mobilenetv1'
# service_name = 'bentoml-pytorch-fashion-mnist'
service_name = 'bentoml-keras-toxic-comments'

override_url = f'http://localhost:3000/proxy/{service_name}'

cpu_m = 1000
ram_mb = 1000

rps_list = [5] * 60

In [3]:
config_names = [
    'bentoml-iris-250m-512mb',
    'bentoml-onnx-resnet50-250m-512mb',
    'tfserving-resnetv2-250m-512mb',
    'tfserving-mobilenetv1-250m-512mb',
    'bentoml-pytorch-fashionmnist-250m-512mb',
    'bentoml-keras-toxic-comments-250m-512mb',
]

workload_configs = {}
for exp_config_name in config_names:
    exp_file = f"deployments/{exp_config_name}.json"
    workload_spec = util.load_json_file(exp_file)
    workload_configs[workload_spec['name']] = workload_spec

In [4]:
# deploy the function
request_limit_override = f"'cpu={cpu_m}m,memory={ram_mb}Mi'"
print('Request Limit Overrid:', request_limit_override)

workload_spec = workload_configs[service_name]
# override request and limit values
workload_spec['opts']['--request'] = request_limit_override
workload_spec['opts']['--limit'] = request_limit_override
kn_command = kube.get_kn_command(**workload_spec)
print(kn_command)
!{kn_command}
print('waiting for settings to converge')
time.sleep(10)

Request Limit Overrid: 'cpu=1000m,memory=1000Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=1000m,memory=1000Mi' \
  --request 'cpu=1000m,memory=1000Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-keras-toxic-comments' in namespace 'default':

  0.066s The Configuration is still working to reflect the latest desired specification.
 11.321s Traffic is not yet migrated to the latest revision.
 11.355s Ingress has not yet been reconciled.
 11.445s Waiting for load balancer to be ready
 11.613s Ready to serve.

Service 'bentoml-keras-toxic-comments' applied to latest revision 'bentoml-keras-toxic-comments-00010' is available at URL:
http://bentoml-keras-toxic-comments.default.kn.nima-dev.com
waiting for settings to converge


In [5]:
# call the request function with proper arguments
def call_request_func():
    request_func = request_funcs.workload_funcs[service_name]
    result = request_func(url=override_url)

    return {
        'response_time_ms': result['response_time_ms'],
        'request_id': result['headers']['X-Request-Id'],
        'queue_position': int(result['headers']['X-SmartProxy-queuePosition']),
        'received_at': from_js_timestamp(int(result['headers']['X-SmartProxy-receivedAt'])),
        'response_at': from_js_timestamp(int(result['headers']['X-SmartProxy-responseAt'])),
        'upstream_response_time': int(result['headers']['X-SmartProxy-upstreamResponseTime']),
        'upstream_request_count': int(result['headers']['X-SmartProxy-upstreamRequestCount']),
        'response_time_ms_server': int(result['headers']['X-SmartProxy-responseTime']),
        'queue_time_ms': int(result['headers']['X-SmartProxy-queueTime']),
    }

call_request_func()

{'response_time_ms': 678.7990000000001,
 'request_id': '8d5da608-15b9-4796-b9f7-45e43cb4f65d',
 'queue_position': 0,
 'received_at': datetime.datetime(2021, 6, 23, 17, 59, 11, 237000, tzinfo=<DstTzInfo 'America/Toronto' EDT-1 day, 20:00:00 DST>),
 'response_at': datetime.datetime(2021, 6, 23, 17, 59, 11, 907000, tzinfo=<DstTzInfo 'America/Toronto' EDT-1 day, 20:00:00 DST>),
 'upstream_response_time': 267,
 'upstream_request_count': 1,
 'response_time_ms_server': 670,
 'queue_time_ms': 403}

In [6]:
sample_reqs = [call_request_func() for _ in range(10)]
sample_reqs = [d for d in sample_reqs if d is not None]
pd.DataFrame(data=sample_reqs)

Unnamed: 0,response_time_ms,request_id,queue_position,received_at,response_at,upstream_response_time,upstream_request_count,response_time_ms_server,queue_time_ms
0,475.628,d999a3c9-b159-418a-83ac-6700d674a18c,0,2021-06-23 17:59:12.375000-04:00,2021-06-23 17:59:12.845000-04:00,69,1,470,401
1,469.366,93964d9c-872d-41c6-b609-125a563be7fc,0,2021-06-23 17:59:12.852000-04:00,2021-06-23 17:59:13.317000-04:00,64,1,465,401
2,466.501,1e4950ad-c66b-4175-9898-7ab95ea22013,0,2021-06-23 17:59:13.325000-04:00,2021-06-23 17:59:13.787000-04:00,60,1,462,402
3,466.426,68bfffbc-1385-42c7-927e-e9cc571dc4e9,0,2021-06-23 17:59:13.793000-04:00,2021-06-23 17:59:14.255000-04:00,61,1,462,401
4,470.053,45abb526-42fa-4ddf-9a56-fe1ec7a2b889,0,2021-06-23 17:59:14.263000-04:00,2021-06-23 17:59:14.728000-04:00,63,1,465,402
5,467.705,3bac2420-6a0e-4777-9680-3588a29c6585,0,2021-06-23 17:59:14.734000-04:00,2021-06-23 17:59:15.198000-04:00,62,1,464,402
6,465.478,b68fa9b9-4291-4a5f-b32b-f215e7322a24,0,2021-06-23 17:59:15.203000-04:00,2021-06-23 17:59:15.665000-04:00,60,1,462,402
7,476.731,4af2a5dd-6190-43e5-9dfe-6f3bdb7465ea,0,2021-06-23 17:59:15.671000-04:00,2021-06-23 17:59:16.143000-04:00,71,1,472,401
8,469.913,b6855165-b93d-4022-90c1-f050a4e51455,0,2021-06-23 17:59:16.150000-04:00,2021-06-23 17:59:16.616000-04:00,65,1,466,401
9,465.67,685156a0-56f7-496b-87ff-c89c34e93e13,0,2021-06-23 17:59:16.622000-04:00,2021-06-23 17:59:17.084000-04:00,61,1,462,401


In [7]:
# adding exception handling to create worker func
def worker_func():
    try:
        return call_request_func()
    except Exception:
        print('exception occured:')
        traceback.print_exc()
        return None

In [8]:
# my library imports
from pacswg.timer import TimerClass
import pacswg

# start workload generator
wg = pacswg.WorkloadGenerator(worker_func=worker_func, rps=0, worker_thread_count=100)
wg.start_workers()
timer = TimerClass()

print("============ Experiment Started ============")
print("Time Started:", get_time_with_tz())

for rps in tqdm(rps_list):
    wg.set_rps(rps)
    timer.tic()
    # apply each for one minute
    while timer.toc() < 60:
        wg.fire_wait()

# get the results
wg.stop_workers()
all_res = wg.get_stats()
total_reqs = len(all_res)
all_res = [d for d in all_res if d is not None]
success_reqs = len(all_res)

print("Total Requests Made:", total_reqs)
print("Successful Requests Made:", success_reqs)

Time Started: 2021-06-23 13:59:18.056437-04:00


  0%|          | 0/60 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [9]:
# collect the results
df_res = pd.DataFrame(data=all_res)
# save the results
now = get_time_with_tz()
res_name = now.strftime('res-%Y-%m-%d_%H-%M-%S')
res_folder = f'results/trace2/{service_name}'
# make the directory and file names
! mkdir -p {res_folder}
requests_results_filename = f'{res_name}_reqs.csv'
df_res.to_csv(os.path.join(res_folder, requests_results_filename))
print('Results Name:', res_name)

Results Name: res-2021-05-21_11-31-34
