In [1]:
%load_ext autoreload
%autoreload 2

from tqdm.auto import tqdm
import itertools

import os
import time
import traceback
from datetime import datetime
import pytz

# for processing
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# my imports
from helpers import kube
from helpers import workload
from helpers import util
from helpers import request_funcs

# config
my_timezone = os.getenv('PY_TZ', 'America/Toronto')

# small funcs
from_js_timestamp = lambda x: datetime.utcfromtimestamp(x/1000).astimezone(pytz.timezone(my_timezone))
def get_time_with_tz():
    return datetime.now().astimezone(pytz.timezone(my_timezone))

fetching imagenet v2
resizing images


  0%|          | 0/100 [00:00<?, ?it/s]

converting to bentoml files


  0%|          | 0/100 [00:00<?, ?it/s]

extracting base64 files


  0%|          | 0/100 [00:00<?, ?it/s]

preprocessing for mobilenet


  0%|          | 0/100 [00:00<?, ?it/s]

In [2]:
# experiment that we want to perform
# service_name = 'tfserving-resnetv2'
# service_name = 'bentoml-onnx-resnet50'
service_name = 'bentoml-iris'
# service_name = 'tfserving-mobilenetv1'

override_url = f'http://localhost:3000/proxy/{service_name}'

cpu_m = 1000
ram_mb = 1000

rps_list = [5] * 60

In [3]:
config_names = [
    'bentoml-iris-250m-512mb',
    'bentoml-onnx-resnet50-250m-512mb',
    'tfserving-resnetv2-250m-512mb',
    'tfserving-mobilenetv1-250m-512mb',
]

workload_configs = {}
for exp_config_name in config_names:
    exp_file = f"deployments/{exp_config_name}.json"
    workload_spec = util.load_json_file(exp_file)
    workload_configs[workload_spec['name']] = workload_spec

In [4]:
# deploy the function
request_limit_override = f"'cpu={cpu_m}m,memory={ram_mb}Mi'"
print('Request Limit Overrid:', request_limit_override)

workload_spec = workload_configs[service_name]
# override request and limit values
workload_spec['opts']['--request'] = request_limit_override
workload_spec['opts']['--limit'] = request_limit_override
kn_command = kube.get_kn_command(**workload_spec)
print(kn_command)
!{kn_command}
print('waiting for settings to converge')
time.sleep(10)

Request Limit Overrid: 'cpu=1000m,memory=1000Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1000m,memory=1000Mi' \
  --request 'cpu=1000m,memory=1000Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.067s The Configuration is still working to reflect the latest desired specification.
  9.255s Traffic is not yet migrated to the latest revision.
  9.506s Ingress has not yet been reconciled.
  9.582s Waiting for load balancer to be ready
  9.641s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00163' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge


In [5]:
# call the request function with proper arguments
def call_request_func():
    request_func = request_funcs.workload_funcs[service_name]
    result = request_func(url=override_url)

    return {
        'response_time_ms': result['response_time_ms'],
        'request_id': result['headers']['X-Request-Id'],
        'queue_position': int(result['headers']['X-SmartProxy-queuePosition']),
        'received_at': from_js_timestamp(int(result['headers']['X-SmartProxy-receivedAt'])),
        'response_at': from_js_timestamp(int(result['headers']['X-SmartProxy-responseAt'])),
        'upstream_response_time': int(result['headers']['X-SmartProxy-upstreamResponseTime']),
        'upstream_request_count': int(result['headers']['X-SmartProxy-upstreamRequestCount']),
        'response_time_ms_server': int(result['headers']['X-SmartProxy-responseTime']),
        'queue_time_ms': int(result['headers']['X-SmartProxy-queueTime']),
    }

call_request_func()

{'response_time_ms': 1134.137,
 'request_id': 'e41e3c1d-c613-4adb-8d37-65d294602c03',
 'queue_position': 0,
 'received_at': datetime.datetime(2021, 5, 21, 10, 31, 10, 779000, tzinfo=<DstTzInfo 'America/Toronto' EDT-1 day, 20:00:00 DST>),
 'response_at': datetime.datetime(2021, 5, 21, 10, 31, 11, 906000, tzinfo=<DstTzInfo 'America/Toronto' EDT-1 day, 20:00:00 DST>),
 'upstream_response_time': 125,
 'upstream_request_count': 1,
 'response_time_ms_server': 1127,
 'queue_time_ms': 1002}

In [6]:
sample_reqs = [call_request_func() for _ in range(10)]
sample_reqs = [d for d in sample_reqs if d is not None]
pd.DataFrame(data=sample_reqs)

Unnamed: 0,response_time_ms,request_id,queue_position,received_at,response_at,upstream_response_time,upstream_request_count,response_time_ms_server,queue_time_ms
0,1026.268,83a08fba-7a1b-440d-aee5-2bc38097a64b,0,2021-05-21 10:31:11.986000-04:00,2021-05-21 10:31:13.009000-04:00,22,1,1023,1001
1,1028.655,38d36c2b-1cd9-41ea-8ba6-3742b6e17b95,0,2021-05-21 10:31:13.017000-04:00,2021-05-21 10:31:14.040000-04:00,22,1,1023,1001
2,1026.297,dbed7215-4818-4784-8eb0-6ecb76f889d0,0,2021-05-21 10:31:14.045000-04:00,2021-05-21 10:31:15.067000-04:00,21,1,1022,1001
3,1030.257,c3b40a72-0a6e-499c-a6b8-e459867d4df0,0,2021-05-21 10:31:15.074000-04:00,2021-05-21 10:31:16.100000-04:00,25,1,1026,1001
4,1028.974,0e282ef0-d6e8-482b-903f-7da52199d67a,0,2021-05-21 10:31:16.106000-04:00,2021-05-21 10:31:17.130000-04:00,23,1,1024,1001
5,1034.088,3af81550-eaef-4896-90c2-67dedddb3aee,0,2021-05-21 10:31:17.138000-04:00,2021-05-21 10:31:18.167000-04:00,28,1,1029,1001
6,1028.206,0e03f44a-7d58-4d4b-b1e9-4de13631d72a,0,2021-05-21 10:31:18.174000-04:00,2021-05-21 10:31:19.198000-04:00,23,1,1024,1001
7,1030.396,0e623a59-5c07-4983-93b8-656c580fd9ff,0,2021-05-21 10:31:19.205000-04:00,2021-05-21 10:31:20.231000-04:00,25,1,1026,1001
8,1025.035,d1fb64f6-a962-4437-a66e-97d61a1dfb91,0,2021-05-21 10:31:20.237000-04:00,2021-05-21 10:31:21.258000-04:00,21,1,1021,1000
9,1035.226,4c6de7dd-da78-4997-8ffd-5040fdbbb954,0,2021-05-21 10:31:21.265000-04:00,2021-05-21 10:31:22.296000-04:00,30,1,1031,1001


In [7]:
# adding exception handling to create worker func
def worker_func():
    try:
        return call_request_func()
    except Exception:
        print('exception occured:')
        traceback.print_exc()
        return None

In [8]:
# my library imports
from pacswg.timer import TimerClass
import pacswg

# start workload generator
wg = pacswg.WorkloadGenerator(worker_func=worker_func, rps=0, worker_thread_count=100)
wg.start_workers()
timer = TimerClass()

print("============ Experiment Started ============")
print("Time Started:", get_time_with_tz())

for rps in tqdm(rps_list):
    wg.set_rps(rps)
    timer.tic()
    # apply each for one minute
    while timer.toc() < 60:
        wg.fire_wait()

# get the results
wg.stop_workers()
all_res = wg.get_stats()
total_reqs = len(all_res)
all_res = [d for d in all_res if d is not None]
success_reqs = len(all_res)

print("Total Requests Made:", total_reqs)
print("Successful Requests Made:", success_reqs)

Time Started: 2021-05-21 10:31:22.445631-04:00


  0%|          | 0/60 [00:00<?, ?it/s]

Total Requests Made: 17966
Successful Requests Made: 17966


In [9]:
# collect the results
df_res = pd.DataFrame(data=all_res)
# save the results
now = get_time_with_tz()
res_name = now.strftime('res-%Y-%m-%d_%H-%M-%S')
res_folder = f'results/trace1/{service_name}'
# make the directory and file names
! mkdir -p {res_folder}
requests_results_filename = f'{res_name}_reqs.csv'
df_res.to_csv(os.path.join(res_folder, requests_results_filename))
print('Results Name:', res_name)

Results Name: res-2021-05-21_11-31-34
