In [1]:
%load_ext autoreload
%autoreload 2

from tqdm.auto import tqdm
import itertools

import os
import time
import traceback
from datetime import datetime
import pytz

# for processing
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# my imports
from helpers import kube
from helpers import workload
from helpers import util
from helpers import request_funcs

from_js_timestamp = lambda x: datetime.utcfromtimestamp(x/1000).astimezone(pytz.timezone('America/Toronto'))

fetching imagenet v2
resizing images


  0%|          | 0/100 [00:00<?, ?it/s]

converting to bentoml files


  0%|          | 0/100 [00:00<?, ?it/s]

extracting base64 files


  0%|          | 0/100 [00:00<?, ?it/s]

preprocessing for mobilenet


  0%|          | 0/100 [00:00<?, ?it/s]

In [2]:
# experiment that we want to perform
service_name = 'tfserving-resnetv2'
# service_name = 'bentoml-onnx-resnet50'
# service_name = 'bentoml-iris'
# service_name = 'tfserving-mobilenetv1'

override_url = f'http://localhost:3000/proxy/{service_name}'

cpu_m = 1000
ram_mb = 1000

In [3]:
config_names = [
    'bentoml-iris-250m-512mb',
    'bentoml-onnx-resnet50-250m-512mb',
    'tfserving-resnetv2-250m-512mb',
    'tfserving-mobilenetv1-250m-512mb',
]

workload_configs = {}
for exp_config_name in config_names:
    exp_file = f"deployments/{exp_config_name}.json"
    workload_spec = util.load_json_file(exp_file)
    workload_configs[workload_spec['name']] = workload_spec

In [4]:
# deploy the function
request_limit_override = f"'cpu={cpu_m}m,memory={ram_mb}Mi'"
print('Request Limit Overrid:', request_limit_override)

workload_spec = workload_configs[service_name]
# override request and limit values
workload_spec['opts']['--request'] = request_limit_override
workload_spec['opts']['--limit'] = request_limit_override
kn_command = kube.get_kn_command(**workload_spec)
print(kn_command)
!{kn_command}
print('waiting for settings to converge')
time.sleep(10)

Request Limit Overrid: 'cpu=1000m,memory=1000Mi'
kn service apply tfserving-resnetv2 --image ghcr.io/nimamahmoudi/tfserving-resnet:20210429213000 \
  --limit 'cpu=1000m,memory=1000Mi' \
  --request 'cpu=1000m,memory=1000Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'tfserving-resnetv2'.
Service 'tfserving-resnetv2' with latest revision 'tfserving-resnetv2-00079' (unchanged) is available at URL:
http://tfserving-resnetv2.default.kn.nima-dev.com
waiting for settings to converge


In [5]:
# call the request function with proper arguments
def call_request_func():
    request_func = request_funcs.workload_funcs[service_name]
    result = request_func(url=override_url)

    return {
        'response_time_ms': result['response_time_ms'],
        'request_id': result['headers']['X-Request-Id'],
        'queue_position': int(result['headers']['X-SmartProxy-queuePosition']),
        'received_at': from_js_timestamp(int(result['headers']['X-SmartProxy-receivedAt'])),
        'response_at': from_js_timestamp(int(result['headers']['X-SmartProxy-responseAt'])),
        'upstream_response_time': int(result['headers']['X-SmartProxy-upstreamResponseTime']),
        'upstream_request_count': int(result['headers']['X-SmartProxy-upstreamRequestCount']),
        'response_time_ms_server': int(result['headers']['X-SmartProxy-responseTime']),
        'queue_time_ms': int(result['headers']['X-SmartProxy-queueTime']),
    }

call_request_func()

{'response_time_ms': 7111.382,
 'request_id': '0d62a607-bac5-464b-af1a-6c17b4f306e4',
 'queue_position': 0,
 'received_at': datetime.datetime(2021, 5, 20, 11, 32, 56, 806000, tzinfo=<DstTzInfo 'America/Toronto' EDT-1 day, 20:00:00 DST>),
 'response_at': datetime.datetime(2021, 5, 20, 11, 33, 3, 907000, tzinfo=<DstTzInfo 'America/Toronto' EDT-1 day, 20:00:00 DST>),
 'upstream_response_time': 6099,
 'upstream_request_count': 1,
 'response_time_ms_server': 7101,
 'queue_time_ms': 1002}

In [10]:
sample_reqs = [call_request_func() for _ in range(10)]
pd.DataFrame(data=sample_reqs)

Unnamed: 0,response_time_ms,request_id,queue_position,received_at,response_at,upstream_response_time,upstream_request_count,response_time_ms_server,queue_time_ms
0,1535.741,8969cd64-02f2-4eaa-985b-ec91182baed1,0,2021-05-20 11:35:40.402000-04:00,2021-05-20 11:35:41.931000-04:00,526,1,1529,1003
1,1184.268,cdfe6171-4f1f-4c87-b66c-eea024fcd822,0,2021-05-20 11:35:41.940000-04:00,2021-05-20 11:35:43.119000-04:00,173,1,1179,1006
2,1193.677,e5823ce2-7e4a-4096-9740-b218e430706c,0,2021-05-20 11:35:43.128000-04:00,2021-05-20 11:35:44.314000-04:00,185,1,1186,1001
3,1201.6,5093bf3b-45c4-49fa-b827-486b37382c84,0,2021-05-20 11:35:44.325000-04:00,2021-05-20 11:35:45.521000-04:00,194,1,1196,1002
4,1187.659,00fd443e-4b4c-4f38-8edc-0afa690b5501,0,2021-05-20 11:35:45.530000-04:00,2021-05-20 11:35:46.713000-04:00,181,1,1183,1002
5,1227.646,05cec817-0d57-45f5-b946-84bd3860996e,0,2021-05-20 11:35:46.721000-04:00,2021-05-20 11:35:47.944000-04:00,220,1,1223,1003
6,1169.1,b8fbe94c-737d-48ff-aa25-4f2ab6564277,0,2021-05-20 11:35:47.953000-04:00,2021-05-20 11:35:49.116000-04:00,161,1,1163,1002
7,1191.37,694bdf00-743c-4feb-bd7f-5a8b756a414d,0,2021-05-20 11:35:49.124000-04:00,2021-05-20 11:35:50.310000-04:00,185,1,1186,1001
8,1200.601,80377443-c2c8-47c4-bab2-28323cb087d0,0,2021-05-20 11:35:50.319000-04:00,2021-05-20 11:35:51.515000-04:00,193,1,1196,1003
9,1193.152,9af581c3-da51-41f6-83a4-af5ce2e71ec8,0,2021-05-20 11:35:51.523000-04:00,2021-05-20 11:35:52.711000-04:00,185,1,1188,1003
