# Prepare Workloads

In this notebook, we will try to prepare the workloads that we will be applying
to the deployments. Potentially, this leads to functions that will be called
when we want to send traffic to the deployment and we want to measure the system's
performance.

In [1]:
%load_ext autoreload
%autoreload 2

from tqdm.auto import tqdm
import itertools

import os
import time
import traceback

# my imports
from helpers import kube
from helpers import workload
from helpers import util
from helpers import request_funcs

fetching imagenet v2
resizing images


  0%|          | 0/100 [00:00<?, ?it/s]

converting to bentoml files


  0%|          | 0/100 [00:00<?, ?it/s]

extracting base64 files


  0%|          | 0/100 [00:00<?, ?it/s]

preprocessing for mobilenet


  0%|          | 0/100 [00:00<?, ?it/s]

In [2]:
config_names = [
    'bentoml-iris-250m-512mb',
    'bentoml-onnx-resnet50-250m-512mb',
    'tfserving-resnetv2-250m-512mb',
    'tfserving-mobilenetv1-250m-512mb',
]

workload_configs = {}
for exp_config_name in config_names:
    exp_file = f"deployments/{exp_config_name}.json"
    workload_spec = util.load_json_file(exp_file)
    workload_configs[workload_spec['name']] = workload_spec


In [3]:
def experiment_batch(warmup_req_count, total_req_count, batch_size, service_name, cpu_m):
    # apply new cpu configuration to the deployment
    ram_mb = int(cpu_m * 1.5)
    request_limit_override = f"'cpu={cpu_m}m,memory={ram_mb}Mi'"
    print('Request Limit Overrid:', request_limit_override)

    workload_spec = workload_configs[service_name]
    # override request and limit values
    workload_spec['opts']['--request'] = request_limit_override
    workload_spec['opts']['--limit'] = request_limit_override
    kn_command = kube.get_kn_command(**workload_spec)
    print(kn_command)
    !{kn_command}
    print('waiting for settings to converge')
    time.sleep(10)

    # an array as the same length as 
    batch_results = {
        'response_times_ms': [],
    }
    reqs_failed = 0
    reqs_succeeded = 0

    print('warming up...')
    for _ in range(warmup_req_count):
        try:
            # discard the results
            request_funcs.workload_funcs[service_name](batch_size=batch_size)
        except Exception:
            print('exception occured:')
            traceback.print_exc()

    # running the main workload
    print(f'running {service_name} workload function, batch_size: {batch_size}')
    for _ in tqdm(range(total_req_count)):
        try:
            result = request_funcs.workload_funcs[service_name](batch_size=batch_size)
            batch_results['response_times_ms'].append(result['response_time_ms'])
            reqs_succeeded += 1
        except Exception:
            print('exception occured:')
            traceback.print_exc()
            reqs_failed += 1

    batch_results.update({
        'reqs_failed': reqs_failed,
        'reqs_succeeded': reqs_succeeded,
        'ram_mb': ram_mb,
    })

    return batch_results

def perform_experiment_batch(config):
    # configurables = ['batch_size']
    configurables = [k for k in config if isinstance(config[k], list)]
    print(f'configurables: {configurables}')

    # make a copy of config
    config_base = {k:config[k] for k in config if k not in configurables}
    configurable_base = {k:config[k] for k in config if k in configurables}

    results = []

    config_combinations_keys = list(configurable_base.keys())
    for config_combination in itertools.product(*configurable_base.values()):
        configurable = {config_combinations_keys[i]: config_combination[i]  for i in range(len(config_combinations_keys))}
        
        print(configurable)
        new_config = {**config_base}
        new_config.update(configurable)

        result = experiment_batch(**new_config)
        new_config.update(result)
        results.append(new_config)
        
    return results

config = {
    'warmup_req_count': 20,
    'total_req_count': 10,
    'service_name': 'bentoml-iris',
    'batch_size': [1,10,50,100],
    # 'cpu_m': [250,500,1000,1500,2000],
    'cpu_m': [250,1000],
}
results = perform_experiment_batch(config)

configurables: ['batch_size', 'cpu_m']
{'batch_size': 1, 'cpu_m': 250}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.313s The Configuration is still working to reflect the latest desired specification.
 11.042s Traffic is not yet migrated to the latest revision.
 11.101s Ingress has not yet been reconciled.
 11.211s Waiting for load balancer to be ready
 11.352s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00064' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 1


  0%|          | 0/10 [00:00<?, ?it/s]

{'batch_size': 1, 'cpu_m': 1000}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.081s The Configuration is still working to reflect the latest desired specification.
 12.267s Traffic is not yet migrated to the latest revision.
 12.365s Ingress has not yet been reconciled.
 12.438s Waiting for load balancer to be ready
 12.594s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00065' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 1


  0%|          | 0/10 [00:00<?, ?it/s]

{'batch_size': 10, 'cpu_m': 250}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.058s The Configuration is still working to reflect the latest desired specification.
  7.659s Traffic is not yet migrated to the latest revision.
  7.726s Ingress has not yet been reconciled.
  7.822s Waiting for load balancer to be ready
  8.021s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00066' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 10


  0%|          | 0/10 [00:00<?, ?it/s]

{'batch_size': 10, 'cpu_m': 1000}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.097s The Configuration is still working to reflect the latest desired specification.
  4.035s Traffic is not yet migrated to the latest revision.
  4.118s Ingress has not yet been reconciled.
  4.183s Waiting for load balancer to be ready
  4.388s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00067' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 10


  0%|          | 0/10 [00:00<?, ?it/s]

{'batch_size': 50, 'cpu_m': 250}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.077s The Configuration is still working to reflect the latest desired specification.
  7.543s Traffic is not yet migrated to the latest revision.
  7.673s Ingress has not yet been reconciled.
  7.738s Waiting for load balancer to be ready
  8.300s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00068' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 50


  0%|          | 0/10 [00:00<?, ?it/s]

{'batch_size': 50, 'cpu_m': 1000}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.116s The Configuration is still working to reflect the latest desired specification.
 11.640s Traffic is not yet migrated to the latest revision.
 11.693s Ingress has not yet been reconciled.
 11.789s Waiting for load balancer to be ready
 11.971s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00069' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 50


  0%|          | 0/10 [00:00<?, ?it/s]

{'batch_size': 100, 'cpu_m': 250}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.059s The Configuration is still working to reflect the latest desired specification.
 11.741s Traffic is not yet migrated to the latest revision.
 11.852s Ingress has not yet been reconciled.
 11.929s Waiting for load balancer to be ready
 12.094s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00070' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 100


  0%|          | 0/10 [00:00<?, ?it/s]

{'batch_size': 100, 'cpu_m': 1000}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.052s The Configuration is still working to reflect the latest desired specification.
 11.040s Traffic is not yet migrated to the latest revision.
 11.136s Ingress has not yet been reconciled.
 11.253s Waiting for load balancer to be ready
 11.783s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00071' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 100


  0%|          | 0/10 [00:00<?, ?it/s]

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def post_process(df):
    df['resp_time_avg'] = df.apply(lambda x: np.mean(x['response_times_ms']), axis=1)
    for percentile in [50,90,95,99]:
        df[f'resp_time_p{percentile}'] = df.apply(lambda x: np.percentile(x['response_times_ms'], percentile), axis=1)
    return df


df = pd.DataFrame(data=results)
# df = post_process(df)
df

Unnamed: 0,warmup_req_count,total_req_count,service_name,batch_size,cpu_m,response_times_ms,reqs_failed,reqs_succeeded,ram_mb
0,20,10,bentoml-iris,1,250,"[64.156, 97.866, 25.590000000000003, 74.440999...",0,10,375
1,20,10,bentoml-iris,1,1000,"[18.651999999999997, 17.194000000000003, 17.40...",0,10,1500
2,20,10,bentoml-iris,10,250,"[21.248, 44.123000000000005, 19.63000000000000...",0,10,375
3,20,10,bentoml-iris,10,1000,"[27.779, 29.82, 29.949, 22.461, 25.397, 24.301...",0,10,1500
4,20,10,bentoml-iris,50,250,"[63.803, 39.885999999999996, 49.671, 36.89, 24...",0,10,375
5,20,10,bentoml-iris,50,1000,"[37.644999999999996, 20.049, 19.264, 17.805, 1...",0,10,1500
6,20,10,bentoml-iris,100,250,"[38.9, 247.013, 54.808, 38.952, 65.908, 86.03,...",0,10,375
7,20,10,bentoml-iris,100,1000,"[45.172, 47.158, 38.576, 29.602, 64.111, 46.86...",0,10,1500


In [5]:
all_configs = [
    {
        'warmup_req_count': 200,
        'total_req_count': 1000,
        'service_name': 'bentoml-iris',
        'batch_size': [1,5,10,20,50,100],
        'cpu_m': [250,500,1000,1500,2000],
    },
    {
        'warmup_req_count': 20,
        'total_req_count': 1000,
        'service_name': 'tfserving-resnetv2',
        'batch_size': [1,2,3,5,10],
        'cpu_m': [250,500,1000,1500,2000],
    },
    {
        'warmup_req_count': 20,
        'total_req_count': 1000,
        'service_name': 'tfserving-mobilenetv1',
        'batch_size': [1,2,3,5],
        'cpu_m': [250,500,1000,1500,2000],
    },
    {
        'warmup_req_count': 20,
        'total_req_count': 1000,
        'service_name': 'bentoml-onnx-resnet50',
        'batch_size': [1,2,3,5,10],
        'cpu_m': [250,500,1000,1500,2000],
    },
]

# results_folder = './results/batch_experiments_default'
results_folder = './results/batch_experiments_var_cpu'

!mkdir -p {results_folder}
for config in all_configs:
    service_name = config['service_name']

    results = perform_experiment_batch(config)
    df = pd.DataFrame(data=results)

    now = util.get_time_with_tz()
    res_name = now.strftime(f'{service_name}_%Y-%m-%d_%H-%M-%S')
    print('res_name:', res_name)
    df.to_csv(f'{results_folder}/{res_name}.csv')

configurables: ['batch_size', 'cpu_m']
{'batch_size': 1, 'cpu_m': 250}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.090s The Configuration is still working to reflect the latest desired specification.
  9.015s Traffic is not yet migrated to the latest revision.
  9.065s Ingress has not yet been reconciled.
  9.176s Waiting for load balancer to be ready
  9.343s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00072' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 1


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 1, 'cpu_m': 500}
Request Limit Overrid: 'cpu=500m,memory=750Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=500m,memory=750Mi' \
  --request 'cpu=500m,memory=750Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.088s The Configuration is still working to reflect the latest desired specification.
  6.158s Traffic is not yet migrated to the latest revision.
  6.245s Ingress has not yet been reconciled.
  6.394s Waiting for load balancer to be ready
  6.945s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00073' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 1


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 1, 'cpu_m': 1000}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.079s The Configuration is still working to reflect the latest desired specification.
  6.811s Traffic is not yet migrated to the latest revision.
  6.943s Ingress has not yet been reconciled.
  7.017s Waiting for load balancer to be ready
  7.137s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00074' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 1


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 1, 'cpu_m': 1500}
Request Limit Overrid: 'cpu=1500m,memory=2250Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1500m,memory=2250Mi' \
  --request 'cpu=1500m,memory=2250Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.062s The Configuration is still working to reflect the latest desired specification.
 11.214s Traffic is not yet migrated to the latest revision.
 11.282s Ingress has not yet been reconciled.
 11.513s Waiting for load balancer to be ready
 11.661s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00075' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 1


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 1, 'cpu_m': 2000}
Request Limit Overrid: 'cpu=2000m,memory=3000Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=2000m,memory=3000Mi' \
  --request 'cpu=2000m,memory=3000Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.073s The Configuration is still working to reflect the latest desired specification.
  3.642s Traffic is not yet migrated to the latest revision.
  3.709s Ingress has not yet been reconciled.
  3.908s Waiting for load balancer to be ready
  3.968s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00076' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 1


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 5, 'cpu_m': 250}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.106s The Configuration is still working to reflect the latest desired specification.
 11.448s Traffic is not yet migrated to the latest revision.
 11.557s Ingress has not yet been reconciled.
 11.608s Waiting for load balancer to be ready
 11.951s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00077' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 5


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 5, 'cpu_m': 500}
Request Limit Overrid: 'cpu=500m,memory=750Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=500m,memory=750Mi' \
  --request 'cpu=500m,memory=750Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.064s The Configuration is still working to reflect the latest desired specification.
  6.170s Traffic is not yet migrated to the latest revision.
  6.239s Ingress has not yet been reconciled.
  6.286s Waiting for load balancer to be ready
  6.475s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00078' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 5


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 5, 'cpu_m': 1000}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.087s The Configuration is still working to reflect the latest desired specification.
 12.191s Traffic is not yet migrated to the latest revision.
 12.249s Ingress has not yet been reconciled.
 12.376s Waiting for load balancer to be ready
 12.565s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00079' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 5


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 5, 'cpu_m': 1500}
Request Limit Overrid: 'cpu=1500m,memory=2250Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1500m,memory=2250Mi' \
  --request 'cpu=1500m,memory=2250Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.067s The Configuration is still working to reflect the latest desired specification.
 10.666s Traffic is not yet migrated to the latest revision.
 10.735s Ingress has not yet been reconciled.
 10.811s Waiting for load balancer to be ready
 10.962s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00080' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 5


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 5, 'cpu_m': 2000}
Request Limit Overrid: 'cpu=2000m,memory=3000Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=2000m,memory=3000Mi' \
  --request 'cpu=2000m,memory=3000Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.062s The Configuration is still working to reflect the latest desired specification.
  2.653s Traffic is not yet migrated to the latest revision.
  2.718s Ingress has not yet been reconciled.
  2.818s Waiting for load balancer to be ready
  2.970s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00081' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 5


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 10, 'cpu_m': 250}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.088s The Configuration is still working to reflect the latest desired specification.
  7.206s Traffic is not yet migrated to the latest revision.
  7.325s Ingress has not yet been reconciled.
  7.421s Waiting for load balancer to be ready
  7.534s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00082' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 10


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 10, 'cpu_m': 500}
Request Limit Overrid: 'cpu=500m,memory=750Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=500m,memory=750Mi' \
  --request 'cpu=500m,memory=750Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.059s The Configuration is still working to reflect the latest desired specification.
  6.862s Traffic is not yet migrated to the latest revision.
  6.965s Ingress has not yet been reconciled.
  7.057s Waiting for load balancer to be ready
  7.156s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00083' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 10


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 10, 'cpu_m': 1000}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.071s The Configuration is still working to reflect the latest desired specification.
  5.790s Traffic is not yet migrated to the latest revision.
  5.853s Ingress has not yet been reconciled.
  5.925s Waiting for load balancer to be ready
  6.062s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00084' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 10


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 10, 'cpu_m': 1500}
Request Limit Overrid: 'cpu=1500m,memory=2250Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1500m,memory=2250Mi' \
  --request 'cpu=1500m,memory=2250Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.078s The Configuration is still working to reflect the latest desired specification.
  7.400s Traffic is not yet migrated to the latest revision.
  7.445s Ingress has not yet been reconciled.
  7.573s Waiting for load balancer to be ready
  8.072s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00085' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 10


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 10, 'cpu_m': 2000}
Request Limit Overrid: 'cpu=2000m,memory=3000Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=2000m,memory=3000Mi' \
  --request 'cpu=2000m,memory=3000Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.058s The Configuration is still working to reflect the latest desired specification.
 11.966s Traffic is not yet migrated to the latest revision.
 12.019s Ingress has not yet been reconciled.
 12.075s Waiting for load balancer to be ready
 12.428s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00086' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 10


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 20, 'cpu_m': 250}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.064s The Configuration is still working to reflect the latest desired specification.
  9.741s Traffic is not yet migrated to the latest revision.
  9.856s Ingress has not yet been reconciled.
  9.931s Waiting for load balancer to be ready
 10.401s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00087' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 20


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 20, 'cpu_m': 500}
Request Limit Overrid: 'cpu=500m,memory=750Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=500m,memory=750Mi' \
  --request 'cpu=500m,memory=750Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.060s The Configuration is still working to reflect the latest desired specification.
 11.101s Traffic is not yet migrated to the latest revision.
 11.254s Ingress has not yet been reconciled.
 11.341s Waiting for load balancer to be ready
 11.482s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00088' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 20


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 20, 'cpu_m': 1000}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.061s The Configuration is still working to reflect the latest desired specification.
  4.381s Traffic is not yet migrated to the latest revision.
  4.456s Ingress has not yet been reconciled.
  4.592s Waiting for load balancer to be ready
  4.770s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00089' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 20


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 20, 'cpu_m': 1500}
Request Limit Overrid: 'cpu=1500m,memory=2250Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1500m,memory=2250Mi' \
  --request 'cpu=1500m,memory=2250Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.103s The Configuration is still working to reflect the latest desired specification.
 11.343s Traffic is not yet migrated to the latest revision.
 11.460s Ingress has not yet been reconciled.
 11.531s Waiting for load balancer to be ready
 11.695s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00090' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 20


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 20, 'cpu_m': 2000}
Request Limit Overrid: 'cpu=2000m,memory=3000Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=2000m,memory=3000Mi' \
  --request 'cpu=2000m,memory=3000Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.090s The Configuration is still working to reflect the latest desired specification.
  6.512s Traffic is not yet migrated to the latest revision.
  6.621s Ingress has not yet been reconciled.
  6.683s Waiting for load balancer to be ready
  6.952s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00091' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 20


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 50, 'cpu_m': 250}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.085s The Configuration is still working to reflect the latest desired specification.
  7.730s Traffic is not yet migrated to the latest revision.
  7.800s Ingress has not yet been reconciled.
  7.911s Waiting for load balancer to be ready
  8.252s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00092' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 50


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 50, 'cpu_m': 500}
Request Limit Overrid: 'cpu=500m,memory=750Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=500m,memory=750Mi' \
  --request 'cpu=500m,memory=750Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.062s The Configuration is still working to reflect the latest desired specification.
  7.550s Traffic is not yet migrated to the latest revision.
  7.606s Ingress has not yet been reconciled.
  7.674s Waiting for load balancer to be ready
  7.863s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00093' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 50


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 50, 'cpu_m': 1000}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.064s The Configuration is still working to reflect the latest desired specification.
 11.880s Traffic is not yet migrated to the latest revision.
 12.078s Ingress has not yet been reconciled.
 12.198s Waiting for load balancer to be ready
 12.356s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00094' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 50


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 50, 'cpu_m': 1500}
Request Limit Overrid: 'cpu=1500m,memory=2250Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1500m,memory=2250Mi' \
  --request 'cpu=1500m,memory=2250Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.073s The Configuration is still working to reflect the latest desired specification.
  5.255s Traffic is not yet migrated to the latest revision.
  5.285s Ingress has not yet been reconciled.
  5.387s Waiting for load balancer to be ready
  5.937s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00095' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 50


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 50, 'cpu_m': 2000}
Request Limit Overrid: 'cpu=2000m,memory=3000Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=2000m,memory=3000Mi' \
  --request 'cpu=2000m,memory=3000Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.066s The Configuration is still working to reflect the latest desired specification.
  3.508s Traffic is not yet migrated to the latest revision.
  3.584s Ingress has not yet been reconciled.
  3.626s Waiting for load balancer to be ready
  4.042s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00096' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 50


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 100, 'cpu_m': 250}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.061s The Configuration is still working to reflect the latest desired specification.
 11.429s Traffic is not yet migrated to the latest revision.
 11.545s Ingress has not yet been reconciled.
 11.623s Waiting for load balancer to be ready
 11.726s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00097' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 100


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 100, 'cpu_m': 500}
Request Limit Overrid: 'cpu=500m,memory=750Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=500m,memory=750Mi' \
  --request 'cpu=500m,memory=750Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.069s The Configuration is still working to reflect the latest desired specification.
  9.572s Traffic is not yet migrated to the latest revision.
  9.677s Ingress has not yet been reconciled.
  9.703s Waiting for load balancer to be ready
  9.921s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00098' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 100


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 100, 'cpu_m': 1000}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.156s The Configuration is still working to reflect the latest desired specification.
 12.503s Traffic is not yet migrated to the latest revision.
 12.535s Ingress has not yet been reconciled.
 12.555s Waiting for load balancer to be ready
 12.871s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00099' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 100


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 100, 'cpu_m': 1500}
Request Limit Overrid: 'cpu=1500m,memory=2250Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1500m,memory=2250Mi' \
  --request 'cpu=1500m,memory=2250Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.079s The Configuration is still working to reflect the latest desired specification.
  4.705s Traffic is not yet migrated to the latest revision.
  4.763s Ingress has not yet been reconciled.
  4.823s Waiting for load balancer to be ready
  5.052s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00100' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 100


  0%|          | 0/1000 [00:00<?, ?it/s]

{'batch_size': 100, 'cpu_m': 2000}
Request Limit Overrid: 'cpu=2000m,memory=3000Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=2000m,memory=3000Mi' \
  --request 'cpu=2000m,memory=3000Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.065s The Configuration is still working to reflect the latest desired specification.
  4.777s Traffic is not yet migrated to the latest revision.
  4.840s Ingress has not yet been reconciled.
  4.930s Waiting for load balancer to be ready
  5.090s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00101' is available at URL:
http://bentoml-iris.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 100


  0%|          | 0/1000 [00:00<?, ?it/s]

res_name: bentoml-iris_2021-05-07_09-26-55
configurables: ['batch_size', 'cpu_m']
{'batch_size': 1, 'cpu_m': 250}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply tfserving-resnetv2 --image ghcr.io/nimamahmoudi/tfserving-resnet:20210429213000 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'tfserving-resnetv2'.
Service 'tfserving-resnetv2' with latest revision 'tfserving-resnetv2-00025' (unchanged) is available at URL:
http://tfserving-resnetv2.default.kn.nima-dev.com
waiting for settings to converge
warming up...
running tfserving-resnetv2 workload function, batch_size: 1


  0%|          | 0/1000 [00:00<?, ?it/s]

KeyboardInterrupt: 