# Prepare Workloads

In this notebook, we will try to prepare the workloads that we will be applying
to the deployments. Potentially, this leads to functions that will be called
when we want to send traffic to the deployment and we want to measure the system's
performance.

In [1]:
%load_ext autoreload
%autoreload 2

from tqdm.auto import tqdm
import itertools

import os
import time
import traceback

# my imports
from helpers import kube
from helpers import workload
from helpers import util
from helpers import request_funcs

using domain 192-168-23-125.nip.io
fetching imagenet v2
resizing images


  0%|          | 0/100 [00:00<?, ?it/s]

converting to bentoml files


  0%|          | 0/100 [00:00<?, ?it/s]

extracting base64 files


  0%|          | 0/100 [00:00<?, ?it/s]

preprocessing for mobilenet


  0%|          | 0/100 [00:00<?, ?it/s]

In [2]:
config_names = [
    'bentoml-iris-250m-512mb',
    'bentoml-onnx-resnet50-250m-512mb',
    'tfserving-resnetv2-250m-512mb',
    'tfserving-mobilenetv1-250m-512mb',
    'bentoml-pytorch-fashionmnist-250m-512mb',
    'bentoml-keras-toxic-comments-250m-512mb',
]

workload_configs = {}
for exp_config_name in config_names:
    exp_file = f"deployments/{exp_config_name}.json"
    workload_spec = util.load_json_file(exp_file)
    workload_configs[workload_spec['name']] = workload_spec


In [3]:
def experiment_batch(warmup_req_count, total_req_count, batch_size, service_name, cpu_m):
    # apply new cpu configuration to the deployment
    ram_mb = int(cpu_m * 1.5)
    request_limit_override = f"'cpu={cpu_m}m,memory={ram_mb}Mi'"
    print('Request Limit Overrid:', request_limit_override)

    workload_spec = workload_configs[service_name]
    # override request and limit values
    workload_spec['opts']['--request'] = request_limit_override
    workload_spec['opts']['--limit'] = request_limit_override
    kn_command = kube.get_kn_command(**workload_spec)
    print(kn_command)
    !{kn_command}
    print('waiting for settings to converge')
    time.sleep(10)

    # an array as the same length as 
    batch_results = {
        'response_times_ms': [],
    }
    reqs_failed = 0
    reqs_succeeded = 0

    print('warming up...')
    for _ in range(warmup_req_count):
        try:
            # discard the results
            request_funcs.workload_funcs[service_name](batch_size=batch_size)
        except Exception:
            print('exception occured:')
            traceback.print_exc()

    # running the main workload
    print(f'running {service_name} workload function, batch_size: {batch_size}')
    for _ in tqdm(range(total_req_count)):
        try:
            result = request_funcs.workload_funcs[service_name](batch_size=batch_size)
            batch_results['response_times_ms'].append(result['response_time_ms'])
            reqs_succeeded += 1
        except Exception:
            print('exception occured:')
            traceback.print_exc()
            reqs_failed += 1

    batch_results.update({
        'reqs_failed': reqs_failed,
        'reqs_succeeded': reqs_succeeded,
        'ram_mb': ram_mb,
    })

    return batch_results

def perform_experiment_batch(config):
    # configurables = ['batch_size']
    configurables = [k for k in config if isinstance(config[k], list)]
    print(f'configurables: {configurables}')

    # make a copy of config
    config_base = {k:config[k] for k in config if k not in configurables}
    configurable_base = {k:config[k] for k in config if k in configurables}

    results = []
    config_combinations_keys = list(configurable_base.keys())
    for config_combination in itertools.product(*configurable_base.values()):
        configurable = {config_combinations_keys[i]: config_combination[i]  for i in range(len(config_combinations_keys))}
        
        print(configurable)
        new_config = {**config_base}
        new_config.update(configurable)

        result = experiment_batch(**new_config)
        new_config.update(result)
        results.append(new_config)
        
    return results

config = {
    'warmup_req_count': 20,
    'total_req_count': 10,
    'service_name': 'bentoml-iris',
    # cpu_m should always be the first to avoid changing deployment too much
    'cpu_m': [250,1000],
    'batch_size': [1,10,50,100],
}
results = perform_experiment_batch(config)

configurables: ['cpu_m', 'batch_size']
{'cpu_m': 250, 'batch_size': 1}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.037s The Configuration is still working to reflect the latest desired specification.
 18.799s Traffic is not yet migrated to the latest revision.
 18.830s Ingress has not yet been reconciled.
 18.894s Waiting for load balancer to be ready
 19.190s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00013' is available at URL:
http://bentoml-iris.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 1


  0%|          | 0/10 [00:00<?, ?it/s]

{'cpu_m': 250, 'batch_size': 10}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-iris'.
Service 'bentoml-iris' with latest revision 'bentoml-iris-00013' (unchanged) is available at URL:
http://bentoml-iris.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 10


  0%|          | 0/10 [00:00<?, ?it/s]

{'cpu_m': 250, 'batch_size': 50}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-iris'.
Service 'bentoml-iris' with latest revision 'bentoml-iris-00013' (unchanged) is available at URL:
http://bentoml-iris.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 50


  0%|          | 0/10 [00:00<?, ?it/s]

{'cpu_m': 250, 'batch_size': 100}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-iris'.
Service 'bentoml-iris' with latest revision 'bentoml-iris-00013' (unchanged) is available at URL:
http://bentoml-iris.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 100


  0%|          | 0/10 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 1}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-iris' in namespace 'default':

  0.022s The Configuration is still working to reflect the latest desired specification.
 10.342s Traffic is not yet migrated to the latest revision.
 10.373s Ingress has not yet been reconciled.
 10.425s Waiting for load balancer to be ready
 10.592s Ready to serve.

Service 'bentoml-iris' applied to latest revision 'bentoml-iris-00014' is available at URL:
http://bentoml-iris.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 1


  0%|          | 0/10 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 10}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-iris'.
Service 'bentoml-iris' with latest revision 'bentoml-iris-00014' (unchanged) is available at URL:
http://bentoml-iris.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 10


  0%|          | 0/10 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 50}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-iris'.
Service 'bentoml-iris' with latest revision 'bentoml-iris-00014' (unchanged) is available at URL:
http://bentoml-iris.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 50


  0%|          | 0/10 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 100}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-iris'.
Service 'bentoml-iris' with latest revision 'bentoml-iris-00014' (unchanged) is available at URL:
http://bentoml-iris.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 100


  0%|          | 0/10 [00:00<?, ?it/s]

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def post_process(df):
    df['resp_time_avg'] = df.apply(lambda x: np.mean(x['response_times_ms']), axis=1)
    for percentile in [50,90,95,99]:
        df[f'resp_time_p{percentile}'] = df.apply(lambda x: np.percentile(x['response_times_ms'], percentile), axis=1)
    return df


df = pd.DataFrame(data=results)
# df = post_process(df)
df

Unnamed: 0,warmup_req_count,total_req_count,service_name,cpu_m,batch_size,response_times_ms,reqs_failed,reqs_succeeded,ram_mb
0,20,10,bentoml-iris,250,1,"[57.021, 8.317, 7.798, 7.428, 71.199, 8.574, 8...",0,10,375
1,20,10,bentoml-iris,250,10,"[52.246, 8.584, 8.322, 7.9799999999999995, 72....",0,10,375
2,20,10,bentoml-iris,250,50,"[57.44, 10.484, 9.716000000000001, 10.277, 66....",0,10,375
3,20,10,bentoml-iris,250,100,"[10.359, 9.072000000000001, 52.922, 9.562, 9.4...",0,10,375
4,20,10,bentoml-iris,1000,1,"[8.882, 7.659, 7.805000000000001, 7.356, 7.567...",0,10,1500
5,20,10,bentoml-iris,1000,10,"[11.188, 9.354, 10.455, 9.514999999999999, 9.4...",0,10,1500
6,20,10,bentoml-iris,1000,50,"[9.048, 8.655, 8.167, 8.05, 8.126, 8.386, 8.36...",0,10,1500
7,20,10,bentoml-iris,1000,100,"[9.587, 8.302999999999999, 8.074, 7.9419999999...",0,10,1500


In [5]:


all_configs = [
    {
        'warmup_req_count': 200,
        'total_req_count': 10000,
        'service_name': 'bentoml-iris',
        # cpu_m should always be the first to avoid changing deployment too much
        'cpu_m': [1000],
        'batch_size': [1,5,10,20,50,100],
    },
    {
        'warmup_req_count': 20,
        'total_req_count': 1000,
        'service_name': 'tfserving-resnetv2',
        # cpu_m should always be the first to avoid changing deployment too much
        'cpu_m': [1000],
        'batch_size': [1,2,3,5,10],
    },
    {
        'warmup_req_count': 20,
        'total_req_count': 1000,
        'service_name': 'tfserving-mobilenetv1',
        # cpu_m should always be the first to avoid changing deployment too much
        'cpu_m': [1000],
        'batch_size': [1,2,3,5],
    },
    {
        'warmup_req_count': 20,
        'total_req_count': 1000,
        'service_name': 'bentoml-onnx-resnet50',
        # cpu_m should always be the first to avoid changing deployment too much
        'cpu_m': [1000],
        'batch_size': [1,2,3,5,10],
    },
    {
        'warmup_req_count': 20,
        'total_req_count': 10000,
        'service_name': 'bentoml-pytorch-fashion-mnist',
        # cpu_m should always be the first to avoid changing deployment too much
        'cpu_m': [1000],
        'batch_size': [1,5,10,20,50,100],
    },
    {
        'warmup_req_count': 20,
        'total_req_count': 10000,
        'service_name': 'bentoml-keras-toxic-comments',
        # cpu_m should always be the first to avoid changing deployment too much
        'cpu_m': [1000],
        'batch_size': [1,5,10,20,50,100],
    },
]
results_folder = './results/batch_experiments_default'


!mkdir -p {results_folder}
for config in all_configs:
    service_name = config['service_name']

    results = perform_experiment_batch(config)
    df = pd.DataFrame(data=results)

    now = util.get_time_with_tz()
    res_name = now.strftime(f'{service_name}_%Y-%m-%d_%H-%M-%S')
    print('res_name:', res_name)
    df.to_csv(f'{results_folder}/{res_name}.csv')

configurables: ['cpu_m', 'batch_size']
{'cpu_m': 1000, 'batch_size': 1}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-iris'.
Service 'bentoml-iris' with latest revision 'bentoml-iris-00014' (unchanged) is available at URL:
http://bentoml-iris.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 1


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 5}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-iris'.
Service 'bentoml-iris' with latest revision 'bentoml-iris-00014' (unchanged) is available at URL:
http://bentoml-iris.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 5


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 10}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-iris'.
Service 'bentoml-iris' with latest revision 'bentoml-iris-00014' (unchanged) is available at URL:
http://bentoml-iris.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 10


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 20}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-iris'.
Service 'bentoml-iris' with latest revision 'bentoml-iris-00014' (unchanged) is available at URL:
http://bentoml-iris.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 20


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 50}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-iris'.
Service 'bentoml-iris' with latest revision 'bentoml-iris-00014' (unchanged) is available at URL:
http://bentoml-iris.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 50


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 100}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-iris --image ghcr.io/nimamahmoudi/bentoml-iris-classifier:20210429201447 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-iris'.
Service 'bentoml-iris' with latest revision 'bentoml-iris-00014' (unchanged) is available at URL:
http://bentoml-iris.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-iris workload function, batch_size: 100


  0%|          | 0/1000 [00:00<?, ?it/s]

res_name: bentoml-iris_2021-06-25_11-23-39
configurables: ['cpu_m', 'batch_size']
{'cpu_m': 1000, 'batch_size': 1}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply tfserving-resnetv2 --image ghcr.io/nimamahmoudi/tfserving-resnet:20210429213000 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'tfserving-resnetv2' in namespace 'default':

  0.020s The Configuration is still working to reflect the latest desired specification.
 21.182s Traffic is not yet migrated to the latest revision.
 21.210s Ingress has not yet been reconciled.
 21.265s Waiting for load balancer to be ready
 21.423s Ready to serve.

Service 'tfserving-resnetv2' applied to latest revision 'tfserving-resnetv2-00006' is available at URL:
http://tfserving-resnetv2.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
runnin

  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 2}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply tfserving-resnetv2 --image ghcr.io/nimamahmoudi/tfserving-resnet:20210429213000 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'tfserving-resnetv2'.
Service 'tfserving-resnetv2' with latest revision 'tfserving-resnetv2-00006' (unchanged) is available at URL:
http://tfserving-resnetv2.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running tfserving-resnetv2 workload function, batch_size: 2


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 3}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply tfserving-resnetv2 --image ghcr.io/nimamahmoudi/tfserving-resnet:20210429213000 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'tfserving-resnetv2'.
Service 'tfserving-resnetv2' with latest revision 'tfserving-resnetv2-00006' (unchanged) is available at URL:
http://tfserving-resnetv2.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running tfserving-resnetv2 workload function, batch_size: 3


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 5}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply tfserving-resnetv2 --image ghcr.io/nimamahmoudi/tfserving-resnet:20210429213000 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'tfserving-resnetv2'.
Service 'tfserving-resnetv2' with latest revision 'tfserving-resnetv2-00006' (unchanged) is available at URL:
http://tfserving-resnetv2.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running tfserving-resnetv2 workload function, batch_size: 5


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 10}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply tfserving-resnetv2 --image ghcr.io/nimamahmoudi/tfserving-resnet:20210429213000 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'tfserving-resnetv2'.
Service 'tfserving-resnetv2' with latest revision 'tfserving-resnetv2-00006' (unchanged) is available at URL:
http://tfserving-resnetv2.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running tfserving-resnetv2 workload function, batch_size: 10


  0%|          | 0/1000 [00:00<?, ?it/s]

res_name: tfserving-resnetv2_2021-06-25_12-14-24
configurables: ['cpu_m', 'batch_size']
{'cpu_m': 1000, 'batch_size': 1}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply tfserving-mobilenetv1 --image ghcr.io/nimamahmoudi/tfserving-mobilenet:20210430005829 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'tfserving-mobilenetv1' in namespace 'default':

  0.025s The Configuration is still working to reflect the latest desired specification.
 10.160s Traffic is not yet migrated to the latest revision.
 10.203s Ingress has not yet been reconciled.
 10.237s Waiting for load balancer to be ready
 10.408s Ready to serve.

Service 'tfserving-mobilenetv1' applied to latest revision 'tfserving-mobilenetv1-00007' is available at URL:
http://tfserving-mobilenetv1.default.192-168-23-125.nip.io
waiting for settings to conve

  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 2}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply tfserving-mobilenetv1 --image ghcr.io/nimamahmoudi/tfserving-mobilenet:20210430005829 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'tfserving-mobilenetv1'.
Service 'tfserving-mobilenetv1' with latest revision 'tfserving-mobilenetv1-00007' (unchanged) is available at URL:
http://tfserving-mobilenetv1.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running tfserving-mobilenetv1 workload function, batch_size: 2


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 3}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply tfserving-mobilenetv1 --image ghcr.io/nimamahmoudi/tfserving-mobilenet:20210430005829 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'tfserving-mobilenetv1'.
Service 'tfserving-mobilenetv1' with latest revision 'tfserving-mobilenetv1-00007' (unchanged) is available at URL:
http://tfserving-mobilenetv1.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running tfserving-mobilenetv1 workload function, batch_size: 3


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 5}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply tfserving-mobilenetv1 --image ghcr.io/nimamahmoudi/tfserving-mobilenet:20210430005829 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'tfserving-mobilenetv1'.
Service 'tfserving-mobilenetv1' with latest revision 'tfserving-mobilenetv1-00007' (unchanged) is available at URL:
http://tfserving-mobilenetv1.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running tfserving-mobilenetv1 workload function, batch_size: 5


  0%|          | 0/1000 [00:00<?, ?it/s]

res_name: tfserving-mobilenetv1_2021-06-25_12-43-23
configurables: ['cpu_m', 'batch_size']
{'cpu_m': 1000, 'batch_size': 1}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-onnx-resnet50 --image ghcr.io/nimamahmoudi/bentoml-onnx-resnet50-b64:20210505124300 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-onnx-resnet50' in namespace 'default':

  0.021s The Configuration is still working to reflect the latest desired specification.
 14.368s Traffic is not yet migrated to the latest revision.
 14.390s Ingress has not yet been reconciled.
 14.446s Waiting for load balancer to be ready
 14.606s Ready to serve.

Service 'bentoml-onnx-resnet50' applied to latest revision 'bentoml-onnx-resnet50-00007' is available at URL:
http://bentoml-onnx-resnet50.default.192-168-23-125.nip.io
waiting for settings

  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 2}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-onnx-resnet50 --image ghcr.io/nimamahmoudi/bentoml-onnx-resnet50-b64:20210505124300 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-onnx-resnet50'.
Service 'bentoml-onnx-resnet50' with latest revision 'bentoml-onnx-resnet50-00007' (unchanged) is available at URL:
http://bentoml-onnx-resnet50.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-onnx-resnet50 workload function, batch_size: 2


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 3}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-onnx-resnet50 --image ghcr.io/nimamahmoudi/bentoml-onnx-resnet50-b64:20210505124300 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-onnx-resnet50'.
Service 'bentoml-onnx-resnet50' with latest revision 'bentoml-onnx-resnet50-00007' (unchanged) is available at URL:
http://bentoml-onnx-resnet50.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-onnx-resnet50 workload function, batch_size: 3


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 5}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-onnx-resnet50 --image ghcr.io/nimamahmoudi/bentoml-onnx-resnet50-b64:20210505124300 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-onnx-resnet50'.
Service 'bentoml-onnx-resnet50' with latest revision 'bentoml-onnx-resnet50-00007' (unchanged) is available at URL:
http://bentoml-onnx-resnet50.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-onnx-resnet50 workload function, batch_size: 5


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 10}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-onnx-resnet50 --image ghcr.io/nimamahmoudi/bentoml-onnx-resnet50-b64:20210505124300 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-onnx-resnet50'.
Service 'bentoml-onnx-resnet50' with latest revision 'bentoml-onnx-resnet50-00007' (unchanged) is available at URL:
http://bentoml-onnx-resnet50.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-onnx-resnet50 workload function, batch_size: 10


  0%|          | 0/1000 [00:00<?, ?it/s]

res_name: bentoml-onnx-resnet50_2021-06-25_13-25-52
configurables: ['cpu_m', 'batch_size']
{'cpu_m': 1000, 'batch_size': 1}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-pytorch-fashion-mnist' in namespace 'default':

  0.024s The Configuration is still working to reflect the latest desired specification.
 76.095s Traffic is not yet migrated to the latest revision.
 76.122s Ingress has not yet been reconciled.
 76.163s Waiting for load balancer to be ready
 76.331s Ready to serve.

Service 'bentoml-pytorch-fashion-mnist' applied to latest revision 'bentoml-pytorch-fashion-mnist-00008' is available at URL:
http://bentoml-pytorch-fashion-mnist.default

  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 5}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00008' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 5


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 10}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00008' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 10


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 20}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00008' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 20


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 50}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00008' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 50


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 100}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00008' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 100


  0%|          | 0/1000 [00:00<?, ?it/s]

res_name: bentoml-pytorch-fashion-mnist_2021-06-25_13-39-17
configurables: ['cpu_m', 'batch_size']
{'cpu_m': 1000, 'batch_size': 1}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-keras-toxic-comments' in namespace 'default':

  0.026s The Configuration is still working to reflect the latest desired specification.
150.730s Revision "bentoml-keras-toxic-comments-00008" failed with message: Initial scale was never achieved.
Error: RevisionFailed: Revision "bentoml-keras-toxic-comments-00008" failed with message: Initial scale was never achieved.
Run 'kn --help' for usage
waiting for settings to converge
warming up...
exception occured:


Traceback (most recent call last):
  File "<ipython-input-1-1941da4c2898>", line 28, in experiment_batch
    request_funcs.workload_funcs[service_name](batch_size=batch_size)
  File "/home/ubuntu/serverless-ml-serving/experiments/helpers/request_funcs.py", line 64, in request_keras_toxic_comments
    response.raise_for_status()
  File "/home/ubuntu/miniconda/lib/python3.8/site-packages/requests/models.py", line 943, in raise_for_status
    raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 408 Client Error: Request Timeout for url: http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io/predict


running bentoml-keras-toxic-comments workload function, batch_size: 1


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 5}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00007' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 5


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 10}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00007' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 10


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 20}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00007' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 20


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 50}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00007' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 50


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 100}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00007' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 100


  0%|          | 0/1000 [00:00<?, ?it/s]

res_name: bentoml-keras-toxic-comments_2021-06-25_13-54-14


In [6]:
all_configs = [
    {
        'warmup_req_count': 200,
        'total_req_count': 10000,
        'service_name': 'bentoml-iris',
        # cpu_m should always be the first to avoid changing deployment too much
        'cpu_m': [250,500,1000,1500,2000],
        'batch_size': [1,5,10,20,50,100],
    },
    {
        'warmup_req_count': 20,
        'total_req_count': 1000,
        'service_name': 'tfserving-resnetv2',
        # cpu_m should always be the first to avoid changing deployment too much
        'cpu_m': [500,1000,1500,2000],
        'batch_size': [1,2,3,5,10],
    },
    {
        'warmup_req_count': 20,
        'total_req_count': 1000,
        'service_name': 'tfserving-mobilenetv1',
        # cpu_m should always be the first to avoid changing deployment too much
        'cpu_m': [250,500,1000,1500,2000],
        'batch_size': [1,2,3,5],
    },
    {
        'warmup_req_count': 20,
        'total_req_count': 1000,
        'service_name': 'bentoml-onnx-resnet50',
        # cpu_m should always be the first to avoid changing deployment too much
        'cpu_m': [250,500,1000,1500,2000],
        'batch_size': [1,2,3,5,10],
    },
    {
        'warmup_req_count': 20,
        'total_req_count': 10000,
        'service_name': 'bentoml-pytorch-fashion-mnist',
        # cpu_m should always be the first to avoid changing deployment too much
        'cpu_m': [250,500,1000,1500,2000],
        'batch_size': [1,5,10,20,50,100],
    },
    {
        'warmup_req_count': 20,
        'total_req_count': 10000,
        'service_name': 'bentoml-keras-toxic-comments',
        # cpu_m should always be the first to avoid changing deployment too much
        'cpu_m': [250,500,1000,1500,2000],
        'batch_size': [1,5,10,20,50,100],
    },
]
results_folder = './results/batch_experiments_var_cpu'


!mkdir -p {results_folder}
for config in all_configs:
    service_name = config['service_name']

    results = perform_experiment_batch(config)
    df = pd.DataFrame(data=results)

    now = util.get_time_with_tz()
    res_name = now.strftime(f'{service_name}_%Y-%m-%d_%H-%M-%S')
    print('res_name:', res_name)
    df.to_csv(f'{results_folder}/{res_name}.csv')

configurables: ['cpu_m', 'batch_size']
{'cpu_m': 250, 'batch_size': 1}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-pytorch-fashion-mnist' in namespace 'default':

  0.026s The Configuration is still working to reflect the latest desired specification.
 77.761s Traffic is not yet migrated to the latest revision.
 77.787s Ingress has not yet been reconciled.
 77.837s Waiting for load balancer to be ready
 78.355s Ready to serve.

Service 'bentoml-pytorch-fashion-mnist' applied to latest revision 'bentoml-pytorch-fashion-mnist-00009' is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
wa

  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 250, 'batch_size': 5}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00009' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 5


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 250, 'batch_size': 10}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00009' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 10


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 250, 'batch_size': 20}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00009' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 20


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 250, 'batch_size': 50}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00009' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 50


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 250, 'batch_size': 100}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00009' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 100


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 500, 'batch_size': 1}
Request Limit Overrid: 'cpu=500m,memory=750Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=500m,memory=750Mi' \
  --request 'cpu=500m,memory=750Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-pytorch-fashion-mnist' in namespace 'default':

  0.020s The Configuration is still working to reflect the latest desired specification.
 11.986s Traffic is not yet migrated to the latest revision.
 12.024s Ingress has not yet been reconciled.
 12.041s Waiting for load balancer to be ready
 12.229s Ready to serve.

Service 'bentoml-pytorch-fashion-mnist' applied to latest revision 'bentoml-pytorch-fashion-mnist-00010' is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fas

  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 500, 'batch_size': 5}
Request Limit Overrid: 'cpu=500m,memory=750Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=500m,memory=750Mi' \
  --request 'cpu=500m,memory=750Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00010' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 5


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 500, 'batch_size': 10}
Request Limit Overrid: 'cpu=500m,memory=750Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=500m,memory=750Mi' \
  --request 'cpu=500m,memory=750Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00010' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 10


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 500, 'batch_size': 20}
Request Limit Overrid: 'cpu=500m,memory=750Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=500m,memory=750Mi' \
  --request 'cpu=500m,memory=750Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00010' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 20


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 500, 'batch_size': 50}
Request Limit Overrid: 'cpu=500m,memory=750Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=500m,memory=750Mi' \
  --request 'cpu=500m,memory=750Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00010' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 50


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 500, 'batch_size': 100}
Request Limit Overrid: 'cpu=500m,memory=750Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=500m,memory=750Mi' \
  --request 'cpu=500m,memory=750Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00010' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 100


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 1}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-pytorch-fashion-mnist' in namespace 'default':

  0.022s The Configuration is still working to reflect the latest desired specification.
 15.648s Traffic is not yet migrated to the latest revision.
 15.670s Ingress has not yet been reconciled.
 15.722s Waiting for load balancer to be ready
 15.883s Ready to serve.

Service 'bentoml-pytorch-fashion-mnist' applied to latest revision 'bentoml-pytorch-fashion-mnist-00011' is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pyto

  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 5}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00011' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 5


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 10}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00011' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 10


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 20}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00011' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 20


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 50}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00011' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 50


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 100}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00011' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 100


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1500, 'batch_size': 1}
Request Limit Overrid: 'cpu=1500m,memory=2250Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=1500m,memory=2250Mi' \
  --request 'cpu=1500m,memory=2250Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-pytorch-fashion-mnist' in namespace 'default':

  0.019s The Configuration is still working to reflect the latest desired specification.
  8.400s Traffic is not yet migrated to the latest revision.
  8.421s Ingress has not yet been reconciled.
  8.480s Waiting for load balancer to be ready
  8.638s Ready to serve.

Service 'bentoml-pytorch-fashion-mnist' applied to latest revision 'bentoml-pytorch-fashion-mnist-00012' is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pyto

  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1500, 'batch_size': 5}
Request Limit Overrid: 'cpu=1500m,memory=2250Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=1500m,memory=2250Mi' \
  --request 'cpu=1500m,memory=2250Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00012' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 5


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1500, 'batch_size': 10}
Request Limit Overrid: 'cpu=1500m,memory=2250Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=1500m,memory=2250Mi' \
  --request 'cpu=1500m,memory=2250Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00012' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 10


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1500, 'batch_size': 20}
Request Limit Overrid: 'cpu=1500m,memory=2250Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=1500m,memory=2250Mi' \
  --request 'cpu=1500m,memory=2250Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00012' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 20


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1500, 'batch_size': 50}
Request Limit Overrid: 'cpu=1500m,memory=2250Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=1500m,memory=2250Mi' \
  --request 'cpu=1500m,memory=2250Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00012' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 50


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1500, 'batch_size': 100}
Request Limit Overrid: 'cpu=1500m,memory=2250Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=1500m,memory=2250Mi' \
  --request 'cpu=1500m,memory=2250Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00012' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 100


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 2000, 'batch_size': 1}
Request Limit Overrid: 'cpu=2000m,memory=3000Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=2000m,memory=3000Mi' \
  --request 'cpu=2000m,memory=3000Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-pytorch-fashion-mnist' in namespace 'default':

  0.023s The Configuration is still working to reflect the latest desired specification.
 13.791s Traffic is not yet migrated to the latest revision.
 13.810s Ingress has not yet been reconciled.
 13.849s Waiting for load balancer to be ready
 14.022s Ready to serve.

Service 'bentoml-pytorch-fashion-mnist' applied to latest revision 'bentoml-pytorch-fashion-mnist-00013' is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pyto

  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 2000, 'batch_size': 5}
Request Limit Overrid: 'cpu=2000m,memory=3000Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=2000m,memory=3000Mi' \
  --request 'cpu=2000m,memory=3000Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00013' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 5


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 2000, 'batch_size': 10}
Request Limit Overrid: 'cpu=2000m,memory=3000Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=2000m,memory=3000Mi' \
  --request 'cpu=2000m,memory=3000Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00013' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 10


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 2000, 'batch_size': 20}
Request Limit Overrid: 'cpu=2000m,memory=3000Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=2000m,memory=3000Mi' \
  --request 'cpu=2000m,memory=3000Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00013' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 20


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 2000, 'batch_size': 50}
Request Limit Overrid: 'cpu=2000m,memory=3000Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=2000m,memory=3000Mi' \
  --request 'cpu=2000m,memory=3000Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00013' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 50


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 2000, 'batch_size': 100}
Request Limit Overrid: 'cpu=2000m,memory=3000Mi'
kn service apply bentoml-pytorch-fashion-mnist --image ghcr.io/nimamahmoudi/bentoml-pytorch-fashion-mnist:20210614122950 \
  --limit 'cpu=2000m,memory=3000Mi' \
  --request 'cpu=2000m,memory=3000Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-pytorch-fashion-mnist'.
Service 'bentoml-pytorch-fashion-mnist' with latest revision 'bentoml-pytorch-fashion-mnist-00013' (unchanged) is available at URL:
http://bentoml-pytorch-fashion-mnist.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-pytorch-fashion-mnist workload function, batch_size: 100


  0%|          | 0/1000 [00:00<?, ?it/s]

res_name: bentoml-pytorch-fashion-mnist_2021-06-25_15-36-36
configurables: ['cpu_m', 'batch_size']
{'cpu_m': 250, 'batch_size': 1}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-keras-toxic-comments' in namespace 'default':

  0.044s The Configuration is still working to reflect the latest desired specification.
150.744s Revision "bentoml-keras-toxic-comments-00009" failed with message: Initial scale was never achieved.
Error: RevisionFailed: Revision "bentoml-keras-toxic-comments-00009" failed with message: Initial scale was never achieved.
Run 'kn --help' for usage
waiting for settings to converge
warming up...
exception occured:


Traceback (most recent call last):
  File "<ipython-input-1-1941da4c2898>", line 28, in experiment_batch
    request_funcs.workload_funcs[service_name](batch_size=batch_size)
  File "/home/ubuntu/serverless-ml-serving/experiments/helpers/request_funcs.py", line 64, in request_keras_toxic_comments
    response.raise_for_status()
  File "/home/ubuntu/miniconda/lib/python3.8/site-packages/requests/models.py", line 943, in raise_for_status
    raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 408 Client Error: Request Timeout for url: http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io/predict


running bentoml-keras-toxic-comments workload function, batch_size: 1


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 250, 'batch_size': 5}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00007' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 5


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 250, 'batch_size': 10}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00007' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 10


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 250, 'batch_size': 20}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00007' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 20


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 250, 'batch_size': 50}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00007' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 50


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 250, 'batch_size': 100}
Request Limit Overrid: 'cpu=250m,memory=375Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=250m,memory=375Mi' \
  --request 'cpu=250m,memory=375Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00007' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 100


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 500, 'batch_size': 1}
Request Limit Overrid: 'cpu=500m,memory=750Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=500m,memory=750Mi' \
  --request 'cpu=500m,memory=750Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-keras-toxic-comments' in namespace 'default':

  0.030s The Configuration is still working to reflect the latest desired specification.
  5.198s Traffic is not yet migrated to the latest revision.
  5.232s Ingress has not yet been reconciled.
  5.270s Waiting for load balancer to be ready
  5.436s Ready to serve.

Service 'bentoml-keras-toxic-comments' applied to latest revision 'bentoml-keras-toxic-comments-00010' is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-ker

  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 500, 'batch_size': 5}
Request Limit Overrid: 'cpu=500m,memory=750Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=500m,memory=750Mi' \
  --request 'cpu=500m,memory=750Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00010' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 5


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 500, 'batch_size': 10}
Request Limit Overrid: 'cpu=500m,memory=750Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=500m,memory=750Mi' \
  --request 'cpu=500m,memory=750Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00010' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 10


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 500, 'batch_size': 20}
Request Limit Overrid: 'cpu=500m,memory=750Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=500m,memory=750Mi' \
  --request 'cpu=500m,memory=750Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00010' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 20


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 500, 'batch_size': 50}
Request Limit Overrid: 'cpu=500m,memory=750Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=500m,memory=750Mi' \
  --request 'cpu=500m,memory=750Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00010' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 50


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 500, 'batch_size': 100}
Request Limit Overrid: 'cpu=500m,memory=750Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=500m,memory=750Mi' \
  --request 'cpu=500m,memory=750Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00010' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 100


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 1}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-keras-toxic-comments' in namespace 'default':

  0.027s The Configuration is still working to reflect the latest desired specification.
 10.858s Traffic is not yet migrated to the latest revision.
 10.880s Ingress has not yet been reconciled.
 10.930s Waiting for load balancer to be ready
 11.099s Ready to serve.

Service 'bentoml-keras-toxic-comments' applied to latest revision 'bentoml-keras-toxic-comments-00011' is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bent

  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 5}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00011' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 5


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 10}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00011' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 10


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 20}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00011' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 20


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 50}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00011' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 50


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1000, 'batch_size': 100}
Request Limit Overrid: 'cpu=1000m,memory=1500Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=1000m,memory=1500Mi' \
  --request 'cpu=1000m,memory=1500Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00011' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 100


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1500, 'batch_size': 1}
Request Limit Overrid: 'cpu=1500m,memory=2250Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=1500m,memory=2250Mi' \
  --request 'cpu=1500m,memory=2250Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-keras-toxic-comments' in namespace 'default':

  0.027s The Configuration is still working to reflect the latest desired specification.
  5.677s Traffic is not yet migrated to the latest revision.
  5.695s Ingress has not yet been reconciled.
  5.751s Waiting for load balancer to be ready
  5.967s Ready to serve.

Service 'bentoml-keras-toxic-comments' applied to latest revision 'bentoml-keras-toxic-comments-00012' is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bent

  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1500, 'batch_size': 5}
Request Limit Overrid: 'cpu=1500m,memory=2250Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=1500m,memory=2250Mi' \
  --request 'cpu=1500m,memory=2250Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00012' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 5


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1500, 'batch_size': 10}
Request Limit Overrid: 'cpu=1500m,memory=2250Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=1500m,memory=2250Mi' \
  --request 'cpu=1500m,memory=2250Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00012' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 10


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1500, 'batch_size': 20}
Request Limit Overrid: 'cpu=1500m,memory=2250Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=1500m,memory=2250Mi' \
  --request 'cpu=1500m,memory=2250Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00012' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 20


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1500, 'batch_size': 50}
Request Limit Overrid: 'cpu=1500m,memory=2250Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=1500m,memory=2250Mi' \
  --request 'cpu=1500m,memory=2250Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00012' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 50


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 1500, 'batch_size': 100}
Request Limit Overrid: 'cpu=1500m,memory=2250Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=1500m,memory=2250Mi' \
  --request 'cpu=1500m,memory=2250Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00012' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 100


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 2000, 'batch_size': 1}
Request Limit Overrid: 'cpu=2000m,memory=3000Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=2000m,memory=3000Mi' \
  --request 'cpu=2000m,memory=3000Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
Applying service 'bentoml-keras-toxic-comments' in namespace 'default':

  0.026s The Configuration is still working to reflect the latest desired specification.
  6.187s Traffic is not yet migrated to the latest revision.
  6.224s Ingress has not yet been reconciled.
  6.268s Waiting for load balancer to be ready
  6.436s Ready to serve.

Service 'bentoml-keras-toxic-comments' applied to latest revision 'bentoml-keras-toxic-comments-00013' is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bent

  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 2000, 'batch_size': 5}
Request Limit Overrid: 'cpu=2000m,memory=3000Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=2000m,memory=3000Mi' \
  --request 'cpu=2000m,memory=3000Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00013' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 5


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 2000, 'batch_size': 10}
Request Limit Overrid: 'cpu=2000m,memory=3000Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=2000m,memory=3000Mi' \
  --request 'cpu=2000m,memory=3000Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00013' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 10


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 2000, 'batch_size': 20}
Request Limit Overrid: 'cpu=2000m,memory=3000Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=2000m,memory=3000Mi' \
  --request 'cpu=2000m,memory=3000Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00013' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 20


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 2000, 'batch_size': 50}
Request Limit Overrid: 'cpu=2000m,memory=3000Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=2000m,memory=3000Mi' \
  --request 'cpu=2000m,memory=3000Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00013' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 50


  0%|          | 0/1000 [00:00<?, ?it/s]

{'cpu_m': 2000, 'batch_size': 100}
Request Limit Overrid: 'cpu=2000m,memory=3000Mi'
kn service apply bentoml-keras-toxic-comments --image ghcr.io/nimamahmoudi/bentoml-keras-toxic-comment-classification:20210622155420 \
  --limit 'cpu=2000m,memory=3000Mi' \
  --request 'cpu=2000m,memory=3000Mi' \
  --port 5000 \
  -a autoscaling.knative.dev/target=1 \
  -a autoscaling.knative.dev/metric=concurrency
No changes to apply to service 'bentoml-keras-toxic-comments'.
Service 'bentoml-keras-toxic-comments' with latest revision 'bentoml-keras-toxic-comments-00013' (unchanged) is available at URL:
http://bentoml-keras-toxic-comments.default.192-168-23-125.nip.io
waiting for settings to converge
warming up...
running bentoml-keras-toxic-comments workload function, batch_size: 100


  0%|          | 0/1000 [00:00<?, ?it/s]

res_name: bentoml-keras-toxic-comments_2021-06-25_16-28-50
