merged master and resolved conficts

pytorch · Jul 22, 2020 · c43c757 · c43c757
2 parents 058c6b9 + ebe55b0
commit c43c757
Show file tree

Hide file tree

Showing 48 changed files with 841 additions and 893 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -7,7 +7,12 @@ If you are interested in contributing to TorchServe, your contributions will fal
 2. You want to implement a feature or bug-fix for an outstanding issue.
     - Search for your issue here: https://github.com/pytorch/serve/issues
     - Pick an issue and comment on the task that you want to work on this feature.
-   - If you need more context on a particular issue, please ask.
+    - To ensure your changes doesn't break any of the existing features run the sanity suite as follows from serve directory:
+    ```bash
+    ./torchserve_sanity.sh
+    ```
+    - For running individual test suites refer [code_coverage](docs/code_coverage.md) documentation
+    - If you need more context on a particular issue, please create raise a ticket on [`TorchServe` GH repo](https://github.com/pytorch/serve/issues/new/choose) or connect to [PyTorch's slack channel](https://pytorch.slack.com/)
 
 Once you finish implementing a feature or bug-fix, please send a Pull Request to https://github.com/pytorch/serve. Use this [template](pull_request_template.md) when creating a Pull Request.
 

diff --git a/README.md b/README.md
@@ -116,16 +116,10 @@ Run the following script from the top of the source directory.
 
 NOTE: This script uninstalls existing `torchserve` and `torch-model-archiver` installations
 
-#### For Debian Based Systems
-Verified on EC2 instances running Ubuntu DL AMI 28.x
+#### For Debian Based Systems/ MacOS
 
 ```
-./scripts/install_from_src_ubuntu
-```
-#### For macOS
-
-```
-./scripts/install_from_src_macos
+./scripts/install_from_src
 ```
 
 For information about the model archiver, see [detailed documentation](model-archiver/README.md).

diff --git a/benchmarks/README.md b/benchmarks/README.md
diff --git a/benchmarks/benchmark-ab.py b/benchmarks/benchmark-ab.py
@@ -0,0 +1,361 @@
+import csv
+import json
+import os
+import re
+import shutil
+import time
+from subprocess import Popen, PIPE
+
+import click
+import click_config_file
+import matplotlib.pyplot as plt
+import pandas as pd
+import requests
+
+default_ab_params = {'url': "https://torchserve.s3.amazonaws.com/mar_files/resnet-18.mar",
+                     'gpus': '',
+                     'exec_env': 'local',
+                     'batch_size': 1,
+                     'batch_delay': 200,
+                     'workers': 1,
+                     'concurrency': 10,
+                     'requests': 100,
+                     'input': '../examples/image_classifier/kitten.jpg',
+                     'content_type': 'application/jpg',
+                     'image': '',
+                     'docker_runtime': ''}
+
+execution_params = default_ab_params.copy()
+result_file = "/tmp/benchmark/result.txt"
+metric_log = "/tmp/benchmark/logs/model_metrics.log"
+
+
+def json_provider(file_path, cmd_name):
+    with open(file_path) as config_data:
+        return json.load(config_data)
+
+
+@click.command()
+@click.argument('test_plan', default='custom')
+@click.option('--url', '-u', default='https://torchserve.s3.amazonaws.com/mar_files/resnet-18.mar',
+              help='Input model url')
+@click.option('--exec_env', '-e', type=click.Choice(['local', 'docker'], case_sensitive=False), default='local',
+              help='Execution environment')
+@click.option('--gpus', '-g', default='',
+              help='Number of gpus to run docker container with.  Leave empty to run CPU based docker container')
+@click.option('--concurrency', '-c', default=10, help='Number of concurrent requests to run')
+@click.option('--requests', '-r', default=100, help='Number of requests')
+@click.option('--batch_size', '-bs', default=1, help='Batch size of model')
+@click.option('--batch_delay', '-bd', default=200, help='Batch delay of model')
+@click.option('--input', '-i', 'input_file', default='../examples/image_classifier/kitten.jpg',
+              type=click.Path(exists=True), help='The input file path for model')
+@click.option('--content_type', '-ic', default='application/jpg', help='Input file content type')
+@click.option('--workers', '-w', default=1, help='Number model workers')
+@click.option('--image', '-di', default='', help='Use custom docker image for benchmark')
+@click.option('--docker_runtime', '-dr', default='', help='Specify required docker runtime')
+@click_config_file.configuration_option(provider=json_provider, implicit=False,
+                                        help="Read configuration from a JSON file")
+def benchmark(test_plan, url, gpus, exec_env, concurrency, requests, batch_size, batch_delay, input_file, workers,
+              content_type, image, docker_runtime):
+    input_params = {'url': url,
+                    'gpus': gpus,
+                    'exec_env': exec_env,
+                    'batch_size': batch_size,
+                    'batch_delay': batch_delay,
+                    'workers': workers,
+                    'concurrency': concurrency,
+                    'requests': requests,
+                    'input': input_file,
+                    'content_type': content_type,
+                    'image': image,
+                    'docker_runtime': docker_runtime
+                    }
+
+    # set ab params
+    update_plan_params[test_plan]()
+    update_exec_params(input_params)
+    click.secho("Starting AB benchmark suite...", fg='green')
+    click.secho(f"\n\nConfigured execution parameters are:", fg='green')
+    click.secho(f"{execution_params}", fg="blue")
+
+    # Setup execution env
+    if execution_params['exec_env'] == 'local':
+        click.secho("\n\nPreparing local execution...", fg='green')
+        local_torserve_start()
+    else:
+        click.secho("\n\nPreparing docker execution...", fg='green')
+        docker_torchserve_start()
+
+    check_torchserve_health()
+    run_benchmark()
+    generate_report()
+
+
+def check_torchserve_health():
+    attempts = 3
+    retry = 0
+    click.secho("*Testing system health...", fg='green')
+    while retry < attempts:
+        try:
+            resp = requests.get("http://localhost:8080/ping")
+            if resp.status_code == 200:
+                click.secho(resp.text)
+                return True
+        except Exception as e:
+            retry += 1
+            time.sleep(3)
+    failure_exit("Could not connect to Tochserve instance at http://localhost:8080/.")
+
+
+def run_benchmark():
+    register_model()
+
+    click.secho("\n\nExecuting Apache Bench tests ...", fg='green')
+    click.secho("*Executing inference performance test...", fg='green')
+    ab_cmd = f"ab -c {execution_params['concurrency']}  -n {execution_params['requests']} -k -p /tmp/benchmark/input -T " \
+             f"{execution_params['content_type']} http://127.0.0.1:8080/predictions/benchmark > {result_file}"
+    execute(ab_cmd, wait=True)
+
+    unregister_model()
+    stop_torchserve()
+
+
+def register_model():
+    click.secho("*Registering model...", fg='green')
+    url = "http://localhost:8081/models"
+    data = {'model_name': 'benchmark', 'url': execution_params['url'], 'batch_delay': execution_params['batch_delay'],
+            'batch_size': execution_params['batch_size'], 'initial_workers': execution_params['workers'],
+            'synchronous': 'true'}
+    resp = requests.post(url, params=data)
+    if not resp.status_code == 200:
+        failure_exit(f"Failed to register model.\n{resp.text}")
+    click.secho(resp.text)
+
+
+def unregister_model():
+    click.secho("*Unregistering model ...", fg='green')
+    resp = requests.delete("http://localhost:8081/models/benchmark")
+    if not resp.status_code == 200:
+        failure_exit(f"Failed to unregister model. \n {resp.text}")
+    click.secho(resp.text)
+
+
+def execute(command, wait=False, stdout=None, stderr=None, shell=True):
+    print(command)
+    cmd = Popen(command, shell=shell, close_fds=True, stdout=stdout, stderr=stderr, universal_newlines=True)
+    if wait:
+        cmd.wait()
+    return cmd
+
+
+def execute_return_stdout(cmd):
+    proc = execute(cmd, stdout=PIPE)
+    return proc.communicate()[0].strip()
+
+
+def local_torserve_start():
+    click.secho("*Terminating any existing Torchserve instance ...", fg='green')
+    execute("torchserve --stop", wait=True)
+    click.secho("*Setting up model store...", fg='green')
+    prepare_local_dependency()
+    click.secho("*Starting local Torchserve instance...", fg='green')
+    execute("torchserve --start --model-store /tmp/model_store "
+            "--ts-config /tmp/benchmark/conf/config.properties > /tmp/benchmark/logs/model_metrics.log")
+    time.sleep(3)
+
+
+def docker_torchserve_start():
+    prepare_docker_dependency()
+    enable_gpu = ''
+    if execution_params['image']:
+        docker_image = execution_params['image']
+        if execution_params['gpus']:
+            enable_gpu = f"--gpus {execution_params['gpus']}"
+    else:
+        if execution_params['gpus']:
+            docker_image = "pytorch/torchserve:latest-gpu"
+            enable_gpu = f"--gpus {execution_params['gpus']}"
+        else:
+            docker_image = "pytorch/torchserve:latest"
+        execute(f"docker pull {docker_image}", wait=True)
+
+    # delete existing ts conatiner instance
+    click.secho("*Removing existing ts conatiner instance...", fg='green')
+    execute('docker rm -f ts', wait=True)
+
+    click.secho(f"*Starting docker container of image {docker_image} ...", fg='green')
+    docker_run_cmd = f"docker run {execution_params['docker_runtime']} --name ts --user root -p 8080:8080 -p 8081:8081 " \
+                     f"-v /tmp/benchmark:/tmp/benchmark {enable_gpu} -itd {docker_image} " \
+                     f"\"torchserve --start --model-store /home/model-server/model-store " \
+                     f"--ts-config /tmp/benchmark/conf/config.properties > /tmp/benchmark/logs/model_metrics.log\""
+    execute(docker_run_cmd, wait=True)
+    time.sleep(5)
+
+
+def prepare_local_dependency():
+    shutil.rmtree('/tmp/model_store/', ignore_errors=True)
+    os.makedirs("/tmp/model_store/", exist_ok=True)
+    prepare_common_dependency()
+
+
+def prepare_docker_dependency():
+    prepare_common_dependency()
+
+
+def prepare_common_dependency():
+    input = execution_params['input']
+    shutil.rmtree("/tmp/benchmark", ignore_errors=True)
+    os.makedirs("/tmp/benchmark/conf", exist_ok=True)
+    os.makedirs("/tmp/benchmark/logs", exist_ok=True)
+    shutil.copy('config.properties', '/tmp/benchmark/conf/')
+    shutil.copyfile(input, '/tmp/benchmark/input')
+
+
+def update_exec_params(input_param):
+    for k, v in input_param.items():
+        if default_ab_params[k] != input_param[k]:
+            execution_params[k] = input_param[k]
+
+
+def generate_report():
+    click.secho("\n\nGenerating Reports...", fg='green')
+    extract_prediction_latency()
+    generate_csv_output()
+    generate_latency_graph()
+    click.secho("\nTest suite execution complete.", fg='green')
+
+
+def extract_prediction_latency():
+    pattern = re.compile("PredictionTime")
+    all_lines = []
+    with open(metric_log) as f:
+        lines = f.readlines()
+    for line in lines:
+        if pattern.search(line):
+            all_lines.append(line.split("|")[0].split(':')[3].strip())
+
+    with  open('/tmp/benchmark/predict.txt', 'w') as outf:
+        all_lines = map(lambda x: x + '\n', all_lines)
+        outf.writelines(all_lines)
+
+
+def generate_csv_output():
+    click.secho("*Generating CSV output...", fg='green')
+    batched_requests = execution_params['requests'] / execution_params['batch_size']
+    line50 = int(batched_requests / 2)
+    line90 = int(batched_requests * 9 / 10)
+    line99 = int(batched_requests * 99 / 100)
+    artifacts = {}
+    with open('/tmp/benchmark/result.txt') as f:
+        data = f.readlines()
+    artifacts['Benchmark'] = "AB"
+    artifacts['Model'] = execution_params['url']
+    artifacts['Concurrency'] = execution_params['concurrency']
+    artifacts['Requests'] = execution_params['requests']
+    artifacts['TS failed requests'] = extract_entity(data, 'Failed requests:', -1)
+    artifacts['TS throughput'] = extract_entity(data, 'Requests per second:', -3)
+    artifacts['TS latency P50'] = extract_entity(data, '50%', -1)
+    artifacts['TS latency P90'] = extract_entity(data, '90%', -1)
+    artifacts['TS latency P99'] = extract_entity(data, '99%', -1)
+    artifacts['TS latency mean'] = extract_entity(data, 'Time per request:.*mean\)', -3)
+    artifacts['TS error rate'] = int(artifacts['TS failed requests']) / execution_params['requests'] * 100
+
+    with open('/tmp/benchmark/predict.txt') as f:
+        lines = f.readlines()
+        artifacts['Model_p50'] = lines[line50].strip()
+        artifacts['Model_p90'] = lines[line90].strip()
+        artifacts['Model_p99'] = lines[line99].strip()
+
+    with open('/tmp/benchmark/ab_report.csv', 'w') as csv_file:
+        csvwriter = csv.writer(csv_file)
+        csvwriter.writerow(artifacts.keys())
+        csvwriter.writerow(artifacts.values())
+
+    return artifacts
+
+
+def extract_entity(data, pattern, index, delim=" "):
+    pattern = re.compile(pattern)
+    for line in data:
+        if pattern.search(line):
+            return line.split(delim)[index].strip()
+
+
+def generate_latency_graph():
+    click.secho("*Preparing graphs...", fg='green')
+    df = pd.read_csv('/tmp/benchmark/predict.txt', header=None, names=['latency'])
+    iteration = df.index
+    latency = df.latency
+    plt.xlabel('Requests ')
+    plt.ylabel('Prediction time')
+    plt.title('Prediction latency')
+    plt.bar(iteration, latency)
+    plt.savefig("/tmp/benchmark/predict_latency.png")
+
+
+def stop_torchserve():
+    if execution_params['exec_env'] == 'local':
+        click.secho("*Terminating Torchserve instance...", fg='green')
+        execute("torchserve --stop", wait=True)
+    else:
+        click.secho("*Removing benchmark container 'ts'...", fg='green')
+        execute('docker rm -f ts', wait=True)
+    click.secho("Apache Bench Execution completed.", fg='green')
+
+
+# Test plans (soak, vgg11_1000r_10c,  vgg11_10000r_100c,...)
+def soak():
+    execution_params['requests'] = 100000
+    execution_params['concurrency'] = 10
+
+
+def vgg11_1000r_10c():
+    execution_params['url'] = 'https://torchserve.s3.amazonaws.com/mar_files/vgg11.mar'
+    execution_params['requests'] = 1000
+    execution_params['concurrency'] = 10
+
+
+def vgg11_10000r_100c():
+    execution_params['url'] = 'https://torchserve.s3.amazonaws.com/mar_files/vgg11.mar'
+    execution_params['requests'] = 10000
+    execution_params['concurrency'] = 100
+
+
+def resnet152_batch():
+    execution_params['url'] = 'https://torchserve.s3.amazonaws.com/mar_files/resnet-152-batch.mar'
+    execution_params['requests'] = 1000
+    execution_params['concurrency'] = 10
+    execution_params['batch_size'] = 4
+
+
+def resnet152_batch_docker():
+    execution_params['url'] = 'https://torchserve.s3.amazonaws.com/mar_files/resnet-152-batch.mar'
+    execution_params['requests'] = 1000
+    execution_params['concurrency'] = 10
+    execution_params['batch_size'] = 4
+    execution_params['exec_env'] = 'docker'
+
+
+def custom():
+    pass
+
+
+update_plan_params = {
+    "soak": soak,
+    "vgg11_1000r_10c": vgg11_1000r_10c,
+    "vgg11_10000r_100c": vgg11_10000r_100c,
+    "resnet152_batch": resnet152_batch,
+    "resnet152_batch_docker": resnet152_batch_docker,
+    "custom": custom
+}
+
+
+def failure_exit(msg):
+    import sys
+    click.secho(f"{msg}", fg='red')
+    click.secho(f"Test suite terminated due to above failure", fg='red')
+    sys.exit()
+
+
+if __name__ == '__main__':
+    benchmark()