diff --git a/.gitignore b/.gitignore
index b0efde0c43..550ce2ab59 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,8 @@ dist/
.github/.DS_Store
.DS_Store
frontend/server/src/main/java/org/pytorch/serve/grpc/
+*.pem
+*.backup
# Postman files
test/artifacts/
@@ -18,5 +20,10 @@ test/model_store/
test/ts_console.log
test/config.properties
+
.vscode
.scratch/
+
+# Custom benchmark artifacts
+instances.yaml
+instances.yaml.backup
diff --git a/docker/Dockerfile.neuron.dev b/docker/Dockerfile.neuron.dev
new file mode 100644
index 0000000000..ce31c434c0
--- /dev/null
+++ b/docker/Dockerfile.neuron.dev
@@ -0,0 +1,109 @@
+# syntax = docker/dockerfile:experimental
+#
+# Following comments have been shamelessly copied from https://github.com/pytorch/pytorch/blob/master/Dockerfile
+#
+# NOTE: To build this you will need a docker version > 18.06 with
+# experimental enabled and DOCKER_BUILDKIT=1
+#
+# If you do not use buildkit you are not going to have a good time
+#
+# For reference:
+# https://docs.docker.com/develop/develop-images/build_enhancements/
+
+ARG BASE_IMAGE=ubuntu:18.04
+ARG BUILD_TYPE=dev
+FROM ${BASE_IMAGE} AS compile-image
+
+ARG BASE_IMAGE
+ARG BRANCH_NAME=master
+ARG MACHINE_TYPE=cpu
+ARG CUDA_VERSION
+
+ENV PYTHONUNBUFFERED TRUE
+
+RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
+ apt-get update && \
+ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
+ fakeroot \
+ ca-certificates \
+ dpkg-dev \
+ sudo \
+ g++ \
+ git \
+ python3-dev \
+ build-essential \
+ openjdk-11-jdk \
+ curl \
+ wget \
+ vim \
+ && rm -rf /var/lib/apt/lists/* \
+ && cd /tmp \
+ && curl -O https://bootstrap.pypa.io/get-pip.py \
+ && python3 get-pip.py
+
+RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 \
+ && update-alternatives --install /usr/local/bin/pip pip /usr/local/bin/pip3 1
+
+RUN pip install -U pip setuptools
+
+RUN echo "deb https://apt.repos.neuron.amazonaws.com bionic main" > /etc/apt/sources.list.d/neuron.list
+RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
+
+RUN apt-get update \
+ && apt-get install -y \
+ aws-neuron-runtime \
+ aws-neuron-tools \
+ && rm -rf /var/lib/apt/lists/* \
+ && rm -rf /tmp/tmp* \
+ && apt-get clean
+
+# Build Dev Image
+FROM compile-image AS dev-image
+ARG MACHINE_TYPE=cpu
+ARG CUDA_VERSION
+RUN if [ "$MACHINE_TYPE" = "gpu" ]; then export USE_CUDA=1; fi \
+ && git clone https://github.com/pytorch/serve.git \
+ && cd serve \
+ && git checkout --track ${BRANCH_NAME} \
+ && if [ -z "$CUDA_VERSION" ]; then python ts_scripts/install_dependencies.py --environment=dev; else python ts_scripts/install_dependencies.py --environment=dev --cuda $CUDA_VERSION; fi \
+ && python ts_scripts/install_from_src.py \
+ && useradd -m model-server \
+ && mkdir -p /home/model-server/tmp \
+ && cp docker/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh \
+ && chmod +x /usr/local/bin/dockerd-entrypoint.sh \
+ && chown -R model-server /home/model-server \
+ && cp docker/config.properties /home/model-server/config.properties \
+ && mkdir /home/model-server/model-store && chown -R model-server /home/model-server/model-store \
+ && pip install torch-neuron 'neuron-cc[tensorflow]' --extra-index-url=https://pip.repos.neuron.amazonaws.com
+
+EXPOSE 8080 8081 8082 7070 7071
+USER model-server
+WORKDIR /home/model-server
+ENV TEMP=/home/model-server/tmp
+ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"]
+CMD ["serve"]
+
+# Build CodeBuild Image
+FROM compile-image AS codebuild-image
+ENV JAVA_VERSION=11 \
+ JAVA_HOME="/usr/lib/jvm/java-11-openjdk-amd64" \
+ JDK_HOME="/usr/lib/jvm/java-11-openjdk-amd64" \
+ JRE_HOME="/usr/lib/jvm/java-11-openjdk-amd64" \
+ ANT_VERSION=1.10.3 \
+ MAVEN_HOME="/opt/maven" \
+ MAVEN_VERSION=3.5.4 \
+ MAVEN_CONFIG="/root/.m2" \
+ MAVEN_DOWNLOAD_SHA1="22cac91b3557586bb1eba326f2f7727543ff15e3"
+
+# Install Maven
+RUN set -ex \
+ && mkdir -p $MAVEN_HOME \
+ && curl -LSso /var/tmp/apache-maven-$MAVEN_VERSION-bin.tar.gz https://apache.org/dist/maven/maven-3/$MAVEN_VERSION/binaries/apache-maven-$MAVEN_VERSION-bin.tar.gz \
+ && echo "$MAVEN_DOWNLOAD_SHA1 /var/tmp/apache-maven-$MAVEN_VERSION-bin.tar.gz" | sha1sum -c - \
+ && tar xzvf /var/tmp/apache-maven-$MAVEN_VERSION-bin.tar.gz -C $MAVEN_HOME --strip-components=1 \
+ && update-alternatives --install /usr/bin/mvn mvn /opt/maven/bin/mvn 10000 \
+ && mkdir -p $MAVEN_CONFIG
+
+FROM ${BUILD_TYPE}-image AS final-image
+ARG BUILD_TYPE
+RUN echo "${BUILD_TYPE} image creation completed"
diff --git a/docker/build_image.sh b/docker/build_image.sh
index 5ff4d38b21..48b80a1d15 100755
--- a/docker/build_image.sh
+++ b/docker/build_image.sh
@@ -89,5 +89,5 @@ if [ $BUILD_TYPE == "production" ]
then
DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE=$BASE_IMAGE --build-arg CUDA_VERSION=$CUDA_VERSION -t $DOCKER_TAG .
else
- DOCKER_BUILDKIT=1 docker build --file Dockerfile.dev -t $DOCKER_TAG --build-arg BUILD_TYPE=$BUILD_TYPE --build-arg BASE_IMAGE=$BASE_IMAGE --build-arg BRANCH_NAME=$BRANCH_NAME --build-arg CUDA_VERSION=$CUDA_VERSION --build-arg MACHINE_TYPE=$MACHINE .
+ DOCKER_BUILDKIT=1 docker build --pull --file Dockerfile.dev -t $DOCKER_TAG --build-arg BUILD_TYPE=$BUILD_TYPE --build-arg BASE_IMAGE=$BASE_IMAGE --build-arg BRANCH_NAME=$BRANCH_NAME --build-arg CUDA_VERSION=$CUDA_VERSION --build-arg MACHINE_TYPE=$MACHINE .
fi
diff --git a/test/benchmark/README.md b/test/benchmark/README.md
index 4ac3b99e5a..7ef9aa7b75 100644
--- a/test/benchmark/README.md
+++ b/test/benchmark/README.md
@@ -21,8 +21,45 @@ If you'd like to use your own repo, edit the __init__.py under `serve/test/bench
* Ensure you have [docker](https://docs.docker.com/get-docker/) client set-up on your system - osx/ec2
* Adjust the following global variables to your preference in the file `serve/test/benchmark/tests/utils/__init__.py`
-- IAM_INSTANCE_PROFILE :this role is attached to all ec2 instances created as part of the benchmarking process. Create this as described [here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html#create-iam-role). Default role name is 'EC2Admin'.
+Use the following commands to create a new role if you don't have one you can use.
+1. Create the trust policy file `ec2-admin-trust-policy.json` and add the following content:
+```
+{
+ "Version": "2012-10-17",
+ "Statement": [
+ {
+ "Effect": "Allow",
+ "Principal": {
+ "Service": [
+ "ec2.amazonaws.com"
+ ]
+ },
+ "Action": "sts:AssumeRole"
+ }
+ ]
+}
+```
+2. Create the EC2 role as follows:
+```
+aws iam create-role --role-name EC2Admin --assume-role-policy-document file://ec2-admin-trust-policy.json
+```
+3. Add the permissions to the role as follows:
+```
+aws iam attach-role-policy --policy-arn arn:aws:iam::aws:policy/IAMFullAccess --role-name EC2Admin
+aws iam attach-role-policy --policy-arn arn:aws:iam::aws:policy/AmazonEC2FullAccess --role-name EC2Admin
+aws iam attach-role-policy --policy-arn arn:aws:iam::aws:policy/AmazonS3FullAccess --role-name EC2Admin
+aws iam attach-role-policy --policy-arn arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryFullAccess --role-name EC2Admin
+```
-- S3_BUCKET_BENCHMARK_ARTIFACTS :all temporary benchmarking artifacts including server logs will be stored in this bucket:
+Use the following command to create a new S3 bucket if you don't have one you can use.
+```
+aws s3api create-bucket --bucket --region us-west-2
+```
-- DEFAULT_DOCKER_DEV_ECR_REPO :docker image used for benchmarking will be pushed to this repo
+Use the following command to create a new ECR repo if you don't have one you can use.
+```
+aws ecr create-repository --bucket torchserve-benchmark --region us-west-2
+```
* If you're running this setup on an EC2 instance, please ensure that the instance's security group settings 'allow' inbound ssh port 22. Refer [docs](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/security-group-rules.html).
*The following steps assume that the current working directory is serve/.*
@@ -32,6 +69,8 @@ If you'd like to use your own repo, edit the __init__.py under `serve/test/bench
sudo apt-get install python3-venv
python3 -m venv bvenv
source bvenv/bin/activate
+# Ensure you have the latest pip
+pip3 install -U pip
```
2. Install requirements for the benchmarking
```
@@ -57,7 +96,7 @@ python report.py
```
The final benchmark report will be available in markdown format as `report.md` in the `serve/` folder.
-**Example report for vgg16 model**
+**Example report for vgg11 model**
### Benchmark report
@@ -103,3 +142,37 @@ The final benchmark report will be available in markdown format as `report.md` i
| AB | vgg11 | 100 | 1000 | 0 | 3.47 | 28765 | 29849 | 30488 | 28781.227 | 0.0 | 1576.24 | 1758.28 | 1758.28 | 2249.52 | 2249.34 | 25210.43 | 46.77 |
+## Features of the automation:
+1. To save time by *not* creating new instances for every benchmark run for local testing, use the '--do-not-terminate' flag. This will automatically create a file called 'instances.yaml' and write instance-related data into the file so that it may be re-used next time.
+```
+python test/benchmark/run_benchmark.py --do-not-terminate
+```
+
+2. To re-use an instance already recorded in `instances.yaml`, use the '--use-instances' flag:
+```
+python test/benchmark/run_benchmark.py --use-instances /instances.yaml --do-no-terminate
+```
+`Note: Use --do-not-termninate flag to keep re-using the instances, else, it will be terminated`.
+
+3. To run a test containing a specific string, use the `--run-only` flag. Note that the argument is 'string matched' i.e. if the test-name contains the supplied argument as a substring, the test will run.
+```
+# To run mnist test
+python test/benchmark/run_benchmark.py --run-only mnist
+
+# To run fastrcnn test
+python test/benchmark/run_benchmark.py --run-only fastrcnn
+
+# To run bert_neuron and bert
+python test/benchmark/run_benchmark.py --run-only bert
+
+# To run vgg11 test
+python test/benchmark/run_benchmark.py --run-only vgg11
+
+# To run vgg16 test
+python test/benchmark/run_benchmark.py --run-only vgg16
+```
+
+4. You can benchmark a specifc branch of the torchserve github repo by specifying the flag `--use-torchserve-branch` e.g.,
+```
+python test/benchmark/run_benchmark.py --use-torchserve-branch issue_1115
+```
\ No newline at end of file
diff --git a/test/benchmark/requirements.txt b/test/benchmark/requirements.txt
index 8fdd36f95c..fc06d1f3d6 100644
--- a/test/benchmark/requirements.txt
+++ b/test/benchmark/requirements.txt
@@ -11,4 +11,5 @@ gitpython
docker
pandas
matplotlib
-pyyaml
\ No newline at end of file
+pyyaml
+cryptography==3.4.7
\ No newline at end of file
diff --git a/test/benchmark/run_benchmark.py b/test/benchmark/run_benchmark.py
index ebee2cac5c..bdf4777eb4 100644
--- a/test/benchmark/run_benchmark.py
+++ b/test/benchmark/run_benchmark.py
@@ -1,3 +1,4 @@
+import argparse
import os
import random
import sys
@@ -5,18 +6,105 @@
import re
import uuid
+
import boto3
import pytest
from invoke import run
from invoke.context import Context
+
+from tests.utils.report import Report
+from tests.utils import (
+ S3_BUCKET_BENCHMARK_ARTIFACTS,
+ DEFAULT_REGION,
+ DEFAULT_DOCKER_DEV_ECR_REPO,
+ YamlHandler,
+ DockerImageHandler,
+)
+
LOGGER = logging.getLogger(__name__)
LOGGER.setLevel(logging.DEBUG)
LOGGER.addHandler(logging.StreamHandler(sys.stdout))
+def build_docker_container(torchserve_branch="master"):
+ LOGGER.info(f"Setting up docker image to be used")
+
+ docker_dev_image_config_path = os.path.join(os.getcwd(), "test", "benchmark", "tests", "suite", "docker", "docker.yaml")
+
+ docker_config = YamlHandler.load_yaml(docker_dev_image_config_path)
+ YamlHandler.validate_docker_yaml(docker_config)
+
+ account_id = run("aws sts get-caller-identity --query Account --output text").stdout.strip()
+
+ for processor, config in docker_config.items():
+ docker_tag = None
+ cuda_version = None
+ for config_key, config_value in config.items():
+ if processor == "gpu" and config_key == "cuda_version":
+ cuda_version = config_value
+ if config_key == "docker_tag":
+ docker_tag = config_value
+ dockerImageHandler = DockerImageHandler(docker_tag, cuda_version, torchserve_branch)
+ dockerImageHandler.build_image()
+ dockerImageHandler.push_docker_image_to_ecr(
+ account_id, DEFAULT_REGION, f"{DEFAULT_DOCKER_DEV_ECR_REPO}:{docker_tag}"
+ )
+
+
def main():
+
+ LOGGER.info(f"sys.path: {sys.path}")
+
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument(
+ "--use-instances",
+ action="store",
+ help="Supply a .yaml file with test_name, instance_id, and key_filename to re-use already-running instances",
+ )
+ parser.add_argument(
+ "--do-not-terminate",
+ action="store_true",
+ default=False,
+ help="Use with caution: does not terminate instances, instead saves the list to a file in order to re-use",
+ )
+
+ parser.add_argument(
+ "--run-only", default=None, help="Runs the tests that contain the supplied keyword as a substring"
+ )
+
+ parser.add_argument(
+ "--use-torchserve-branch",
+ default="master",
+ help="Specify a specific torchserve branch to benchmark on, else uses 'master' by default"
+ )
+
+ parser.add_argument(
+ "--skip-docker-build",
+ action="store_true",
+ default=False,
+ help="Use if you already have a docker image built and available locally and have specified it in docker.yaml"
+ )
+
+ arguments = parser.parse_args()
+ do_not_terminate_string = "" if not arguments.do_not_terminate else "--do-not-terminate"
+ use_instances_arg_list = ["--use-instances", f"{arguments.use_instances}"] if arguments.use_instances else []
+ run_only_test = arguments.run_only
+
+ if run_only_test:
+ run_only_string = f"-k {run_only_test}"
+ LOGGER.info(f"Note: running only the tests that have the name '{run_only_test}'.")
+ else:
+ run_only_string = ""
+
+ torchserve_branch = arguments.use_torchserve_branch
+
+ # Build docker containers as specified in docker.yaml
+ if not arguments.skip_docker_build:
+ build_docker_container(torchserve_branch=torchserve_branch)
+
# Run this script from the root directory 'serve', it changes directory below as required
os.chdir(os.path.join(os.getcwd(), "test", "benchmark"))
@@ -25,12 +113,30 @@ def main():
test_path = os.path.join(os.getcwd(), "tests")
LOGGER.info(f"Running tests from directory: {test_path}")
- pytest_args = ["-s", "-rA", test_path, "-n=4", "--disable-warnings", "-v", "--execution-id", execution_id]
+ pytest_args = [
+ "-s",
+ run_only_string,
+ "-rA",
+ test_path,
+ "-n=4",
+ "--disable-warnings",
+ "-v",
+ "--execution-id",
+ execution_id,
+ do_not_terminate_string,
+ ] + use_instances_arg_list
LOGGER.info(f"Running pytest")
pytest.main(pytest_args)
+ # Generate report
+ s3_results_uri = f"{S3_BUCKET_BENCHMARK_ARTIFACTS}/{execution_id}"
+
+ report = Report()
+ report.download_benchmark_results_from_s3(s3_results_uri)
+ report.generate_comprehensive_report()
+
if __name__ == "__main__":
main()
diff --git a/test/benchmark/tests/__init__.py b/test/benchmark/tests/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/benchmark/tests/conftest.py b/test/benchmark/tests/conftest.py
index 9cf11acacf..ebb49d250f 100644
--- a/test/benchmark/tests/conftest.py
+++ b/test/benchmark/tests/conftest.py
@@ -4,6 +4,7 @@
import random
import re
import sys
+import yaml
import boto3
import pytest
@@ -37,28 +38,19 @@ def pytest_addoption(parser):
help="execution id that is used to keep all artifacts together",
)
+ parser.addoption(
+ "--use-instances",
+ default=False,
+ action="store",
+ help="Supply a .yaml file with test_name, instance_id, and key_filename to re-use already-running instances",
+ )
-@pytest.fixture(scope="session", autouse=True)
-def build_docker_container(request, docker_dev_image_config_path):
- LOGGER.info(f"Setting up docker image to be used")
- docker_config = YamlHandler.load_yaml(docker_dev_image_config_path)
- YamlHandler.validate_docker_yaml(docker_config)
-
- account_id = run("aws sts get-caller-identity --query Account --output text").stdout.strip()
-
- for processor, config in docker_config.items():
- docker_tag = None
- cuda_version = None
- for config_key, config_value in config.items():
- if processor == "gpu" and config_key == "cuda_version":
- cuda_version = config_value
- if config_key == "docker_tag":
- docker_tag = config_value
- dockerImageHandler = DockerImageHandler(docker_tag, cuda_version)
- dockerImageHandler.build_image()
- dockerImageHandler.push_docker_image_to_ecr(
- account_id, DEFAULT_REGION, f"{DEFAULT_DOCKER_DEV_ECR_REPO}:{docker_tag}"
- )
+ parser.addoption(
+ "--do-not-terminate",
+ action="store_true",
+ default=False,
+ help="Use with caution: does not terminate instances, instead saves the list to a file in order to re-use",
+ )
@pytest.fixture(scope="session")
@@ -75,6 +67,10 @@ def benchmark_execution_id(request):
return execution_id
+@pytest.fixture(scope="function")
+def bert_neuron_config_file_path(request):
+ return os.path.join(os.getcwd(), "tests", "suite", "bert_neuron.yaml")
+
@pytest.fixture(scope="function")
def vgg11_config_file_path(request):
return os.path.join(os.getcwd(), "tests", "suite", "vgg11.yaml")
@@ -162,12 +158,29 @@ def ec2_instance(
ec2_instance_ami,
region,
):
- key_filename = ec2_utils.generate_ssh_keypair(ec2_client, ec2_key_name)
- def delete_ssh_keypair():
- ec2_utils.destroy_ssh_keypair(ec2_client, key_filename)
+ use_instances_flag = request.config.getoption("--use-instances") if request.config.getoption("--use-instances") else None
- request.addfinalizer(delete_ssh_keypair)
+ if use_instances_flag:
+ instances_file = request.config.getoption("--use-instances")
+ run(f"touch {instances_file}", warn=True)
+ instances_dict = YamlHandler.load_yaml(instances_file)
+ LOGGER.info(f"instances_dict: {instances_dict}")
+ instances = instances_dict.get(request.node.name.split("[")[0], "")
+ LOGGER.info(f"instances: {instances}")
+ assert instances != "", f"Could not find instance details corresponding to test: {request.node.name.split('[')[0]}"
+ instance_details = instances.get(ec2_instance_type, "")
+ assert instance_details != "", f"Could not obtain details for instance type: {ec2_instance_type}"
+ instance_id = instance_details.get("instance_id", "")
+ assert instance_id != "", f"Missing instance_id"
+ key_filename = instance_details.get("key_filename", "")
+ assert key_filename != "", f"Missing key_filename"
+
+ LOGGER.info(f"For test: {request.node.name}; Using instance_id: {instance_id} and key_filename: {key_filename}")
+
+ return instance_id, key_filename
+
+ key_filename = ec2_utils.generate_ssh_keypair(ec2_client, ec2_key_name)
params = {
"KeyName": ec2_key_name,
@@ -179,7 +192,7 @@ def delete_ssh_keypair():
],
"MaxCount": 1,
"MinCount": 1,
- "BlockDeviceMappings": [{"DeviceName": "/dev/sda1", "Ebs": {"VolumeSize": 120}}],
+ "BlockDeviceMappings": [{"DeviceName": "/dev/sda1", "Ebs": {"VolumeSize": 220}}],
}
try:
@@ -196,10 +209,34 @@ def delete_ssh_keypair():
def terminate_ec2_instance():
ec2_client.terminate_instances(InstanceIds=[instance_id])
- request.addfinalizer(terminate_ec2_instance)
+ def delete_ssh_keypair():
+ ec2_utils.destroy_ssh_keypair(ec2_client, key_filename)
+
+ do_not_terminate_flag = request.config.getoption("--do-not-terminate")
+
+ LOGGER.info(f"do_not_terminate_flag: {do_not_terminate_flag}")
+
+ instances_file = os.path.join(os.getcwd(), "instances.yaml")
+ run(f"touch {instances_file}", warn=True)
+
+ if not do_not_terminate_flag:
+ request.addfinalizer(terminate_ec2_instance)
+ request.addfinalizer(delete_ssh_keypair)
+
+ if do_not_terminate_flag and not use_instances_flag:
+ instances_dict = YamlHandler.load_yaml(instances_file)
+ if not instances_dict:
+ instances_dict = {}
+
+ update_dictionary = {request.node.name.split("[")[0]: {ec2_instance_type: {"instance_id": instance_id, "key_filename": key_filename}}}
+
+ instances_dict.update(update_dictionary)
+
+ YamlHandler.write_yaml(instances_file, instances_dict)
ec2_utils.check_instance_state(instance_id, state="running", region=region)
ec2_utils.check_system_state(instance_id, system_status="ok", instance_status="ok", region=region)
+
return instance_id, key_filename
@@ -232,6 +269,4 @@ def delete_s3_artifact_copy():
request.addfinalizer(delete_s3_artifact_copy)
-
return conn
-
diff --git a/test/benchmark/tests/resources/neuron-bert/compile_bert.py b/test/benchmark/tests/resources/neuron-bert/compile_bert.py
new file mode 100644
index 0000000000..8f0e30968a
--- /dev/null
+++ b/test/benchmark/tests/resources/neuron-bert/compile_bert.py
@@ -0,0 +1,65 @@
+import tensorflow # to workaround a protobuf version conflict issue
+import torch
+import torch.neuron
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import logging
+import argparse
+
+## Enable logging so we can see any important warnings
+logger = logging.getLogger('Neuron')
+logger.setLevel(logging.INFO)
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument(
+"--batch-size",
+action="store",
+help="Supply a .yaml file with test_name, instance_id, and key_filename to re-use already-running instances",
+)
+
+arguments = parser.parse_args()
+
+batch_size = int(arguments.batch_size)
+
+# Build tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained("bert-base-cased-finetuned-mrpc", return_dict=False)
+model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased-finetuned-mrpc", return_dict=False)
+
+# Setup some example inputs
+sequence_0 = "The company HuggingFace is based in New York City"
+sequence_1 = "Apples are especially bad for your health"
+sequence_2 = "HuggingFace's headquarters are situated in Manhattan"
+paraphrase = tokenizer.encode_plus(sequence_0, sequence_2, max_length=128, pad_to_max_length=True, return_tensors="pt")
+not_paraphrase = tokenizer.encode_plus(sequence_0, sequence_1, max_length=128, pad_to_max_length=True, return_tensors="pt")
+
+# Run the original PyTorch model on both example inputs
+paraphrase_classification_logits = model(**paraphrase)[0]
+not_paraphrase_classification_logits = model(**not_paraphrase)[0]
+
+max_length=128
+# Convert example inputs to a format that is compatible with TorchScript tracing
+input_ids = paraphrase['input_ids'] # type:torch.Tensor
+token_type_ids = paraphrase['token_type_ids'] # type:torch.Tensor
+attention_mask = paraphrase['attention_mask'] # type:torch.Tensor
+input_ids = input_ids.expand(batch_size, max_length)
+token_type_ids = token_type_ids.expand(batch_size, max_length)
+attention_mask = attention_mask.expand(batch_size, max_length)
+example_inputs_paraphrase = input_ids, attention_mask, token_type_ids
+
+input_ids = not_paraphrase['input_ids'] # type:torch.Tensor
+token_type_ids = not_paraphrase['token_type_ids'] # type:torch.Tensor
+attention_mask = not_paraphrase['attention_mask'] # type:torch.Tensor
+input_ids = input_ids.expand(batch_size, max_length)
+token_type_ids = token_type_ids.expand(batch_size, max_length)
+attention_mask = attention_mask.expand(batch_size, max_length)
+example_inputs_not_paraphrase = input_ids, attention_mask, token_type_ids
+
+# Run torch.neuron.trace to generate a TorchScript that is optimized by AWS Neuron, using optimization level -O2
+model_neuron = torch.neuron.trace(model, example_inputs_paraphrase, compiler_args=['-O2'])
+
+# Verify the TorchScript works on both example inputs
+paraphrase_classification_logits_neuron = model_neuron(*example_inputs_paraphrase)
+not_paraphrase_classification_logits_neuron = model_neuron(*example_inputs_not_paraphrase)
+
+# Save the TorchScript for later use
+model_neuron.save(f"bert_neuron_{batch_size}.pt")
\ No newline at end of file
diff --git a/test/benchmark/tests/resources/neuron-bert/config.py b/test/benchmark/tests/resources/neuron-bert/config.py
new file mode 100644
index 0000000000..e21697aadd
--- /dev/null
+++ b/test/benchmark/tests/resources/neuron-bert/config.py
@@ -0,0 +1,3 @@
+model_name='bert-base-cased-finetuned-mrpc'
+max_length=128
+batch_size=1
\ No newline at end of file
diff --git a/test/benchmark/tests/resources/neuron-bert/handler_bert.py b/test/benchmark/tests/resources/neuron-bert/handler_bert.py
new file mode 100644
index 0000000000..9166fdd505
--- /dev/null
+++ b/test/benchmark/tests/resources/neuron-bert/handler_bert.py
@@ -0,0 +1,110 @@
+import os
+import json
+import sys
+import logging
+
+import torch, torch_neuron
+from transformers import AutoTokenizer
+from abc import ABC
+from ts.torch_handler.base_handler import BaseHandler
+
+# one core per worker
+os.environ['NEURONCORE_GROUP_SIZES'] = '1'
+
+logger = logging.getLogger(__name__)
+
+class BertEmbeddingHandler(BaseHandler, ABC):
+ """
+ Handler class for Bert Embedding computations.
+ """
+ def __init__(self):
+ super(BertEmbeddingHandler, self).__init__()
+ self.initialized = False
+
+ def initialize(self, ctx):
+ self.manifest = ctx.manifest
+ properties = ctx.system_properties
+ self.device = 'cpu'
+ model_dir = properties.get('model_dir')
+ serialized_file = self.manifest['model']['serializedFile']
+ model_pt_path = os.path.join(model_dir, serialized_file)
+
+ # point sys.path to our config file
+ sys.path.append(model_dir)
+ import config
+ self.max_length = config.max_length
+ self.batch_size = config.batch_size
+ self.classes = ['not paraphrase', 'paraphrase']
+
+ self.model = torch.jit.load(model_pt_path)
+ logger.debug(f'Model loaded from {model_dir}')
+ self.model.to(self.device)
+ self.model.eval()
+
+ self.tokenizer = AutoTokenizer.from_pretrained(config.model_name)
+ self.initialized = True
+
+ def preprocess(self, input_data):
+ """
+ Tokenization pre-processing
+ """
+
+ input_ids = []
+ attention_masks = []
+ token_type_ids = []
+
+ for row in input_data:
+ #seq_0 = row['body']['seq_0'].decode('utf-8')
+ #seq_1 = row['body']['seq_1'].decode('utf-8')
+
+ json_data = json.loads(row['body'].decode('utf-8'))
+
+ seq_0 = json_data['seq_0']
+ seq_1 = json_data['seq_1']
+ logger.debug(f'Received text: "{seq_0}", "{seq_1}"')
+
+ inputs = self.tokenizer.encode_plus(
+ seq_0,
+ seq_1,
+ max_length=self.max_length,
+ padding='max_length',
+ truncation=True,
+ return_tensors='pt'
+ )
+
+ input_ids.append(inputs['input_ids'])
+ attention_masks.append(inputs['attention_mask'])
+ token_type_ids.append(inputs['token_type_ids'])
+
+ batch = (torch.cat(input_ids, 0),
+ torch.cat(attention_masks, 0),
+ torch.cat(token_type_ids, 0))
+
+ return batch
+
+ def inference(self, inputs):
+ """
+ Predict the class of a text using a trained transformer model.
+ """
+
+ # sanity check dimensions
+ assert(len(inputs) == 3)
+ num_inferences = len(inputs[0])
+ assert(num_inferences <= self.batch_size)
+
+ # insert padding if we received a partial batch
+ padding = self.batch_size - num_inferences
+ if padding > 0:
+ pad = torch.nn.ConstantPad1d((0, 0, 0, padding), value=0)
+ inputs = [pad(x) for x in inputs]
+
+ outputs = self.model(*inputs)[0]
+ predictions = []
+ for i in range(num_inferences):
+ prediction = self.classes[outputs[i].argmax().item()]
+ predictions.append([prediction])
+ logger.debug("Model predicted: '%s'", prediction)
+ return predictions
+
+ def postprocess(self, inference_output):
+ return inference_output
\ No newline at end of file
diff --git a/test/benchmark/tests/resources/neuron-bert/input b/test/benchmark/tests/resources/neuron-bert/input
new file mode 100644
index 0000000000..e8a5324c7a
--- /dev/null
+++ b/test/benchmark/tests/resources/neuron-bert/input
@@ -0,0 +1 @@
+{"seq_0": "HuggingFace's headquarters are situated in Manhattan", "seq_1": "This is total nonsense."}
\ No newline at end of file
diff --git a/test/benchmark/tests/suite/bert.yaml b/test/benchmark/tests/suite/bert.yaml
index 40a447aa75..7173ac1bf3 100644
--- a/test/benchmark/tests/suite/bert.yaml
+++ b/test/benchmark/tests/suite/bert.yaml
@@ -2,7 +2,7 @@
bert:
scripted_mode:
benchmark_engine: "ab"
- url: "https://torchserve.s3.amazonaws.com/mar_files/BERTSeqClassification_Torchscript_batch.mar"
+ url: "https://s3.us-west-2.amazonaws.com/ts0.4.1-marfiles/BERTSeqClassification_torchscript.mar" #for CPU: https://torchserve.s3.amazonaws.com/mar_files/BERTSeqClassification_Torchscript_batch.mar
workers: 4
batch_delay: 100
batch_size:
@@ -10,10 +10,10 @@ bert:
- 2
- 4
- 8
- input: "https://raw.githubusercontent.com/pytorch/serve/master/examples/image_classifier/kitten.jpg"
+ input: "https://raw.githubusercontent.com/pytorch/serve/master/examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt"
requests: 10000
concurrency: 100
- backend_profiling: True
+ backend_profiling: False
exec_env: "docker"
processors:
- "cpu"
diff --git a/test/benchmark/tests/suite/bert_neuron.yaml b/test/benchmark/tests/suite/bert_neuron.yaml
new file mode 100644
index 0000000000..a80ad24e5c
--- /dev/null
+++ b/test/benchmark/tests/suite/bert_neuron.yaml
@@ -0,0 +1,19 @@
+---
+bert_inf1:
+ scripted_mode:
+ benchmark_engine: "ab"
+ compile_per_batch_size: True
+ workers: 4
+ batch_delay: 100
+ batch_size:
+ - 1
+ - 2
+ - 4
+ - 8
+ input: "/home/ubuntu/serve/test/benchmark/tests/resources/neuron-bert/input"
+ requests: 10000
+ concurrency: 100
+ backend_profiling: False
+ exec_env: "local"
+ processors:
+ - "inferentia"
\ No newline at end of file
diff --git a/test/benchmark/tests/suite/fastrcnn.yaml b/test/benchmark/tests/suite/fastrcnn.yaml
index 647e2725ae..52f19c0947 100644
--- a/test/benchmark/tests/suite/fastrcnn.yaml
+++ b/test/benchmark/tests/suite/fastrcnn.yaml
@@ -10,10 +10,10 @@ fastrcnn:
- 2
- 4
- 8
- input: "https://raw.githubusercontent.com/pytorch/serve/master/examples/object_detector/persons.jpg"
- requests: 10000
+ input: "https://raw.githubusercontent.com/pytorch/serve/master/examples/image_classifier/kitten.jpg"
+ requests: 1000
concurrency: 100
- backend_profiling: True
+ backend_profiling: False
exec_env: "docker"
processors:
- "cpu"
diff --git a/test/benchmark/tests/suite/mnist.yaml b/test/benchmark/tests/suite/mnist.yaml
index bfe42b6123..b360a724d8 100644
--- a/test/benchmark/tests/suite/mnist.yaml
+++ b/test/benchmark/tests/suite/mnist.yaml
@@ -10,10 +10,10 @@ mnist:
- 2
- 4
- 8
- requests: 10000
+ requests: 1000
concurrency: 10
input: "https://raw.githubusercontent.com/pytorch/serve/master/examples/image_classifier/mnist/test_data/0.png"
- backend_profiling: True
+ backend_profiling: False
exec_env: "docker"
processors:
- "cpu"
diff --git a/test/benchmark/tests/suite/vgg11.yaml b/test/benchmark/tests/suite/vgg11.yaml
index 7305e7de2e..f378ab18de 100644
--- a/test/benchmark/tests/suite/vgg11.yaml
+++ b/test/benchmark/tests/suite/vgg11.yaml
@@ -13,7 +13,7 @@ vgg11:
input: "https://raw.githubusercontent.com/pytorch/serve/master/examples/image_classifier/kitten.jpg"
requests: 1000
concurrency: 100
- backend_profiling: True
+ backend_profiling: False
exec_env: "docker"
processors:
- "cpu"
@@ -31,7 +31,7 @@ vgg11:
input: "https://raw.githubusercontent.com/pytorch/serve/master/examples/image_classifier/kitten.jpg"
requests: 1000
concurrency: 100
- backend_profiling: True
+ backend_profiling: False
exec_env: "docker"
dockerhub_image: "pytorch/torchserve:latest"
processors:
diff --git a/test/benchmark/tests/suite/vgg16.yaml b/test/benchmark/tests/suite/vgg16.yaml
index 021925aa58..1700e9362e 100644
--- a/test/benchmark/tests/suite/vgg16.yaml
+++ b/test/benchmark/tests/suite/vgg16.yaml
@@ -13,7 +13,7 @@ vgg16:
input: "https://raw.githubusercontent.com/pytorch/serve/master/examples/image_classifier/kitten.jpg"
requests: 1000
concurrency: 100
- backend_profiling: True
+ backend_profiling: False
exec_env: "docker"
processors:
- "cpu"
@@ -31,7 +31,7 @@ vgg16:
input: "https://raw.githubusercontent.com/pytorch/serve/master/examples/image_classifier/kitten.jpg"
requests: 1000
concurrency: 100
- backend_profiling: True
+ backend_profiling: False
exec_env: "docker"
dockerhub_image: "pytorch/torchserve:latest"
processors:
diff --git a/test/benchmark/tests/test_bert.py b/test/benchmark/tests/test_bert.py
index 5ccd5346f6..e866fed0ca 100644
--- a/test/benchmark/tests/test_bert.py
+++ b/test/benchmark/tests/test_bert.py
@@ -6,10 +6,10 @@
from invoke import run
from invoke.context import Context
-import utils.ec2 as ec2_utils
-import utils.s3 as s3_utils
-import utils.ts as ts_utils
-import utils.apache_bench as ab_utils
+import tests.utils.ec2 as ec2_utils
+import tests.utils.s3 as s3_utils
+import tests.utils.ts as ts_utils
+import tests.utils.apache_bench as ab_utils
from tests.utils import (
@@ -22,10 +22,10 @@
S3_BUCKET_BENCHMARK_ARTIFACTS,
)
-INSTANCE_TYPES_TO_TEST = ["p3.8xlarge"]
+# Add/remove from the following list to benchmark on the instance of your choice
+INSTANCE_TYPES_TO_TEST = ["c4.4xlarge"]
-@pytest.mark.skip()
@pytest.mark.parametrize("ec2_instance_type", INSTANCE_TYPES_TO_TEST, indirect=True)
def test_bert_benchmark(
ec2_connection, ec2_instance_type, bert_config_file_path, docker_dev_image_config_path, benchmark_execution_id
@@ -71,7 +71,7 @@ def test_bert_benchmark(
account_id, DEFAULT_REGION, docker_repo_tag, connection=ec2_connection
)
docker_repo_tag_for_current_instance = docker_repo_tag
- cuda_version_for_instance = cuda_version
+ cuda_version_for_instance = None
break
mode_list = []
@@ -131,7 +131,7 @@ def test_bert_benchmark(
torchserveHandler.unregister_model()
# Stop torchserve
- torchserveHandler.stop_torchserve()
+ torchserveHandler.stop_torchserve(exec_env="docker")
# Generate report (note: needs to happen after torchserve has stopped)
apacheBenchHandler.generate_report(requests=requests, concurrency=concurrency, connection=ec2_connection)
diff --git a/test/benchmark/tests/test_bert_neuron.py b/test/benchmark/tests/test_bert_neuron.py
new file mode 100644
index 0000000000..efb25d018b
--- /dev/null
+++ b/test/benchmark/tests/test_bert_neuron.py
@@ -0,0 +1,171 @@
+import os
+import pprint
+
+import pytest
+import time
+from invoke import run
+from invoke.context import Context
+
+import tests.utils.ec2 as ec2_utils
+import tests.utils.s3 as s3_utils
+import tests.utils.ts as ts_utils
+import tests.utils.apache_bench as ab_utils
+import tests.utils.neuron as neuron_utils
+
+from tests.utils import (
+ DEFAULT_DOCKER_DEV_ECR_REPO,
+ DEFAULT_REGION,
+ GPU_INSTANCES,
+ LOGGER,
+ DockerImageHandler,
+ YamlHandler,
+ S3_BUCKET_BENCHMARK_ARTIFACTS,
+)
+
+# Add/remove from the following list to benchmark on the instance of your choice
+INSTANCE_TYPES_TO_TEST = ["inf1.6xlarge"]
+
+@pytest.mark.skip(reason="Skipping neuron test, manually unskip if you need to benchmark")
+@pytest.mark.parametrize("ec2_instance_type", INSTANCE_TYPES_TO_TEST, indirect=True)
+def test_neuron_benchmark(
+ ec2_connection, ec2_instance_type, bert_neuron_config_file_path, docker_dev_image_config_path, benchmark_execution_id
+):
+
+ test_config = YamlHandler.load_yaml(bert_neuron_config_file_path)
+
+ model_name = bert_neuron_config_file_path.split("/")[-1].split(".")[0]
+
+ LOGGER.info("Validating yaml contents")
+
+ LOGGER.info(YamlHandler.validate_benchmark_yaml(test_config))
+
+ docker_config = YamlHandler.load_yaml(docker_dev_image_config_path)
+
+ docker_repo_tag_for_current_instance = ""
+ cuda_version_for_instance = ""
+ account_id = run("aws sts get-caller-identity --query Account --output text").stdout.strip()
+
+ for processor, config in docker_config.items():
+ docker_tag = None
+ cuda_version = None
+ for config_key, config_value in config.items():
+ if processor == "gpu" and config_key == "cuda_version":
+ cuda_version = config_value
+ if config_key == "docker_tag":
+ docker_tag = config_value
+ # TODO: Improve logic that selectively pulls CPU image on CPU instances and likewise for GPU.
+
+ docker_repo_tag = f"{DEFAULT_DOCKER_DEV_ECR_REPO}:{docker_tag}"
+
+ if ec2_instance_type[:2] in GPU_INSTANCES and ("gpu" in docker_tag or "neuron" in docker_tag):
+ dockerImageHandler = DockerImageHandler(docker_tag, cuda_version)
+ dockerImageHandler.pull_docker_image_from_ecr(
+ account_id, DEFAULT_REGION, docker_repo_tag, connection=ec2_connection
+ )
+ docker_repo_tag_for_current_instance = docker_repo_tag
+ cuda_version_for_instance = cuda_version
+ break
+ if ec2_instance_type[:2] not in GPU_INSTANCES and ("cpu" in docker_tag or "neuron" in docker_tag):
+ dockerImageHandler = DockerImageHandler(docker_tag, cuda_version)
+ dockerImageHandler.pull_docker_image_from_ecr(
+ account_id, DEFAULT_REGION, docker_repo_tag, connection=ec2_connection
+ )
+ docker_repo_tag_for_current_instance = docker_repo_tag
+ cuda_version_for_instance = None
+ break
+
+ mode_list = []
+ config_list = []
+ batch_size_list = []
+ processor_list = []
+
+ apacheBenchHandler = ab_utils.ApacheBenchHandler(model_name=model_name, connection=ec2_connection)
+
+ for model, config in test_config.items():
+ for mode, mode_config in config.items():
+ mode_list.append(mode)
+ benchmark_engine = mode_config.get("benchmark_engine")
+ workers = mode_config.get("workers")
+ batch_delay = mode_config.get("batch_delay")
+ batch_sizes = mode_config.get("batch_size")
+ input_file = mode_config.get("input")
+ requests = mode_config.get("requests")
+ concurrency = mode_config.get("concurrency")
+ backend_profiling = mode_config.get("backend_profiling")
+ exec_env = mode_config.get("exec_env")
+ processors = mode_config.get("processors")
+ gpus = None
+ if len(processors) == 2:
+ gpus = processors[1].get("gpus")
+ LOGGER.info(f"processors: {processors[1]}")
+ LOGGER.info(f"gpus: {gpus}")
+
+ LOGGER.info(
+ f"\n benchmark_engine: {benchmark_engine}\n workers: {workers}\n batch_delay: {batch_delay}\n batch_size:{batch_sizes}\n input_file: {input_file}\n requests: {requests}\n concurrency: {concurrency}\n backend_profiling: {backend_profiling}\n exec_env: {exec_env}\n processors: {processors}"
+ )
+
+ torchserveHandler = ts_utils.TorchServeHandler(
+ exec_env=exec_env,
+ cuda_version=cuda_version_for_instance,
+ gpus=gpus,
+ torchserve_docker_image=docker_repo_tag_for_current_instance,
+ backend_profiling=backend_profiling,
+ connection=ec2_connection,
+ )
+
+ # Note: Assumes a DLAMI (conda-based) is being used
+ torchserveHandler.setup_torchserve(virtual_env_name="aws_neuron_pytorch_p36")
+
+ for batch_size in batch_sizes:
+ url = f"benchmark_{batch_size}.mar"
+ LOGGER.info(f"Running benchmark for model archive: {url}")
+
+ # Stop torchserve
+ torchserveHandler.stop_torchserve(exec_env="local", virtual_env_name="aws_neuron_pytorch_p36")
+
+ # Generate bert inf model
+ neuron_utils.setup_neuron_mar_files(connection=ec2_connection, virtual_env_name="aws_neuron_pytorch_p36", batch_size=batch_size)
+
+ # Start torchserve
+ torchserveHandler.start_torchserve_local(virtual_env_name="aws_neuron_pytorch_p36", stop_torchserve=False)
+
+ # Register
+ torchserveHandler.register_model(
+ url=url, workers=workers, batch_delay=batch_delay, batch_size=batch_size
+ )
+
+ # Run benchmark
+ apacheBenchHandler.run_apache_bench(requests=requests, concurrency=concurrency, input_file=input_file)
+
+ # Unregister
+ torchserveHandler.unregister_model()
+
+ # Stop torchserve
+ torchserveHandler.stop_torchserve(exec_env="local", virtual_env_name="aws_neuron_pytorch_p36")
+
+ # Generate report (note: needs to happen after torchserve has stopped)
+ apacheBenchHandler.generate_report(
+ requests=requests, concurrency=concurrency, connection=ec2_connection
+ )
+
+ # Move artifacts into a common folder.
+ remote_artifact_folder = (
+ f"/home/ubuntu/{benchmark_execution_id}/{model_name}/{ec2_instance_type}/{mode}/{batch_size}"
+ )
+
+ ec2_connection.run(f"mkdir -p {remote_artifact_folder}")
+ ec2_connection.run(f"cp -R /home/ubuntu/benchmark/* {remote_artifact_folder}")
+
+ # Upload artifacts to s3 bucket
+ ec2_connection.run(
+ f"aws s3 cp --recursive /home/ubuntu/{benchmark_execution_id}/ {S3_BUCKET_BENCHMARK_ARTIFACTS}/{benchmark_execution_id}/"
+ )
+
+ time.sleep(3)
+
+ run(
+ f"aws s3 cp --recursive /tmp/{model_name}/ {S3_BUCKET_BENCHMARK_ARTIFACTS}/{benchmark_execution_id}/{model_name}/{ec2_instance_type}/{mode}/{batch_size}"
+ )
+
+ run(f"rm -rf /tmp/{model_name}")
+ apacheBenchHandler.clean_up()
diff --git a/test/benchmark/tests/test_fastrcnn.py b/test/benchmark/tests/test_fastrcnn.py
index 2fe2cba0c0..401f0c59d3 100644
--- a/test/benchmark/tests/test_fastrcnn.py
+++ b/test/benchmark/tests/test_fastrcnn.py
@@ -6,10 +6,10 @@
from invoke import run
from invoke.context import Context
-import utils.ec2 as ec2_utils
-import utils.s3 as s3_utils
-import utils.ts as ts_utils
-import utils.apache_bench as ab_utils
+import tests.utils.ec2 as ec2_utils
+import tests.utils.s3 as s3_utils
+import tests.utils.ts as ts_utils
+import tests.utils.apache_bench as ab_utils
from tests.utils import (
DEFAULT_DOCKER_DEV_ECR_REPO,
@@ -21,9 +21,9 @@
S3_BUCKET_BENCHMARK_ARTIFACTS,
)
-INSTANCE_TYPES_TO_TEST = ["p3.8xlarge"]
+# Add/remove from the following list to benchmark on the instance of your choice
+INSTANCE_TYPES_TO_TEST = ["c4.4xlarge", "p3.8xlarge"]
-@pytest.mark.skip()
@pytest.mark.parametrize("ec2_instance_type", INSTANCE_TYPES_TO_TEST, indirect=True)
def test_fastrcnn_benchmark(
ec2_connection, ec2_instance_type, fastrcnn_config_file_path, docker_dev_image_config_path, benchmark_execution_id
@@ -69,7 +69,7 @@ def test_fastrcnn_benchmark(
account_id, DEFAULT_REGION, docker_repo_tag, connection=ec2_connection
)
docker_repo_tag_for_current_instance = docker_repo_tag
- cuda_version_for_instance = cuda_version
+ cuda_version_for_instance = None
break
mode_list = []
diff --git a/test/benchmark/tests/test_mnist.py b/test/benchmark/tests/test_mnist.py
index b6024f8834..0a1a8e8ced 100644
--- a/test/benchmark/tests/test_mnist.py
+++ b/test/benchmark/tests/test_mnist.py
@@ -6,10 +6,10 @@
from invoke import run
from invoke.context import Context
-import utils.ec2 as ec2_utils
-import utils.s3 as s3_utils
-import utils.ts as ts_utils
-import utils.apache_bench as ab_utils
+import tests.utils.ec2 as ec2_utils
+import tests.utils.s3 as s3_utils
+import tests.utils.ts as ts_utils
+import tests.utils.apache_bench as ab_utils
from tests.utils import (
DEFAULT_DOCKER_DEV_ECR_REPO,
@@ -21,9 +21,9 @@
S3_BUCKET_BENCHMARK_ARTIFACTS,
)
+# Add/remove from the following list to benchmark on the instance of your choice
INSTANCE_TYPES_TO_TEST = ["p3.8xlarge"]
-@pytest.mark.skip()
@pytest.mark.parametrize("ec2_instance_type", INSTANCE_TYPES_TO_TEST, indirect=True)
def test_mnist_benchmark(
ec2_connection, ec2_instance_type, mnist_config_file_path, docker_dev_image_config_path, benchmark_execution_id
@@ -69,7 +69,7 @@ def test_mnist_benchmark(
account_id, DEFAULT_REGION, docker_repo_tag, connection=ec2_connection
)
docker_repo_tag_for_current_instance = docker_repo_tag
- cuda_version_for_instance = cuda_version
+ cuda_version_for_instance = None
break
mode_list = []
diff --git a/test/benchmark/tests/test_vgg11.py b/test/benchmark/tests/test_vgg11.py
index d2f41bbc38..9549ec7a50 100644
--- a/test/benchmark/tests/test_vgg11.py
+++ b/test/benchmark/tests/test_vgg11.py
@@ -6,10 +6,10 @@
from invoke import run
from invoke.context import Context
-import utils.ec2 as ec2_utils
-import utils.s3 as s3_utils
-import utils.ts as ts_utils
-import utils.apache_bench as ab_utils
+import tests.utils.ec2 as ec2_utils
+import tests.utils.s3 as s3_utils
+import tests.utils.ts as ts_utils
+import tests.utils.apache_bench as ab_utils
from tests.utils import (
DEFAULT_DOCKER_DEV_ECR_REPO,
@@ -21,9 +21,9 @@
S3_BUCKET_BENCHMARK_ARTIFACTS,
)
+# Add/remove from the following list to benchmark on the instance of your choice
INSTANCE_TYPES_TO_TEST = ["p3.8xlarge"]
-@pytest.mark.skip()
@pytest.mark.parametrize("ec2_instance_type", INSTANCE_TYPES_TO_TEST, indirect=True)
def test_vgg11_benchmark(
ec2_connection, ec2_instance_type, vgg11_config_file_path, docker_dev_image_config_path, benchmark_execution_id
@@ -69,7 +69,7 @@ def test_vgg11_benchmark(
account_id, DEFAULT_REGION, docker_repo_tag, connection=ec2_connection
)
docker_repo_tag_for_current_instance = docker_repo_tag
- cuda_version_for_instance = cuda_version
+ cuda_version_for_instance = None
break
mode_list = []
@@ -129,7 +129,7 @@ def test_vgg11_benchmark(
torchserveHandler.unregister_model()
# Stop torchserve
- torchserveHandler.stop_torchserve()
+ torchserveHandler.stop_torchserve(exec_env="docker")
# Generate report (note: needs to happen after torchserve has stopped)
apacheBenchHandler.generate_report(requests=requests, concurrency=concurrency, connection=ec2_connection)
diff --git a/test/benchmark/tests/test_vgg16.py b/test/benchmark/tests/test_vgg16.py
index cf9f8e7997..b7116eb701 100644
--- a/test/benchmark/tests/test_vgg16.py
+++ b/test/benchmark/tests/test_vgg16.py
@@ -6,10 +6,10 @@
from invoke import run
from invoke.context import Context
-import utils.ec2 as ec2_utils
-import utils.s3 as s3_utils
-import utils.ts as ts_utils
-import utils.apache_bench as ab_utils
+import tests.utils.ec2 as ec2_utils
+import tests.utils.s3 as s3_utils
+import tests.utils.ts as ts_utils
+import tests.utils.apache_bench as ab_utils
from tests.utils import (
DEFAULT_DOCKER_DEV_ECR_REPO,
@@ -21,7 +21,8 @@
S3_BUCKET_BENCHMARK_ARTIFACTS,
)
-INSTANCE_TYPES_TO_TEST = ["p3.8xlarge"]
+# Add/remove from the following list to benchmark on the instance of your choice
+INSTANCE_TYPES_TO_TEST = ["c4.4xlarge"]
@pytest.mark.parametrize("ec2_instance_type", INSTANCE_TYPES_TO_TEST, indirect=True)
def test_vgg16_benchmark(
@@ -54,7 +55,7 @@ def test_vgg16_benchmark(
docker_repo_tag = f"{DEFAULT_DOCKER_DEV_ECR_REPO}:{docker_tag}"
- if ec2_instance_type[:2] in GPU_INSTANCES and "gpu" in docker_tag:
+ if ec2_instance_type[:2] in GPU_INSTANCES and ("gpu" in docker_tag or "neuron" in docker_tag):
dockerImageHandler = DockerImageHandler(docker_tag, cuda_version)
dockerImageHandler.pull_docker_image_from_ecr(
account_id, DEFAULT_REGION, docker_repo_tag, connection=ec2_connection
@@ -62,13 +63,13 @@ def test_vgg16_benchmark(
docker_repo_tag_for_current_instance = docker_repo_tag
cuda_version_for_instance = cuda_version
break
- if ec2_instance_type[:2] not in GPU_INSTANCES and "cpu" in docker_tag:
+ if ec2_instance_type[:2] not in GPU_INSTANCES and ("cpu" in docker_tag or "neuron" in docker_tag):
dockerImageHandler = DockerImageHandler(docker_tag, cuda_version)
dockerImageHandler.pull_docker_image_from_ecr(
account_id, DEFAULT_REGION, docker_repo_tag, connection=ec2_connection
)
docker_repo_tag_for_current_instance = docker_repo_tag
- cuda_version_for_instance = cuda_version
+ cuda_version_for_instance = None
break
mode_list = []
@@ -104,7 +105,7 @@ def test_vgg16_benchmark(
torchserveHandler = ts_utils.TorchServeHandler(
exec_env=exec_env,
- cuda_version=cuda_version,
+ cuda_version=cuda_version_for_instance,
gpus=gpus,
torchserve_docker_image=docker_repo_tag_for_current_instance,
backend_profiling=backend_profiling,
@@ -128,7 +129,7 @@ def test_vgg16_benchmark(
torchserveHandler.unregister_model()
# Stop torchserve
- torchserveHandler.stop_torchserve()
+ torchserveHandler.stop_torchserve(exec_env="docker")
# Generate report (note: needs to happen after torchserve has stopped)
apacheBenchHandler.generate_report(requests=requests, concurrency=concurrency, connection=ec2_connection)
diff --git a/test/benchmark/tests/utils/__init__.py b/test/benchmark/tests/utils/__init__.py
index 5c45d6d8a6..9fd2571599 100644
--- a/test/benchmark/tests/utils/__init__.py
+++ b/test/benchmark/tests/utils/__init__.py
@@ -1,5 +1,8 @@
+from __future__ import absolute_import
+
import json
import logging
+import fcntl
import os
import re
import subprocess
@@ -27,8 +30,9 @@
GPU_INSTANCES = ["p2", "p3", "p4", "g2", "g3", "g4"]
# DLAMI with nVidia Driver ver. 450.119.03 (support upto CUDA 11.2), Ubuntu 18.04
-AMI_ID = "ami-0ff137c06803a8bb7"
-# AMI_ID = "ami-0198925303105158c", with apache2-utils installed
+# AMI_ID = "ami-064696901389beb84"
+# AMI_ID = "ami-0198925303105158c", Base DLAMI 37.0 with apache2-utils installed
+AMI_ID = "ami-00c5ebd9076702cbe"#, DLAMI 43.0 with apache2-utils installed
LOGGER = logging.getLogger(__name__)
LOGGER.setLevel(logging.INFO)
@@ -50,10 +54,10 @@ def build_image(self):
os.chdir(torch_serve_docker_directory)
if self.cuda_version:
run_out = run(
- f"./build_image.sh -bt dev -g -cv {self.cuda_version} -t {DEFAULT_DOCKER_DEV_ECR_REPO}:{self.docker_tag}"
+ f"./build_image.sh -b {self.branch} -bt dev -g -cv {self.cuda_version} -t {DEFAULT_DOCKER_DEV_ECR_REPO}:{self.docker_tag}"
)
else:
- run_out = run(f"./build_image.sh -bt dev -t {DEFAULT_DOCKER_DEV_ECR_REPO}:{self.docker_tag}")
+ run_out = run(f"./build_image.sh -b {self.branch} -bt dev -t {DEFAULT_DOCKER_DEV_ECR_REPO}:{self.docker_tag}")
# Switch back to original directory
os.chdir(current_working_directory)
@@ -140,11 +144,10 @@ class YamlHandler(object):
"input",
"processors",
"requests",
- "url",
"workers",
]
- optional_config_keys = ["dockerhub_image", "docker_dev_image"]
+ optional_config_keys = ["url", "dockerhub_image", "docker_dev_image", "compile_per_batch_size"]
valid_config_keys = mandatory_config_keys + optional_config_keys
@@ -154,7 +157,7 @@ class YamlHandler(object):
valid_processors = ["cpu", "gpus"]
- valid_docker_processors = ["cpu", "gpu"]
+ valid_docker_processors = ["cpu", "gpu", "inferentia"]
mandatory_docker_config_keys = ["docker_tag"]
@@ -179,8 +182,10 @@ def write_yaml(file_path, dictionary_object):
:param dictionary_object: dictionary with content that needs to be written to a yaml file
:return None
"""
- with open(file_path) as f:
- yaml.dump(f, dictionary_object)
+ with open(file_path, "a") as f:
+ fcntl.flock(f, fcntl.LOCK_EX)
+ yaml.dump(dictionary_object, f)
+ fcntl.flock(f, fcntl.LOCK_UN)
@staticmethod
def validate_benchmark_yaml(yaml_content):
diff --git a/test/benchmark/tests/utils/apache_bench.py b/test/benchmark/tests/utils/apache_bench.py
index 27b9a2bc9a..2c0c2af47a 100644
--- a/test/benchmark/tests/utils/apache_bench.py
+++ b/test/benchmark/tests/utils/apache_bench.py
@@ -47,7 +47,7 @@ def install_dependencies(self):
"""
Installs apache2-utils, assuming it's an Ubuntu instance
"""
- run_out = self.connection.sudo(f"apt install -y apache2-utils")
+ run_out = self.connection.sudo(f"apt install -y apache2-utils", pty=True)
return run_out.return_code
def run_apache_bench(self, requests, concurrency, input_file):
@@ -58,14 +58,15 @@ def run_apache_bench(self, requests, concurrency, input_file):
"""
self.connection.run(f"mkdir -p {TMP_DIR}/benchmark")
- self.connection.run(f"wget {input_file}")
+ if input_file.startswith("https://") or input_file.startswith("http://"):
+ self.connection.run(f"wget {input_file}", warn=True)
+ file_name = self.connection.run(f"basename {input_file}").stdout.strip()
+ # Copy to the directory with other benchmark artifacts
+ self.connection.run(f"cp {file_name} {os.path.join(TMP_DIR, 'benchmark/input')}")
+ else:
+ self.connection.run(f"cp {input_file} {os.path.join(TMP_DIR, 'benchmark/input')}")
- file_name = self.connection.run(f"basename {input_file}").stdout.strip()
-
- # Copy to the directory with other benchmark artifacts
- self.connection.run(f"cp {file_name} {os.path.join(TMP_DIR, 'benchmark/input')}")
-
- apache_bench_command = f"ab -c {concurrency} -n {requests} -k -p {TMP_DIR}/benchmark/input -T application/png {self.inference_url}/predictions/benchmark > {self.result_file}"
+ apache_bench_command = f"ab -c {concurrency} -n {requests} -k -p {TMP_DIR}/benchmark/input -T application/jpg {self.inference_url}/predictions/benchmark > {self.result_file}"
# Run apache bench
run_out = self.connection.run(
diff --git a/test/benchmark/tests/utils/neuron.py b/test/benchmark/tests/utils/neuron.py
new file mode 100644
index 0000000000..96eb1763b9
--- /dev/null
+++ b/test/benchmark/tests/utils/neuron.py
@@ -0,0 +1,53 @@
+import subprocess
+import time
+import glob
+import os
+import requests
+import tempfile
+
+import invoke
+import pandas as pd
+
+from io import StringIO
+from urllib.parse import urlparse
+from invoke import run
+from invoke.context import Context
+
+from . import DEFAULT_REGION, IAM_INSTANCE_PROFILE, AMI_ID, LOGGER, S3_BUCKET_BENCHMARK_ARTIFACTS
+
+# Assumes the functions from this file execute on an Ubuntu ec2 instance
+ROOT_DIR = f"/home/ubuntu"
+TORCHSERVE_DIR = os.path.join(ROOT_DIR, "serve")
+MODEL_STORE = os.path.join(TORCHSERVE_DIR, "model_store")
+LOCAL_TMP_DIR = "/tmp"
+TMP_DIR = "/home/ubuntu"
+NEURON_RESOURCES_FOLDER = os.path.join(TORCHSERVE_DIR, "test", "benchmark", "tests", "resources", "neuron-bert")
+
+def setup_neuron_mar_files(connection=None, virtual_env_name=None, batch_size=1):
+ activation_command = ""
+
+ if virtual_env_name:
+ activation_command = f"cd /home/ubuntu/serve/test/benchmark/tests/resources/neuron-bert && source activate {virtual_env_name} && "
+
+ # Note: change version here to make sure the torch version compatible with neuron is being used.
+ connection.run(f"{activation_command}pip3 install -U --ignore-installed torch==1.7.1", warn=True)
+ connection.run(f"{activation_command}pip3 install -U --ignore-installed torch-neuron 'neuron-cc[tensorflow]' --extra-index-url=https://pip.repos.neuron.amazonaws.com", warn=True)
+
+ connection.run(f"{activation_command}python3 compile_bert.py --batch-size {batch_size}", warn=True)
+ time.sleep(5)
+ run_out_sed = connection.run(f"{activation_command}sed -i 's/batch_size=[[:digit:]]\+/batch_size={batch_size}/g' config.py", warn=True)
+ LOGGER.info(f"run_out_sed: {run_out_sed.stdout}, run_out_return: {run_out_sed.return_code}")
+ run_out_mkdir = connection.run(f"mkdir -p /home/ubuntu/benchmark/model_store")
+ LOGGER.info(f"run_out_mkdir: {run_out_mkdir.stdout}, run_out_return: {run_out_mkdir.return_code}")
+ run_out_archiver = connection.run(f"{activation_command}torch-model-archiver --model-name 'benchmark_{batch_size}' --version 1.0 --serialized-file ./bert_neuron_{batch_size}.pt --handler './handler_bert.py' --extra-files './config.py' -f", warn=True)
+ LOGGER.info(f"run_out_archiver: {run_out_archiver.stdout}, run_out_return: {run_out_archiver.return_code}")
+
+ LOGGER.info(f"Running copy command")
+ connection.run(f"cp /home/ubuntu/serve/test/benchmark/tests/resources/neuron-bert/benchmark_{batch_size}.mar /home/ubuntu/benchmark/model_store")
+ run_out = connection.run(f"test -e /home/ubuntu/benchmark/model_store/benchmark_{batch_size}.mar")
+ if run_out.return_code == 0:
+ LOGGER.info(f"mar file available at location /home/ubuntu/benchmark/model_store/benchmark_{batch_size}.mar")
+ else:
+ LOGGER.info(f"mar file NOT available at location /home/ubuntu/benchmark/model_store/benchmark_{batch_size}.mar")
+
+ time.sleep(5)
\ No newline at end of file
diff --git a/test/benchmark/tests/utils/report.py b/test/benchmark/tests/utils/report.py
index f09c01ce9e..ca5bb9ec2a 100644
--- a/test/benchmark/tests/utils/report.py
+++ b/test/benchmark/tests/utils/report.py
@@ -18,7 +18,7 @@
TMP_DIR = "/tmp"
-
+from . import LOGGER
class MarkdownTable:
def __init__(self):
@@ -81,10 +81,6 @@ def add_markdown_from_csv(self, file_path, delimiter):
md_string += item + " | "
md_string += "\n"
- # writing md_string to the output_file
- # file = open(output_file, "w", encoding="UTF-8")
- # file.write(md_string)
- # file.close()
self.markdown_content += md_string
print("The markdown file has been created!!!")
@@ -99,7 +95,7 @@ def add_code_block(self, content: str, newline=True):
newline_modifier = "\n" if newline else ""
backticks_modifier = "```" if newline else "`"
- self.markdown_content += str(f"{newline_modifier}{backticks_modifier}{newline_modifier}{content}\n{backticks_modifier}")
+ self.markdown_content += str(f"{newline_modifier}{backticks_modifier}\n{content}\n{backticks_modifier}{newline_modifier}")
def add_paragraph(self, content: str, bold=False, italics=False, newline=True):
"""
@@ -123,45 +119,56 @@ def add_newline(self):
def get_document(self):
return self.markdown_content
-def main(s3_bucket_uri):
- """
- Compile a markdown file with different csv files as input
- """
- # Download the s3 files
- run(f"mkdir -p /tmp/report")
- run(f"aws s3 cp --recursive {s3_bucket_uri} /tmp/report")
-
- csv_files = []
+class Report:
+ def __init__(self):
+ self.tmp_report_dir = os.path.join("/tmp", "report")
- for root, dirs, files in os.walk("/tmp/report/"):
- for name in files:
- csv_files.append(os.path.join(root, name)) if "ab_report" in name else None
-
- markdownDocument = MarkdownDocument("Benchmark report")
- markdownDocument.add_newline()
- # Assume model configuration starts from /tmp/report
- for report_path in csv_files:
- split_path = report_path.split("/")
- print(split_path)
- model = split_path[3]
- instance_type = split_path[4]
- mode = split_path[5]
- batch_size = split_path[6]
+ def download_benchmark_results_from_s3(self, s3_uri):
+ """
+ Download benchmark results of various runs from s3
+ """
+ # Cleanup any previous folder
+ run(f"rm -rf {self.tmp_report_dir}")
- config_header = f"{model} | {mode} | {instance_type} | batch size {batch_size}"
+ # Create a tmp folder
+ run(f"mkdir -p {self.tmp_report_dir}")
- markdownDocument.add_paragraph(config_header, bold=True, newline=True)
+ run(f"aws s3 cp --recursive {s3_uri} {self.tmp_report_dir}")
- print(f"Updating data from file: {report_path}")
- markdownDocument.add_markdown_from_csv(report_path, delimiter=" ")
-
- with open("report.md", "w") as f:
- f.write(markdownDocument.get_document())
- # Clean up
- run(f"rm -rf /tmp/report")
+ def generate_comprehensive_report(self):
+ """
+ Compile a markdown file with different csv files as input
+ """
+ csv_files = []
+ for root, dirs, files in os.walk("/tmp/report/"):
+ for name in files:
+ csv_files.append(os.path.join(root, name)) if "ab_report" in name else None
+
+ csv_files = sorted(csv_files)
+
+ markdownDocument = MarkdownDocument("Benchmark report")
+ markdownDocument.add_newline()
+
+ # Assume model configuration starts from /tmp/report
+ for report_path in csv_files:
+ split_path = report_path.split("/")
+ print(split_path)
+ model = split_path[3]
+ instance_type = split_path[4]
+ mode = split_path[5]
+ batch_size = split_path[6]
+
+ config_header = f"{model} | {mode} | {instance_type} | batch size {batch_size}"
+
+ markdownDocument.add_code_block(config_header, newline=True)
+
+ print(f"Updating data from file: {report_path}")
+ markdownDocument.add_markdown_from_csv(report_path, delimiter=" ")
+
+ with open("report.md", "w") as f:
+ f.write(markdownDocument.get_document())
-if __name__ == "__main__":
- generate_comprehensive_report("s3_bucket_uri")
\ No newline at end of file
+ LOGGER.info(f"Benchmark report generated at: {os.path.join(os.getcwd(), 'report.md')}")
\ No newline at end of file
diff --git a/test/benchmark/tests/utils/ts.py b/test/benchmark/tests/utils/ts.py
index 490755403e..605bb61a42 100644
--- a/test/benchmark/tests/utils/ts.py
+++ b/test/benchmark/tests/utils/ts.py
@@ -26,7 +26,7 @@
class TorchServeHandler(object):
def __init__(
self,
- exec_env="local",
+ exec_env="docker",
cuda_version="cu102",
gpus=None,
torchserve_docker_image=None,
@@ -50,18 +50,31 @@ def __init__(
# self.prepare_common_dependency()
# self.getAPIS()
- def setup_torchserve(self):
+ def setup_torchserve(self, virtual_env_name=None):
"""
Set up torchserve dependencies, and install torchserve
"""
- pass
+ activation_command = ""
+ self.connection.run(f"chmod +x -R /home/ubuntu/serve")
+ if virtual_env_name:
+ activation_command = f"cd /home/ubuntu/serve && source activate {virtual_env_name} && "
+
+ if self.connection.run(f"{activation_command}torchserve --version", warn=True).return_code == 0:
+ return
+
+ self.connection.run(f"{activation_command}python3 ./ts_scripts/install_dependencies.py --environment=dev", warn=True)
+ self.connection.run(f"{activation_command}pip3 install pygit2", warn=True)
+ self.connection.run(f"{activation_command}python3 ./ts_scripts/install_from_src.py", warn=True)
+ self.connection.run(f"{activation_command}torchserve --version")
+
def prepare_common_dependency(self):
- # Note: the following command cleans up any previous run logs
- self.connection.run(f"rm -rf {os.path.join(TMP_DIR, 'benchmark')}")
+ # Note: the following command cleans up any previous run logs, except any *.mar files generated to avoid re-creation
+ self.connection.run(f"find {os.path.join(TMP_DIR, 'benchmark')} ! -name '*.mar' -type f -exec rm -f {{}} +", warn=True)
# Recreate required folders
self.connection.run(f"mkdir -p {os.path.join(TMP_DIR, 'benchmark', 'conf')}")
self.connection.run(f"mkdir -p {os.path.join(TMP_DIR, 'benchmark', 'logs')}")
+ self.connection.run(f"mkdir -p {os.path.join(TMP_DIR, 'benchmark', 'model_store')}")
# Use config from benchmarks/ folder
self.connection.run(
@@ -86,10 +99,29 @@ def getAPIS(self):
self.management_port = urlparse(management_api).port
self.inference_api = urlparse(inference_api).port
- def start_torchserve_local(self):
- pass
+ def start_torchserve_local(self, virtual_env_name=None, stop_torchserve=True):
+
+ self.prepare_common_dependency()
+ self.getAPIS()
+
+ activation_command = ""
+ if virtual_env_name:
+ activation_command = f"cd /home/ubuntu/serve && source activate {virtual_env_name} && "
+ if self.backend_profiling:
+ activation_command = f"{activation_command} && export TS_BENCHMARK=True && "
+
+ if stop_torchserve:
+ LOGGER.info(f"Stop existing torchserve instance")
+ self.connection.run(f"{activation_command}torchserve --stop", warn=True)
+
+ self.connection.run(f"{activation_command}torchserve --start --model-store /home/ubuntu/benchmark/model_store/ --ts-config {TMP_DIR}/benchmark/conf/config.properties > {TMP_DIR}/benchmark/logs/model_metrics.log", warn=True)
+ LOGGER.info(f"Started torchserve using command")
+ LOGGER.info(f"{activation_command}torchserve --start --model-store /home/ubuntu/benchmark/model_store/ --ts-config {TMP_DIR}/benchmark/conf/config.properties > {TMP_DIR}/benchmark/logs/model_metrics.log")
+
+ time.sleep(10)
- def start_torchserve_docker(self):
+
+ def start_torchserve_docker(self, stop_torchserve=True):
self.prepare_common_dependency()
self.getAPIS()
@@ -101,8 +133,9 @@ def start_torchserve_docker(self):
if self.backend_profiling:
backend_profiling = f"-e TS_BENCHMARK=True"
- LOGGER.info(f"Removing existing TS container instance...")
- self.connection.run("docker rm -f ts")
+ if stop_torchserve:
+ LOGGER.info(f"Removing existing TS container instance...")
+ self.connection.run("docker rm -f ts")
LOGGER.info(f"Starting docker container on the instance from image: {self.torchserve_docker_image}")
docker_run_cmd = (
@@ -127,16 +160,17 @@ def register_model(self, url, workers, batch_delay, batch_size, model_name="benc
:param batch_size: max number of requests allowed to be batched
"""
run_out = self.connection.run(
- f'curl -X POST "http://localhost:8081/models?url={url}&initial_workers={workers}&batch_delay={batch_delay}&batch_size={batch_size}&synchronous=true&model_name=benchmark"'
+ f'curl -X POST "http://localhost:8081/models?url={url}&initial_workers={workers}&batch_delay={batch_delay}&batch_size={batch_size}&synchronous=true&model_name=benchmark"', warn=True
)
LOGGER.info(
f'curl -X POST "http://localhost:8081/models?url={url}&initial_workers={workers}&batch_delay={batch_delay}&batch_size={batch_size}&synchronous=true&model_name=benchmark"'
)
- time.sleep(5)
+ time.sleep(40)
- assert run_out.return_code == 0, f"Failed to register model {model_name} sourced from url: {url}"
+ if run_out.return_code == 0:
+ LOGGER.error(f"Failed to register model {model_name} sourced from url: {url}")
def unregister_model(self, model_name="benchmark"):
"""
@@ -148,18 +182,23 @@ def unregister_model(self, model_name="benchmark"):
LOGGER.info(f'curl -X DELETE "http://localhost:8081/models/{model_name}/1.0"')
LOGGER.info(f"stdout: {run_out.stdout}")
- time.sleep(5)
+ time.sleep(10)
if run_out.return_code == 0:
LOGGER.error(f"Failed to unregister model {model_name}")
- def stop_torchserve(self, exec_env="local"):
+ def stop_torchserve(self, exec_env="docker", virtual_env_name=None):
"""
Stops torchserve depending on the exec_env
:param exec_env: either 'local' or 'docker'
"""
if exec_env == "docker":
- self.connection.run(f"docker rm -f ts")
+ self.connection.run(f"docker rm -f ts", warn=True)
+ else:
+ activation_command = ""
+ if virtual_env_name:
+ activation_command = f"cd /home/ubuntu/serve/test/benchmark/tests/resources/neuron-bert && source activate {virtual_env_name} && "
+ self.connection.run(f"{activation_command}torchserve --stop", warn=True)
time.sleep(5)