pytorch · lxning · Jul 12, 2021 · May 27, 2021 · Jun 4, 2021 · Jun 10, 2021
diff --git a/.gitignore b/.gitignore
@@ -10,6 +10,8 @@ dist/
 .github/.DS_Store
 .DS_Store
 frontend/server/src/main/java/org/pytorch/serve/grpc/
+*.pem
+*.backup
 
 # Postman files
 test/artifacts/
@@ -18,5 +20,10 @@ test/model_store/
 test/ts_console.log
 test/config.properties
 
+
 .vscode
 .scratch/
+
+# Custom benchmark artifacts
+instances.yaml
+instances.yaml.backup
diff --git a/docker/Dockerfile.neuron.dev b/docker/Dockerfile.neuron.dev
@@ -0,0 +1,109 @@
+# syntax = docker/dockerfile:experimental
+#
+# Following comments have been shamelessly copied from https://github.com/pytorch/pytorch/blob/master/Dockerfile
+# 
+# NOTE: To build this you will need a docker version > 18.06 with
+#       experimental enabled and DOCKER_BUILDKIT=1
+#
+#       If you do not use buildkit you are not going to have a good time
+#
+#       For reference: 
+#           https://docs.docker.com/develop/develop-images/build_enhancements/
+
+ARG BASE_IMAGE=ubuntu:18.04
+ARG BUILD_TYPE=dev
+FROM ${BASE_IMAGE} AS compile-image
+
+ARG BASE_IMAGE
+ARG BRANCH_NAME=master
+ARG MACHINE_TYPE=cpu
+ARG CUDA_VERSION
+
+ENV PYTHONUNBUFFERED TRUE
+
+RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
+    apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
+    fakeroot \
+    ca-certificates \
+    dpkg-dev \
+    sudo \
+    g++ \
+    git \
+    python3-dev \
+    build-essential \
+    openjdk-11-jdk \
+    curl \
+    wget \
+    vim \
+    && rm -rf /var/lib/apt/lists/* \
+    && cd /tmp \
+    && curl -O https://bootstrap.pypa.io/get-pip.py \
+    && python3 get-pip.py
+
+RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 \
+    && update-alternatives --install /usr/local/bin/pip pip /usr/local/bin/pip3 1
+
+RUN pip install -U pip setuptools
+
+RUN echo "deb https://apt.repos.neuron.amazonaws.com bionic main" > /etc/apt/sources.list.d/neuron.list
+RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
+
+RUN apt-get update \
+    && apt-get install -y \
+    aws-neuron-runtime \
+    aws-neuron-tools \
+    && rm -rf /var/lib/apt/lists/* \
+    && rm -rf /tmp/tmp* \
+    && apt-get clean
+
+# Build Dev Image
+FROM compile-image AS dev-image
+ARG MACHINE_TYPE=cpu
+ARG CUDA_VERSION
+RUN if [ "$MACHINE_TYPE" = "gpu" ]; then export USE_CUDA=1; fi \
+    && git clone https://github.com/pytorch/serve.git \
+    && cd serve \
+    && git checkout --track ${BRANCH_NAME} \
+    && if [ -z "$CUDA_VERSION" ]; then python ts_scripts/install_dependencies.py --environment=dev; else python ts_scripts/install_dependencies.py --environment=dev  --cuda $CUDA_VERSION; fi \
+    && python ts_scripts/install_from_src.py \
+    && useradd -m model-server \
+    && mkdir -p /home/model-server/tmp \
+    && cp docker/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh \
+    && chmod +x /usr/local/bin/dockerd-entrypoint.sh \
+    && chown -R model-server /home/model-server \
+    && cp docker/config.properties /home/model-server/config.properties \
+    && mkdir /home/model-server/model-store && chown -R model-server /home/model-server/model-store \
+    && pip install torch-neuron 'neuron-cc[tensorflow]' --extra-index-url=https://pip.repos.neuron.amazonaws.com 
+
+EXPOSE 8080 8081 8082 7070 7071
+USER model-server
+WORKDIR /home/model-server
+ENV TEMP=/home/model-server/tmp
+ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"]
+CMD ["serve"]
+
+# Build CodeBuild Image
+FROM compile-image AS codebuild-image
+ENV JAVA_VERSION=11 \
+  JAVA_HOME="/usr/lib/jvm/java-11-openjdk-amd64" \
+  JDK_HOME="/usr/lib/jvm/java-11-openjdk-amd64" \
+  JRE_HOME="/usr/lib/jvm/java-11-openjdk-amd64" \
+  ANT_VERSION=1.10.3 \
+  MAVEN_HOME="/opt/maven" \
+  MAVEN_VERSION=3.5.4 \
+  MAVEN_CONFIG="/root/.m2" \
+  MAVEN_DOWNLOAD_SHA1="22cac91b3557586bb1eba326f2f7727543ff15e3"
+
+# Install Maven
+RUN set -ex \
+  && mkdir -p $MAVEN_HOME \
+  && curl -LSso /var/tmp/apache-maven-$MAVEN_VERSION-bin.tar.gz https://apache.org/dist/maven/maven-3/$MAVEN_VERSION/binaries/apache-maven-$MAVEN_VERSION-bin.tar.gz \
+  && echo "$MAVEN_DOWNLOAD_SHA1 /var/tmp/apache-maven-$MAVEN_VERSION-bin.tar.gz" | sha1sum -c - \
+  && tar xzvf /var/tmp/apache-maven-$MAVEN_VERSION-bin.tar.gz -C $MAVEN_HOME --strip-components=1 \
+  && update-alternatives --install /usr/bin/mvn mvn /opt/maven/bin/mvn 10000 \
+  && mkdir -p $MAVEN_CONFIG
+
+FROM ${BUILD_TYPE}-image AS final-image
+ARG BUILD_TYPE
+RUN echo "${BUILD_TYPE} image creation completed"
diff --git a/docker/build_image.sh b/docker/build_image.sh
@@ -89,5 +89,5 @@ if [ $BUILD_TYPE == "production" ]
 then
   DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE=$BASE_IMAGE --build-arg CUDA_VERSION=$CUDA_VERSION -t $DOCKER_TAG .
 else
-  DOCKER_BUILDKIT=1 docker build --file Dockerfile.dev -t $DOCKER_TAG --build-arg BUILD_TYPE=$BUILD_TYPE --build-arg BASE_IMAGE=$BASE_IMAGE --build-arg BRANCH_NAME=$BRANCH_NAME --build-arg CUDA_VERSION=$CUDA_VERSION --build-arg MACHINE_TYPE=$MACHINE .
+  DOCKER_BUILDKIT=1 docker build --pull --file Dockerfile.dev -t $DOCKER_TAG --build-arg BUILD_TYPE=$BUILD_TYPE --build-arg BASE_IMAGE=$BASE_IMAGE --build-arg BRANCH_NAME=$BRANCH_NAME --build-arg CUDA_VERSION=$CUDA_VERSION --build-arg MACHINE_TYPE=$MACHINE .
 fi
diff --git a/test/benchmark/README.md b/test/benchmark/README.md
@@ -21,8 +21,45 @@ If you'd like to use your own repo, edit the __init__.py under `serve/test/bench
 * Ensure you have [docker](https://docs.docker.com/get-docker/) client set-up on your system - osx/ec2
 * Adjust the following global variables to your preference in the file `serve/test/benchmark/tests/utils/__init__.py` <br>
 -- IAM_INSTANCE_PROFILE :this role is attached to all ec2 instances created as part of the benchmarking process. Create this as described [here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html#create-iam-role). Default role name is 'EC2Admin'.<br>
+Use the following commands to create a new role if you don't have one you can use.
+1. Create the trust policy file `ec2-admin-trust-policy.json` and add the following content:
+```
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Effect": "Allow",
+      "Principal": {
+        "Service": [
+          "ec2.amazonaws.com"
+        ]
+      },
+      "Action": "sts:AssumeRole"
+    }
+  ]
+}
+```
+2. Create the EC2 role as follows:
+```
+aws iam create-role --role-name EC2Admin --assume-role-policy-document file://ec2-admin-trust-policy.json
+```
+3. Add the permissions to the role as follows:
+```
+aws iam attach-role-policy --policy-arn arn:aws:iam::aws:policy/IAMFullAccess --role-name EC2Admin
+aws iam attach-role-policy --policy-arn arn:aws:iam::aws:policy/AmazonEC2FullAccess --role-name EC2Admin
+aws iam attach-role-policy --policy-arn arn:aws:iam::aws:policy/AmazonS3FullAccess --role-name EC2Admin
+aws iam attach-role-policy --policy-arn arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryFullAccess --role-name EC2Admin
+```
 -- S3_BUCKET_BENCHMARK_ARTIFACTS :all temporary benchmarking artifacts including server logs will be stored in this bucket: <br>
+Use the following command to create a new S3 bucket if you don't have one you can use.
+```
+aws s3api create-bucket --bucket <torchserve-benchmark> --region us-west-2
+```
 -- DEFAULT_DOCKER_DEV_ECR_REPO :docker image used for benchmarking will be pushed to this repo <br>
+Use the following command to create a new ECR repo if you don't have one you can use.
+```
+aws ecr create-repository --bucket torchserve-benchmark --region us-west-2
+```
 * If you're running this setup on an EC2 instance, please ensure that the instance's security group settings 'allow' inbound ssh port 22. Refer [docs](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/security-group-rules.html).
 
 *The following steps assume that the current working directory is serve/.*
@@ -32,6 +69,8 @@ If you'd like to use your own repo, edit the __init__.py under `serve/test/bench
 sudo apt-get install python3-venv
 python3 -m venv bvenv
 source bvenv/bin/activate
+# Ensure you have the latest pip
+pip3 install -U pip
 ```
 2. Install requirements for the benchmarking 
 ```
@@ -57,7 +96,7 @@ python report.py
 ```
 The final benchmark report will be available in markdown format as `report.md` in the `serve/` folder. 
 
-**Example report for vgg16 model**
+**Example report for vgg11 model**
 
 
 ### Benchmark report
@@ -103,3 +142,37 @@ The final benchmark report will be available in markdown format as `report.md` i
  | AB | vgg11 | 100 | 1000 | 0 | 3.47 | 28765 | 29849 | 30488 | 28781.227 | 0.0 | 1576.24 | 1758.28 | 1758.28 | 2249.52 | 2249.34 | 25210.43 | 46.77 | 
 
 
+## Features of the automation:
+1. To save time by *not* creating new instances for every benchmark run for local testing, use the '--do-not-terminate' flag. This will automatically create a file called 'instances.yaml' and write instance-related data into the file so that it may be re-used next time.
+```
+python test/benchmark/run_benchmark.py --do-not-terminate
+```
+
+2. To re-use an instance already recorded in `instances.yaml`, use the '--use-instances' flag:
+```
+python test/benchmark/run_benchmark.py --use-instances <full_path_to>/instances.yaml --do-no-terminate
+```
+`Note: Use --do-not-termninate flag to keep re-using the instances, else, it will be terminated`.
+
+3. To run a test containing a specific string, use the `--run-only` flag. Note that the argument is 'string matched' i.e. if the test-name contains the supplied argument as a substring, the test will run. 
+```
+# To run mnist test
+python test/benchmark/run_benchmark.py --run-only mnist
+
+# To run fastrcnn test
+python test/benchmark/run_benchmark.py --run-only fastrcnn
+
+# To run bert_neuron and bert
+python test/benchmark/run_benchmark.py --run-only bert
+
+# To run vgg11 test
+python test/benchmark/run_benchmark.py --run-only vgg11
+
+# To run vgg16 test
+python test/benchmark/run_benchmark.py --run-only vgg16
+```
+
+4. You can benchmark a specifc branch of the torchserve github repo by specifying the flag `--use-torchserve-branch` e.g., 
+```
+python test/benchmark/run_benchmark.py --use-torchserve-branch issue_1115
+```
diff --git a/test/benchmark/requirements.txt b/test/benchmark/requirements.txt
@@ -11,4 +11,5 @@ gitpython
 docker
 pandas
 matplotlib
-pyyaml
+pyyaml
+cryptography==3.4.7
diff --git a/test/benchmark/run_benchmark.py b/test/benchmark/run_benchmark.py
@@ -1,22 +1,110 @@
+import argparse
 import os
 import random
 import sys
 import logging
 import re
 import uuid
 
+
 import boto3
 import pytest
 
 from invoke import run
 from invoke.context import Context
 
+
+from tests.utils.report import Report
+from tests.utils import (
+    S3_BUCKET_BENCHMARK_ARTIFACTS,
+    DEFAULT_REGION,
+    DEFAULT_DOCKER_DEV_ECR_REPO,
+    YamlHandler,
+    DockerImageHandler,
+)
+
 LOGGER = logging.getLogger(__name__)
 LOGGER.setLevel(logging.DEBUG)
 LOGGER.addHandler(logging.StreamHandler(sys.stdout))
 
 
+def build_docker_container(torchserve_branch="master"):
+    LOGGER.info(f"Setting up docker image to be used")
+
+    docker_dev_image_config_path = os.path.join(os.getcwd(), "test", "benchmark", "tests", "suite", "docker", "docker.yaml")
+
+    docker_config = YamlHandler.load_yaml(docker_dev_image_config_path)
+    YamlHandler.validate_docker_yaml(docker_config)
+
+    account_id = run("aws sts get-caller-identity --query Account --output text").stdout.strip()
+
+    for processor, config in docker_config.items():
+        docker_tag = None
+        cuda_version = None
+        for config_key, config_value in config.items():
+            if processor == "gpu" and config_key == "cuda_version":
+                cuda_version = config_value
+            if config_key == "docker_tag":
+                docker_tag = config_value
+        dockerImageHandler = DockerImageHandler(docker_tag, cuda_version, torchserve_branch)
+        dockerImageHandler.build_image()
+        dockerImageHandler.push_docker_image_to_ecr(
+            account_id, DEFAULT_REGION, f"{DEFAULT_DOCKER_DEV_ECR_REPO}:{docker_tag}"
+        )
+
+
 def main():
+
+    LOGGER.info(f"sys.path: {sys.path}")
+
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "--use-instances",
+        action="store",
+        help="Supply a .yaml file with test_name, instance_id, and key_filename to re-use already-running instances",
+    )
+    parser.add_argument(
+        "--do-not-terminate",
+        action="store_true",
+        default=False,
+        help="Use with caution: does not terminate instances, instead saves the list to a file in order to re-use",
+    )
+
+    parser.add_argument(
+        "--run-only", default=None, help="Runs the tests that contain the supplied keyword as a substring"
+    )
+
+    parser.add_argument(
+        "--use-torchserve-branch",
+        default="master",
+        help="Specify a specific torchserve branch to benchmark on, else uses 'master' by default"
+    )
+
+    parser.add_argument(
+        "--skip-docker-build",
+        action="store_true",
+        default=False,
+        help="Use if you already have a docker image built and available locally and have specified it in docker.yaml"
+    )
+
+    arguments = parser.parse_args()
+    do_not_terminate_string = "" if not arguments.do_not_terminate else "--do-not-terminate"
+    use_instances_arg_list = ["--use-instances", f"{arguments.use_instances}"] if arguments.use_instances else []
+    run_only_test = arguments.run_only
+
+    if run_only_test:
+        run_only_string = f"-k {run_only_test}"
+        LOGGER.info(f"Note: running only the tests that have the name '{run_only_test}'.")
+    else:
+        run_only_string = ""
+
+    torchserve_branch = arguments.use_torchserve_branch
+
+    # Build docker containers as specified in docker.yaml
+    if not arguments.skip_docker_build:
+        build_docker_container(torchserve_branch=torchserve_branch)
+
     # Run this script from the root directory 'serve', it changes directory below as required
     os.chdir(os.path.join(os.getcwd(), "test", "benchmark"))
 
@@ -25,12 +113,30 @@ def main():
     test_path = os.path.join(os.getcwd(), "tests")
     LOGGER.info(f"Running tests from directory: {test_path}")
 
-    pytest_args = ["-s", "-rA", test_path, "-n=4", "--disable-warnings", "-v", "--execution-id", execution_id]
+    pytest_args = [
+        "-s",
+        run_only_string,
+        "-rA",
+        test_path,
+        "-n=4",
+        "--disable-warnings",
+        "-v",
+        "--execution-id",
+        execution_id,
+        do_not_terminate_string,
+    ] + use_instances_arg_list
 
     LOGGER.info(f"Running pytest")
 
     pytest.main(pytest_args)
 
+    # Generate report
+    s3_results_uri = f"{S3_BUCKET_BENCHMARK_ARTIFACTS}/{execution_id}"
+
+    report = Report()
+    report.download_benchmark_results_from_s3(s3_results_uri)
+    report.generate_comprehensive_report()
+
 
 if __name__ == "__main__":
     main()
diff --git a/test/benchmark/tests/__init__.py b/test/benchmark/tests/__init__.py
-Original file line number
+Diff line change
@@ Expand Up / @@ -11,4 +11,5 @@ gitpython @@
     docker
     pandas
     matplotlib
-    pyyaml
+    pyyaml
+    cryptography==3.4.7