Merge branch 'master' into master

pytorch · Aug 30, 2021 · d87bc0c · d87bc0c
2 parents 6356da2 + 8903ca1
commit d87bc0c
Show file tree

Hide file tree

Showing 30 changed files with 179 additions and 171 deletions.
diff --git a/binaries/README.md b/binaries/README.md
@@ -78,7 +78,7 @@
      ##### Windows:
      Conda install is currently not supported. Please use pip install command instead.
 
-# Uploading packages for staging
+# Uploading packages for testing to a personal account
 1. Export the following environment variables for TestPypi and anaconda.org authentication
    ```
    export CONDA_TOKEN=<>
@@ -90,7 +90,7 @@
    ```
    python3 binaries/conda/build_packages.py --install-conda-dependencies
    exec bash
-   python3 binaries/build.py --staging
+   python3 binaries/build.py
    cd binaries/
    python3 upload.py --upload-pypi-packages --upload-conda-packages 
    ```
diff --git a/binaries/build.py b/binaries/build.py
@@ -7,30 +7,14 @@
 REPO_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")
 sys.path.append(REPO_ROOT)
 
-STAGING_SUFFIX = "-staging"
-
 from pathlib import Path
 
 from ts_scripts.utils import is_conda_env, is_conda_build_env
 from binaries.conda.build_packages import conda_build, install_miniconda, install_conda_build
 
 
-def build(is_staging=False):
+def build():
 
-    ts_setup_file = glob.glob(os.path.join(REPO_ROOT, "setup.py"))[0]
-    ma_setup_file = glob.glob(os.path.join(REPO_ROOT, "model-archiver","setup.py"))[0]
-    wa_setup_file = glob.glob(os.path.join(REPO_ROOT, "workflow-archiver","setup.py"))[0]
-
-    if is_staging:
-        f_ts = Path(ts_setup_file)
-        f_ts.write_text(f_ts.read_text().replace(f"name='torchserve'", f"name='torchserve{STAGING_SUFFIX}'"))
-
-        f_ma = Path(ma_setup_file)
-        f_ma.write_text(f_ma.read_text().replace(f"name='torch-model-archiver'", f"name='torch-model-archiver{STAGING_SUFFIX}'"))
-
-        f_wa = Path(wa_setup_file)
-        f_wa.write_text(f_wa.read_text().replace(f"name='torch-workflow-archiver'", f"name='torch-workflow-archiver{STAGING_SUFFIX}'"))
-
     print("## Started torchserve, model-archiver and workflow-archiver build")
     create_wheel_cmd = "python setup.py bdist_wheel --release --universal"
 
@@ -69,17 +53,6 @@ def build(is_staging=False):
 
     conda_build_exit_code = conda_build(ts_wheel_path, ma_wheel_path, wa_wheel_path)
 
-    # Revert setup.py changes
-    if is_staging:
-        f_ts = Path(ts_setup_file)
-        f_ts.write_text(f_ts.read_text().replace(f"name='torchserve{STAGING_SUFFIX}'", f"name='torchserve'"))
-
-        f_ma = Path(ma_setup_file)
-        f_ma.write_text(f_ma.read_text().replace(f"name='torch-model-archiver{STAGING_SUFFIX}'", f"name='torch-model-archiver'"))
-
-        f_wa = Path(wa_setup_file)
-        f_wa.write_text(f_wa.read_text().replace(f"name='torch-workflow-archiver{STAGING_SUFFIX}'", f"name='torch-workflow-archiver'"))
-
     # If any one of the steps fail, exit with error
     if ts_build_exit_code != 0:
         sys.exit("## Torchserve Build Failed !")
@@ -92,9 +65,5 @@ def build(is_staging=False):
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="argument parser for build.py")
-    parser.add_argument("--staging", default=False, required=False, action="store_true", help="Specify if you want to build packages only for staging/testing")
-
-    args = parser.parse_args()
 
-    build(is_staging=args.staging)
+    build()
diff --git a/binaries/conda/build_packages.py b/binaries/conda/build_packages.py
@@ -51,7 +51,9 @@ def conda_build(ts_wheel_path, ma_wheel_path, wa_wheel_path):
     os.environ["TORCH_MODEL_ARCHIVER_VERSION"] = ma_version
     os.environ["TORCH_WORKFLOW_ARCHIVER_VERSION"] = wa_version
 
-    os.environ["TORCHSERVE_ROOT_DIR"] = REPO_ROOT
+    os.environ["TORCHSERVE_ROOT_DIR"] = REPO_ROOT.replace("\\", "/")
+
+    os.environ["PYTHON"] = "python"
 
     python_versions = ["3.6", "3.7", "3.8", "3.9"]
     packages = [

diff --git a/docs/README.md b/docs/README.md
@@ -16,6 +16,7 @@ TorchServe is a flexible and easy to use tool for serving PyTorch models.
 * [Metrics](metrics.md) - How to configure metrics
    * [Metrics API](metrics_api.md) - How to configure metrics API
 * [Batch inference with TorchServe](batch_inference_with_ts.md) - How to create and serve a model with batch inference in TorchServe
+* [Workflows](workflows.md) - How to create workflows to compose Pytorch models and Python functions in sequential and parallel pipelines
 * [Model Zoo](model_zoo.md) - List of pre-trained model archives ready to be served for inference with TorchServe.
 * [Examples](https://github.com/pytorch/serve/tree/master/examples) - Many examples of how to package and deploy models and workflows with TorchServe
 

diff --git a/docs/workflow_inference_api.md b/docs/workflow_inference_api.md
@@ -10,7 +10,7 @@ The TorchServe server supports the following APIs:
 
 To get predictions from a workflow, make a REST call to `/wfpredict/{workflow_name}`:
 
-* POST /wfpredict/{workflow_name}
+`POST /wfpredict/{workflow_name}`
 
 ### curl Example
 
@@ -20,4 +20,4 @@ curl -O https://raw.githubusercontent.com/pytorch/serve/master/docs/images/kitte
 curl http://localhost:8080/wfpredict/myworkflow -T kitten_small.jpg
 ```
 
-The result is JSON object returning the response bytes from the leaf node of the workflow DAG
+The result is JSON object returning the response bytes from the leaf node of the workflow DAG.
diff --git a/docs/workflow_management_api.md b/docs/workflow_management_api.md
@@ -15,7 +15,7 @@ The Workflow Management API listens on port 8081 and is only accessible from loc
 
 * `url` - Workflow archive download url. Supports the following locations:
   * a local workflow archive (.war); the file must be in the `workflow_store` folder (and not in a subfolder).
-  * a URI using the HTTP(s) protocol. TorchServe can download .war files from the Internet.
+  * a URI using the HTTP(s) protocol. TorchServe can download `.war` files from the Internet.
 * `workflow_name` - the name of the workflow; this name will be used as {workflow_name} in other APIs as part of the path. If this parameter is not present, `modelName` in MANIFEST.json will be used.
 
 ```bash
@@ -32,7 +32,7 @@ The workflow registration API parses the workflow specification file (.yaml) sup
 
 `GET /workflows/{workflow_name}`
 
-Use the Describe Wofkflow API to get detail of a workflow:
+Use the Describe Workflow API to get detail of a workflow:
 
 ```bash
 curl http://localhost:8081/workflows/myworkflow

diff --git a/docs/workflows.md b/docs/workflows.md
@@ -1,6 +1,6 @@
 # TorchServe Workflows
 
-TorchServe can be used for serving ensemble of models & functions (python) through Workflow APIs. 
+TorchServe can be used for serving ensemble of Pytorch models packaged as mar files and Python functions through Workflow APIs. 
 
 It utilizes [REST based APIs](rest_api.md) for workflow management and predictions.
 
@@ -10,6 +10,11 @@ A Workflow is served on TorchServe using a workflow-archive(.war) which comprise
 
 A workflow specification is a YAML file which provides the details of the models to be executed and a DAG for defining data flow.
 
+The YAML file is split into a few sections
+1. `models` which include global model parameters
+2. `m1,m2,m3` all the relevant model parameters which would override the global model params
+3. `dag` which describes the structure of the workflow, which nodes feed into which other nodes
+
 E.g.
 
 ```yaml
@@ -74,6 +79,8 @@ These properties can be defined as a global value for every model and can be ove
 
 User can define the dataflow of a workflow using the `dag` section of the workflow specification. The `dag` consists of the model names defined in the `model` section and python function names which are implemented in the workflow-archive's handler file.
 
+### Sequential DAG
+
 Eg.
 ```
 dag:
@@ -82,19 +89,38 @@ dag:
   model2 : [function2]
 ```
 
-In the above example the data will flow as follows:
+Which maps to this data flow
 
 ```
 input -> function1 -> model1 -> model2 -> function2 -> output
 ```
 
+### Parallel DAG
+
+E.g
+```
+dag:
+  pre_processing: [model1, model2]
+  model1: [aggregate_func]
+  model2: [aggregate_func]
+```
+
+Which maps to this data flow
+
+```
+                          model1
+                         /       \
+input -> preprocessing ->         -> aggregate_func
+                         \       /
+                          model2
+```
+
 ## Handler file
 
 A handler file (python) is supplied in the workflow archive (.war) and consists of all the functions used in the workflow dag.
 
 Eg.
-```
-
+```python
 def preprocess(data, context):
     pass
 
@@ -103,15 +129,15 @@ def postprocess(data, context):
 
 ```
 
+# Related docs
+* [workflow_inference_api.md](workflow_inference_api.md)
+* [workflow_management_api.md](workflow_management_api.md)
+
 # Known issues
 
- * **Each workflow dag node (model/function) will receive input as bytes**
- * **Workflow scale/updates is not supported through APIs. User will need to unregister the workflow and re-register with the required changes**
- * **Each workflow dag node (model/function) will receive input as bytes**
- * **Only following output types are supported by workflow models/functions : String, Int, List, Dict of String, int, Json serializable objects, byte array and Torch Tensors**
- * **Specifying Input or output per workflow model is not supported**
- * **Snapshots are not supported for workflows and related models are not captured in the workflow**
- * **Workflow versioning is not supported**
- * **Workflows registration having public model URL with mar file names which are already registered will fail.**
- * **There is no validation in place to check if the function names provided in DAG are available in the handler supplied in the workflow archive.**
- * **Workflow models can be currently accessed and modified through model management APIs**
+* Each workflow dag node (model/function) will receive input as bytes
+* Only following output types are supported by workflow models/functions : String, Int, List, Dict of String, int, Json serializable objects, byte array and Torch Tensors
+* Workflow scale/updates is not supported through APIs. User will need to unregister the workflow and re-register with the required changes
+* Snapshots are not supported for workflows and related models are not captured in the workflow
+* Workflow versioning is not supported
+* Workflows registration having public model URL with mar file names which are already registered will fail.
diff --git a/examples/Workflows/README.md b/examples/Workflows/README.md
@@ -1,12 +1,25 @@
 # Workflow examples
 
-The following links provide examples on how to implement workflows with different models. The workflow feature can be used for serving an ensemble of models and python functions through workflow APIs. A workflow is executed as a DAG where the nodes can be either models (MAR files) or functions specified in the workflow handler file. The DAG need not contain additional functions if not required. Typically, the function nodes are used for processing or augmenting intermediate data or aggregating data from multiple nodes. Preprocessing nodes are used when we want to apply some common transformation to an input payload which is going to be passed to multiple model nodes. An example use case for this would be a preprocessing node passing transformed data to two branches with model nodes (refer dog/breed classification example below). In other cases, having a preprocessing step in the model handler itself might suffice.
+Workflows can be used to compose an ensemble of Pytorch models and Python functions and package them in a `war` file. A workflow is executed as a DAG where the nodes can be either Pytorch models packaged as `mar` files or function nodes specified in the workflow handler file. The DAG can be used to define both sequential or parallel pipelines.
 
-It is also possible to use the same mar file in multiple workflows and register them at the same time. The model server will create separate instances of this model for the different workflows. This is demonstrated in the NMT Transformers example where the English-to-German model is used in both back translation and dual translation workflows.
+As an example a sequential pipeline may look something like
 
-The following examples show the current workflows supported in this release. This include sequential pipeline and parallel models where you can aggregate the results. This can be used for ensemble models that votes on a task and results are aggregated in the post-processing function in the handler. Further examples will be added soon.
+```
+input -> function1 -> model1 -> model2 -> function2 -> output
+```
 
-For a more detailed explanation of Workflows and what is currently supported please refer to the main [documentation](../../docs/workflows.md)
+And a parallel pipeline may look something like 
+
+```
+                          model1
+                         /       \
+input -> preprocessing ->         -> aggregate_func
+                         \       /
+                          model2
+```
 
- * [Pipeline/Sequential workflow using nmt tranformers example](nmt_tranformers_pipeline/)
- * [Pipeline/Sequential workflow using resnet for dog breed classification](dog_breed_classification/)
+You can experiment with much more complicated workflows by configuring a `YAML` file. We've included 2 reference examples including a sequential pipeline and parallel pipeline.
+* [Parallel workflow using nmt transformers example](nmt_transformers_pipeline/)
+* [Sequential workflow using resnet for dog breed classification](dog_breed_classification/)
+
+For a more detailed explanation of Workflows and what is currently supported please refer to the main [documentation](../../docs/workflows.md)
diff --git a/examples/Workflows/dog_breed_classification/README.md b/examples/Workflows/dog_breed_classification/README.md
@@ -11,7 +11,8 @@ In the flow described below, the pre_processing node base64 encodes the image an
 
 ## Commands to create the models and the workflow
 The notebooks for training the [dog-cat classification](cat_dog_classification.ipynb) model and the [dog breed classification](dog_breed_classification.ipynb) models are provided in this example. Once the models are trained you can download the corresponding .pth files and use them to generate the mar files for serving inference requests as below.
-The [dog-cat classification](https://torchserve.pytorch.org/mar_files/cat_dog_classification.mar) and [dog breed classification](https://torchserve.pytorch.org/mar_files/dog_breed_classification.mar) mar files can also be dowloaded directly.
+
+The [dog-cat classification](https://torchserve.pytorch.org/mar_files/cat_dog_classification.mar) and [dog breed classification](https://torchserve.pytorch.org/mar_files/dog_breed_classification.mar) mar files can also be downloaded directly.
 
 ```
 $ cd $TORCH_SERVE_DIR/examples/Workflows/dog_breed_classification

diff --git a/...kflows/nmt_tranformers_pipeline/README.md → ...flows/nmt_transformers_pipeline/README.md b/...kflows/nmt_tranformers_pipeline/README.md → ...flows/nmt_transformers_pipeline/README.md
@@ -1,21 +1,21 @@
 # Workflow pipeline example using nmt transformer nlp model
 
 This example uses the existing [nmt_transformers](../../nmt_transformer) standalone example to create a workflow. We use three models, in two examples to demonstrate stringing them together in a workflow.
-To change the default batch size and batch delay the yaml file for the workflow can to be changed. This cannot currently be set via the REST API.
+The default batch size and delay can only be changed via the YAML file. This cannot currently be set via the REST API.
 
-_NOTE: This example currently works with Py36 only due to fairseq dependency on dataclasses [issue](https://github.com/huggingface/transformers/issues/8638#issuecomment-790772391). This example currently doesn't work on Windows_
+_NOTE: This example currently works with Python 3.6 only due to fairseq dependency on dataclasses [issue](https://github.com/huggingface/transformers/issues/8638#issuecomment-790772391). This example currently doesn't work on Windows_
 
 ## Flow
 
 ### Example 1: Dual translation
 
-In the case of dual translation we use a preprocessing node as a dummy node to pass the input to both the english to french and english to german translators. The output from both the translations are converted into a single output by the aggregate-output node and returned to the user.
+In the case of dual translation we use a preprocessing node as a dummy node to pass the input to both the English to French and English to German translators. The output from both the translations are converted into a single output by the aggregate-output node and returned to the user.
 
 ![NMTDualTranslate](../../../docs/images/NMTDualTranslate.png)
 
 ### Example 2: Back-translation
 
-In the case of back translation we pass the input to an english to german translation model. The output is cleaned up by the intermediate-input-processing node and converted into a format which is expected by the german to english translation model. The post-processing node takes the final output and converts the keys of the output dictionary to be relevant to the workflow.
+In the case of back translation we pass the input to an English to German translation model. The output is cleaned up by the intermediate-input-processing node and converted into a format which is expected by the German to English translation model. The post-processing node takes the final output and converts the keys of the output dictionary to be relevant to the workflow.
 
 ![NMTBackTranslate](../../../docs/images/NMTBackTranslate.png)
 
@@ -26,7 +26,7 @@ $ ./create_mar.sh de2en_model
 $ ./create_mar.sh en2de_model
 $ ./create_mar.sh en2fr_model
 
-$ cd $TORCH_SERVE_DIR/examples/Workflows/nmt_tranformers_pipeline/
+$ cd $TORCH_SERVE_DIR/examples/Workflows/nmt_transformers_pipeline/
 $ mkdir model_store wf_store
 $ mv $TORCH_SERVE_DIR/examples/nmt_transformer/model_store/*.mar model_store/
 $ torch-workflow-archiver -f --workflow-name nmt_wf_dual --spec-file nmt_workflow_dualtranslation.yaml --handler nmt_workflow_handler_dualtranslation.py --export-path wf_store/

diff --git a/...mt_tranformers_pipeline/config.properties → ...t_transformers_pipeline/config.properties b/...mt_tranformers_pipeline/config.properties → ...t_transformers_pipeline/config.properties
diff --git a/...anformers_pipeline/model_input/sample.txt → ...nsformers_pipeline/model_input/sample.txt b/...anformers_pipeline/model_input/sample.txt → ...nsformers_pipeline/model_input/sample.txt
diff --git a/...nformers_pipeline/model_input/sample1.txt → ...sformers_pipeline/model_input/sample1.txt b/...nformers_pipeline/model_input/sample1.txt → ...sformers_pipeline/model_input/sample1.txt
diff --git a/...nformers_pipeline/model_input/sample2.txt → ...sformers_pipeline/model_input/sample2.txt b/...nformers_pipeline/model_input/sample2.txt → ...sformers_pipeline/model_input/sample2.txt
diff --git a/...nformers_pipeline/model_input/sample3.txt → ...sformers_pipeline/model_input/sample3.txt b/...nformers_pipeline/model_input/sample3.txt → ...sformers_pipeline/model_input/sample3.txt
diff --git a/...nformers_pipeline/model_input/sample4.txt → ...sformers_pipeline/model_input/sample4.txt b/...nformers_pipeline/model_input/sample4.txt → ...sformers_pipeline/model_input/sample4.txt
diff --git a/...ipeline/nmt_workflow_dualtranslation.yaml → ...ipeline/nmt_workflow_dualtranslation.yaml b/...ipeline/nmt_workflow_dualtranslation.yaml → ...ipeline/nmt_workflow_dualtranslation.yaml
diff --git a/...e/nmt_workflow_handler_dualtranslation.py → ...e/nmt_workflow_handler_dualtranslation.py b/...e/nmt_workflow_handler_dualtranslation.py → ...e/nmt_workflow_handler_dualtranslation.py
diff --git a/...ine/nmt_workflow_handler_retranslation.py → ...ine/nmt_workflow_handler_retranslation.py b/...ine/nmt_workflow_handler_retranslation.py → ...ine/nmt_workflow_handler_retranslation.py
diff --git a/..._pipeline/nmt_workflow_retranslation.yaml → ..._pipeline/nmt_workflow_retranslation.yaml b/..._pipeline/nmt_workflow_retranslation.yaml → ..._pipeline/nmt_workflow_retranslation.yaml
diff --git a/...er/src/main/java/org/pytorch/serve/workflow/api/http/WorkflowInferenceRequestHandler.java b/...er/src/main/java/org/pytorch/serve/workflow/api/http/WorkflowInferenceRequestHandler.java
@@ -45,10 +45,10 @@ public void handleRequest(
             QueryStringDecoder decoder,
             String[] segments)
             throws ModelException, DownloadArchiveException, WorkflowException {
-        if (segments.length < 3) {
-            throw new ResourceNotFoundException();
-        }
         if ("wfpredict".equalsIgnoreCase(segments[1])) {
+            if (segments.length < 3) {
+                throw new ResourceNotFoundException();
+            }
             handlePredictions(ctx, req, segments);
         } else {
             chain.handleRequest(ctx, req, decoder, segments);

diff --git a/kubernetes/kfserving/Dockerfile b/kubernetes/kfserving/Dockerfile
@@ -0,0 +1,32 @@
+# syntax = docker/dockerfile:experimental
+#
+# Following comments have been shamelessly copied from https://github.com/pytorch/pytorch/blob/master/Dockerfile
+# 
+# NOTE: To build this you will need a docker version > 18.06 with
+#       experimental enabled and DOCKER_BUILDKIT=1
+#
+#       If you do not use buildkit you are not going to have a good time
+#
+#       For reference: 
+#           https://docs.docker.com/develop/develop-images/build_enhancements
+
+ARG BASE_IMAGE=pytorch/torchserve:latest
+FROM ${BASE_IMAGE}
+
+USER root
+
+RUN pip install --upgrade pip
+
+COPY requirements.txt requirements.txt
+
+RUN pip install -r requirements.txt
+
+COPY dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh
+RUN chmod +x /usr/local/bin/dockerd-entrypoint.sh
+COPY kfserving_wrapper kfserving_wrapper
+COPY config.properties config.properties
+
+USER model-server
+
+ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"]
+
diff --git a/kubernetes/kfserving/README.md b/kubernetes/kfserving/README.md
@@ -4,6 +4,32 @@ The documentation covers the steps to run Torchserve inside the KFServing enviro
 
 Currently, KFServing supports the Inference API for all the existing models but text to speech synthesizer and it's explain API works for the eager models of MNIST,BERT and text classification only.
 
+### Docker Image Building
+
+* To create a CPU based image
+
+```
+./build_image.sh 
+```
+
+* To create a CPU based image with custom tag
+
+```
+./build_image.sh -t <repository>/<image>:<tag>
+```
+
+* To create a GPU based image
+
+```
+./build_image.sh -g 
+```
+
+* To create a GPU based image with custom tag
+
+```
+./build_image.sh -g -t <repository>/<image>:<tag>
+```
+
 Individual Readmes for KFServing :
 
 * [BERT](https://github.com/pytorch/serve/blob/master/kubernetes/kfserving/Huggingface_readme.md)