Merge branch 'master' into issues/update_docker_example

pytorch · Feb 16, 2023 · 94b787a · 94b787a
2 parents eb2e516 + 485ebf8
commit 94b787a
Show file tree

Hide file tree

Showing 10 changed files with 156 additions and 23 deletions.
diff --git a/.github/workflows/ci_gpu.yml b/.github/workflows/ci_gpu.yml
@@ -38,7 +38,7 @@ jobs:
         uses: actions/checkout@v3
       - name: Install dependencies
         run: |
-          python ts_scripts/install_dependencies.py --environment=dev --cuda=cu102
+          python ts_scripts/install_dependencies.py --environment=dev --cuda=cu117
       - name: Torchserve Sanity
         uses: nick-fields/retry@v2
         with:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -34,7 +34,7 @@ repos:
       - id: black
         additional_dependencies: ['click==8.0.4']
   - repo: https://github.com/PyCQA/isort
-    rev: 5.10.1
+    rev: 5.11.5
     hooks:
       - id: isort
         args: ["--profile", "black"]
diff --git a/benchmarks/requirements-ab.txt b/benchmarks/requirements-ab.txt
@@ -4,5 +4,5 @@ click-config-file
 matplotlib
 requests
 pyyaml
-mdutils
-ruamel.yaml
+mdutils==1.4.0
+ruamel.yaml
diff --git a/docs/configuration.md b/docs/configuration.md
@@ -188,7 +188,7 @@ Configuration parameter `install_py_dep_per_model` controls if the model server
 install_py_dep_per_model=true
 ```
 
-User can also supply custom python packages in zip or tar.gz format using the `--extra-files` flag while creating the model-archive and make an entry of the file name in the `requirements` file. 
+User can also supply custom python packages in zip or tar.gz format using the `--extra-files` flag while creating the model-archive and make an entry of the file name in the `requirements` file.
 
 ### Restrict backend worker to access environment variables
 
@@ -239,7 +239,7 @@ A model's parameters are defined in [model source code](https://github.com/pytor
 * `defaultVersion`: the default version of a model
 * `marName`: the mar file name of a model
 
-A model's configuration example 
+A model's configuration example
 ```properties
 models={\
   "noop": {\
@@ -281,22 +281,23 @@ Most of the following properties are designed for performance tuning. Adjusting
 * `unregister_model_timeout`: Timeout, in seconds, used when handling an unregister model request when cleaning a process before it is deemed unresponsive and an error response is sent. Default: 120 seconds.
 * `decode_input_request`: Configuration to let backend workers to decode requests, when the content type is known.
 If this is set to "true", backend workers do "Bytearray to JSON object" conversion when the content type is "application/json" and
-the backend workers convert "Bytearray to utf-8 string" when the Content-Type of the request is set to "text*". Default: true  
+the backend workers convert "Bytearray to utf-8 string" when the Content-Type of the request is set to "text*". Default: true
 * `initial_worker_port` : This is the initial port number for auto assigning port to worker process.
 * `model_store` : Path of model store directory.
-* `model_server_home` : Torchserve home directory. 
+* `model_server_home` : Torchserve home directory.
 * `max_request_size` : The maximum allowable request size that the Torchserve accepts, in bytes. Default: 6553500
 * `max_response_size` : The maximum allowable response size that the Torchserve sends, in bytes. Default: 6553500
-* `limit_max_image_pixels` : Default value is true (Use default [PIL.Image.MAX_IMAGE_PIXELS](https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.MAX_IMAGE_PIXELS)). If this is set to "false", set PIL.Image.MAX_IMAGE_PIXELS = None in backend default vision handler for large image payload. 
+* `limit_max_image_pixels` : Default value is true (Use default [PIL.Image.MAX_IMAGE_PIXELS](https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.MAX_IMAGE_PIXELS)). If this is set to "false", set PIL.Image.MAX_IMAGE_PIXELS = None in backend default vision handler for large image payload.
 * `allowed_urls` : Comma separated regex of allowed source URL(s) from where models can be registered. Default: "file://.*|http(s)?://.*" (all URLs and local file system)
 e.g. : To allow base URLs `https://s3.amazonaws.com/` and `https://torchserve.pytorch.org/` use the following regex string `allowed_urls=https://s3.amazonaws.com/.*,https://torchserve.pytorch.org/.*`
 * `workflow_store` : Path of workflow store directory. Defaults to model store directory.
+* `disable_system_metrics` : Disable collection of system metrics when set to "true". Default value is "false".
 
 **NOTE**
 
 All the above config properties can be set using environment variable as follows.
 - set `enable_envvars_config` to true in config.properties
-- export environment variable for property as`TS_<PROPERTY_NAME>`. 
+- export environment variable for property as`TS_<PROPERTY_NAME>`.
 
   e.g.: to set inference_address property run cmd
   `export TS_INFERENCE_ADDRESS="http://127.0.0.1:8082"`.

diff --git a/examples/text_classification_with_scriptable_tokenizer/handler.py b/examples/text_classification_with_scriptable_tokenizer/handler.py
@@ -1,6 +1,5 @@
 """
 Module for text classification with scriptable tokenizer
-DOES NOT SUPPORT BATCH!
 """
 import logging
 from abc import ABC
@@ -51,18 +50,19 @@ def preprocess(self, data):
 
         # Compat layer: normally the envelope should just return the data
         # directly, but older versions of Torchserve didn't have envelope.
-        # Processing only the first input, not handling batch inference
 
-        line = data[0]
-        text = line.get("data") or line.get("body")
-        # Decode text if not a str but bytes or bytearray
-        if isinstance(text, (bytes, bytearray)):
-            text = text.decode("utf-8")
+        text_batch = []
+        for line in data:
+            text = line.get("data") or line.get("body")
+            # Decode text if not a str but bytes or bytearray
+            if isinstance(text, (bytes, bytearray)):
+                text = text.decode("utf-8")
 
-        text = remove_html_tags(text)
-        text = text.lower()
+            text = remove_html_tags(text)
+            text = text.lower()
+            text_batch.append(text)
 
-        return text
+        return text_batch
 
     def inference(self, data, *args, **kwargs):
         """The Inference Request is made through this function and the user

diff --git a/examples/text_classification_with_scriptable_tokenizer/script_tokenizer_and_model.py b/examples/text_classification_with_scriptable_tokenizer/script_tokenizer_and_model.py
@@ -76,7 +76,7 @@ def main(args):
     model = XLMR_BASE_ENCODER.get_model(head=classifier_head)
 
     # Load trained parameters and load them into the model
-    model.load_state_dict(torch.load(args.input_file))
+    model.load_state_dict(torch.load(args.input_file, map_location=torch.device("cpu")))
 
     # Chain the tokenizer, the adapter and the model
     combi_model = T.Sequential(
@@ -88,7 +88,7 @@ def main(args):
     combi_model.eval()
 
     # Make sure to move the model to CPU to avoid placement error during loading
-    combi_model.to("cpu")
+    combi_model.to(torch.device("cpu"))
 
     combi_model_jit = torch.jit.script(combi_model)
 

diff --git a/frontend/server/src/main/java/org/pytorch/serve/ModelServer.java b/frontend/server/src/main/java/org/pytorch/serve/ModelServer.java
@@ -119,7 +119,10 @@ public void startAndWait()
             startGRPCServers();
 
             // Create and schedule metrics manager
-            MetricManager.scheduleMetrics(configManager);
+            if (!configManager.isSystemMetricsDisabled()) {
+                MetricManager.scheduleMetrics(configManager);
+            }
+
             System.out.println("Model server started."); // NOPMD
 
             channelFutures.get(0).sync();

diff --git a/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java b/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java
@@ -66,6 +66,7 @@ public final class ConfigManager {
     private static final String TS_JOB_QUEUE_SIZE = "job_queue_size";
     private static final String TS_NUMBER_OF_GPU = "number_of_gpu";
     private static final String TS_METRICS_CONFIG = "metrics_config";
+    private static final String TS_DISABLE_SYSTEM_METRICS = "disable_system_metrics";
 
     // IPEX config option that can be set at config.properties
     private static final String TS_IPEX_ENABLE = "ipex_enable";
@@ -373,6 +374,10 @@ public String getMetricsConfigPath() {
         return path;
     }
 
+    public boolean isSystemMetricsDisabled() {
+        return Boolean.parseBoolean(getProperty(TS_DISABLE_SYSTEM_METRICS, "false"));
+    }
+
     public String getTsDefaultServiceHandler() {
         return getProperty(TS_DEFAULT_SERVICE_HANDLER, null);
     }
@@ -642,6 +647,8 @@ public String dumpConfigurations() {
                 + prop.getProperty(TS_METRICS_FORMAT, METRIC_FORMAT_PROMETHEUS)
                 + "\nEnable metrics API: "
                 + prop.getProperty(TS_ENABLE_METRICS_API, "true")
+                + "\nDisable system metrics: "
+                + isSystemMetricsDisabled()
                 + "\nWorkflow Store: "
                 + (getWorkflowStore() == null ? "N/A" : getWorkflowStore())
                 + "\nModel config: "

diff --git a/requirements/common.txt b/requirements/common.txt
@@ -9,4 +9,5 @@ numpy; sys_platform != 'win32'
 numpy==1.19.3; sys_platform == 'win32' #see https://tinyurl.com/y3dm3h86
 nvgpu; sys_platform != 'win32'
 nvgpu==0.8.0; sys_platform == 'win32'
+pynvml==11.4.1
 pyyaml
diff --git a/test/pytest/test_metrics.py b/test/pytest/test_metrics.py
@@ -1,6 +1,7 @@
 import glob
 import os
 import platform
+import re
 import shutil
 import time
 from os import path
@@ -9,6 +10,18 @@
 import test_utils
 
 NUM_STARTUP_CFG = 0
+SYSTEM_METRICS = [
+    "CPUUtilization",
+    "MemoryUsed",
+    "MemoryAvailable",
+    "MemoryUtilization",
+    "DiskUsage",
+    "DiskUtilization",
+    "DiskAvailable",
+    "GPUMemoryUtilization",
+    "GPUMemoryUsed",
+    "GPUUtilization",
+]
 
 
 def setup_module(module):
@@ -55,6 +68,33 @@ def run_log_location_var(custom_path=test_utils.ROOT_DIR, no_config_snapshots=Fa
         assert len(glob.glob(custom_path + "/ts_log.log")) == 1
 
 
+def register_densenet161_model_and_make_inference_request():
+    test_utils.register_model("densenet161.mar", "densenet161")
+    data_file = os.path.join(
+        test_utils.REPO_ROOT, "examples/image_classifier/kitten.jpg"
+    )
+    with open(data_file, "rb") as input_data:
+        requests.post(
+            url=f"http://localhost:8080/predictions/densenet161", data=input_data
+        )
+
+
+def validate_system_metrics(present=True):
+    assert len(glob.glob("logs/ts_metrics.log")) == 1
+    ts_metrics_path = glob.glob("logs/ts_metrics.log")[0]
+    assert os.path.getsize(ts_metrics_path) > 0
+
+    system_metrics_regex = re.compile("|".join(SYSTEM_METRICS), flags=re.IGNORECASE)
+    with open(ts_metrics_path, "rt") as ts_metrics_file:
+        ts_metrics = ts_metrics_file.read()
+        system_metrics = re.findall(system_metrics_regex, ts_metrics)
+
+    if present:
+        assert len(system_metrics) > 0
+    else:
+        assert len(system_metrics) == 0
+
+
 def test_logs_created():
     logs_created()
     global NUM_STARTUP_CFG
@@ -318,3 +358,84 @@ def test_metrics_location_var_snapshot_enabled_rdonly_dir():
         assert len(glob.glob(RDONLY_DIR + "/logs/ts_metrics.log")) == 0
     finally:
         del os.environ["METRICS_LOCATION"]
+
+
+def test_collect_system_metrics_when_not_disabled():
+    """
+    Validates that system metrics are collected when not disabled
+    """
+    # Torchserve cleanup
+    test_utils.torchserve_cleanup()
+    # Remove existing logs if any
+    for f in glob.glob("logs/*.log"):
+        os.remove(f)
+
+    try:
+        test_utils.start_torchserve(
+            model_store=test_utils.MODEL_STORE, no_config_snapshots=True, gen_mar=False
+        )
+        register_densenet161_model_and_make_inference_request()
+        validate_system_metrics(present=True)
+    finally:
+        test_utils.torchserve_cleanup()
+
+
+def test_disable_system_metrics_using_config_properties():
+    """
+    Validates that system metrics collection is disabled when "disable_system_metrics"
+    configuration option is set to "true"
+    """
+    # Torchserve cleanup
+    test_utils.torchserve_cleanup()
+    # Remove existing logs if any
+    for f in glob.glob("logs/*.log"):
+        os.remove(f)
+
+    config_file = test_utils.ROOT_DIR + "config.properties"
+    with open(config_file, "w") as f:
+        f.write("disable_system_metrics=true")
+
+    try:
+        test_utils.start_torchserve(
+            model_store=test_utils.MODEL_STORE,
+            snapshot_file=config_file,
+            no_config_snapshots=True,
+            gen_mar=False,
+        )
+        register_densenet161_model_and_make_inference_request()
+        validate_system_metrics(present=False)
+    finally:
+        test_utils.torchserve_cleanup()
+        os.remove(config_file)
+
+
+def test_disable_system_metrics_using_environment_variable():
+    """
+    Validates that system metrics collection is disabled when TS_DISABLE_SYSTEM_METRICS
+    environment variable is set to "true"
+    """
+    # Torchserve cleanup
+    test_utils.torchserve_cleanup()
+    # Remove existing logs if any
+    for f in glob.glob("logs/*.log"):
+        os.remove(f)
+
+    config_file = test_utils.ROOT_DIR + "config.properties"
+    with open(config_file, "w") as f:
+        f.write("enable_envvars_config=true")
+
+    os.environ["TS_DISABLE_SYSTEM_METRICS"] = "true"
+
+    try:
+        test_utils.start_torchserve(
+            model_store=test_utils.MODEL_STORE,
+            snapshot_file=config_file,
+            no_config_snapshots=True,
+            gen_mar=False,
+        )
+        register_densenet161_model_and_make_inference_request()
+        validate_system_metrics(present=False)
+    finally:
+        test_utils.torchserve_cleanup()
+        del os.environ["TS_DISABLE_SYSTEM_METRICS"]
+        os.remove(config_file)