Merge branch 'master' into feature/cpp_baby_llama_rework

pytorch · Jan 24, 2024 · 3064301 · 3064301
2 parents 7157754 + 35b0b93
commit 3064301
Show file tree

Hide file tree

Showing 57 changed files with 1,086 additions and 285 deletions.
diff --git a/.github/workflows/benchmark_torch_compile_nightly.yml b/.github/workflows/benchmark_torch_compile_nightly.yml
@@ -20,10 +20,10 @@ jobs:
           cd ..
           pwd
           rm -rf _tool
-      - name: Setup Python 3.8
+      - name: Setup Python 3.10
         uses: actions/setup-python@v4
         with:
-          python-version: 3.8
+          python-version: "3.10"
           architecture: x64
       - name: Setup Java 17
         uses: actions/setup-java@v3
@@ -39,5 +39,9 @@ jobs:
           sudo apt-get update -y
           sudo apt-get install -y apache2-utils
           pip install -r benchmarks/requirements-ab.txt
+      - name: Install model-specific dependencies
+        run: |
+          chmod +x benchmarks/benchmark_model_dependencies.sh
+          source benchmarks/benchmark_model_dependencies.sh
       - name: Benchmark gpu nightly
         run: python benchmarks/auto_benchmark.py --input benchmarks/benchmark_config_torch_compile_gpu.yaml --skip false --nightly True
diff --git a/.github/workflows/ci-cpu-cpp.yml b/.github/workflows/ci-cpu-cpp.yml
@@ -3,13 +3,13 @@ name: CI CPU - CPP Build
 on:
   push:
     branches:
-      - cpp_backend
+      - master
   pull_request:
     branches:
-      - cpp_backend
+      - master
 
 concurrency:
-  group: ci-cpu-${{ github.workflow }}-${{ github.ref == 'refs/heads/cpp_backend' && github.run_number || github.ref }}
+  group: ci-cpu-${{ github.workflow }}-${{ github.ref == 'refs/heads/master' && github.run_number || github.ref }}
   cancel-in-progress: true
 
 jobs:

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -53,7 +53,7 @@ jobs:
 
       - name: Get changed files
         id: changed-files
-        uses: tj-actions/changed-files@v29.0.4
+        uses: tj-actions/changed-files@v41.0.0
         with:
 
           files: |
@@ -87,7 +87,7 @@ jobs:
 
       - name: Get changed files
         id: changed-files
-        uses: tj-actions/changed-files@v29.0.4
+        uses: tj-actions/changed-files@v41.0.0
         with:
           files: |
             **/*.md

diff --git a/benchmarks/benchmark-ab.py b/benchmarks/benchmark-ab.py
@@ -555,6 +555,19 @@ def generate_csv_output():
         artifacts["Model_p90"] = lines[line90].strip()
         artifacts["Model_p99"] = lines[line99].strip()
 
+    with open(
+        os.path.join(execution_params["tmp_dir"], "benchmark", "waiting_time.txt")
+    ) as f:
+        lines = f.readlines()
+        lines.sort(key=float)
+        num_requests = len(lines)
+        line50 = int(num_requests / 2)
+        line90 = int(num_requests * 9 / 10)
+        line99 = int(num_requests * 99 / 100)
+        artifacts["Queue time p50"] = lines[line50].strip()
+        artifacts["Queue time p90"] = lines[line90].strip()
+        artifacts["Queue time p99"] = lines[line99].strip()
+
     for m in metrics:
         df = pd.read_csv(
             os.path.join(*(execution_params["tmp_dir"], "benchmark", m)),

diff --git a/benchmarks/benchmark_config_torch_compile_gpu.yaml b/benchmarks/benchmark_config_torch_compile_gpu.yaml
@@ -11,7 +11,7 @@
 models:
     - "bert_torch_compile_gpu.yaml"
     - "resnet50_torch_compile_gpu.yaml"
-    - "vgg16_torch_compile_gpu.yaml"
+    - "sam_fast_torch_compile_gpu_best_latency.yaml"
 
 # benchmark on "cpu" or "gpu".
 # "cpu" is set if "hardware" is not specified
@@ -45,4 +45,3 @@ report_cmd:
     - "cmd": "aws s3 cp --recursive"
     - "source": '/tmp/ts_benchmark/'
     - "dest": ['s3://torchserve-benchmark/torch-compile-nightly', "today()", *hardware]
-
diff --git a/benchmarks/benchmark_model_dependencies.sh b/benchmarks/benchmark_model_dependencies.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+# Install dependencies and set environment variables for SAM Fast
+
+# Install dependencies
+pip install chardet
+chmod +x examples/large_models/segment_anything_fast/install_segment_anything_fast.sh
+source examples/large_models/segment_anything_fast/install_segment_anything_fast.sh
+
+# Turn off A100G optimization
+export SEGMENT_ANYTHING_FAST_USE_FLASH_4=0
+
+echo "Installed dependencies and set environment variables for SAM Fast"
+
diff --git a/benchmarks/models_config/bert_torch_compile_gpu.yaml b/benchmarks/models_config/bert_torch_compile_gpu.yaml
@@ -18,7 +18,6 @@ bert:
         backend_profiling: False
         exec_env: "local"
         processors:
-            - "cpu"
             - "gpus": "all"
     torch_compile_default_mode:
         benchmark_engine: "ab"
@@ -38,5 +37,4 @@ bert:
         backend_profiling: False
         exec_env: "local"
         processors:
-            - "cpu"
             - "gpus": "all"
diff --git a/benchmarks/models_config/resnet50_torch_compile_gpu.yaml b/benchmarks/models_config/resnet50_torch_compile_gpu.yaml
@@ -18,7 +18,6 @@ resnet50:
         backend_profiling: False
         exec_env: "local"
         processors:
-            - "cpu"
             - "gpus": "all"
     torch_compile_default_mode:
         benchmark_engine: "ab"
@@ -38,5 +37,4 @@ resnet50:
         backend_profiling: False
         exec_env: "local"
         processors:
-            - "cpu"
             - "gpus": "all"
diff --git a/benchmarks/models_config/sam_fast_torch_compile_gpu_best_latency.yaml b/benchmarks/models_config/sam_fast_torch_compile_gpu_best_latency.yaml
@@ -0,0 +1,32 @@
+---
+sam_fast:
+    vit_b:
+        benchmark_engine: "ab"
+        url: https://torchserve.pytorch.org/mar_files/sam_fast_vit_b_01ec64.mar
+        workers:
+            - 4
+        batch_delay: 100
+        batch_size:
+            - 1
+        input: "./examples/large_models/segment_anything_fast/kitten.jpg"
+        requests: 1000
+        concurrency: 4
+        backend_profiling: False
+        exec_env: "local"
+        processors:
+            - "gpus": "all"
+    vit_h:
+        benchmark_engine: "ab"
+        url: https://torchserve.pytorch.org/mar_files/sam_fast_vit_h_4b8939.mar
+        workers:
+            - 4
+        batch_delay: 100
+        batch_size:
+            - 1
+        input: "./examples/large_models/segment_anything_fast/kitten.jpg"
+        requests: 1000
+        concurrency: 4
+        backend_profiling: False
+        exec_env: "local"
+        processors:
+            - "gpus": "all"
diff --git a/benchmarks/models_config/vgg16_torch_compile_gpu.yaml b/benchmarks/models_config/vgg16_torch_compile_gpu.yaml