From 9dee3ef919502b57bf41a2528f1533cdc31ac882 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Sat, 11 May 2024 04:16:34 +0000
Subject: [PATCH] Ingest gpt-fast benchmark results from S3 to Rockset
 (#125891)

A follow-up of https://github.com/pytorch/pytorch/pull/125450, this extends the `tools/stats/upload_dynamo_perf_stats.py` script to upload arbitrary benchmark results in CSV format.

* Upload gpt-fast benchmarks to a new Rockset collection `benchmarks/oss_ci_benchmark`.  The file is in the following format:
```
$ cat test/test-reports/gpt_fast_benchmark.csv
name,mode,target,actual,percentage
Llama-2-7b-chat-hf,bfloat16,104,104.754128,100.73%
```
* The CSV output needs to be kept in `test/test-reports` directory.
* Re-use the existing `.github/workflows/upload-test-stats.yml` workflow

### Testing

Run the commands manually

```
(py3.11) huydo@huydo-mbp pytorch % python3 -m tools.stats.upload_artifacts --workflow-run-id 9026179545 --workflow-run-attempt 1 --repo "pytorch/pytorch"
Using temporary directory: /var/folders/x4/2kd9r0fn5b9bf_sbcw16fxsc0000gn/T/tmp6eug3cdz
Downloading test-jsons-runattempt1-test-inductor-micro-benchmark-1-1-linux.gcp.a100_24803987212.zip
Upload /private/var/folders/x4/2kd9r0fn5b9bf_sbcw16fxsc0000gn/T/tmp6eug3cdz/test-jsons-runattempt1-test-inductor-micro-benchmark-1-1-linux.gcp.a100_24803987212.zip to s3://gha-artifacts/pytorch/pytorch/9026179545/1/artifact/test-jsons-test-inductor-micro-benchmark-1-1-linux.gcp.a100_24803987212.zip
Downloading test-reports-runattempt1-test-inductor-micro-benchmark-1-1-linux.gcp.a100_24803987212.zip
Upload /private/var/folders/x4/2kd9r0fn5b9bf_sbcw16fxsc0000gn/T/tmp6eug3cdz/test-reports-runattempt1-test-inductor-micro-benchmark-1-1-linux.gcp.a100_24803987212.zip to s3://gha-artifacts/pytorch/pytorch/9026179545/1/artifact/test-reports-test-inductor-micro-benchmark-1-1-linux.gcp.a100_24803987212.zip

(py3.11) huydo@huydo-mbp pytorch % python3 -m tools.stats.upload_dynamo_perf_stats --workflow-run-id 9026179545 --workflow-run-attempt 1 --repo "pytorch/pytorch" --head-branch "ciflow/inductor-micro-benchmark/125891" --rockset-collection oss_ci_benchmark --rockset-workspace benchmarks --match-filename "^gpt_fast_benchmark"
Using temporary directory: /var/folders/x4/2kd9r0fn5b9bf_sbcw16fxsc0000gn/T/tmp8xr4sdxk
Downloading test-reports-test-inductor-micro-benchmark-1-1-linux.gcp.a100_24803987212.zip
Extracting test-reports-test-inductor-micro-benchmark-1-1-linux.gcp.a100_24803987212.zip to unzipped-test-reports-test-inductor-micro-benchmark-1-1-linux.gcp.a100_24803987212
Processing gpt_fast_benchmark from test-reports-test-inductor-micro-benchmark-1-1-linux.gcp.a100_24803987212.zip
Writing 3 documents to Rockset
Done!
```

Also run a sanity check on ingesting inductor benchmark results:

```
(py3.11) huydo@huydo-mbp pytorch % python -m tools.stats.upload_dynamo_perf_stats --workflow-run-id 8997654356 --workflow-run-attempt 1 --repo pytorch/pytorch --head-branch main --rockset-collection torch_dynamo_perf_stats --rockset-workspace inductor --match-filename "^inductor_"
...
Writing 4904 documents to Rockset
Done!
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/125891
Approved by: https://github.com/yanboliang
---
 .ci/pytorch/test.sh                           |  4 +-
 .github/workflows/upload-test-stats.yml       | 16 +++++++-
 .../upload-torch-dynamo-perf-stats.yml        |  2 +-
 benchmarks/gpt_fast/benchmark.py              | 23 +++++++----
 tools/stats/upload_dynamo_perf_stats.py       | 41 ++++++++++++++-----
 5 files changed, 64 insertions(+), 22 deletions(-)

diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh
index a22bebc166792..19d28eeefd9a8 100755
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@@ -523,8 +523,8 @@ test_single_dynamo_benchmark() {
 }
 
 test_inductor_micro_benchmark() {
-  TEST_REPORTS_DIR=$(pwd)/test/test-micro-reports
-  python benchmarks/gpt_fast/benchmark.py
+  TEST_REPORTS_DIR=$(pwd)/test/test-reports
+  python benchmarks/gpt_fast/benchmark.py --output "${TEST_REPORTS_DIR}/gpt_fast_benchmark.csv"
 }
 
 test_dynamo_benchmark() {
diff --git a/.github/workflows/upload-test-stats.yml b/.github/workflows/upload-test-stats.yml
index bcd8c47dcecc3..f71d86eb5e59f 100644
--- a/.github/workflows/upload-test-stats.yml
+++ b/.github/workflows/upload-test-stats.yml
@@ -2,7 +2,7 @@ name: Upload test stats
 
 on:
   workflow_run:
-    workflows: [pull, trunk, periodic, inductor, unstable, slow, unstable-periodic, inductor-periodic, rocm]
+    workflows: [pull, trunk, periodic, inductor, unstable, slow, unstable-periodic, inductor-periodic, rocm, inductor-micro-benchmark]
     types:
       - completed
 
@@ -50,6 +50,7 @@ jobs:
           pip3 install requests==2.26 rockset==1.0.3 boto3==1.19.12
 
       - name: Upload test artifacts
+        id: upload-s3
         env:
           AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
           AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
@@ -94,6 +95,19 @@ jobs:
           # Analyze the results from disable tests rerun and upload them to S3
           python3 -m tools.stats.check_disabled_tests --workflow-run-id "${WORKFLOW_RUN_ID}" --workflow-run-attempt "${WORKFLOW_RUN_ATTEMPT}" --repo "${REPO_FULLNAME}"
 
+      - name: Upload gpt-fast benchmark results to Rockset
+        if: steps.upload-s3.outcome && steps.upload-s3.outcome == 'success' && github.event.workflow_run.name == 'inductor-micro-benchmark'
+        env:
+          ROCKSET_API_KEY: ${{ secrets.ROCKSET_API_KEY }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }}
+          WORKFLOW_RUN_ATTEMPT: ${{ github.event.workflow_run.run_attempt }}
+          REPO_FULLNAME: ${{ github.event.workflow_run.repository.full_name }}
+          HEAD_BRANCH: ${{ github.event.workflow_run.head_branch }}
+        run: |
+          python3 -m tools.stats.upload_dynamo_perf_stats --workflow-run-id "${WORKFLOW_RUN_ID}" --workflow-run-attempt "${WORKFLOW_RUN_ATTEMPT}" --repo "${REPO_FULLNAME}" --head-branch "${HEAD_BRANCH}" --rockset-collection oss_ci_benchmark --rockset-workspace benchmarks --match-filename "^gpt_fast_benchmark"
+
   check-api-rate:
     if: ${{ always() && github.repository_owner == 'pytorch' }}
     runs-on: ubuntu-latest
diff --git a/.github/workflows/upload-torch-dynamo-perf-stats.yml b/.github/workflows/upload-torch-dynamo-perf-stats.yml
index 2ad1065076a5b..546d4d945761b 100644
--- a/.github/workflows/upload-torch-dynamo-perf-stats.yml
+++ b/.github/workflows/upload-torch-dynamo-perf-stats.yml
@@ -68,4 +68,4 @@ jobs:
           REPO_FULLNAME: ${{ github.event.workflow_run.repository.full_name }}
           HEAD_BRANCH: ${{ github.event.workflow_run.head_branch }}
         run: |
-          python3 -m tools.stats.upload_dynamo_perf_stats --workflow-run-id "${WORKFLOW_RUN_ID}" --workflow-run-attempt "${WORKFLOW_RUN_ATTEMPT}" --repo "${REPO_FULLNAME}" --head-branch "${HEAD_BRANCH}"
+          python3 -m tools.stats.upload_dynamo_perf_stats --workflow-run-id "${WORKFLOW_RUN_ID}" --workflow-run-attempt "${WORKFLOW_RUN_ATTEMPT}" --repo "${REPO_FULLNAME}" --head-branch "${HEAD_BRANCH}" --rockset-collection torch_dynamo_perf_stats --rockset-workspace inductor --match-filename "^inductor_"
diff --git a/benchmarks/gpt_fast/benchmark.py b/benchmarks/gpt_fast/benchmark.py
index 791ed28c449ff..1e964d0ae2b2f 100644
--- a/benchmarks/gpt_fast/benchmark.py
+++ b/benchmarks/gpt_fast/benchmark.py
@@ -46,7 +46,7 @@ class Experiment:
     ),
 }
 
-output_filename = "gpt_fast_benchmark.csv"
+DEFAULT_OUTPUT_FILE = "gpt_fast_benchmark.csv"
 
 
 def device_sync(device):
@@ -235,9 +235,9 @@ def run_experiment(
     return token_per_sec
 
 
-def output_csv(filename, headers, row):
-    if os.path.exists(filename):
-        with open(filename) as fd:
+def output_csv(output_file, headers, row):
+    if os.path.exists(output_file):
+        with open(output_file) as fd:
             lines = list(csv.reader(fd)) or [[]]
             if headers and len(headers) > len(lines[0]):
                 # if prior results failed the header might not be filled in yet
@@ -246,14 +246,16 @@ def output_csv(filename, headers, row):
                 headers = lines[0]
     else:
         lines = [headers]
+
+    os.makedirs(os.path.dirname(output_file), exist_ok=True)
     lines.append([(f"{x:.6f}" if isinstance(x, float) else x) for x in row])
-    with open(filename, "w") as fd:
+    with open(output_file, "w") as fd:
         writer = csv.writer(fd, lineterminator="\n")
         for line in lines:
             writer.writerow(list(line) + ["0"] * (len(headers) - len(line)))
 
 
-def main(experiments=None):
+def main(experiments=None, output_file=DEFAULT_OUTPUT_FILE):
     results = []
 
     if experiments is None:
@@ -270,7 +272,7 @@ def main(experiments=None):
     rows = [[x[0].name, x[0].mode, x[0].target, x[1], x[2]] for x in results]
 
     for row in rows:
-        output_csv(output_filename, headers, row)
+        output_csv(output_file, headers, row)
 
 
 if __name__ == "__main__":
@@ -281,6 +283,11 @@ def main(experiments=None):
         default=None,
         help="Experiment names to run (default: all)",
     )
+    parser.add_argument(
+        "--output",
+        default=DEFAULT_OUTPUT_FILE,
+        help="Set the output CSV file to save the benchmark results",
+    )
     args = parser.parse_args()
 
-    main(experiments=args.experiments)
+    main(experiments=args.experiments, output_file=args.output)
diff --git a/tools/stats/upload_dynamo_perf_stats.py b/tools/stats/upload_dynamo_perf_stats.py
index 2cbbcd25bdab5..c6c507863f440 100644
--- a/tools/stats/upload_dynamo_perf_stats.py
+++ b/tools/stats/upload_dynamo_perf_stats.py
@@ -13,7 +13,7 @@
     "test-reports",
 ]
 ARTIFACT_REGEX = re.compile(
-    r"test-reports-test-(?P<name>\w+)-\d+-\d+-(?P<runner>[\w\.]+)_(?P<job>\d+).zip"
+    r"test-reports-test-(?P<name>[\w\-]+)-\d+-\d+-(?P<runner>[\w\.]+)_(?P<job>\d+).zip"
 )
 
 
@@ -22,7 +22,9 @@ def upload_dynamo_perf_stats_to_rockset(
     workflow_run_id: int,
     workflow_run_attempt: int,
     head_branch: str,
+    match_filename: str,
 ) -> List[Dict[str, Any]]:
+    match_filename_regex = re.compile(match_filename)
     perf_stats = []
     with TemporaryDirectory() as temp_dir:
         print("Using temporary directory:", temp_dir)
@@ -49,17 +51,14 @@ def upload_dynamo_perf_stats_to_rockset(
 
                 for csv_file in Path(".").glob("**/*.csv"):
                     filename = os.path.splitext(os.path.basename(csv_file))[0]
+                    if not re.match(match_filename_regex, filename):
+                        continue
                     print(f"Processing {filename} from {path}")
 
                     with open(csv_file) as csvfile:
                         reader = csv.DictReader(csvfile, delimiter=",")
 
                         for row in reader:
-                            # If the row doesn't have a dev and a name column, it's not
-                            # a torch dynamo perf stats csv file
-                            if "dev" not in row or "name" not in row:
-                                break
-
                             row.update(
                                 {
                                     "workflow_id": workflow_run_id,  # type: ignore[dict-item]
@@ -105,14 +104,36 @@ def upload_dynamo_perf_stats_to_rockset(
         "--head-branch",
         type=str,
         required=True,
-        help="Head branch of the workflow",
+        help="head branch of the workflow",
+    )
+    parser.add_argument(
+        "--rockset-collection",
+        type=str,
+        required=True,
+        help="the name of the Rockset collection to store the stats",
+    )
+    parser.add_argument(
+        "--rockset-workspace",
+        type=str,
+        default="commons",
+        help="the name of the Rockset workspace to store the stats",
+    )
+    parser.add_argument(
+        "--match-filename",
+        type=str,
+        default="",
+        help="the regex to filter the list of CSV files containing the records to upload",
     )
     args = parser.parse_args()
     perf_stats = upload_dynamo_perf_stats_to_rockset(
-        args.repo, args.workflow_run_id, args.workflow_run_attempt, args.head_branch
+        args.repo,
+        args.workflow_run_id,
+        args.workflow_run_attempt,
+        args.head_branch,
+        args.match_filename,
     )
     upload_to_rockset(
-        collection="torch_dynamo_perf_stats",
+        collection=args.rockset_collection,
         docs=perf_stats,
-        workspace="inductor",
+        workspace=args.rockset_workspace,
     )