From 7a23446702a96738ca1fbd9b13af6859ed720e8a Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Mon, 21 Jun 2021 13:23:28 -0400 Subject: [PATCH 01/11] Adding torchbench v1 workflow. --- .github/scripts/run.sh | 17 ++++-- .../workflows/{main.yml => v0-nightly.yml} | 0 .github/workflows/v1-nightly.yml | 58 +++++++++++++++++++ 3 files changed, 70 insertions(+), 5 deletions(-) rename .github/workflows/{main.yml => v0-nightly.yml} (100%) create mode 100644 .github/workflows/v1-nightly.yml diff --git a/.github/scripts/run.sh b/.github/scripts/run.sh index 97723740ad..3cff4e8b80 100644 --- a/.github/scripts/run.sh +++ b/.github/scripts/run.sh @@ -50,11 +50,18 @@ for c in $(seq 1 $NUM_ITER); do taskset -c "${CORE_LIST}" pytest test_bench.py -k "${BENCHMARK_FILTER}" \ --benchmark-min-rounds "${NUM_ROUNDS}" \ --benchmark-json ${DATA_DIR}/${DATA_JSON_PREFIX}_${c}.json - # Fill in circle_build_num and circle_project_reponame - jq --arg run_id "${GITHUB_RUN_ID}" --arg config_version "githubactions-benchmark-${CONFIG_VER}-metal-fullname" \ - '.machine_info.circle_project_name=$config_version | .machine_info.circle_build_num=$run_id' \ - ${DATA_DIR}/${DATA_JSON_PREFIX}_${c}.json > ${DATA_DIR}/${DATA_JSON_PREFIX}_${c}.json.tmp - mv ${DATA_DIR}/${DATA_JSON_PREFIX}_${c}.json.tmp ${DATA_DIR}/${DATA_JSON_PREFIX}_${c}.json + if [ "$CONFIG_VER" == "v0" ]; then + # Fill in circle_build_num and circle_project_reponame + jq --arg run_id "${GITHUB_RUN_ID}" --arg config_version "githubactions-benchmark-${CONFIG_VER}-metal-fullname" \ + '.machine_info.circle_project_name=$config_version | .machine_info.circle_build_num=$run_id' \ + ${DATA_DIR}/${DATA_JSON_PREFIX}_${c}.json > ${DATA_DIR}/${DATA_JSON_PREFIX}_${c}.json.tmp + mv ${DATA_DIR}/${DATA_JSON_PREFIX}_${c}.json.tmp ${DATA_DIR}/${DATA_JSON_PREFIX}_${c}.json + elif [ "$CONFIG_VER" == "v1" ]; then + jq --arg run_id "${GITHUB_RUN_ID}" --arg config_version "${CONFIG_VER}" \ + '.machine_info.torchbench_score_version=$config_version | .machine_info.circle_build_num=$run_id' \ + ${DATA_DIR}/${DATA_JSON_PREFIX}_${c}.json > ${DATA_DIR}/${DATA_JSON_PREFIX}_${c}.json.tmp + mv ${DATA_DIR}/${DATA_JSON_PREFIX}_${c}.json.tmp ${DATA_DIR}/${DATA_JSON_PREFIX}_${c}.json + fi done echo "Benchmark finished successfully. Output data dir is ${DATA_DIR}." diff --git a/.github/workflows/main.yml b/.github/workflows/v0-nightly.yml similarity index 100% rename from .github/workflows/main.yml rename to .github/workflows/v0-nightly.yml diff --git a/.github/workflows/v1-nightly.yml b/.github/workflows/v1-nightly.yml new file mode 100644 index 0000000000..116be8a07e --- /dev/null +++ b/.github/workflows/v1-nightly.yml @@ -0,0 +1,58 @@ +name: TorchBench nightly ci v1.0 +on: + workflow_dispatch: + schedule: + - cron: '0 13 * * *' # run at 1 PM UTC + +jobs: + run-benchmark: + env: + CONFIG_VER: "v1" + CONDA_ENV_NAME: "v1-nightly-ci" + OUTPUT_DIR: $(echo ${HOME})/.torchbench/v1-nighlty-ci + if: ${{ github.repository_owner == 'pytorch' }} + runs-on: [self-hosted, bm-runner] + env: + SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + ref: v1.0 + - name: Create conda env + run: | + conda create -y -q --name "${CONDA_ENV_NAME}" python=3.7 + - name: Install PyTorch nightly + run: | + # Check if nightly builds are available + NIGHTLIES=$(python torchbenchmark/util/torch_nightly.py --packages torch) + # If failed, the script will generate empty result + if [ -z $NIGHTLIES ]; then + echo "Torch nightly build failed. Cancel the workflow." + exit 1 + fi + # Install PyTorch nightly from pip + pip install --pre torch \ + -f https://download.pytorch.org/whl/nightly/${CUDA_VERSION}/torch_nightly.html + - name: Run benchmark + run: | + bash ./.github/scripts/run.sh "${OUTPUT_DIR}" + - name: Copy artifact + run: | + LATEST_RESULT=$(find ${OUTPUT_DIR}/gh${GITHUB_RUN_ID}/ -name "*.json" | sort -r | head -1) + TODAY=$(date "+%Y%m%d%H%M%S") + cp ${LATEST_RESULT} ./benchmark-result-v1-${TODAY}.json + - name: Upload artifact + uses: actions/upload-artifact@v2 + with: + name: Benchmark result + path: benchmark-result-v1-*.json + - name: Upload results to Amazon S3 + run: | + LAST_MODIFIED_DIR=$(find ${OUTPUT_DIR} -mindepth 1 -maxdepth 1 -type d -exec stat -t {} \; | sort -r -n -k 13,13 | head -1 | cut -d " " -f 1) + python ./scripts/upload_s3.py --torchbench-result-dir ${LAST_MODIFIED_DIR} --gen-index --upload-s3 + - name: Remove conda env + run: | + conda remove --name From 0e6c1710ba04147ed788984f0c94f70a0ff18fc7 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Mon, 21 Jun 2021 20:47:10 -0400 Subject: [PATCH 02/11] Adding the v1 workflow --- .github/scripts/run-sweep.sh | 0 .github/scripts/run.sh | 12 -- .github/workflows/{sweep.yml => v0-sweep.yml} | 0 .github/workflows/v1-nightly.yml | 12 +- .github/workflows/v1-sweep.yml | 17 +++ conftest.py | 4 +- scripts/upload_s3.py | 121 ++++++++++++++++++ 7 files changed, 148 insertions(+), 18 deletions(-) create mode 100644 .github/scripts/run-sweep.sh rename .github/workflows/{sweep.yml => v0-sweep.yml} (100%) create mode 100644 .github/workflows/v1-sweep.yml create mode 100644 scripts/upload_s3.py diff --git a/.github/scripts/run-sweep.sh b/.github/scripts/run-sweep.sh new file mode 100644 index 0000000000..e69de29bb2 diff --git a/.github/scripts/run.sh b/.github/scripts/run.sh index 3cff4e8b80..ea816a20bf 100644 --- a/.github/scripts/run.sh +++ b/.github/scripts/run.sh @@ -50,18 +50,6 @@ for c in $(seq 1 $NUM_ITER); do taskset -c "${CORE_LIST}" pytest test_bench.py -k "${BENCHMARK_FILTER}" \ --benchmark-min-rounds "${NUM_ROUNDS}" \ --benchmark-json ${DATA_DIR}/${DATA_JSON_PREFIX}_${c}.json - if [ "$CONFIG_VER" == "v0" ]; then - # Fill in circle_build_num and circle_project_reponame - jq --arg run_id "${GITHUB_RUN_ID}" --arg config_version "githubactions-benchmark-${CONFIG_VER}-metal-fullname" \ - '.machine_info.circle_project_name=$config_version | .machine_info.circle_build_num=$run_id' \ - ${DATA_DIR}/${DATA_JSON_PREFIX}_${c}.json > ${DATA_DIR}/${DATA_JSON_PREFIX}_${c}.json.tmp - mv ${DATA_DIR}/${DATA_JSON_PREFIX}_${c}.json.tmp ${DATA_DIR}/${DATA_JSON_PREFIX}_${c}.json - elif [ "$CONFIG_VER" == "v1" ]; then - jq --arg run_id "${GITHUB_RUN_ID}" --arg config_version "${CONFIG_VER}" \ - '.machine_info.torchbench_score_version=$config_version | .machine_info.circle_build_num=$run_id' \ - ${DATA_DIR}/${DATA_JSON_PREFIX}_${c}.json > ${DATA_DIR}/${DATA_JSON_PREFIX}_${c}.json.tmp - mv ${DATA_DIR}/${DATA_JSON_PREFIX}_${c}.json.tmp ${DATA_DIR}/${DATA_JSON_PREFIX}_${c}.json - fi done echo "Benchmark finished successfully. Output data dir is ${DATA_DIR}." diff --git a/.github/workflows/sweep.yml b/.github/workflows/v0-sweep.yml similarity index 100% rename from .github/workflows/sweep.yml rename to .github/workflows/v0-sweep.yml diff --git a/.github/workflows/v1-nightly.yml b/.github/workflows/v1-nightly.yml index 116be8a07e..eb8df9588b 100644 --- a/.github/workflows/v1-nightly.yml +++ b/.github/workflows/v1-nightly.yml @@ -7,12 +7,13 @@ on: jobs: run-benchmark: env: - CONFIG_VER: "v1" + TORCHBENCH_CONFIG_VER: "v1" CONDA_ENV_NAME: "v1-nightly-ci" OUTPUT_DIR: $(echo ${HOME})/.torchbench/v1-nighlty-ci if: ${{ github.repository_owner == 'pytorch' }} runs-on: [self-hosted, bm-runner] env: + CONDA_ENV_NAME: torchbench-v1-nightly-ci SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -26,6 +27,7 @@ jobs: conda create -y -q --name "${CONDA_ENV_NAME}" python=3.7 - name: Install PyTorch nightly run: | + . activate "$CONDA_ENV_NAME" # Check if nightly builds are available NIGHTLIES=$(python torchbenchmark/util/torch_nightly.py --packages torch) # If failed, the script will generate empty result @@ -38,7 +40,8 @@ jobs: -f https://download.pytorch.org/whl/nightly/${CUDA_VERSION}/torch_nightly.html - name: Run benchmark run: | - bash ./.github/scripts/run.sh "${OUTPUT_DIR}" + . activate "$CONDA_ENV_NAME" + CONFIG_VER="$TORCHBENCH_CONFIG_VER" bash ./.github/scripts/run.sh "${OUTPUT_DIR}" - name: Copy artifact run: | LATEST_RESULT=$(find ${OUTPUT_DIR}/gh${GITHUB_RUN_ID}/ -name "*.json" | sort -r | head -1) @@ -51,8 +54,9 @@ jobs: path: benchmark-result-v1-*.json - name: Upload results to Amazon S3 run: | + . activate "$CONDA_ENV_NAME" LAST_MODIFIED_DIR=$(find ${OUTPUT_DIR} -mindepth 1 -maxdepth 1 -type d -exec stat -t {} \; | sort -r -n -k 13,13 | head -1 | cut -d " " -f 1) - python ./scripts/upload_s3.py --torchbench-result-dir ${LAST_MODIFIED_DIR} --gen-index --upload-s3 + python ./scripts/upload_s3.py --torchbench-result-dir ${LAST_MODIFIED_DIR} --s3-result-dir torchbench-v1-nightly --gen-index --upload-s3 - name: Remove conda env run: | - conda remove --name + conda env remove --name "${CONDA_ENV_NAME}" diff --git a/.github/workflows/v1-sweep.yml b/.github/workflows/v1-sweep.yml new file mode 100644 index 0000000000..97934fb81f --- /dev/null +++ b/.github/workflows/v1-sweep.yml @@ -0,0 +1,17 @@ +name: TorchBench sweep v0.1 +on: + workflow_dispatch: + +jobs: + run-benchmark: + if: ${{ github.repository_owner == 'pytorch' }} + runs-on: [self-hosted, bm-runner] + timeout-minutes: 2880 # 48 hours + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + ref: v1.0 + - name: Run benchmark + run: | + SCORE_CONFIG=v1 bash ./.github/scripts/run-sweep.sh diff --git a/conftest.py b/conftest.py index e3f62e9bd4..139a52e460 100644 --- a/conftest.py +++ b/conftest.py @@ -73,8 +73,8 @@ def pytest_benchmark_update_machine_info(config, machine_info): except ImportError: machine_info['torchvision_version'] = '*not-installed*' - machine_info['circle_build_num'] = os.environ.get("CIRCLE_BUILD_NUM") - machine_info['circle_project_name'] = os.environ.get("CIRCLE_PROJECT_REPONAME") + machine_info['github_run_id'] = os.environ.get("GITHUB_RUN_ID") + machine_info['torchbench_score_version'] = os.environ.get("TORCHBENCH_CONFIG_VER") try: # if running on unexpected machine/os, get_machine_config _may_ not work diff --git a/scripts/upload_s3.py b/scripts/upload_s3.py new file mode 100644 index 0000000000..a77c05cd08 --- /dev/null +++ b/scripts/upload_s3.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python +""" +The script to upload TorchBench nightly CI result to Amazon S3. +It assumes the following hierarchy of the result directory: + +benchmark-results/ + |-result-directory-1 + |-result1.json + |-result-directory-2 + |-result2.json + +The command +`upload_s3.py --torchbench-result-dir benchmark-results/result-directory --gen-index --upload-s3` +will index all directories under `benchmark-results` and generate the `index.json` file. +Then it will upload the `result-directory` and `index.json` to Amazon S3 bucket. +""" + +import argparse +import json +import re +import os +import boto3 +from json import JSONEncoder +from pathlib import Path +from dataclasses import dataclass +from typing import Dict, Optional, Any + +@dataclass +class TorchBenchData: + torch_date: str + relpath: str + +class TBEncoder(JSONEncoder): + def default(self, o): + return o.__dict__ + +S3_BUCKET = "ossci-metrics" +COMMIT_HISTORY: Dict[str, TorchBenchData] = dict() + +def get_S3_object_from_bucket(bucket_name: str, object: str) -> Any: + s3 = boto3.resource('s3') + return s3.Object(bucket_name, object) + +def analyze_json_result(json_file: str, relpath: str) -> (str, Optional[TorchBenchData]): + with open(json_file, "r") as jfile: + data = json.load(jfile) + pytorch_version = data["machine_info"]["pytorch_version"] + pytorch_date_result = re.search("dev([0-9]{8})", pytorch_version) + if not pytorch_date_result: + print(f"Found invalid pytorch nightly version {pytorch_version} in {json_file}, skip!") + return ("", None) + else: + pytorch_date = pytorch_date_result.groups()[0] + data = TorchBenchData( + torch_date = pytorch_date, + relpath = relpath) + return (pytorch_date, data) + +def gen_commit_history(result_dir: str): + dirs = map(lambda x: os.path.join(result_dir, x), sorted(os.listdir(result_dir))) + dirs = filter(lambda x: os.path.isdir(x), dirs) + results = dict() + for d in dirs: + json_files = map(lambda x: os.path.join(d, x), sorted(os.listdir(d))) + # Don't analyze empty files + filtered_json_files = filter(lambda x: os.stat(x).st_size, json_files) + for json_file in filtered_json_files: + relpath = os.path.relpath(json_file, result_dir) + (torch_date, tb_data) = analyze_json_result(json_file, relpath) + if torch_date: + # Only store the latest data + results[torch_date] = tb_data + out = [] + for key in sorted(results.keys()): + test = dict() + test["id"] = key + test["result"] = results[key] + out.append(test) + return out + +def upload_s3_file(key, body_file): + with open(body_file, "r") as body: + print(f"Uploading file {body_file} to S3 key: {key}") + obj = get_S3_object_from_bucket(S3_BUCKET, key) + obj.put(Body=body.read()) + +def is_nonempty_json(json_path): + # Upload non-empty json files + return (json_path.endswith(".json") and os.path.exists(json_path) and os.stat(json_path).st_size) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--torchbench-result-dir", required=True, + help="Specify the benchmark result directory") + parser.add_argument("--s3-result-dir", required=True, + help="Specify the benchmark result directory") + parser.add_argument("--gen-index", action="store_true", + help="Regenerate the benchmark index json") + parser.add_argument("--upload-s3", action="store_true", + help="Upload the specified directory to Amazon S3") + args = parser.parse_args() + parent_dir = Path(args.torchbench_result_dir).parent.absolute() + index_path = os.path.join(parent_dir, "index.json") + # Generate the index.json file to the parent directory + if args.gen_index: + # Get parent directory + index_json = gen_commit_history(parent_dir) + with open(index_path, "w") as out_file: + out_file.write(json.dumps(index_json, indent=4, sort_keys=True, cls=TBEncoder)) + if args.upload_s3: + # Upload index file + index_key = f"{args.s3_result_dir}/index.json" + upload_s3_file(index_key, index_path) + # Upload the result directory + basedir = os.path.basename(args.torchbench_result_dir) + result_dir = f"{args.s3_result_dir}/{basedir}" + for result_file in os.listdir(args.torchbench_result_dir): + result_key = f"{result_dir}/{result_file}" + result_path = os.path.join(args.torchbench_result_dir, result_file) + if is_nonempty_json(result_path): + upload_s3_file(result_key, result_path) From a6ab8546782ccac9c6947106879882c7f5fa18ea Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Mon, 21 Jun 2021 21:36:44 -0400 Subject: [PATCH 03/11] Fixed upload to scribe. --- .github/workflows/v1-nightly.yml | 19 +++++++++++-------- .github/workflows/v1-sweep.yml | 17 ----------------- conftest.py | 2 +- scripts/upload_scribe.py | 23 +++++++++++++---------- 4 files changed, 25 insertions(+), 36 deletions(-) delete mode 100644 .github/workflows/v1-sweep.yml diff --git a/.github/workflows/v1-nightly.yml b/.github/workflows/v1-nightly.yml index eb8df9588b..fdc9fb2794 100644 --- a/.github/workflows/v1-nightly.yml +++ b/.github/workflows/v1-nightly.yml @@ -7,7 +7,8 @@ on: jobs: run-benchmark: env: - TORCHBENCH_CONFIG_VER: "v1" + TORCHBENCH_VER: "v1-alpha" + CONFIG_VER: "v1" CONDA_ENV_NAME: "v1-nightly-ci" OUTPUT_DIR: $(echo ${HOME})/.torchbench/v1-nighlty-ci if: ${{ github.repository_owner == 'pytorch' }} @@ -41,22 +42,24 @@ jobs: - name: Run benchmark run: | . activate "$CONDA_ENV_NAME" - CONFIG_VER="$TORCHBENCH_CONFIG_VER" bash ./.github/scripts/run.sh "${OUTPUT_DIR}" - - name: Copy artifact + bash ./.github/scripts/run.sh "${OUTPUT_DIR}" + - name: Copy artifact and upload to scribe run: | LATEST_RESULT=$(find ${OUTPUT_DIR}/gh${GITHUB_RUN_ID}/ -name "*.json" | sort -r | head -1) + echo "Benchmark result file: $LATEST_RESULT" TODAY=$(date "+%Y%m%d%H%M%S") + CONFIG_NORM_FILE=${CONFIG_DIR}/${CONFIG_FILE} + SCORE_FILE="./benchmark-result-v1-score-${TODAY}.json" + # Generate score + python compute_score.py --score_version v1 --benchmark_data_file $LATEST_RESULT > $SCORE_FILE + # Upload result to Scribe + python scripts/upload_scribe.py --pytest_bench_json $LATEST_RESULT --torchbench_score_file $SCORE_FILE cp ${LATEST_RESULT} ./benchmark-result-v1-${TODAY}.json - name: Upload artifact uses: actions/upload-artifact@v2 with: name: Benchmark result path: benchmark-result-v1-*.json - - name: Upload results to Amazon S3 - run: | - . activate "$CONDA_ENV_NAME" - LAST_MODIFIED_DIR=$(find ${OUTPUT_DIR} -mindepth 1 -maxdepth 1 -type d -exec stat -t {} \; | sort -r -n -k 13,13 | head -1 | cut -d " " -f 1) - python ./scripts/upload_s3.py --torchbench-result-dir ${LAST_MODIFIED_DIR} --s3-result-dir torchbench-v1-nightly --gen-index --upload-s3 - name: Remove conda env run: | conda env remove --name "${CONDA_ENV_NAME}" diff --git a/.github/workflows/v1-sweep.yml b/.github/workflows/v1-sweep.yml deleted file mode 100644 index 97934fb81f..0000000000 --- a/.github/workflows/v1-sweep.yml +++ /dev/null @@ -1,17 +0,0 @@ -name: TorchBench sweep v0.1 -on: - workflow_dispatch: - -jobs: - run-benchmark: - if: ${{ github.repository_owner == 'pytorch' }} - runs-on: [self-hosted, bm-runner] - timeout-minutes: 2880 # 48 hours - steps: - - name: Checkout - uses: actions/checkout@v2 - with: - ref: v1.0 - - name: Run benchmark - run: | - SCORE_CONFIG=v1 bash ./.github/scripts/run-sweep.sh diff --git a/conftest.py b/conftest.py index 139a52e460..5cd239f929 100644 --- a/conftest.py +++ b/conftest.py @@ -74,7 +74,7 @@ def pytest_benchmark_update_machine_info(config, machine_info): machine_info['torchvision_version'] = '*not-installed*' machine_info['github_run_id'] = os.environ.get("GITHUB_RUN_ID") - machine_info['torchbench_score_version'] = os.environ.get("TORCHBENCH_CONFIG_VER") + machine_info['torchbench_score_version'] = os.environ.get("TORCHBENCH_VER") try: # if running on unexpected machine/os, get_machine_config _may_ not work diff --git a/scripts/upload_scribe.py b/scripts/upload_scribe.py index f02a0ac81a..68629fd89e 100644 --- a/scripts/upload_scribe.py +++ b/scripts/upload_scribe.py @@ -113,8 +113,8 @@ def post_pytest_benchmarks(self, pytest_json, max_data_upload=100): "machine_kernel": machine_info['release'], "machine_processor": machine_info['processor'], "machine_hostname": machine_info['node'], - "circle_build_num": machine_info.get('circle_build_num', None), - "circle_project_reponame": machine_info.get('circle_project_name', None), + "github_run_id": machine_info.get('github_run_id', None), + "torchbench_score_version": machine_info.get('torchbench_score_version', None), } stats_msg = {"stddev": b['stats']['stddev'], @@ -139,7 +139,7 @@ def post_torchbench_score(self, pytest_json, score): machine_info = pytest_json['machine_info'] commit_info = pytest_json['commit_info'] upload_time = int(time.time()) - m = self.format_message({ + scribe_message = { "time": upload_time, "benchmark_time": pytest_json['datetime'], "git_repo": commit_info['project'], @@ -154,10 +154,12 @@ def post_torchbench_score(self, pytest_json, score): "machine_kernel": machine_info['release'], "machine_processor": machine_info['processor'], "machine_hostname": machine_info['node'], - "circle_build_num": machine_info.get('circle_build_num', None), - "circle_project_reponame": machine_info.get('circle_project_name', None), - "torchbench_score": score, - }) + "github_run_id": machine_info.get('github_run_id', None), + "torchbench_score_version": machine_info.get('torchbench_score_version', None), + } + for key in score: + scribe_message[key] = score[key] + m = self.format_message(scribe_message) self.upload([m]) @@ -166,13 +168,14 @@ def post_torchbench_score(self, pytest_json, score): parser.add_argument("--pytest_bench_json", required=True, type=argparse.FileType('r'), help='Upload json data formatted by pytest-benchmark module') - parser.add_argument("--torchbench_score", type=float, - help="optional torchbench score to include") + parser.add_argument("--torchbench_score_file", required=True, + help="torchbench score file to include") args = parser.parse_args() benchmark_uploader = PytorchBenchmarkUploader() json_data = json.load(args.pytest_bench_json) + json_score = json.load(args.torchbench_score_file) benchmark_uploader.post_pytest_benchmarks(json_data) if args.torchbench_score is not None: - benchmark_uploader.post_torchbench_score(json_data, args.torchbench_score) + benchmark_uploader.post_torchbench_score(json_data, json_score) From b8535c5cd78518171115fe92782940c5192e4b43 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Mon, 21 Jun 2021 21:39:57 -0400 Subject: [PATCH 04/11] Remove s3 upload for now. --- scripts/upload_s3.py | 121 ------------------------------------------- 1 file changed, 121 deletions(-) delete mode 100644 scripts/upload_s3.py diff --git a/scripts/upload_s3.py b/scripts/upload_s3.py deleted file mode 100644 index a77c05cd08..0000000000 --- a/scripts/upload_s3.py +++ /dev/null @@ -1,121 +0,0 @@ -#!/usr/bin/env python -""" -The script to upload TorchBench nightly CI result to Amazon S3. -It assumes the following hierarchy of the result directory: - -benchmark-results/ - |-result-directory-1 - |-result1.json - |-result-directory-2 - |-result2.json - -The command -`upload_s3.py --torchbench-result-dir benchmark-results/result-directory --gen-index --upload-s3` -will index all directories under `benchmark-results` and generate the `index.json` file. -Then it will upload the `result-directory` and `index.json` to Amazon S3 bucket. -""" - -import argparse -import json -import re -import os -import boto3 -from json import JSONEncoder -from pathlib import Path -from dataclasses import dataclass -from typing import Dict, Optional, Any - -@dataclass -class TorchBenchData: - torch_date: str - relpath: str - -class TBEncoder(JSONEncoder): - def default(self, o): - return o.__dict__ - -S3_BUCKET = "ossci-metrics" -COMMIT_HISTORY: Dict[str, TorchBenchData] = dict() - -def get_S3_object_from_bucket(bucket_name: str, object: str) -> Any: - s3 = boto3.resource('s3') - return s3.Object(bucket_name, object) - -def analyze_json_result(json_file: str, relpath: str) -> (str, Optional[TorchBenchData]): - with open(json_file, "r") as jfile: - data = json.load(jfile) - pytorch_version = data["machine_info"]["pytorch_version"] - pytorch_date_result = re.search("dev([0-9]{8})", pytorch_version) - if not pytorch_date_result: - print(f"Found invalid pytorch nightly version {pytorch_version} in {json_file}, skip!") - return ("", None) - else: - pytorch_date = pytorch_date_result.groups()[0] - data = TorchBenchData( - torch_date = pytorch_date, - relpath = relpath) - return (pytorch_date, data) - -def gen_commit_history(result_dir: str): - dirs = map(lambda x: os.path.join(result_dir, x), sorted(os.listdir(result_dir))) - dirs = filter(lambda x: os.path.isdir(x), dirs) - results = dict() - for d in dirs: - json_files = map(lambda x: os.path.join(d, x), sorted(os.listdir(d))) - # Don't analyze empty files - filtered_json_files = filter(lambda x: os.stat(x).st_size, json_files) - for json_file in filtered_json_files: - relpath = os.path.relpath(json_file, result_dir) - (torch_date, tb_data) = analyze_json_result(json_file, relpath) - if torch_date: - # Only store the latest data - results[torch_date] = tb_data - out = [] - for key in sorted(results.keys()): - test = dict() - test["id"] = key - test["result"] = results[key] - out.append(test) - return out - -def upload_s3_file(key, body_file): - with open(body_file, "r") as body: - print(f"Uploading file {body_file} to S3 key: {key}") - obj = get_S3_object_from_bucket(S3_BUCKET, key) - obj.put(Body=body.read()) - -def is_nonempty_json(json_path): - # Upload non-empty json files - return (json_path.endswith(".json") and os.path.exists(json_path) and os.stat(json_path).st_size) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument("--torchbench-result-dir", required=True, - help="Specify the benchmark result directory") - parser.add_argument("--s3-result-dir", required=True, - help="Specify the benchmark result directory") - parser.add_argument("--gen-index", action="store_true", - help="Regenerate the benchmark index json") - parser.add_argument("--upload-s3", action="store_true", - help="Upload the specified directory to Amazon S3") - args = parser.parse_args() - parent_dir = Path(args.torchbench_result_dir).parent.absolute() - index_path = os.path.join(parent_dir, "index.json") - # Generate the index.json file to the parent directory - if args.gen_index: - # Get parent directory - index_json = gen_commit_history(parent_dir) - with open(index_path, "w") as out_file: - out_file.write(json.dumps(index_json, indent=4, sort_keys=True, cls=TBEncoder)) - if args.upload_s3: - # Upload index file - index_key = f"{args.s3_result_dir}/index.json" - upload_s3_file(index_key, index_path) - # Upload the result directory - basedir = os.path.basename(args.torchbench_result_dir) - result_dir = f"{args.s3_result_dir}/{basedir}" - for result_file in os.listdir(args.torchbench_result_dir): - result_key = f"{result_dir}/{result_file}" - result_path = os.path.join(args.torchbench_result_dir, result_file) - if is_nonempty_json(result_path): - upload_s3_file(result_key, result_path) From 003b3f896f91d1caecc54489eeee653c2ac0f950 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Mon, 21 Jun 2021 21:48:15 -0400 Subject: [PATCH 05/11] Add rename of score result. --- scripts/upload_scribe.py | 18 ++++++++++++++---- torchbenchmark/score/compute_score_v1.py | 4 ++-- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/scripts/upload_scribe.py b/scripts/upload_scribe.py index 68629fd89e..8d16a240d2 100644 --- a/scripts/upload_scribe.py +++ b/scripts/upload_scribe.py @@ -82,10 +82,16 @@ def __init__(self): 'pytorch_version', 'python_version', 'torchtext_version', 'torchvision_version', 'machine_kernel', 'machine_processor', 'machine_hostname', - 'circle_build_num', 'circle_project_reponame', + 'github_run_id', 'torchbench_score_version', ], 'float': [ - 'stddev', 'min', 'median', 'max', 'mean', 'runtime', 'torchbench_score', + 'stddev', 'min', 'median', 'max', 'mean', 'runtime', + 'torchbench_score', + 'torchbench_score_jit_speedup', + 'torchbench_subscore_cpu_train', + 'torchbench_subscore_cpu_infer', + 'torchbench_subscore_gpu_train', + 'torchbench_subscore_gpu_infer', ] } @@ -156,9 +162,13 @@ def post_torchbench_score(self, pytest_json, score): "machine_hostname": machine_info['node'], "github_run_id": machine_info.get('github_run_id', None), "torchbench_score_version": machine_info.get('torchbench_score_version', None), + "torchbench_score": score["score"]["total"], + "torchbench_score_jit_speedup": score["score"]["jit-speedup"], + "torchbench_subscore_cpu_train": score["score"]["subscore-cpu-train"], + "torchbench_subscore_cpu_infer": score["score"]["subscore-cpu-eval"], + "torchbench_subscore_gpu_train": score["score"]["subscore-gpu-train"], + "torchbench_subscore_gpu_infer": score["score"]["subscore-gpu-train"], } - for key in score: - scribe_message[key] = score[key] m = self.format_message(scribe_message) self.upload([m]) diff --git a/torchbenchmark/score/compute_score_v1.py b/torchbenchmark/score/compute_score_v1.py index e1dcad6796..cc6824b8c7 100755 --- a/torchbenchmark/score/compute_score_v1.py +++ b/torchbenchmark/score/compute_score_v1.py @@ -231,12 +231,12 @@ def compute_score(self, data): assert not diff_set, f"The request benchmark json doesn't have v1 test: {diff_set}" summary = {} - summary["subscore[jit]"] = self.compute_jit_speedup_score(data) + summary["jit-speedup"] = self.compute_jit_speedup_score(data) devices = ["cpu", "cuda"] tests = ["train", "eval"] filters = [(a, b) for a in devices for b in tests] for f in filters: - key = f"subscore[{f[0]}-{f[1]}]" + key = f"subscore-{f[0]}-{f[1]}" summary[key] = self._get_subscore(data_norm, self.norm, self.norm_weights, f) * self.target summary["total"] = self._get_score(data_norm, self.norm, self.norm_weights) * self.target return summary From 65e558fd98c1d8d20451fcfa3b8ae83b7ea17c70 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Mon, 21 Jun 2021 21:50:53 -0400 Subject: [PATCH 06/11] Add comment to v1 score. --- .github/workflows/v1-nightly.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/v1-nightly.yml b/.github/workflows/v1-nightly.yml index fdc9fb2794..241dc82f08 100644 --- a/.github/workflows/v1-nightly.yml +++ b/.github/workflows/v1-nightly.yml @@ -7,6 +7,7 @@ on: jobs: run-benchmark: env: + # Set to "v1-alpha" for testing. Will set it to "v1" in release. TORCHBENCH_VER: "v1-alpha" CONFIG_VER: "v1" CONDA_ENV_NAME: "v1-nightly-ci" From deb6e1861cdbbf9447cbeeccacffb80eee4e4903 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Mon, 21 Jun 2021 21:52:05 -0400 Subject: [PATCH 07/11] Remove the cron, add it in the formal release. --- .github/workflows/v1-nightly.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/v1-nightly.yml b/.github/workflows/v1-nightly.yml index 241dc82f08..c4509d6f3c 100644 --- a/.github/workflows/v1-nightly.yml +++ b/.github/workflows/v1-nightly.yml @@ -1,8 +1,6 @@ name: TorchBench nightly ci v1.0 on: workflow_dispatch: - schedule: - - cron: '0 13 * * *' # run at 1 PM UTC jobs: run-benchmark: From 59279b9bd2d9b81a84ac9b583ce25cce063bf6a1 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Tue, 22 Jun 2021 08:52:21 -0400 Subject: [PATCH 08/11] Add result sanity check in upload score. --- scripts/upload_scribe.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/scripts/upload_scribe.py b/scripts/upload_scribe.py index 8d16a240d2..44286e26b8 100644 --- a/scripts/upload_scribe.py +++ b/scripts/upload_scribe.py @@ -182,10 +182,15 @@ def post_torchbench_score(self, pytest_json, score): help="torchbench score file to include") args = parser.parse_args() + # Result sanity check + json_name = os.path.basename(args.pytest_bench_json) + json_score = json.load(args.torchbench_score_file) + score_data = None + for data in json_score: + if data["file"] == json_name: + score_data = data + assert score_data, f"Can't find {json_name} score in {args.torchbench_score_file}. Stop." benchmark_uploader = PytorchBenchmarkUploader() json_data = json.load(args.pytest_bench_json) - json_score = json.load(args.torchbench_score_file) benchmark_uploader.post_pytest_benchmarks(json_data) - - if args.torchbench_score is not None: - benchmark_uploader.post_torchbench_score(json_data, json_score) + benchmark_uploader.post_torchbench_score(json_data, score_data) From 650174c164559044a5a30433fabb02916c900731 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Tue, 22 Jun 2021 09:14:14 -0400 Subject: [PATCH 09/11] Fix upload scribe error. --- scripts/upload_scribe.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/upload_scribe.py b/scripts/upload_scribe.py index 44286e26b8..b7e62ea9d3 100644 --- a/scripts/upload_scribe.py +++ b/scripts/upload_scribe.py @@ -179,11 +179,12 @@ def post_torchbench_score(self, pytest_json, score): type=argparse.FileType('r'), help='Upload json data formatted by pytest-benchmark module') parser.add_argument("--torchbench_score_file", required=True, + type=argparse.FileType('r'), help="torchbench score file to include") args = parser.parse_args() # Result sanity check - json_name = os.path.basename(args.pytest_bench_json) + json_name = os.path.basename(args.pytest_bench_json.name) json_score = json.load(args.torchbench_score_file) score_data = None for data in json_score: From 162f0177f9981e722eb439a17c7b146fd8a51205 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Tue, 22 Jun 2021 09:15:29 -0400 Subject: [PATCH 10/11] Fix another upload scribe error. --- scripts/upload_scribe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/upload_scribe.py b/scripts/upload_scribe.py index b7e62ea9d3..3baa3ecb6d 100644 --- a/scripts/upload_scribe.py +++ b/scripts/upload_scribe.py @@ -188,7 +188,7 @@ def post_torchbench_score(self, pytest_json, score): json_score = json.load(args.torchbench_score_file) score_data = None for data in json_score: - if data["file"] == json_name: + if os.path.basename(data["file"]) == json_name: score_data = data assert score_data, f"Can't find {json_name} score in {args.torchbench_score_file}. Stop." benchmark_uploader = PytorchBenchmarkUploader() From a35e3bb1dbef2e9ae2a31600e258877776b32680 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Tue, 22 Jun 2021 09:17:08 -0400 Subject: [PATCH 11/11] Fix field in upload scribe. --- scripts/upload_scribe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/upload_scribe.py b/scripts/upload_scribe.py index 3baa3ecb6d..803b65a18f 100644 --- a/scripts/upload_scribe.py +++ b/scripts/upload_scribe.py @@ -166,8 +166,8 @@ def post_torchbench_score(self, pytest_json, score): "torchbench_score_jit_speedup": score["score"]["jit-speedup"], "torchbench_subscore_cpu_train": score["score"]["subscore-cpu-train"], "torchbench_subscore_cpu_infer": score["score"]["subscore-cpu-eval"], - "torchbench_subscore_gpu_train": score["score"]["subscore-gpu-train"], - "torchbench_subscore_gpu_infer": score["score"]["subscore-gpu-train"], + "torchbench_subscore_gpu_train": score["score"]["subscore-cuda-train"], + "torchbench_subscore_gpu_infer": score["score"]["subscore-cuda-eval"], } m = self.format_message(scribe_message) self.upload([m])