diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh index b6cff5adb86bd..06fa8db5ff9a5 100755 --- a/.ci/pytorch/test.sh +++ b/.ci/pytorch/test.sh @@ -485,9 +485,13 @@ test_inductor_torchbench_smoketest_perf() { python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --float16 --training \ --batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" --only hf_Bert \ --output "$TEST_REPORTS_DIR/inductor_training_smoketest.csv" - # the reference speedup value is hardcoded in check_hf_bert_perf_csv.py - # this value needs to be actively maintained to make this check useful - python benchmarks/dynamo/check_hf_bert_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_training_smoketest.csv" + # The threshold value needs to be actively maintained to make this check useful + python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_training_smoketest.csv" -t 1.15 + + python benchmarks/dynamo/torchbench.py --device cuda --performance --bfloat16 --inference \ + --export-aot-inductor --only nanogpt --output "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv" + # The threshold value needs to be actively maintained to make this check useful + python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv" -t 5.5 # Check memory compression ratio for a few models for test in hf_Albert timm_vision_transformer; do diff --git a/benchmarks/dynamo/check_hf_bert_perf_csv.py b/benchmarks/dynamo/check_perf_csv.py similarity index 64% rename from benchmarks/dynamo/check_hf_bert_perf_csv.py rename to benchmarks/dynamo/check_perf_csv.py index 43a362dd074d6..2a19f6c4a1426 100644 --- a/benchmarks/dynamo/check_hf_bert_perf_csv.py +++ b/benchmarks/dynamo/check_perf_csv.py @@ -5,7 +5,7 @@ import pandas as pd -def check_hf_bert_perf_csv(filename): +def check_perf_csv(filename, threshold): """ Basic performance checking. """ @@ -16,10 +16,7 @@ def check_hf_bert_perf_csv(filename): for _, row in df.iterrows(): model_name = row["name"] speedup = row["speedup"] - # Reduce from 1.165 to 1.160, see https://github.com/pytorch/pytorch/issues/96530 - # Reduce from 1.160 to 1.140 after a transformer version upgrade, see https://github.com/pytorch/benchmark/pull/1406 - # The speedup is not backed to 1.16 after the extra graph break issue is fixed in transformer upstream - if speedup < 1.150: + if speedup < threshold: failed.append(model_name) print(f"{model_name:34} {speedup}") @@ -39,5 +36,8 @@ def check_hf_bert_perf_csv(filename): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--file", "-f", type=str, help="csv file name") + parser.add_argument( + "--threshold", "-t", type=float, help="threshold speedup value to check against" + ) args = parser.parse_args() - check_hf_bert_perf_csv(args.file) + check_perf_csv(args.file, args.threshold)