diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh
index b6cff5adb86bd..06fa8db5ff9a5 100755
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@@ -485,9 +485,13 @@ test_inductor_torchbench_smoketest_perf() {
   python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --float16 --training \
     --batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" --only hf_Bert \
     --output "$TEST_REPORTS_DIR/inductor_training_smoketest.csv"
-  # the reference speedup value is hardcoded in check_hf_bert_perf_csv.py
-  # this value needs to be actively maintained to make this check useful
-  python benchmarks/dynamo/check_hf_bert_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_training_smoketest.csv"
+  # The threshold value needs to be actively maintained to make this check useful
+  python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_training_smoketest.csv" -t 1.15
+
+  python benchmarks/dynamo/torchbench.py --device cuda --performance --bfloat16 --inference \
+    --export-aot-inductor --only nanogpt --output "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv"
+  # The threshold value needs to be actively maintained to make this check useful
+  python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv" -t 5.5
 
   # Check memory compression ratio for a few models
   for test in hf_Albert timm_vision_transformer; do
diff --git a/benchmarks/dynamo/check_hf_bert_perf_csv.py b/benchmarks/dynamo/check_perf_csv.py
similarity index 64%
rename from benchmarks/dynamo/check_hf_bert_perf_csv.py
rename to benchmarks/dynamo/check_perf_csv.py
index 43a362dd074d6..2a19f6c4a1426 100644
--- a/benchmarks/dynamo/check_hf_bert_perf_csv.py
+++ b/benchmarks/dynamo/check_perf_csv.py
@@ -5,7 +5,7 @@
 import pandas as pd
 
 
-def check_hf_bert_perf_csv(filename):
+def check_perf_csv(filename, threshold):
     """
     Basic performance checking.
     """
@@ -16,10 +16,7 @@ def check_hf_bert_perf_csv(filename):
     for _, row in df.iterrows():
         model_name = row["name"]
         speedup = row["speedup"]
-        # Reduce from 1.165 to 1.160, see https://github.com/pytorch/pytorch/issues/96530
-        # Reduce from 1.160 to 1.140 after a transformer version upgrade, see https://github.com/pytorch/benchmark/pull/1406
-        # The speedup is not backed to 1.16 after the extra graph break issue is fixed in transformer upstream
-        if speedup < 1.150:
+        if speedup < threshold:
             failed.append(model_name)
 
         print(f"{model_name:34} {speedup}")
@@ -39,5 +36,8 @@ def check_hf_bert_perf_csv(filename):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--file", "-f", type=str, help="csv file name")
+    parser.add_argument(
+        "--threshold", "-t", type=float, help="threshold speedup value to check against"
+    )
     args = parser.parse_args()
-    check_hf_bert_perf_csv(args.file)
+    check_perf_csv(args.file, args.threshold)