From 9a887e297aa2be76f514012c41f8edab59b75a30 Mon Sep 17 00:00:00 2001
From: Yang Wang <elainewy@meta.com>
Date: Fri, 21 Nov 2025 16:47:16 -0700
Subject: [PATCH 1/6] hidemetadata

---
 .../common/benchmark_time_series_api_model.py       | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py b/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py
index e825a87c68..06aa758ff7 100644
--- a/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py
+++ b/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py
@@ -60,7 +60,18 @@ def from_request(
             requests.exceptions.RequestException if network/timeout/HTTP error
             RuntimeError if the API returns an "error" field or malformed data
         """
-        resp = requests.post(url, json=query, timeout=timeout)
+
+        headers = {
+            # Looks like a real browser instead of python-requests
+            "User-Agent": (
+                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
+                "AppleWebKit/537.36 (KHTML, like Gecko) "
+                "Chrome/120.0.0.0 Safari/537.36"
+            ),
+            "Accept": "application/json,text/html;q=0.9,*/*;q=0.8",
+            "Accept-Language": "en-US,en;q=0.9",
+        }
+        resp = requests.post(url, json=query, timeout=timeout, headers=headers)
         resp.raise_for_status()
         payload = resp.json()
 

From 46dfe98c4d9d38bced84d0e2513dc0d6b170fa44 Mon Sep 17 00:00:00 2001
From: Yang Wang <elainewy@meta.com>
Date: Fri, 21 Nov 2025 17:05:31 -0700
Subject: [PATCH 2/6] hidemetadata

---
 .../common/config.py                                | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/aws/lambda/benchmark_regression_summary_report/common/config.py b/aws/lambda/benchmark_regression_summary_report/common/config.py
index fd267360a1..e91e9c8374 100644
--- a/aws/lambda/benchmark_regression_summary_report/common/config.py
+++ b/aws/lambda/benchmark_regression_summary_report/common/config.py
@@ -89,12 +89,12 @@
     hud_info={
         "url": "https://hud.pytorch.org/benchmark/v3/dashboard/torchao_micro_api_benchmark",
     },
-    # set baseline from past 3-6 days, and compare with the lastest 4 day
+    # set baseline from past 4-8 days, and compare with the lastest 4 day
     policy=Policy(
         frequency=Frequency(value=1, unit="days"),
         range=RangeConfig(
-            baseline=DayRangeWindow(value=3),
-            comparison=DayRangeWindow(value=3),
+            baseline=DayRangeWindow(value=4),
+            comparison=DayRangeWindow(value=4),
         ),
         metrics={
             "bfloat16 fwd time (ms)": RegressionPolicy(
@@ -141,6 +141,7 @@
                     "branches": ["main"],
                     "repo": "pytorch/pytorch",
                     "device": "",
+                    "arch": "",
                     "benchmarkName": "PyTorch operator microbenchmark",
                     "startTime": "{{ startTime }}",
                     "stopTime": "{{ stopTime }}"
@@ -152,12 +153,12 @@
     hud_info={
         "url": "https://hud.pytorch.org/benchmark/v3/dashboard/pytorch_operator_microbenchmark",
     },
-    # set baseline from past 3-6 days, and compare with the lastest 4 day
+    # set baseline from past 4-8 days, and compare with the lastest 4 day
     policy=Policy(
         frequency=Frequency(value=1, unit="days"),
         range=RangeConfig(
-            baseline=DayRangeWindow(value=3),
-            comparison=DayRangeWindow(value=3),
+            baseline=DayRangeWindow(value=4),
+            comparison=DayRangeWindow(value=4),
         ),
         metrics={
             "latency": RegressionPolicy(

From 6d580233bf135c016846ec43db8479ce08519cae Mon Sep 17 00:00:00 2001
From: Yang Wang <elainewy@meta.com>
Date: Fri, 21 Nov 2025 17:06:43 -0700
Subject: [PATCH 3/6] hidemetadata

---
 .../common/config.py                                 | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/aws/lambda/benchmark_regression_summary_report/common/config.py b/aws/lambda/benchmark_regression_summary_report/common/config.py
index e91e9c8374..5cb060245d 100644
--- a/aws/lambda/benchmark_regression_summary_report/common/config.py
+++ b/aws/lambda/benchmark_regression_summary_report/common/config.py
@@ -37,12 +37,12 @@
     hud_info={
         "url": "https://hud.pytorch.org/benchmark/v3/dashboard/pytorch_helion",
     },
-    # set baseline from past 4-8 days, and compare with the lastest 4 day
+    # set baseline from past 3-6 days, and compare with the lastest 3 day
     policy=Policy(
         frequency=Frequency(value=1, unit="days"),
         range=RangeConfig(
-            baseline=DayRangeWindow(value=4),
-            comparison=DayRangeWindow(value=4),
+            baseline=DayRangeWindow(value=3),
+            comparison=DayRangeWindow(value=3),
         ),
         metrics={
             "helion_speedup": RegressionPolicy(
@@ -153,12 +153,12 @@
     hud_info={
         "url": "https://hud.pytorch.org/benchmark/v3/dashboard/pytorch_operator_microbenchmark",
     },
-    # set baseline from past 4-8 days, and compare with the lastest 4 day
+    # set baseline from past 3-6 days, and compare with the lastest 3 day
     policy=Policy(
         frequency=Frequency(value=1, unit="days"),
         range=RangeConfig(
-            baseline=DayRangeWindow(value=4),
-            comparison=DayRangeWindow(value=4),
+            baseline=DayRangeWindow(value=3),
+            comparison=DayRangeWindow(value=3),
         ),
         metrics={
             "latency": RegressionPolicy(

From 85201682bffcafc79057c01f82d6711845e6af0b Mon Sep 17 00:00:00 2001
From: Yang Wang <elainewy@meta.com>
Date: Fri, 21 Nov 2025 17:08:36 -0700
Subject: [PATCH 4/6] hidemetadata

---
 .../common/config.py                                 | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/aws/lambda/benchmark_regression_summary_report/common/config.py b/aws/lambda/benchmark_regression_summary_report/common/config.py
index 5cb060245d..1068aa76dc 100644
--- a/aws/lambda/benchmark_regression_summary_report/common/config.py
+++ b/aws/lambda/benchmark_regression_summary_report/common/config.py
@@ -37,12 +37,12 @@
     hud_info={
         "url": "https://hud.pytorch.org/benchmark/v3/dashboard/pytorch_helion",
     },
-    # set baseline from past 3-6 days, and compare with the lastest 3 day
+    # set baseline from past 4-8 days, and compare with the lastest 4 day
     policy=Policy(
         frequency=Frequency(value=1, unit="days"),
         range=RangeConfig(
-            baseline=DayRangeWindow(value=3),
-            comparison=DayRangeWindow(value=3),
+            baseline=DayRangeWindow(value=4),
+            comparison=DayRangeWindow(value=4),
         ),
         metrics={
             "helion_speedup": RegressionPolicy(
@@ -89,12 +89,12 @@
     hud_info={
         "url": "https://hud.pytorch.org/benchmark/v3/dashboard/torchao_micro_api_benchmark",
     },
-    # set baseline from past 4-8 days, and compare with the lastest 4 day
+    # set baseline from past 3-6 days, and compare with the lastest 3 days
     policy=Policy(
         frequency=Frequency(value=1, unit="days"),
         range=RangeConfig(
-            baseline=DayRangeWindow(value=4),
-            comparison=DayRangeWindow(value=4),
+            baseline=DayRangeWindow(value=3),
+            comparison=DayRangeWindow(value=3),
         ),
         metrics={
             "bfloat16 fwd time (ms)": RegressionPolicy(

From acb9a93ee568da60137b5423d2d606aa5faa8b9b Mon Sep 17 00:00:00 2001
From: Yang Wang <elainewy@meta.com>
Date: Fri, 21 Nov 2025 19:56:18 -0700
Subject: [PATCH 5/6] hidemetadata

---
 .../common/benchmark_time_series_api_model.py | 11 +-----
 .../lambda_function.py                        | 37 ++++++++++++++++---
 2 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py b/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py
index 06aa758ff7..44442da0a5 100644
--- a/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py
+++ b/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py
@@ -45,7 +45,7 @@ class BenchmarkTimeSeriesApiResponse:
 
     @classmethod
     def from_request(
-        cls, url: str, query: dict, timeout: int = 180
+        cls, url: str, query: dict, access_token:str, timeout: int = 180
     ) -> "BenchmarkTimeSeriesApiResponse":
         """
         Send a POST request and parse into BenchmarkTimeSeriesApiResponse.
@@ -62,14 +62,7 @@ def from_request(
         """
 
         headers = {
-            # Looks like a real browser instead of python-requests
-            "User-Agent": (
-                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
-                "AppleWebKit/537.36 (KHTML, like Gecko) "
-                "Chrome/120.0.0.0 Safari/537.36"
-            ),
-            "Accept": "application/json,text/html;q=0.9,*/*;q=0.8",
-            "Accept-Language": "en-US,en;q=0.9",
+            "x-hud-internal-bot": access_token,
         }
         resp = requests.post(url, json=query, timeout=timeout, headers=headers)
         resp.raise_for_status()
diff --git a/aws/lambda/benchmark_regression_summary_report/lambda_function.py b/aws/lambda/benchmark_regression_summary_report/lambda_function.py
index e6a06ed698..0b0a56d5de 100644
--- a/aws/lambda/benchmark_regression_summary_report/lambda_function.py
+++ b/aws/lambda/benchmark_regression_summary_report/lambda_function.py
@@ -32,6 +32,7 @@
     "CLICKHOUSE_ENDPOINT": os.getenv("CLICKHOUSE_ENDPOINT", ""),
     "CLICKHOUSE_PASSWORD": os.getenv("CLICKHOUSE_PASSWORD", ""),
     "CLICKHOUSE_USERNAME": os.getenv("CLICKHOUSE_USERNAME", ""),
+    "HUD_INTERNAL_BOT_TOKEN": os.getenv("HUD_INTERNAL_BOT_TOKEN", ""),
 }
 
 
@@ -71,6 +72,7 @@ def __init__(
         self,
         config_id: str,
         end_time: int,
+        hud_access_token: str = "",
         is_dry_run: bool = False,
         is_pass_check: bool = False,
     ) -> None:
@@ -78,6 +80,7 @@ def __init__(
         self.is_pass_check = is_pass_check
         self.config_id = config_id
         self.end_time = end_time
+        self.hud_access_token = hud_access_token
 
     def log_info(self, msg: str):
         logger.info("[%s][%s] %s", self.end_time, self.config_id, msg)
@@ -136,13 +139,13 @@ def process(
                 f"with frequency {report_freq.get_text()}..."
             )
 
-        target, ls, le = self.get_target(config, self.end_time)
+        target, ls, le = self.get_target(config, self.end_time,self.hud_access_token)
         if not target.time_series:
             self.log_info(
                 f"no target data found for time range [{ls},{le}] with frequency {report_freq.get_text()}..."
             )
             return
-        baseline, bs, be = self.get_baseline(config, self.end_time)
+        baseline, bs, be = self.get_baseline(config, self.end_time, self.hud_access_token)
 
         if not baseline.time_series:
             self.log_info(
@@ -165,7 +168,7 @@ def process(
         reportManager.run(cc, ENVS["GITHUB_TOKEN"])
         return
 
-    def get_target(self, config: BenchmarkConfig, end_time: int):
+    def get_target(self, config: BenchmarkConfig, end_time: int, hud_access_token: str):
         data_range = config.policy.range
         target_s = end_time - data_range.comparison_timedelta_s()
         target_e = end_time
@@ -178,10 +181,16 @@ def get_target(self, config: BenchmarkConfig, end_time: int):
             start_time=target_s,
             end_time=target_e,
             source=config.source,
+            access_token=hud_access_token,
         )
         self.log_info(
             f"done. found {len(target_data.time_series)} # of data groups, with time range {target_data.time_range}",
         )
+
+        if len(target_data.time_series) > 0:
+            self.log_info(
+                f"peeking the first data: {target_data.time_series[0]}",
+            )
         if not target_data.time_range or not target_data.time_range.end:
             return None, target_s, target_e
 
@@ -190,7 +199,7 @@ def get_target(self, config: BenchmarkConfig, end_time: int):
             return None, target_s, target_e
         return target_data, target_s, target_e
 
-    def get_baseline(self, config: BenchmarkConfig, end_time: int):
+    def get_baseline(self, config: BenchmarkConfig, end_time: int, hud_access_token: str):
         data_range = config.policy.range
         baseline_s = end_time - data_range.total_timedelta_s()
         baseline_e = end_time - data_range.comparison_timedelta_s()
@@ -204,12 +213,18 @@ def get_baseline(self, config: BenchmarkConfig, end_time: int):
             start_time=baseline_s,
             end_time=baseline_e,
             source=config.source,
+            access_token=hud_access_token,
         )
 
         self.log_info(
             f"Done. found {len(raw_data.time_series)} # of data, with time range {raw_data.time_range}",
         )
 
+        if len(raw_data.time_series) > 0:
+            self.log_info(
+                f"peeking the first data: {raw_data.time_series[0]}",
+            )
+
         baseline_latest_ts = int(isoparse(raw_data.time_range.end).timestamp())
 
         if not self.should_use_data(baseline_latest_ts, baseline_e):
@@ -245,6 +260,7 @@ def _fetch_from_benchmark_ts_api(
         config_id: str,
         end_time: int,
         start_time: int,
+        access_token: str,
         source: BenchmarkApiSource,
     ):
         str_end_time = format_ts_with_t(end_time)
@@ -261,7 +277,7 @@ def _fetch_from_benchmark_ts_api(
         t0 = time.perf_counter()
         try:
             resp: BenchmarkTimeSeriesApiResponse = (
-                BenchmarkTimeSeriesApiResponse.from_request(url, query)
+                BenchmarkTimeSeriesApiResponse.from_request(url, query, access_token)
             )
 
             elapsed_ms = (time.perf_counter() - t0) * 1000.0
@@ -353,6 +369,7 @@ def _get_latest_record_ts(
 def main(
     config_id: str,
     github_access_token: str = "",
+    hud_access_token: str = "",
     args: Optional[argparse.Namespace] = None,
     *,
     is_dry_run: bool = False,
@@ -392,6 +409,7 @@ def main(
             end_time=end_time_ts,
             is_dry_run=is_dry_run,
             is_pass_check=is_forced,
+            hud_access_token=hud_access_token,
         )
         processor.process(args=args)
     except Exception as e:
@@ -411,6 +429,7 @@ def lambda_handler(event: Any, context: Any) -> None:
     main(
         config_id=config_id,
         github_access_token=ENVS["GITHUB_TOKEN"],
+        hud_access_token=ENVS["HUD_INTERNAL_BOT_TOKEN"],
     )
     return
 
@@ -473,11 +492,16 @@ def parse_args() -> argparse.Namespace:
         type=str,
         help="the end time to run, in format of YYYY-MM-DD HH:MM:SS",
     )
+    parser.add_argument(
+        "--hud-internal-bot-token",
+        type=str,
+        default=ENVS["HUD_INTERNAL_BOT_TOKEN"],
+        help="the hud internal bot token to access hud api",
+    )
     parser.set_defaults(dry_run=True)  # default is True
     args, _ = parser.parse_known_args()
     return args
 
-
 def local_run() -> None:
     """
     method to run in local test environment
@@ -487,6 +511,7 @@ def local_run() -> None:
     # update environment variables for input parameters
     main(
         config_id=args.config_id,
+        hud_access_token=args.hud_internal_bot_token,
         github_access_token=args.github_access_token,
         args=args,
         is_dry_run=args.dry_run,

From eb7e78b2d50b6a4d8699d905e2e8efa9771d7df8 Mon Sep 17 00:00:00 2001
From: Yang Wang <elainewy@meta.com>
Date: Mon, 24 Nov 2025 10:44:22 -0700
Subject: [PATCH 6/6] hidemetadata

---
 .../common/benchmark_time_series_api_model.py         |  2 +-
 .../lambda_function.py                                | 11 ++++++++---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py b/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py
index 44442da0a5..da912df2bc 100644
--- a/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py
+++ b/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py
@@ -45,7 +45,7 @@ class BenchmarkTimeSeriesApiResponse:
 
     @classmethod
     def from_request(
-        cls, url: str, query: dict, access_token:str, timeout: int = 180
+        cls, url: str, query: dict, access_token: str, timeout: int = 180
     ) -> "BenchmarkTimeSeriesApiResponse":
         """
         Send a POST request and parse into BenchmarkTimeSeriesApiResponse.
diff --git a/aws/lambda/benchmark_regression_summary_report/lambda_function.py b/aws/lambda/benchmark_regression_summary_report/lambda_function.py
index 0b0a56d5de..8d125e913c 100644
--- a/aws/lambda/benchmark_regression_summary_report/lambda_function.py
+++ b/aws/lambda/benchmark_regression_summary_report/lambda_function.py
@@ -139,13 +139,15 @@ def process(
                 f"with frequency {report_freq.get_text()}..."
             )
 
-        target, ls, le = self.get_target(config, self.end_time,self.hud_access_token)
+        target, ls, le = self.get_target(config, self.end_time, self.hud_access_token)
         if not target.time_series:
             self.log_info(
                 f"no target data found for time range [{ls},{le}] with frequency {report_freq.get_text()}..."
             )
             return
-        baseline, bs, be = self.get_baseline(config, self.end_time, self.hud_access_token)
+        baseline, bs, be = self.get_baseline(
+            config, self.end_time, self.hud_access_token
+        )
 
         if not baseline.time_series:
             self.log_info(
@@ -199,7 +201,9 @@ def get_target(self, config: BenchmarkConfig, end_time: int, hud_access_token: s
             return None, target_s, target_e
         return target_data, target_s, target_e
 
-    def get_baseline(self, config: BenchmarkConfig, end_time: int, hud_access_token: str):
+    def get_baseline(
+        self, config: BenchmarkConfig, end_time: int, hud_access_token: str
+    ):
         data_range = config.policy.range
         baseline_s = end_time - data_range.total_timedelta_s()
         baseline_e = end_time - data_range.comparison_timedelta_s()
@@ -502,6 +506,7 @@ def parse_args() -> argparse.Namespace:
     args, _ = parser.parse_known_args()
     return args
 
+
 def local_run() -> None:
     """
     method to run in local test environment