From 9a887e297aa2be76f514012c41f8edab59b75a30 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 21 Nov 2025 16:47:16 -0700 Subject: [PATCH 1/6] hidemetadata --- .../common/benchmark_time_series_api_model.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py b/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py index e825a87c68..06aa758ff7 100644 --- a/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py +++ b/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py @@ -60,7 +60,18 @@ def from_request( requests.exceptions.RequestException if network/timeout/HTTP error RuntimeError if the API returns an "error" field or malformed data """ - resp = requests.post(url, json=query, timeout=timeout) + + headers = { + # Looks like a real browser instead of python-requests + "User-Agent": ( + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/120.0.0.0 Safari/537.36" + ), + "Accept": "application/json,text/html;q=0.9,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.9", + } + resp = requests.post(url, json=query, timeout=timeout, headers=headers) resp.raise_for_status() payload = resp.json() From 46dfe98c4d9d38bced84d0e2513dc0d6b170fa44 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 21 Nov 2025 17:05:31 -0700 Subject: [PATCH 2/6] hidemetadata --- .../common/config.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/aws/lambda/benchmark_regression_summary_report/common/config.py b/aws/lambda/benchmark_regression_summary_report/common/config.py index fd267360a1..e91e9c8374 100644 --- a/aws/lambda/benchmark_regression_summary_report/common/config.py +++ b/aws/lambda/benchmark_regression_summary_report/common/config.py @@ -89,12 +89,12 @@ hud_info={ "url": "https://hud.pytorch.org/benchmark/v3/dashboard/torchao_micro_api_benchmark", }, - # set baseline from past 3-6 days, and compare with the lastest 4 day + # set baseline from past 4-8 days, and compare with the lastest 4 day policy=Policy( frequency=Frequency(value=1, unit="days"), range=RangeConfig( - baseline=DayRangeWindow(value=3), - comparison=DayRangeWindow(value=3), + baseline=DayRangeWindow(value=4), + comparison=DayRangeWindow(value=4), ), metrics={ "bfloat16 fwd time (ms)": RegressionPolicy( @@ -141,6 +141,7 @@ "branches": ["main"], "repo": "pytorch/pytorch", "device": "", + "arch": "", "benchmarkName": "PyTorch operator microbenchmark", "startTime": "{{ startTime }}", "stopTime": "{{ stopTime }}" @@ -152,12 +153,12 @@ hud_info={ "url": "https://hud.pytorch.org/benchmark/v3/dashboard/pytorch_operator_microbenchmark", }, - # set baseline from past 3-6 days, and compare with the lastest 4 day + # set baseline from past 4-8 days, and compare with the lastest 4 day policy=Policy( frequency=Frequency(value=1, unit="days"), range=RangeConfig( - baseline=DayRangeWindow(value=3), - comparison=DayRangeWindow(value=3), + baseline=DayRangeWindow(value=4), + comparison=DayRangeWindow(value=4), ), metrics={ "latency": RegressionPolicy( From 6d580233bf135c016846ec43db8479ce08519cae Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 21 Nov 2025 17:06:43 -0700 Subject: [PATCH 3/6] hidemetadata --- .../common/config.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/aws/lambda/benchmark_regression_summary_report/common/config.py b/aws/lambda/benchmark_regression_summary_report/common/config.py index e91e9c8374..5cb060245d 100644 --- a/aws/lambda/benchmark_regression_summary_report/common/config.py +++ b/aws/lambda/benchmark_regression_summary_report/common/config.py @@ -37,12 +37,12 @@ hud_info={ "url": "https://hud.pytorch.org/benchmark/v3/dashboard/pytorch_helion", }, - # set baseline from past 4-8 days, and compare with the lastest 4 day + # set baseline from past 3-6 days, and compare with the lastest 3 day policy=Policy( frequency=Frequency(value=1, unit="days"), range=RangeConfig( - baseline=DayRangeWindow(value=4), - comparison=DayRangeWindow(value=4), + baseline=DayRangeWindow(value=3), + comparison=DayRangeWindow(value=3), ), metrics={ "helion_speedup": RegressionPolicy( @@ -153,12 +153,12 @@ hud_info={ "url": "https://hud.pytorch.org/benchmark/v3/dashboard/pytorch_operator_microbenchmark", }, - # set baseline from past 4-8 days, and compare with the lastest 4 day + # set baseline from past 3-6 days, and compare with the lastest 3 day policy=Policy( frequency=Frequency(value=1, unit="days"), range=RangeConfig( - baseline=DayRangeWindow(value=4), - comparison=DayRangeWindow(value=4), + baseline=DayRangeWindow(value=3), + comparison=DayRangeWindow(value=3), ), metrics={ "latency": RegressionPolicy( From 85201682bffcafc79057c01f82d6711845e6af0b Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 21 Nov 2025 17:08:36 -0700 Subject: [PATCH 4/6] hidemetadata --- .../common/config.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/aws/lambda/benchmark_regression_summary_report/common/config.py b/aws/lambda/benchmark_regression_summary_report/common/config.py index 5cb060245d..1068aa76dc 100644 --- a/aws/lambda/benchmark_regression_summary_report/common/config.py +++ b/aws/lambda/benchmark_regression_summary_report/common/config.py @@ -37,12 +37,12 @@ hud_info={ "url": "https://hud.pytorch.org/benchmark/v3/dashboard/pytorch_helion", }, - # set baseline from past 3-6 days, and compare with the lastest 3 day + # set baseline from past 4-8 days, and compare with the lastest 4 day policy=Policy( frequency=Frequency(value=1, unit="days"), range=RangeConfig( - baseline=DayRangeWindow(value=3), - comparison=DayRangeWindow(value=3), + baseline=DayRangeWindow(value=4), + comparison=DayRangeWindow(value=4), ), metrics={ "helion_speedup": RegressionPolicy( @@ -89,12 +89,12 @@ hud_info={ "url": "https://hud.pytorch.org/benchmark/v3/dashboard/torchao_micro_api_benchmark", }, - # set baseline from past 4-8 days, and compare with the lastest 4 day + # set baseline from past 3-6 days, and compare with the lastest 3 days policy=Policy( frequency=Frequency(value=1, unit="days"), range=RangeConfig( - baseline=DayRangeWindow(value=4), - comparison=DayRangeWindow(value=4), + baseline=DayRangeWindow(value=3), + comparison=DayRangeWindow(value=3), ), metrics={ "bfloat16 fwd time (ms)": RegressionPolicy( From acb9a93ee568da60137b5423d2d606aa5faa8b9b Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 21 Nov 2025 19:56:18 -0700 Subject: [PATCH 5/6] hidemetadata --- .../common/benchmark_time_series_api_model.py | 11 +----- .../lambda_function.py | 37 ++++++++++++++++--- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py b/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py index 06aa758ff7..44442da0a5 100644 --- a/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py +++ b/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py @@ -45,7 +45,7 @@ class BenchmarkTimeSeriesApiResponse: @classmethod def from_request( - cls, url: str, query: dict, timeout: int = 180 + cls, url: str, query: dict, access_token:str, timeout: int = 180 ) -> "BenchmarkTimeSeriesApiResponse": """ Send a POST request and parse into BenchmarkTimeSeriesApiResponse. @@ -62,14 +62,7 @@ def from_request( """ headers = { - # Looks like a real browser instead of python-requests - "User-Agent": ( - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " - "AppleWebKit/537.36 (KHTML, like Gecko) " - "Chrome/120.0.0.0 Safari/537.36" - ), - "Accept": "application/json,text/html;q=0.9,*/*;q=0.8", - "Accept-Language": "en-US,en;q=0.9", + "x-hud-internal-bot": access_token, } resp = requests.post(url, json=query, timeout=timeout, headers=headers) resp.raise_for_status() diff --git a/aws/lambda/benchmark_regression_summary_report/lambda_function.py b/aws/lambda/benchmark_regression_summary_report/lambda_function.py index e6a06ed698..0b0a56d5de 100644 --- a/aws/lambda/benchmark_regression_summary_report/lambda_function.py +++ b/aws/lambda/benchmark_regression_summary_report/lambda_function.py @@ -32,6 +32,7 @@ "CLICKHOUSE_ENDPOINT": os.getenv("CLICKHOUSE_ENDPOINT", ""), "CLICKHOUSE_PASSWORD": os.getenv("CLICKHOUSE_PASSWORD", ""), "CLICKHOUSE_USERNAME": os.getenv("CLICKHOUSE_USERNAME", ""), + "HUD_INTERNAL_BOT_TOKEN": os.getenv("HUD_INTERNAL_BOT_TOKEN", ""), } @@ -71,6 +72,7 @@ def __init__( self, config_id: str, end_time: int, + hud_access_token: str = "", is_dry_run: bool = False, is_pass_check: bool = False, ) -> None: @@ -78,6 +80,7 @@ def __init__( self.is_pass_check = is_pass_check self.config_id = config_id self.end_time = end_time + self.hud_access_token = hud_access_token def log_info(self, msg: str): logger.info("[%s][%s] %s", self.end_time, self.config_id, msg) @@ -136,13 +139,13 @@ def process( f"with frequency {report_freq.get_text()}..." ) - target, ls, le = self.get_target(config, self.end_time) + target, ls, le = self.get_target(config, self.end_time,self.hud_access_token) if not target.time_series: self.log_info( f"no target data found for time range [{ls},{le}] with frequency {report_freq.get_text()}..." ) return - baseline, bs, be = self.get_baseline(config, self.end_time) + baseline, bs, be = self.get_baseline(config, self.end_time, self.hud_access_token) if not baseline.time_series: self.log_info( @@ -165,7 +168,7 @@ def process( reportManager.run(cc, ENVS["GITHUB_TOKEN"]) return - def get_target(self, config: BenchmarkConfig, end_time: int): + def get_target(self, config: BenchmarkConfig, end_time: int, hud_access_token: str): data_range = config.policy.range target_s = end_time - data_range.comparison_timedelta_s() target_e = end_time @@ -178,10 +181,16 @@ def get_target(self, config: BenchmarkConfig, end_time: int): start_time=target_s, end_time=target_e, source=config.source, + access_token=hud_access_token, ) self.log_info( f"done. found {len(target_data.time_series)} # of data groups, with time range {target_data.time_range}", ) + + if len(target_data.time_series) > 0: + self.log_info( + f"peeking the first data: {target_data.time_series[0]}", + ) if not target_data.time_range or not target_data.time_range.end: return None, target_s, target_e @@ -190,7 +199,7 @@ def get_target(self, config: BenchmarkConfig, end_time: int): return None, target_s, target_e return target_data, target_s, target_e - def get_baseline(self, config: BenchmarkConfig, end_time: int): + def get_baseline(self, config: BenchmarkConfig, end_time: int, hud_access_token: str): data_range = config.policy.range baseline_s = end_time - data_range.total_timedelta_s() baseline_e = end_time - data_range.comparison_timedelta_s() @@ -204,12 +213,18 @@ def get_baseline(self, config: BenchmarkConfig, end_time: int): start_time=baseline_s, end_time=baseline_e, source=config.source, + access_token=hud_access_token, ) self.log_info( f"Done. found {len(raw_data.time_series)} # of data, with time range {raw_data.time_range}", ) + if len(raw_data.time_series) > 0: + self.log_info( + f"peeking the first data: {raw_data.time_series[0]}", + ) + baseline_latest_ts = int(isoparse(raw_data.time_range.end).timestamp()) if not self.should_use_data(baseline_latest_ts, baseline_e): @@ -245,6 +260,7 @@ def _fetch_from_benchmark_ts_api( config_id: str, end_time: int, start_time: int, + access_token: str, source: BenchmarkApiSource, ): str_end_time = format_ts_with_t(end_time) @@ -261,7 +277,7 @@ def _fetch_from_benchmark_ts_api( t0 = time.perf_counter() try: resp: BenchmarkTimeSeriesApiResponse = ( - BenchmarkTimeSeriesApiResponse.from_request(url, query) + BenchmarkTimeSeriesApiResponse.from_request(url, query, access_token) ) elapsed_ms = (time.perf_counter() - t0) * 1000.0 @@ -353,6 +369,7 @@ def _get_latest_record_ts( def main( config_id: str, github_access_token: str = "", + hud_access_token: str = "", args: Optional[argparse.Namespace] = None, *, is_dry_run: bool = False, @@ -392,6 +409,7 @@ def main( end_time=end_time_ts, is_dry_run=is_dry_run, is_pass_check=is_forced, + hud_access_token=hud_access_token, ) processor.process(args=args) except Exception as e: @@ -411,6 +429,7 @@ def lambda_handler(event: Any, context: Any) -> None: main( config_id=config_id, github_access_token=ENVS["GITHUB_TOKEN"], + hud_access_token=ENVS["HUD_INTERNAL_BOT_TOKEN"], ) return @@ -473,11 +492,16 @@ def parse_args() -> argparse.Namespace: type=str, help="the end time to run, in format of YYYY-MM-DD HH:MM:SS", ) + parser.add_argument( + "--hud-internal-bot-token", + type=str, + default=ENVS["HUD_INTERNAL_BOT_TOKEN"], + help="the hud internal bot token to access hud api", + ) parser.set_defaults(dry_run=True) # default is True args, _ = parser.parse_known_args() return args - def local_run() -> None: """ method to run in local test environment @@ -487,6 +511,7 @@ def local_run() -> None: # update environment variables for input parameters main( config_id=args.config_id, + hud_access_token=args.hud_internal_bot_token, github_access_token=args.github_access_token, args=args, is_dry_run=args.dry_run, From eb7e78b2d50b6a4d8699d905e2e8efa9771d7df8 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Mon, 24 Nov 2025 10:44:22 -0700 Subject: [PATCH 6/6] hidemetadata --- .../common/benchmark_time_series_api_model.py | 2 +- .../lambda_function.py | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py b/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py index 44442da0a5..da912df2bc 100644 --- a/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py +++ b/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py @@ -45,7 +45,7 @@ class BenchmarkTimeSeriesApiResponse: @classmethod def from_request( - cls, url: str, query: dict, access_token:str, timeout: int = 180 + cls, url: str, query: dict, access_token: str, timeout: int = 180 ) -> "BenchmarkTimeSeriesApiResponse": """ Send a POST request and parse into BenchmarkTimeSeriesApiResponse. diff --git a/aws/lambda/benchmark_regression_summary_report/lambda_function.py b/aws/lambda/benchmark_regression_summary_report/lambda_function.py index 0b0a56d5de..8d125e913c 100644 --- a/aws/lambda/benchmark_regression_summary_report/lambda_function.py +++ b/aws/lambda/benchmark_regression_summary_report/lambda_function.py @@ -139,13 +139,15 @@ def process( f"with frequency {report_freq.get_text()}..." ) - target, ls, le = self.get_target(config, self.end_time,self.hud_access_token) + target, ls, le = self.get_target(config, self.end_time, self.hud_access_token) if not target.time_series: self.log_info( f"no target data found for time range [{ls},{le}] with frequency {report_freq.get_text()}..." ) return - baseline, bs, be = self.get_baseline(config, self.end_time, self.hud_access_token) + baseline, bs, be = self.get_baseline( + config, self.end_time, self.hud_access_token + ) if not baseline.time_series: self.log_info( @@ -199,7 +201,9 @@ def get_target(self, config: BenchmarkConfig, end_time: int, hud_access_token: s return None, target_s, target_e return target_data, target_s, target_e - def get_baseline(self, config: BenchmarkConfig, end_time: int, hud_access_token: str): + def get_baseline( + self, config: BenchmarkConfig, end_time: int, hud_access_token: str + ): data_range = config.policy.range baseline_s = end_time - data_range.total_timedelta_s() baseline_e = end_time - data_range.comparison_timedelta_s() @@ -502,6 +506,7 @@ def parse_args() -> argparse.Namespace: args, _ = parser.parse_known_args() return args + def local_run() -> None: """ method to run in local test environment