Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 45 additions & 7 deletions superbench/benchmarks/micro_benchmarks/hipblaslt_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""Module of the hipBlasLt GEMM benchmark."""

import os
import re

from superbench.common.utils import logger
from superbench.benchmarks import BenchmarkRegistry, Platform, ReturnCode
Expand Down Expand Up @@ -110,7 +111,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
lines = raw_output.splitlines()
index = None

# Find the line containing 'hipblaslt-Gflops'
# Find the header line containing 'hipblaslt-Gflops'
for i, line in enumerate(lines):
if 'hipblaslt-Gflops' in line:
index = i
Expand All @@ -119,16 +120,53 @@ def _process_raw_result(self, cmd_idx, raw_output):
if index is None:
raise ValueError('Line with "hipblaslt-Gflops" not found in the log.')

# Split the line into fields using a comma as the delimiter
# Parse the header and resolve every key column (batch_count/m/n/k/hipblaslt-Gflops)
# by name. This keeps the parser forward-compatible across known and future
# hipBLASLt output formats (v600: 23 columns; v1500: 34 columns with extra
# a_type/b_type/c_type/scaleA-D/amaxD/bias_type/aux_type/hipblaslt-GB/s),
# without relying on any fixed column position.
header_fields = lines[index].strip().split(',')
# Strip leading rank markers like '[0]' or '[0]:' from the first header field.
# Use a regex anchored at the start so a column name that legitimately contains
# ']' (unlikely, but defensive) is not truncated.
header_fields[0] = re.sub(r'^\s*\[\d+\]:?', '', header_fields[0])

# Build a name -> column-index map (first occurrence wins for any duplicates).
col_idx_by_name = {}
for col_idx, col_name in enumerate(header_fields):
col_idx_by_name.setdefault(col_name.strip(), col_idx)

required_columns = ['batch_count', 'm', 'n', 'k', 'hipblaslt-Gflops']
missing_columns = [c for c in required_columns if c not in col_idx_by_name]
if missing_columns:
raise ValueError(f'Required column(s) not found in header: {missing_columns}.')

# Ensure a data line follows the header (e.g., hipblaslt-bench may have
# crashed after printing the header).
if index + 1 >= len(lines):
raise ValueError('Data line missing after "hipblaslt-Gflops" header.')

# Split the data line into fields using a comma as the delimiter
fields = lines[index + 1].strip().split(',')

# Check the number of fields and the format of the first two fields
if len(fields) != 23:
raise ValueError('Invalid result')
# Validate that the data line has the same number of columns as the header
if len(fields) != len(header_fields):
raise ValueError(
f'Field count mismatch: header has {len(header_fields)} columns '
f'but data has {len(fields)} columns'
)

# Resolve every key value by header name and strip whitespace from each, so
# any padding around CSV values does not bleed into the metric key.
batch_count = fields[col_idx_by_name['batch_count']].strip()
m_val = fields[col_idx_by_name['m']].strip()
n_val = fields[col_idx_by_name['n']].strip()
k_val = fields[col_idx_by_name['k']].strip()
gflops_col = col_idx_by_name['hipblaslt-Gflops']

self._result.add_result(
f'{self._precision_in_commands[cmd_idx]}_{fields[3]}_{"_".join(fields[4:7])}_flops',
float(fields[-2]) / 1000
f'{self._precision_in_commands[cmd_idx]}_{batch_count}_{m_val}_{n_val}_{k_val}_flops',
float(fields[gflops_col]) / 1000
)
except BaseException as e:
self._result.set_return_code(ReturnCode.MICROBENCHMARK_RESULT_PARSING_FAILURE)
Expand Down
90 changes: 87 additions & 3 deletions tests/benchmarks/micro_benchmarks/test_hipblaslt_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def test_hipblaslt_gemm_result_parsing(self):
benchmark._args = SimpleNamespace(shapes=['896,896,896'], in_types=['fp16'], log_raw_data=False)
benchmark._result = BenchmarkResult(self.benchmark_name, BenchmarkType.MICRO, ReturnCode.SUCCESS, run_count=1)

# Old format (hipBLASLt v600, 23 columns)
example_raw_output = """
hipBLASLt version: 600
hipBLASLt git version: 52776da
Expand All @@ -101,12 +102,95 @@ def test_hipblaslt_gemm_result_parsing(self):
[0]transA,transB,grouped_gemm,batch_count,m,n,k,alpha,lda,stride_a,beta,ldb,stride_b,ldc,stride_c,ldd,stride_d,d_type,compute_type,activation_type,bias_vector,hipblaslt-Gflops,us
N,N,0,1,896,896,896,1,896,802816,0,896,802816,896,802816,896,802816,fp16_r,f32_r,none,0, 58624.5, 24.54
"""
# Positive case - valid raw output
# Positive case - valid raw output (old format)
self.assertTrue(benchmark._process_raw_result(0, example_raw_output))
self.assertEqual(ReturnCode.SUCCESS, benchmark.return_code)
Comment thread
polarG marked this conversation as resolved.

self.assertEqual(2, len(benchmark.result))
self.assertEqual(58.6245, benchmark.result['fp16_1_896_896_896_flops'][0])
self.assertIn('fp16_1_896_896_896_flops', benchmark.result)
self.assertAlmostEqual(58.6245, benchmark.result['fp16_1_896_896_896_flops'][0], places=4)

# Negative case - invalid raw output
self.assertFalse(benchmark._process_raw_result(1, 'HipBLAS API failed'))

def test_hipblaslt_gemm_result_parsing_new_format(self):
"""Test hipblaslt-bench benchmark result parsing with new 34-column format (hipBLASLt v1500+)."""
benchmark = self.get_benchmark()
self.assertTrue(benchmark._preprocess())
benchmark._args = SimpleNamespace(shapes=['4096,4096,4096'], in_types=['fp16'], log_raw_data=False)
benchmark._result = BenchmarkResult(self.benchmark_name, BenchmarkType.MICRO, ReturnCode.SUCCESS, run_count=1)

# New format (hipBLASLt v1500, 34 columns) - includes a_type, b_type, c_type, d_type,
# scaleA, scaleB, scaleC, scaleD, amaxD, bias_type, aux_type, and hipblaslt-GB/s columns
example_raw_output_new = """
hipBLASLt version: 1500
hipBLASLt git version: 8c69191d
Query device success: there are 1 devices. (Target device ID is 0)
Device ID 0 : gfx942:sramecc+:xnack-
with 205.6 GB memory, max. SCLK 2100 MHz, max. MCLK 1300 MHz, compute capability 9.4
maxGridDimX 2147483647, sharedMemPerBlock 65.5 KB, maxThreadsPerBlock 1024, warpSize 64

Is supported 1 / Total solutions: 1
[0]:transA,transB,grouped_gemm,batch_count,m,n,k,alpha,lda,stride_a,beta,ldb,stride_b,ldc,stride_c,ldd,stride_d,a_type,b_type,c_type,d_type,compute_type,scaleA,scaleB,scaleC,scaleD,amaxD,activation_type,bias_vector,bias_type,aux_type,hipblaslt-Gflops,hipblaslt-GB/s,us
N,N,0,1,4096,4096,4096,1,4096,16777216,0,4096,16777216,4096,16777216,4096,16777216,f16_r,f16_r,f16_r,f16_r,f32_r,0,0,0,0,0,none,0,f16_r,f16_r,678209,462.62,202.65
"""
Comment thread
polarG marked this conversation as resolved.
# Positive case - valid raw output (new format)
self.assertTrue(benchmark._process_raw_result(0, example_raw_output_new))
self.assertEqual(ReturnCode.SUCCESS, benchmark.return_code)

self.assertIn('fp16_1_4096_4096_4096_flops', benchmark.result)
self.assertAlmostEqual(678.209, benchmark.result['fp16_1_4096_4096_4096_flops'][0], places=3)

def test_hipblaslt_gemm_result_parsing_future_format_with_inserted_column(self):
"""Test that the parser is forward-compatible when a new column is inserted before batch_count.

This proves the metric key is built purely from header-named columns, not fixed
positions, so reordering or inserting columns in a future hipBLASLt release does
not silently produce a wrong metric key.
"""
benchmark = self.get_benchmark()
self.assertTrue(benchmark._preprocess())
benchmark._args = SimpleNamespace(shapes=['4096,4096,4096'], in_types=['fp16'], log_raw_data=False)
benchmark._result = BenchmarkResult(self.benchmark_name, BenchmarkType.MICRO, ReturnCode.SUCCESS, run_count=1)

# Synthetic future format: a new column 'fake_new_col' is inserted before batch_count,
# and the data values for the key fields are padded with whitespace to confirm that
# individual field values are stripped before being used to build the metric key.
# A minimal header is used so the padded data line stays within the 120-column limit.
example_raw_output_future = """
hipBLASLt version: 9999
Is supported 1 / Total solutions: 1
[0]:transA,transB,fake_new_col,batch_count,m,n,k,hipblaslt-Gflops,us
N,N,FAKE, 1 , 4096 , 4096 , 4096 ,678209,202.65
"""
self.assertTrue(benchmark._process_raw_result(0, example_raw_output_future))
self.assertEqual(ReturnCode.SUCCESS, benchmark.return_code)

# The correct, header-driven key must be present with the correct value.
self.assertIn('fp16_1_4096_4096_4096_flops', benchmark.result)
self.assertAlmostEqual(678.209, benchmark.result['fp16_1_4096_4096_4096_flops'][0], places=3)

# No key derived from the wrong (positional) field should leak through.
for key in benchmark.result:
self.assertNotIn('FAKE', key)
self.assertNotIn('fake_new_col', key)

def test_hipblaslt_gemm_result_parsing_missing_required_column(self):
"""Test that the parser fails loudly when a required key column (e.g. batch_count) is missing.

Failing surfaces unknown output formats explicitly instead of silently producing
a wrong metric key.
"""
benchmark = self.get_benchmark()
self.assertTrue(benchmark._preprocess())
benchmark._args = SimpleNamespace(shapes=['896,896,896'], in_types=['fp16'], log_raw_data=False)
benchmark._result = BenchmarkResult(self.benchmark_name, BenchmarkType.MICRO, ReturnCode.SUCCESS, run_count=1)

# batch_count is removed from both the header and the data line.
example_raw_output_missing_col = """
hipBLASLt version: 600
Is supported 1 / Total solutions: 1
[0]transA,transB,grouped_gemm,m,n,k,alpha,lda,stride_a,beta,ldb,stride_b,ldc,stride_c,ldd,stride_d,d_type,compute_type,activation_type,bias_vector,hipblaslt-Gflops,us
N,N,0,896,896,896,1,896,802816,0,896,802816,896,802816,896,802816,fp16_r,f32_r,none,0, 58624.5, 24.54
"""
self.assertFalse(benchmark._process_raw_result(0, example_raw_output_missing_col))
self.assertEqual(ReturnCode.MICROBENCHMARK_RESULT_PARSING_FAILURE, benchmark.return_code)
Loading