From 70cb5b60fad9d7068a6f9ac6cc0bd670d03765ea Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Wed, 24 Sep 2025 10:39:53 +0100 Subject: [PATCH 1/2] [mlir] Use MLIR op names when generating FileCheck variables in generate-test-checks.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Motivation ---------- Improve readability and stability of autogenerated CHECK lines by using operation-aware FileCheck variable names instead of generic VAL_N. What changes ------------ - When possible, variable names are derived from the MLIR op name, e.g. `vector.transfer_read` → `TRANSFER_READ_0`. - Unknown ops (e.g., from out-of-tree dialects) fall back to the prior `VAL_N` scheme. Before ------ ```mlir // CHECK: %[[VAL_4:.*]] = vector.transfer_read ... // CHECK: %[[VAL_5:.*]] = "val_use"(%[[VAL_4]]) : ... ``` After ----- ```mlir // CHECK: %[[TRANSFER_READ_0:.*]] = vector.transfer_read ... // CHECK: %[[VAL_1:.*]] = "val_use"(%[[TRANSFER_READ_0]]) : ... ``` Rationale --------- Using op-derived names (e.g., `TRANSFER_READ_0`) makes tests easier to read and audit, while remaining more stable across unrelated edits (e.g. there will always be fewer `TRANSFER_READ_#N` variables than `VAL_#N`). The fallback to `VAL_N` preserves compatibility for unknown ops. --- mlir/utils/generate-test-checks.py | 43 +++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/mlir/utils/generate-test-checks.py b/mlir/utils/generate-test-checks.py index f80a1811f418c..a6c4ab3b023ed 100755 --- a/mlir/utils/generate-test-checks.py +++ b/mlir/utils/generate-test-checks.py @@ -31,6 +31,7 @@ import os # Used to advertise this file's name ("autogenerated_note"). import re import sys +from collections import Counter ADVERT_BEGIN = "// NOTE: Assertions have been autogenerated by " ADVERT_END = """ @@ -45,6 +46,14 @@ SSA_RE_STR = "[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*" SSA_RE = re.compile(SSA_RE_STR) +# Regex matching `dialect.op_name`, where `dialect` is an upstream MLIR +# dialect (e.g. `vector.transfer_read`). +DIALECTS = "acc|affine|amdgpu|amx|arith|arm_neon|arm_sve|arm_sme|async|bufferization|cf|complex|dlti|emitc|\ + func|gpu|index|irdl|linalg|llvm|math|memref|ml_program|mpi|nvgpu|nvvm|omp|pdl_interp|pdl|ptr|quant|\ + rocdl|scf|shape|shard|smt|sparse_tensor|tensor|ub|vcix|vector|wasmssa|x86vector|xegpu|xevm|spirv|tosa|\ + transform" +SSA_OP_NAME_RE = re.compile(rf"\b(?:{DIALECTS})[.]([a-z_]+)\b") + # Regex matching the left-hand side of an assignment SSA_RESULTS_STR = r'\s*(%' + SSA_RE_STR + r')(\s*,\s*(%' + SSA_RE_STR + r'))*\s*=' SSA_RESULTS_RE = re.compile(SSA_RESULTS_STR) @@ -63,7 +72,12 @@ class VariableNamer: def __init__(self, variable_names): self.scopes = [] + # Counter for generic FileCHeck names, e.g. VAL_#N self.name_counter = 0 + # Counters for FileCheck names derived from Op names, e.g. + # TRANSFER_READ_#N (based on `vector.transfer_read`). Note, there's a + # dedicated counter for every Op type present in the input. + self.op_name_counter = Counter() # Number of variable names to still generate in parent scope self.generate_in_parent_scope_left = 0 @@ -77,17 +91,29 @@ def generate_in_parent_scope(self, n): self.generate_in_parent_scope_left = n # Generate a substitution name for the given ssa value name. - def generate_name(self, source_variable_name, use_ssa_name): + def generate_name(self, source_variable_name, use_ssa_name, op_name=""): # Compute variable name - variable_name = self.variable_names.pop(0) if len(self.variable_names) > 0 else '' - if variable_name == '': + variable_name = ( + self.variable_names.pop(0) if len(self.variable_names) > 0 else "" + ) + if variable_name == "": # If `use_ssa_name` is set, use the MLIR SSA value name to generate # a FileCHeck substation string. As FileCheck requires these # strings to start with a character, skip MLIR variables starting # with a digit (e.g. `%0`). + # + # The next fallback option is to use the op name, if the + # corresponding match succeeds. + # + # If neither worked, use a generic name: `VAL_#N`. if use_ssa_name and source_variable_name[0].isalpha(): variable_name = source_variable_name.upper() + elif op_name != "": + variable_name = ( + op_name.upper() + "_" + str(self.op_name_counter[op_name]) + ) + self.op_name_counter[op_name] += 1 else: variable_name = "VAL_" + str(self.name_counter) self.name_counter += 1 @@ -123,6 +149,7 @@ def num_scopes(self): def clear_names(self): self.name_counter = 0 self.used_variable_names = set() + self.op_name_counter.clear() class AttributeNamer: @@ -170,8 +197,12 @@ def process_line(line_chunks, variable_namer, use_ssa_name=False, strict_name_re # Process the rest that contained an SSA value name. for chunk in line_chunks: - m = SSA_RE.match(chunk) - ssa_name = m.group(0) if m is not None else '' + ssa = SSA_RE.match(chunk) + op_name_with_dialect = SSA_OP_NAME_RE.search(chunk) + ssa_name = ssa.group(0) if ssa is not None else "" + op_name = ( + op_name_with_dialect.group(1) if op_name_with_dialect is not None else "" + ) # Check if an existing variable exists for this name. variable = None @@ -185,7 +216,7 @@ def process_line(line_chunks, variable_namer, use_ssa_name=False, strict_name_re output_line += "%[[" + variable + "]]" else: # Otherwise, generate a new variable. - variable = variable_namer.generate_name(ssa_name, use_ssa_name) + variable = variable_namer.generate_name(ssa_name, use_ssa_name, op_name) if strict_name_re: # Use stricter regexp for the variable name, if requested. # Greedy matching may cause issues with the generic '.*' From 43ded5be699fcf478e7b79e42fcbedfc31792336 Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Sun, 5 Oct 2025 17:27:02 +0100 Subject: [PATCH 2/2] Update ADVERT_END note and remove the list of dialects --- mlir/utils/generate-test-checks.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/mlir/utils/generate-test-checks.py b/mlir/utils/generate-test-checks.py index a6c4ab3b023ed..3712a6b9c963d 100755 --- a/mlir/utils/generate-test-checks.py +++ b/mlir/utils/generate-test-checks.py @@ -35,10 +35,12 @@ ADVERT_BEGIN = "// NOTE: Assertions have been autogenerated by " ADVERT_END = """ -// The script is designed to make adding checks to -// a test case fast, it is *not* designed to be authoritative -// about what constitutes a good test! The CHECK should be -// minimized and named to reflect the test intent. +// This script is intended to make adding checks to a test case quick and easy. +// It is *not* authoritative about what constitutes a good test. After using the +// script, be sure to review and refine the generated checks. For example, +// CHECK lines should be minimized and named to reflect the test’s intent. +// For comprehensive guidelines, see: +// * https://mlir.llvm.org/getting_started/TestingGuide/ """ @@ -46,13 +48,8 @@ SSA_RE_STR = "[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*" SSA_RE = re.compile(SSA_RE_STR) -# Regex matching `dialect.op_name`, where `dialect` is an upstream MLIR -# dialect (e.g. `vector.transfer_read`). -DIALECTS = "acc|affine|amdgpu|amx|arith|arm_neon|arm_sve|arm_sme|async|bufferization|cf|complex|dlti|emitc|\ - func|gpu|index|irdl|linalg|llvm|math|memref|ml_program|mpi|nvgpu|nvvm|omp|pdl_interp|pdl|ptr|quant|\ - rocdl|scf|shape|shard|smt|sparse_tensor|tensor|ub|vcix|vector|wasmssa|x86vector|xegpu|xevm|spirv|tosa|\ - transform" -SSA_OP_NAME_RE = re.compile(rf"\b(?:{DIALECTS})[.]([a-z_]+)\b") +# Regex matching `dialect.op_name` (e.g. `vector.transfer_read`). +SSA_OP_NAME_RE = re.compile(r"\b(?:\s=\s[a-z_]+)[.]([a-z_]+)\b") # Regex matching the left-hand side of an assignment SSA_RESULTS_STR = r'\s*(%' + SSA_RE_STR + r')(\s*,\s*(%' + SSA_RE_STR + r'))*\s*='