From 70cb5b60fad9d7068a6f9ac6cc0bd670d03765ea Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski@arm.com>
Date: Wed, 24 Sep 2025 10:39:53 +0100
Subject: [PATCH 1/2] [mlir] Use MLIR op names when generating FileCheck
 variables in generate-test-checks.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Motivation
----------
Improve readability and stability of autogenerated CHECK lines by using
operation-aware FileCheck variable names instead of generic VAL_N.

What changes
------------
- When possible, variable names are derived from the MLIR op name, e.g.
  `vector.transfer_read` → `TRANSFER_READ_0`.
- Unknown ops (e.g., from out-of-tree dialects) fall back to the prior
  `VAL_N` scheme.

Before
------
```mlir
  // CHECK: %[[VAL_4:.*]] = vector.transfer_read ...
  // CHECK: %[[VAL_5:.*]] = "val_use"(%[[VAL_4]]) : ...
```

After
-----
```mlir
  // CHECK: %[[TRANSFER_READ_0:.*]] = vector.transfer_read ...
  // CHECK: %[[VAL_1:.*]] = "val_use"(%[[TRANSFER_READ_0]]) : ...
```

Rationale
---------
Using op-derived names (e.g., `TRANSFER_READ_0`) makes tests easier to
read and audit, while remaining more stable across unrelated edits (e.g.
there will always be fewer `TRANSFER_READ_#N` variables than `VAL_#N`).
The fallback to `VAL_N` preserves compatibility for unknown ops.
---
 mlir/utils/generate-test-checks.py | 43 +++++++++++++++++++++++++-----
 1 file changed, 37 insertions(+), 6 deletions(-)

diff --git a/mlir/utils/generate-test-checks.py b/mlir/utils/generate-test-checks.py
index f80a1811f418c..a6c4ab3b023ed 100755
--- a/mlir/utils/generate-test-checks.py
+++ b/mlir/utils/generate-test-checks.py
@@ -31,6 +31,7 @@
 import os  # Used to advertise this file's name ("autogenerated_note").
 import re
 import sys
+from collections import Counter
 
 ADVERT_BEGIN = "// NOTE: Assertions have been autogenerated by "
 ADVERT_END = """
@@ -45,6 +46,14 @@
 SSA_RE_STR = "[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*"
 SSA_RE = re.compile(SSA_RE_STR)
 
+# Regex matching `dialect.op_name`, where `dialect` is an upstream MLIR
+# dialect (e.g. `vector.transfer_read`).
+DIALECTS = "acc|affine|amdgpu|amx|arith|arm_neon|arm_sve|arm_sme|async|bufferization|cf|complex|dlti|emitc|\
+    func|gpu|index|irdl|linalg|llvm|math|memref|ml_program|mpi|nvgpu|nvvm|omp|pdl_interp|pdl|ptr|quant|\
+    rocdl|scf|shape|shard|smt|sparse_tensor|tensor|ub|vcix|vector|wasmssa|x86vector|xegpu|xevm|spirv|tosa|\
+    transform"
+SSA_OP_NAME_RE = re.compile(rf"\b(?:{DIALECTS})[.]([a-z_]+)\b")
+
 # Regex matching the left-hand side of an assignment
 SSA_RESULTS_STR = r'\s*(%' + SSA_RE_STR + r')(\s*,\s*(%' + SSA_RE_STR + r'))*\s*='
 SSA_RESULTS_RE = re.compile(SSA_RESULTS_STR)
@@ -63,7 +72,12 @@
 class VariableNamer:
     def __init__(self, variable_names):
         self.scopes = []
+        # Counter for generic FileCHeck names, e.g. VAL_#N
         self.name_counter = 0
+        # Counters for FileCheck names derived from Op names, e.g.
+        # TRANSFER_READ_#N (based on `vector.transfer_read`). Note, there's a
+        # dedicated counter for every Op type present in the input.
+        self.op_name_counter = Counter()
 
         # Number of variable names to still generate in parent scope
         self.generate_in_parent_scope_left = 0
@@ -77,17 +91,29 @@ def generate_in_parent_scope(self, n):
         self.generate_in_parent_scope_left = n
 
     # Generate a substitution name for the given ssa value name.
-    def generate_name(self, source_variable_name, use_ssa_name):
+    def generate_name(self, source_variable_name, use_ssa_name, op_name=""):
 
         # Compute variable name
-        variable_name = self.variable_names.pop(0) if len(self.variable_names) > 0 else ''
-        if variable_name == '':
+        variable_name = (
+            self.variable_names.pop(0) if len(self.variable_names) > 0 else ""
+        )
+        if variable_name == "":
             # If `use_ssa_name` is set, use the MLIR SSA value name to generate
             # a FileCHeck substation string. As FileCheck requires these
             # strings to start with a character, skip MLIR variables starting
             # with a digit (e.g. `%0`).
+            #
+            # The next fallback option is to use the op name, if the
+            # corresponding match succeeds.
+            #
+            # If neither worked, use a generic name: `VAL_#N`.
             if use_ssa_name and source_variable_name[0].isalpha():
                 variable_name = source_variable_name.upper()
+            elif op_name != "":
+                variable_name = (
+                    op_name.upper() + "_" + str(self.op_name_counter[op_name])
+                )
+                self.op_name_counter[op_name] += 1
             else:
                 variable_name = "VAL_" + str(self.name_counter)
                 self.name_counter += 1
@@ -123,6 +149,7 @@ def num_scopes(self):
     def clear_names(self):
         self.name_counter = 0
         self.used_variable_names = set()
+        self.op_name_counter.clear()
 
 class AttributeNamer:
 
@@ -170,8 +197,12 @@ def process_line(line_chunks, variable_namer, use_ssa_name=False, strict_name_re
 
     # Process the rest that contained an SSA value name.
     for chunk in line_chunks:
-        m = SSA_RE.match(chunk)
-        ssa_name = m.group(0) if m is not None else ''
+        ssa = SSA_RE.match(chunk)
+        op_name_with_dialect = SSA_OP_NAME_RE.search(chunk)
+        ssa_name = ssa.group(0) if ssa is not None else ""
+        op_name = (
+            op_name_with_dialect.group(1) if op_name_with_dialect is not None else ""
+        )
 
         # Check if an existing variable exists for this name.
         variable = None
@@ -185,7 +216,7 @@ def process_line(line_chunks, variable_namer, use_ssa_name=False, strict_name_re
             output_line += "%[[" + variable + "]]"
         else:
             # Otherwise, generate a new variable.
-            variable = variable_namer.generate_name(ssa_name, use_ssa_name)
+            variable = variable_namer.generate_name(ssa_name, use_ssa_name, op_name)
             if strict_name_re:
                 # Use stricter regexp for the variable name, if requested.
                 # Greedy matching may cause issues with the generic '.*'

From 43ded5be699fcf478e7b79e42fcbedfc31792336 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski@arm.com>
Date: Sun, 5 Oct 2025 17:27:02 +0100
Subject: [PATCH 2/2] Update ADVERT_END note and remove the list of dialects

---
 mlir/utils/generate-test-checks.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/mlir/utils/generate-test-checks.py b/mlir/utils/generate-test-checks.py
index a6c4ab3b023ed..3712a6b9c963d 100755
--- a/mlir/utils/generate-test-checks.py
+++ b/mlir/utils/generate-test-checks.py
@@ -35,10 +35,12 @@
 
 ADVERT_BEGIN = "// NOTE: Assertions have been autogenerated by "
 ADVERT_END = """
-// The script is designed to make adding checks to
-// a test case fast, it is *not* designed to be authoritative
-// about what constitutes a good test! The CHECK should be
-// minimized and named to reflect the test intent.
+// This script is intended to make adding checks to a test case quick and easy.
+// It is *not* authoritative about what constitutes a good test. After using the
+// script, be sure to review and refine the generated checks. For example,
+// CHECK lines should be minimized and named to reflect the test’s intent.
+// For comprehensive guidelines, see:
+//   * https://mlir.llvm.org/getting_started/TestingGuide/
 """
 
 
@@ -46,13 +48,8 @@
 SSA_RE_STR = "[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*"
 SSA_RE = re.compile(SSA_RE_STR)
 
-# Regex matching `dialect.op_name`, where `dialect` is an upstream MLIR
-# dialect (e.g. `vector.transfer_read`).
-DIALECTS = "acc|affine|amdgpu|amx|arith|arm_neon|arm_sve|arm_sme|async|bufferization|cf|complex|dlti|emitc|\
-    func|gpu|index|irdl|linalg|llvm|math|memref|ml_program|mpi|nvgpu|nvvm|omp|pdl_interp|pdl|ptr|quant|\
-    rocdl|scf|shape|shard|smt|sparse_tensor|tensor|ub|vcix|vector|wasmssa|x86vector|xegpu|xevm|spirv|tosa|\
-    transform"
-SSA_OP_NAME_RE = re.compile(rf"\b(?:{DIALECTS})[.]([a-z_]+)\b")
+# Regex matching `dialect.op_name` (e.g. `vector.transfer_read`).
+SSA_OP_NAME_RE = re.compile(r"\b(?:\s=\s[a-z_]+)[.]([a-z_]+)\b")
 
 # Regex matching the left-hand side of an assignment
 SSA_RESULTS_STR = r'\s*(%' + SSA_RE_STR + r')(\s*,\s*(%' + SSA_RE_STR + r'))*\s*='