From 063c92a14f9ba0e2fb0aa320cbc32bb15ef28299 Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Thu, 4 Sep 2025 09:02:16 +0000 Subject: [PATCH] [mlir][sme] Use signed comparison in ArmSMEToSCF This change switches from `ult` to `slt` in the comparison generated by `TileLoadOpWithMaskAndPadNonZeroConversion`. From the updated test: ``` %[[ROW_IS_ACTIVE:.*]] = arith.cmpi slt, %[[TILE_SLICE_INDEX]], %[[NUM_ROWS]] : index ``` Here: - `%[[TILE_SLICE_INDEX]]` is always non-negative. - `%[[NUM_ROWS]]` represents the number of remaining rows. `%[[NUM_ROWS]]` is computed as: ```mlir %c-4 = arith.constant -4 : index %c-4_vscale = arith.muli %c-4, %vscale_11 : index %num_rows_remaining = arith.addi %num_rows_init, %c-4_vscale : index ``` (inserted by the "arm-sme-vector-legalization" pass, see `VectorLegalization.cpp`). Because of the subtraction, `%num_rows_remaining` can be negative. Therefore, the comparison must be signed (`slt`) rather than unsigned (`ult`). Fixes: https://github.com/iree-org/iree/issues/21714 --- mlir/lib/Conversion/ArmSMEToSCF/ArmSMEToSCF.cpp | 2 +- mlir/test/Conversion/ArmSMEToSCF/arm-sme-to-scf.mlir | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Conversion/ArmSMEToSCF/ArmSMEToSCF.cpp b/mlir/lib/Conversion/ArmSMEToSCF/ArmSMEToSCF.cpp index c69ede988b3dc..bad53c0a4a97a 100644 --- a/mlir/lib/Conversion/ArmSMEToSCF/ArmSMEToSCF.cpp +++ b/mlir/lib/Conversion/ArmSMEToSCF/ArmSMEToSCF.cpp @@ -309,7 +309,7 @@ struct TileLoadOpWithMaskAndPadNonZeroConversion // Combine masks. auto rowIsActive = arith::CmpIOp::create( - rewriter, loc, arith::CmpIPredicate::ult, tileSliceIndex, numRows); + rewriter, loc, arith::CmpIPredicate::slt, tileSliceIndex, numRows); auto rowIsActiveI32 = arith::ExtSIOp::create( rewriter, loc, rewriter.getI32Type(), rowIsActive); auto mask = diff --git a/mlir/test/Conversion/ArmSMEToSCF/arm-sme-to-scf.mlir b/mlir/test/Conversion/ArmSMEToSCF/arm-sme-to-scf.mlir index 6f2766ddc6e6e..e7bbedc586012 100644 --- a/mlir/test/Conversion/ArmSMEToSCF/arm-sme-to-scf.mlir +++ b/mlir/test/Conversion/ArmSMEToSCF/arm-sme-to-scf.mlir @@ -81,7 +81,7 @@ func.func @arm_sme_tile_load_hor_with_mask_and_pad_zero(%src : memref) // CHECK-DAG: %[[VSCALE:.*]] = vector.vscale // CHECK-NEXT: %[[NUM_TILE_SLICES:.*]] = arith.muli %[[C4]], %[[VSCALE]] : index // CHECK-NEXT: scf.for %[[TILE_SLICE_INDEX:.*]] = %[[C0]] to %[[NUM_TILE_SLICES]] step %[[C1]] iter_args(%[[CURRENT_TILE:.*]] = %[[TILE]]) -> (vector<[4]x[4]xi32>) { -// CHECK-NEXT: %[[ROW_IS_ACTIVE:.*]] = arith.cmpi ult, %[[TILE_SLICE_INDEX]], %[[NUM_ROWS]] : index +// CHECK-NEXT: %[[ROW_IS_ACTIVE:.*]] = arith.cmpi slt, %[[TILE_SLICE_INDEX]], %[[NUM_ROWS]] : index // CHECK-NEXT: %[[ROW_IS_ACTIVE_SEXT_I32:.*]] = arith.extsi %[[ROW_IS_ACTIVE]] : i1 to i32 // CHECK-NEXT: %[[MASK:.*]] = arith.andi %[[ROW_IS_ACTIVE_SEXT_I32]], %[[NUM_COLS_I32]] : i32 // CHECK-NEXT: %[[MASK_INDEX:.*]] = arith.index_cast %[[MASK]] : i32 to index