From ae7ef105469b953fe0441d7549ccbd52ad379fa8 Mon Sep 17 00:00:00 2001 From: Sudharsan Veeravalli Date: Fri, 7 Nov 2025 10:42:45 +0530 Subject: [PATCH 1/2] Precommit test --- .../CodeGen/Hexagon/and_mask_cmp0_sink.ll | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll diff --git a/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll b/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll new file mode 100644 index 0000000000000..8eef8c73354aa --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; Test that 'and' mask is sunk to the cmp use block only if it is masking a single bit +; RUN: llc -march=hexagon --verify-machineinstrs < %s | FileCheck %s + +@A = global i32 zeroinitializer + +define i32 @and_sink1(i32 %a) { +; CHECK-LABEL: and_sink1: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r1 = and(r0,##2048) +; CHECK-NEXT: r0 = ##A +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r1,#0) +; CHECK-NEXT: } +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: // %bb0 +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) jump:nt .LBB0_1 +; CHECK-NEXT: memw(r0+#0) = #0 +; CHECK-NEXT: } +; CHECK-NEXT: // %bb.2: // %bb2 +; CHECK-NEXT: { +; CHECK-NEXT: r0 = #0 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %and = and i32 %a, 2048 + br label %bb0 +bb0: + %cmp = icmp eq i32 %and, 0 + store i32 0, i32* @A + br i1 %cmp, label %bb0, label %bb2 +bb2: + ret i32 0 +} + +define i32 @and_sink2(i32 %a) { +; CHECK-LABEL: and_sink2: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r1 = and(r0,##2049) +; CHECK-NEXT: r0 = ##A +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r1,#0) +; CHECK-NEXT: } +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB1_1: // %bb0 +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) jump:nt .LBB1_1 +; CHECK-NEXT: memw(r0+#0) = #0 +; CHECK-NEXT: } +; CHECK-NEXT: // %bb.2: // %bb2 +; CHECK-NEXT: { +; CHECK-NEXT: r0 = #0 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %and = and i32 %a, 2049 + br label %bb0 +bb0: + %cmp = icmp eq i32 %and, 0 + store i32 0, i32* @A + br i1 %cmp, label %bb0, label %bb2 +bb2: + ret i32 0 +} From 5deaf1336b7301ede75e1cc3b162215823bc3aa2 Mon Sep 17 00:00:00 2001 From: Sudharsan Veeravalli Date: Fri, 7 Nov 2025 10:49:40 +0530 Subject: [PATCH 2/2] isMaskAndCmp0FoldingBeneficial --- llvm/lib/Target/Hexagon/HexagonISelLowering.cpp | 10 ++++++++++ llvm/lib/Target/Hexagon/HexagonISelLowering.h | 2 ++ llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll | 5 +---- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 526b4de975915..04a97606cb7f8 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -3948,3 +3948,13 @@ HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR( AtomicCmpXchgInst *AI) const { return AtomicExpansionKind::LLSC; } + +bool HexagonTargetLowering::isMaskAndCmp0FoldingBeneficial( + const Instruction &AndI) const { + // Only sink 'and' mask to cmp use block if it is masking a single bit since + // this will fold the and/cmp/br into a single tstbit instruction. + ConstantInt *Mask = dyn_cast(AndI.getOperand(1)); + if (!Mask) + return false; + return Mask->getValue().isPowerOf2(); +} diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 8d04edbea5b43..4ac3e7671592a 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -160,6 +160,8 @@ class HexagonTargetLowering : public TargetLowering { bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; + bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; + /// Return true if an FMA operation is faster than a pair of mul and add /// instructions. fmuladd intrinsics will be expanded to FMAs when this /// method returns true (and FMAs are legal), otherwise fmuladd is diff --git a/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll b/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll index 8eef8c73354aa..b5c3399ce6605 100644 --- a/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll +++ b/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll @@ -9,12 +9,9 @@ define i32 @and_sink1(i32 %a) { ; CHECK: .cfi_startproc ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { -; CHECK-NEXT: r1 = and(r0,##2048) +; CHECK-NEXT: p0 = !tstbit(r0,#11) ; CHECK-NEXT: r0 = ##A ; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: p0 = cmp.eq(r1,#0) -; CHECK-NEXT: } ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: // %bb0 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1