diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 526b4de975915..04a97606cb7f8 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -3948,3 +3948,13 @@ HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR( AtomicCmpXchgInst *AI) const { return AtomicExpansionKind::LLSC; } + +bool HexagonTargetLowering::isMaskAndCmp0FoldingBeneficial( + const Instruction &AndI) const { + // Only sink 'and' mask to cmp use block if it is masking a single bit since + // this will fold the and/cmp/br into a single tstbit instruction. + ConstantInt *Mask = dyn_cast(AndI.getOperand(1)); + if (!Mask) + return false; + return Mask->getValue().isPowerOf2(); +} diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 8d04edbea5b43..4ac3e7671592a 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -160,6 +160,8 @@ class HexagonTargetLowering : public TargetLowering { bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; + bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; + /// Return true if an FMA operation is faster than a pair of mul and add /// instructions. fmuladd intrinsics will be expanded to FMAs when this /// method returns true (and FMAs are legal), otherwise fmuladd is diff --git a/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll b/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll new file mode 100644 index 0000000000000..b5c3399ce6605 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; Test that 'and' mask is sunk to the cmp use block only if it is masking a single bit +; RUN: llc -march=hexagon --verify-machineinstrs < %s | FileCheck %s + +@A = global i32 zeroinitializer + +define i32 @and_sink1(i32 %a) { +; CHECK-LABEL: and_sink1: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: p0 = !tstbit(r0,#11) +; CHECK-NEXT: r0 = ##A +; CHECK-NEXT: } +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: // %bb0 +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) jump:nt .LBB0_1 +; CHECK-NEXT: memw(r0+#0) = #0 +; CHECK-NEXT: } +; CHECK-NEXT: // %bb.2: // %bb2 +; CHECK-NEXT: { +; CHECK-NEXT: r0 = #0 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %and = and i32 %a, 2048 + br label %bb0 +bb0: + %cmp = icmp eq i32 %and, 0 + store i32 0, i32* @A + br i1 %cmp, label %bb0, label %bb2 +bb2: + ret i32 0 +} + +define i32 @and_sink2(i32 %a) { +; CHECK-LABEL: and_sink2: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r1 = and(r0,##2049) +; CHECK-NEXT: r0 = ##A +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r1,#0) +; CHECK-NEXT: } +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB1_1: // %bb0 +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) jump:nt .LBB1_1 +; CHECK-NEXT: memw(r0+#0) = #0 +; CHECK-NEXT: } +; CHECK-NEXT: // %bb.2: // %bb2 +; CHECK-NEXT: { +; CHECK-NEXT: r0 = #0 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %and = and i32 %a, 2049 + br label %bb0 +bb0: + %cmp = icmp eq i32 %and, 0 + store i32 0, i32* @A + br i1 %cmp, label %bb0, label %bb2 +bb2: + ret i32 0 +}