[VPlan] Simplify (X && Y) || (X && !Y) -> X. #89386

fhahn · 2024-04-19T14:09:33Z

Simplify a common pattern generated for masks when folding the tail.

llvmbot · 2024-04-19T14:10:04Z

@llvm/pr-subscribers-llvm-transforms

Author: Florian Hahn (fhahn)

Changes

Simplify a common pattern generated for masks when folding the tail.

Patch is 27.61 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/89386.diff

8 Files Affected:

(modified) llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h (+59)
(modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+12)
(modified) llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll (+6-12)
(modified) llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll (+1-2)
(modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll (+1-2)
(modified) llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll (+4-16)
(modified) llvm/test/Transforms/LoopVectorize/uniform-blend.ll (+1-4)
(modified) llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll (+5-14)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index ae7717eb7cc97f..76bd25d939523b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -185,6 +185,47 @@ using AllBinaryRecipe_match =
     BinaryRecipe_match<Op0_t, Op1_t, Opcode, VPWidenRecipe, VPReplicateRecipe,
                        VPWidenCastRecipe, VPInstruction>;
 
+template <typename Op0_t, typename Op1_t, unsigned Opcode, bool Commutable,
+          typename... RecipeTys>
+struct LogicalRecipe_match {
+  Op0_t LHS;
+  Op1_t RHS;
+
+  LogicalRecipe_match(Op0_t LHS, Op1_t RHS) : LHS(LHS), RHS(RHS) {}
+
+  bool match(const VPValue *V) {
+    auto *DefR = V->getDefiningRecipe();
+    return DefR && match(DefR);
+  }
+
+  bool match(const VPSingleDefRecipe *R) {
+    return match(static_cast<const VPRecipeBase *>(R));
+  }
+
+  bool match(const VPRecipeBase *R) {
+    if (!detail::MatchRecipeAndOpcode<Opcode, RecipeTys...>::match(R)) {
+      if (!detail::MatchRecipeAndOpcode<Instruction::Select,
+                                        RecipeTys...>::match(R))
+        return false;
+      if (Opcode == Instruction::And) {
+        if (!m_SpecificInt(0).match(R->getOperand(2)))
+          return false;
+      } else if (Opcode == Instruction::Or) {
+        if (!m_SpecificInt(1).match(R->getOperand(1)))
+          return false;
+      } else {
+        llvm_unreachable("unsupported opcode");
+      }
+    } else {
+      assert(R->getNumOperands() == 2 &&
+             "recipe with matched opcode does not have 2 operands");
+    }
+    return (LHS.match(R->getOperand(0)) && RHS.match(R->getOperand(1))) ||
+           (Commutable && LHS.match(R->getOperand(1)) &&
+            RHS.match(R->getOperand(0)));
+  }
+};
+
 template <unsigned Opcode, typename Op0_t>
 inline UnaryVPInstruction_match<Op0_t, Opcode>
 m_VPInstruction(const Op0_t &Op0) {
@@ -266,6 +307,24 @@ inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Or>
 m_Or(const Op0_t &Op0, const Op1_t &Op1) {
   return m_Binary<Instruction::Or, Op0_t, Op1_t>(Op0, Op1);
 }
+
+template <typename Op0_t, typename Op1_t>
+inline LogicalRecipe_match<Op0_t, Op1_t, Instruction::Or, true, VPWidenRecipe,
+                           VPReplicateRecipe, VPWidenCastRecipe, VPInstruction>
+m_c_LogicalOr(const Op0_t &Op0, const Op1_t &Op1) {
+  return LogicalRecipe_match<Op0_t, Op1_t, Instruction::Or, true, VPWidenRecipe,
+                             VPReplicateRecipe, VPWidenCastRecipe,
+                             VPInstruction>(Op0, Op1);
+}
+
+template <typename Op0_t, typename Op1_t>
+inline LogicalRecipe_match<Op0_t, Op1_t, Instruction::And, true, VPWidenRecipe,
+                           VPReplicateRecipe, VPWidenCastRecipe, VPInstruction>
+m_c_LogicalAnd(const Op0_t &Op0, const Op1_t &Op1) {
+  return LogicalRecipe_match<Op0_t, Op1_t, Instruction::And, true,
+                             VPWidenRecipe, VPReplicateRecipe,
+                             VPWidenCastRecipe, VPInstruction>(Op0, Op1);
+}
 } // namespace VPlanPatternMatch
 } // namespace llvm
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 007dc3f89b3fb9..5453c47ecf7000 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -939,6 +939,18 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
 #endif
   }
 
+  VPValue *B;
+  VPValue *C;
+  VPValue *D;
+  // Simplify (X && Y) || (X && !Y) -> X.
+  if (match(&R,
+            m_c_LogicalOr(m_c_LogicalAnd(m_VPValue(A), m_VPValue(B)),
+                          m_c_LogicalAnd(m_VPValue(C), m_Not(m_VPValue(D))))) &&
+      A == C && B == D) {
+    R.getVPSingleValue()->replaceAllUsesWith(A);
+    return;
+  }
+
   if (match(&R, m_CombineOr(m_Mul(m_VPValue(A), m_SpecificInt(1)),
                             m_Mul(m_SpecificInt(1), m_VPValue(A)))))
     return R.getVPSingleValue()->replaceAllUsesWith(A);
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
index b91579106261a6..200c2adcf0e666 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
@@ -223,10 +223,9 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
 ; TFCOMMON-NEXT:    [[TMP10:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP9]])
 ; TFCOMMON-NEXT:    [[TMP11:%.*]] = xor <vscale x 2 x i1> [[TMP8]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ; TFCOMMON-NEXT:    [[TMP12:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP11]], <vscale x 2 x i1> zeroinitializer
-; TFCOMMON-NEXT:    [[TMP13:%.*]] = or <vscale x 2 x i1> [[TMP9]], [[TMP12]]
 ; TFCOMMON-NEXT:    [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP12]], <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> [[TMP10]]
 ; TFCOMMON-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
-; TFCOMMON-NEXT:    call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP14]], i32 8, <vscale x 2 x i1> [[TMP13]])
+; TFCOMMON-NEXT:    call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP14]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
 ; TFCOMMON-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
 ; TFCOMMON-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025)
 ; TFCOMMON-NEXT:    [[TMP15:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
@@ -272,16 +271,14 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
 ; TFA_INTERLEAVE-NEXT:    [[TMP20:%.*]] = xor <vscale x 2 x i1> [[TMP14]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ; TFA_INTERLEAVE-NEXT:    [[TMP21:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP19]], <vscale x 2 x i1> zeroinitializer
 ; TFA_INTERLEAVE-NEXT:    [[TMP22:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i1> [[TMP20]], <vscale x 2 x i1> zeroinitializer
-; TFA_INTERLEAVE-NEXT:    [[TMP23:%.*]] = or <vscale x 2 x i1> [[TMP15]], [[TMP21]]
-; TFA_INTERLEAVE-NEXT:    [[TMP24:%.*]] = or <vscale x 2 x i1> [[TMP16]], [[TMP22]]
 ; TFA_INTERLEAVE-NEXT:    [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP21]], <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> [[TMP17]]
 ; TFA_INTERLEAVE-NEXT:    [[PREDPHI4:%.*]] = select <vscale x 2 x i1> [[TMP22]], <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> [[TMP18]]
 ; TFA_INTERLEAVE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
 ; TFA_INTERLEAVE-NEXT:    [[TMP26:%.*]] = call i64 @llvm.vscale.i64()
 ; TFA_INTERLEAVE-NEXT:    [[TMP27:%.*]] = mul i64 [[TMP26]], 2
 ; TFA_INTERLEAVE-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[TMP25]], i64 [[TMP27]]
-; TFA_INTERLEAVE-NEXT:    call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP25]], i32 8, <vscale x 2 x i1> [[TMP23]])
-; TFA_INTERLEAVE-NEXT:    call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI4]], ptr [[TMP28]], i32 8, <vscale x 2 x i1> [[TMP24]])
+; TFA_INTERLEAVE-NEXT:    call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP25]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
+; TFA_INTERLEAVE-NEXT:    call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI4]], ptr [[TMP28]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
 ; TFA_INTERLEAVE-NEXT:    [[INDEX_NEXT:%.*]] = add i64 [[INDEX]], [[TMP6]]
 ; TFA_INTERLEAVE-NEXT:    [[TMP29:%.*]] = call i64 @llvm.vscale.i64()
 ; TFA_INTERLEAVE-NEXT:    [[TMP30:%.*]] = mul i64 [[TMP29]], 2
@@ -405,10 +402,9 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
 ; TFCOMMON-NEXT:    [[TMP11:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP10]])
 ; TFCOMMON-NEXT:    [[TMP12:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP8]], <vscale x 2 x i1> zeroinitializer
 ; TFCOMMON-NEXT:    [[TMP13:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP12]])
-; TFCOMMON-NEXT:    [[TMP14:%.*]] = or <vscale x 2 x i1> [[TMP10]], [[TMP12]]
 ; TFCOMMON-NEXT:    [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP10]], <vscale x 2 x i64> [[TMP11]], <vscale x 2 x i64> [[TMP13]]
 ; TFCOMMON-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
-; TFCOMMON-NEXT:    call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP15]], i32 8, <vscale x 2 x i1> [[TMP14]])
+; TFCOMMON-NEXT:    call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP15]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
 ; TFCOMMON-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
 ; TFCOMMON-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025)
 ; TFCOMMON-NEXT:    [[TMP16:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
@@ -456,16 +452,14 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
 ; TFA_INTERLEAVE-NEXT:    [[TMP22:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i1> [[TMP14]], <vscale x 2 x i1> zeroinitializer
 ; TFA_INTERLEAVE-NEXT:    [[TMP23:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP21]])
 ; TFA_INTERLEAVE-NEXT:    [[TMP24:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD3]], <vscale x 2 x i1> [[TMP22]])
-; TFA_INTERLEAVE-NEXT:    [[TMP25:%.*]] = or <vscale x 2 x i1> [[TMP17]], [[TMP21]]
-; TFA_INTERLEAVE-NEXT:    [[TMP26:%.*]] = or <vscale x 2 x i1> [[TMP18]], [[TMP22]]
 ; TFA_INTERLEAVE-NEXT:    [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP17]], <vscale x 2 x i64> [[TMP19]], <vscale x 2 x i64> [[TMP23]]
 ; TFA_INTERLEAVE-NEXT:    [[PREDPHI4:%.*]] = select <vscale x 2 x i1> [[TMP18]], <vscale x 2 x i64> [[TMP20]], <vscale x 2 x i64> [[TMP24]]
 ; TFA_INTERLEAVE-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
 ; TFA_INTERLEAVE-NEXT:    [[TMP28:%.*]] = call i64 @llvm.vscale.i64()
 ; TFA_INTERLEAVE-NEXT:    [[TMP29:%.*]] = mul i64 [[TMP28]], 2
 ; TFA_INTERLEAVE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[TMP27]], i64 [[TMP29]]
-; TFA_INTERLEAVE-NEXT:    call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP27]], i32 8, <vscale x 2 x i1> [[TMP25]])
-; TFA_INTERLEAVE-NEXT:    call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI4]], ptr [[TMP30]], i32 8, <vscale x 2 x i1> [[TMP26]])
+; TFA_INTERLEAVE-NEXT:    call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP27]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
+; TFA_INTERLEAVE-NEXT:    call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI4]], ptr [[TMP30]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
 ; TFA_INTERLEAVE-NEXT:    [[INDEX_NEXT:%.*]] = add i64 [[INDEX]], [[TMP6]]
 ; TFA_INTERLEAVE-NEXT:    [[TMP31:%.*]] = call i64 @llvm.vscale.i64()
 ; TFA_INTERLEAVE-NEXT:    [[TMP32:%.*]] = mul i64 [[TMP31]], 2
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
index ad6e8534f31800..e22f70e436f330 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
@@ -1241,9 +1241,8 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no
 ; CHECK-ORDERED-TF-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP16]], i32 4, <vscale x 4 x i1> [[TMP15]], <vscale x 4 x float> poison)
 ; CHECK-ORDERED-TF-NEXT:    [[TMP17:%.*]] = xor <vscale x 4 x i1> [[TMP13]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-ORDERED-TF-NEXT:    [[TMP18:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP17]], <vscale x 4 x i1> zeroinitializer
-; CHECK-ORDERED-TF-NEXT:    [[TMP19:%.*]] = or <vscale x 4 x i1> [[TMP15]], [[TMP18]]
 ; CHECK-ORDERED-TF-NEXT:    [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP18]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> [[WIDE_MASKED_LOAD1]]
-; CHECK-ORDERED-TF-NEXT:    [[TMP20:%.*]] = select <vscale x 4 x i1> [[TMP19]], <vscale x 4 x float> [[PREDPHI]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-ORDERED-TF-NEXT:    [[TMP20:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[PREDPHI]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-ORDERED-TF-NEXT:    [[TMP21]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP20]])
 ; CHECK-ORDERED-TF-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP23]]
 ; CHECK-ORDERED-TF-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]])
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
index 2b2742ca7ccbc3..63ad98b2d8ab2a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
@@ -480,11 +480,10 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr
 ; CHECK-NEXT:    [[TMP15:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> zeroinitializer
 ; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> poison)
 ; CHECK-NEXT:    [[TMP16:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> zeroinitializer
-; CHECK-NEXT:    [[TMP18:%.*]] = or <vscale x 4 x i1> [[TMP15]], [[TMP16]]
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[WIDE_MASKED_GATHER]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP10]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0
-; CHECK-NEXT:    call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[PREDPHI]], ptr [[TMP19]], i32 4, <vscale x 4 x i1> [[TMP18]])
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[PREDPHI]], ptr [[TMP19]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
 ; CHECK-NEXT:    [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP21]]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]])
 ; CHECK-NEXT:    [[TMP22:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
index 1ce4cb928e8085..ee70f4aa35850f 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
@@ -462,13 +462,10 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
 ; TF-SCALABLE-NEXT:    [[TMP12:%.*]] = icmp ugt <vscale x 2 x i64> [[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 10, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
 ; TF-SCALABLE-NEXT:    [[TMP13:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP12]], <vscale x 2 x i1> zeroinitializer
 ; TF-SCALABLE-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT]], i32 8, <vscale x 2 x i1> [[TMP13]], <vscale x 2 x i64> poison)
-; TF-SCALABLE-NEXT:    [[TMP14:%.*]] = xor <vscale x 2 x i1> [[TMP12]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
-; TF-SCALABLE-NEXT:    [[TMP15:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP14]], <vscale x 2 x i1> zeroinitializer
-; TF-SCALABLE-NEXT:    [[TMP17:%.*]] = or <vscale x 2 x i1> [[TMP13]], [[TMP15]]
 ; TF-SCALABLE-NEXT:    [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP13]], <vscale x 2 x i64> [[WIDE_MASKED_GATHER]], <vscale x 2 x i64> zeroinitializer
 ; TF-SCALABLE-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP11]]
 ; TF-SCALABLE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP16]], i32 0
-; TF-SCALABLE-NEXT:    call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP18]], i32 8, <vscale x 2 x i1> [[TMP17]])
+; TF-SCALABLE-NEXT:    call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP18]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
 ; TF-SCALABLE-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP20]]
 ; TF-SCALABLE-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
 ; TF-SCALABLE-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -510,13 +507,10 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
 ; TF-FIXEDLEN-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], <i64 10, i64 10, i64 10, i64 10>
 ; TF-FIXEDLEN-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer
 ; TF-FIXEDLEN-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP2]], <4 x i64> poison)
-; TF-FIXEDLEN-NEXT:    [[TMP3:%.*]] = xor <4 x i1> [[TMP1]], <i1 true, i1 true, i1 true, i1 true>
-; TF-FIXEDLEN-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
-; TF-FIXEDLEN-NEXT:    [[TMP6:%.*]] = or <4 x i1> [[TMP2]], [[TMP4]]
 ; TF-FIXEDLEN-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer
 ; TF-FIXEDLEN-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
 ; TF-FIXEDLEN-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
-; TF-FIXEDLEN-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> [[PREDPHI]], ptr [[TMP7]], i32 8, <4 x i1> [[TMP6]])
+; TF-FIXEDLEN-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> [[PREDPHI]], ptr [[TMP7]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]])
 ; TF-FIXEDLEN-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
 ; TF-FIXEDLEN-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
 ; TF-FIXEDLEN-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028
...
[truncated]

fhahn · 2024-04-23T18:16:23Z

Updated after landing #89384. Note that for the logical AND/OR matching, we need to actually check the provided bit width instead of asserting.

fhahn · 2024-04-25T08:09:53Z

ping :)

ayalz

Long-term, optimizing masks may be addressed when generated by a VPlan2VPlan Predicator, rather than by a separate subsequent redundancy eliminator.

ayalz · 2024-04-25T08:19:27Z

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

+  }
+
+  bool match(const VPSingleDefRecipe *R) {
+    return match(static_cast<const VPRecipeBase *>(R));


Is this upcast and overloaded method needed?

Removed, thanks!

Ah, overloaded match(const VPSingleDefRecipe*) is needed because having both match(const VPValue*) and match(const VPRecipeBase *) its unclear which one to upcast to? Perhaps worth a comment.

It's not needed for now, I removed it for real this time. (From the comment I thought I already removed it , but apparently not)

ayalz · 2024-04-25T08:31:46Z

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

+                                        RecipeTys...>::match(R))
+        return false;
+      if (Opcode == Instruction::And) {
+        if (!m_False().match(R->getOperand(2)))


This use of m_False applies to a general bitwise AND, rather than logical AND of boolean mask operands. But the redundancy holds if the operand is all-false, rather than restricting to a single zero bit.

Not sure if I am missing something, but this here doesn't match the opcode of the recipe (that is matched above to be a select), it matches the operation that the matcher was instantiated to match.

Trying to see why the above "Trying the match constant with unexpected bitwidth" assert needs to be relaxed into a check, when applied to m_False/m_True whose operand is still expected to be boolean?

It would also match other selects where the last operand is 0, e.g. select i1 %c, i32 %a, i32 0

This stems from searching for select i1 %c, i1 %a, i1 0 as the poison-preserving form of and i1 %c, %a? Would it be clearer for VPlan to represent Poison-Preserving AND/OR directly, possibly via a flag, whose execute() emits the desired select patterns, rather than reverse-engineer them?

Would doing so earlier simplify this "Simplify (X && Y) || (X && !Y) -> X" patch, and potentially additional mask optimizations, by keeping it focused on AND/OR/NOT only, w/o having to reverse engineer selects, nor affect how false/true are matched?
When considering operands of AND/OR, in general, redundancy of constant false/true operands extends beyond i1 (of logical AND/OR) to all-zero/all-ones (of bitwise AND/OR) - in contrast to operands of select, whose matching to identify logical AND is restricted to i1.
Regarding handling (all) input - is that needed or expected, given that this matching tries to simplify mask computations that were generated earlier by LV. I.e., the select introduced by VPRecipeBuilder::createEdgeMask(). Representing LogicalAnd/LogicalOr is also consistent with the representation of Not, introduced there (rather than its xor-with-true form, simply because it was introduced before VPlan could represent constants including true).
(Another option is for VPlan to represent an AND-NOT recipe, which is admittedly a non-basic logic gate.)

Independently, should createBlockInMask() generate logical OR's using selects as well, possibly by recipes that target IRBuilder::CreateLogicalOr(), rather than plain bitwise OR's via IRBuilder's CreateBinOp()?

The only AND/OR VPInstructions currently in use are those generated by create[Edge,BlockIn]Mask(), which are logical(ly dealing with masks), right? Actually, VPBuilder::createAnd() appears to be useless, after createEdgeMask() switched to calling VPBuilder::createSelect() instead, so no AND VPInstructions appear to exist, neither bitwise nor logical.

Note that the distinction between logical and bitwise effectively involves type analysis: is the bitwidth 1 or more. Vectorization turns logical boolean operations on (non-uniform) i1 values into bitwise ones.

Put up to add a logical-and opcode and use that when creating edge masks: #91897

Independently, should createBlockInMask() generate logical OR's using selects as well, possibly by recipes that target IRBuilder::CreateLogicalOr(), rather than plain bitwise OR's via IRBuilder's CreateBinOp()?

Hmm, I am not sure. Would probably warrant a separate look.

The only AND/OR VPInstructions currently in use are those generated by create[Edge,BlockIn]Mask(), which are logical(ly dealing with masks), right? Actually, VPBuilder::createAnd() appears to be useless, after createEdgeMask() switched to calling VPBuilder::createSelect() instead, so no AND VPInstructions appear to exist, neither bitwise nor logical.

I think so yes, let me clean this up separately.

Independently, should createBlockInMask() generate logical OR's using selects as well, possibly by recipes that target IRBuilder::CreateLogicalOr(), rather than plain bitwise OR's via IRBuilder's CreateBinOp()?

Hmm, I am not sure. Would probably warrant a separate look.

Perhaps Not should also become a LogicalNot, generating a select %c, false, true instead of the natural xor %c, true?

Updated now to be based on LogicalAnd.

Perhaps Not should also become a LogicalNot, generating a select %c, false, true instead of the natural xor %c, true?

I am not entirely sure about changing the IR we generate here, the XOR variant appears to be the canonical for expected by other passes (i.e. that's what IRBuilder.CreateNot generates)

Independently, should createBlockInMask() generate logical OR's using selects as well, possibly by recipes that target IRBuilder::CreateLogicalOr(), rather than plain bitwise OR's via IRBuilder's CreateBinOp()?

Hmm, I am not sure. Would probably warrant a separate look.

Looking at the original issue https://bugs.llvm.org/show_bug.cgi?id=48832 and its fix, it appears that masks of unreachable edges can no longer convey poison, so ORing them to produce block-in-masks should continue to be bitwise rather than logical. OTOH, logical ORs require their operands to be suitably ordered, whereas predecessors are in general unordered (unlike employing a logical AND for edge masks where the mask of the block naturally precedes the condition of its terminal branch).

Perhaps Not should also become a LogicalNot, generating a select %c, false, true instead of the natural xor %c, true?

I am not entirely sure about changing the IR we generate here, the XOR variant appears to be the canonical for expected by other passes (i.e. that's what IRBuilder.CreateNot generates)

The condition of a terminal branch may be poison, so may its negating NOT, provided the edge mask uses a logical AND to avoid propagating it in case the block is unreachable (has a false block-in-mask).

Note: better eliminate dead edges and nodes from the CFG prior to applying predication, once this is done via VPlan-to-VPlan transformation. Doing so may allow to use bitwise ANDs rather than logical ones, as before.

ayalz · 2024-04-25T08:33:17Z

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

+  VPValue *B;
+  VPValue *C;
+  VPValue *D;
+  // Simplify (X && Y) || (X && !Y) -> X.


Keep comments and code consistent: have both use A,B,C,D or both X,Y,Z.

Updated to use A, B, C, D. The intention was to avoid A == C && B == D in the description, but it is probably more confusing.

How about keeping the description (X && Y) || (X && !Y) -> X and use X, Y, X1, Y1 instead of A, B, C, D in the code, respectively?

Excellent, updated!

ayalz · 2024-04-25T08:52:36Z

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

+  VPValue *C;
+  VPValue *D;
+  // Simplify (X && Y) || (X && !Y) -> X.
+  if (match(&R,


More general and modular: first simplify (A && B) || (C && D) where A == C into A && (B || D). Also for A == D, B == C, B == D. Followed by simplifying (X || !X).

Could do, but we will also add logic to simplify newly created recipes. Left as is for now.

ok, worth leaving behind a TODO?

Done, thanks!

fhahn · 2024-05-03T12:47:38Z

ping :)

github-actions · 2024-05-09T15:52:15Z

✅ With the latest revision this PR passed the C/C++ code formatter.

ayalz · 2024-05-10T07:29:02Z

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

+                                        RecipeTys...>::match(R))
+        return false;
+      if (Opcode == Instruction::And) {
+        if (!m_False().match(R->getOperand(2)))


This stems from searching for select i1 %c, i1 %a, i1 0 as the poison-preserving form of and i1 %c, %a? Would it be clearer for VPlan to represent Poison-Preserving AND/OR directly, possibly via a flag, whose execute() emits the desired select patterns, rather than reverse-engineer them?

ayalz · 2024-05-10T07:32:58Z

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

+  VPValue *Y;
+  VPValue *X1;
+  VPValue *Y1;
+  // Simplify (X && Y) || (X1 && !Y1) -> X.


Suggested change

// Simplify (X && Y) || (X1 && !Y1) -> X.

// Simplify (X && Y) || (X && !Y) -> X.

This now more closely matches the code, where repeated occurrences of X and Y are enumerated.

Done, thanks!

Add a new opcode to mode non-poison propagating logical AND operations used when generating edge masks. This follows the similar decision to model Not as dedicated opcode as well, to improve clarity. This also helps to simplify the matchers for llvm#89386.

ayalz · 2024-05-13T10:03:44Z

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

-           "Trying the match constant with unexpected bitwidth.");
-    return APInt::isSameValue(CI->getValue(), Val);
+    return (BitWidth == 0 || CI->getBitWidth() == BitWidth) &&
+           APInt::isSameValue(CI->getValue(), Val);


Is this change still needed, now that Logical Ands are matched whose operands must be single bits, rather than Selects?

Restored, thanks!

ayalz · 2024-05-13T10:04:58Z

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

+
+  bool match(const VPRecipeBase *R) {
+    if (!detail::MatchRecipeAndOpcode<Opcode, RecipeTys...>::match(R)) {
+      if (!detail::MatchRecipeAndOpcode<Instruction::Select,


Still need to match Select here?

No, I just updated the PR based on the new LogicalAnd.

ayalz · 2024-05-13T10:09:06Z

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

+                                        RecipeTys...>::match(R))
+        return false;
+      if (Opcode == Instruction::And) {
+        if (!m_False().match(R->getOperand(2)))


Independently, should createBlockInMask() generate logical OR's using selects as well, possibly by recipes that target IRBuilder::CreateLogicalOr(), rather than plain bitwise OR's via IRBuilder's CreateBinOp()?

Hmm, I am not sure. Would probably warrant a separate look.

Perhaps Not should also become a LogicalNot, generating a select %c, false, true instead of the natural xor %c, true?

ayalz · 2024-05-13T10:13:52Z

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

+}
+
+template <typename Op0_t, typename Op1_t>
+inline LogicalRecipe_match<Op0_t, Op1_t, Instruction::And, true, VPWidenRecipe,


Suggested change

inline LogicalRecipe_match<Op0_t, Op1_t, Instruction::And, true, VPWidenRecipe,

inline LogicalRecipe_match<Op0_t, Op1_t, VPInstruction::LogicalAnd, true, VPWidenRecipe,

base on #91897?

Just updated the PR based on the new LogicalAnd, thanks!

ayalz · 2024-05-13T10:16:05Z

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

@@ -273,6 +309,24 @@ inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Or>
 m_Or(const Op0_t &Op0, const Op1_t &Op1) {
  return m_Binary<Instruction::Or, Op0_t, Op1_t>(Op0, Op1);
 }
+
+template <typename Op0_t, typename Op1_t>
+inline LogicalRecipe_match<Op0_t, Op1_t, Instruction::Or, true, VPWidenRecipe,


Suggested change

inline LogicalRecipe_match<Op0_t, Op1_t, Instruction::Or, true, VPWidenRecipe,

inline LogicalRecipe_match<Op0_t, Op1_t, VPInstruction::LogicalOr, true, VPWidenRecipe,

for consistency, and supporting createBlockInMask?

Worth commenting true /* Commutable */, here and/or below?

ayalz · 2024-05-13T10:17:35Z

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

+  }
+
+  bool match(const VPRecipeBase *R) {
+    if (!detail::MatchRecipeAndOpcode<Opcode, RecipeTys...>::match(R)) {


Still need to consider R's with non matching Opcode?

The whole matcher is now gone.

ayalz · 2024-05-13T16:43:35Z

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

+          return false;
+      } else if (Opcode == Instruction::Or) {
+        if (!m_True().match(R->getOperand(1)))
+          return false;


Is this folding of And-with-true and Or-with-false redundancies and inherent part of folding (X&&Y)||(X&&!Y)->X redundancy?

ayalz · 2024-05-13T17:13:26Z

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

+
+template <typename Op0_t, typename Op1_t>
+inline LogicalRecipe_match<Op0_t, Op1_t, Instruction::And, true, VPWidenRecipe,
+                           VPReplicateRecipe, VPWidenCastRecipe, VPInstruction>


Are all these recipes relevant?

Not for the logicalAnd, but in terms of generality we could have ANDs/OR with any of those opcodes, which could potentially benefit.

…91897) Add a new opcode to mode non-poison propagating logical AND operations used when generating edge masks. This follows the similar decision to model Not as dedicated opcode as well, to improve clarity. This also helps to simplify the matchers for #89386. PR: #91897

Conflicts: llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll

fhahn

The latest version has been simplified and updated to use LogicalAnd.

fhahn · 2024-05-14T10:45:28Z

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

-           "Trying the match constant with unexpected bitwidth.");
-    return APInt::isSameValue(CI->getValue(), Val);
+    return (BitWidth == 0 || CI->getBitWidth() == BitWidth) &&
+           APInt::isSameValue(CI->getValue(), Val);


Restored, thanks!

fhahn · 2024-05-14T10:49:37Z

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

+                                        RecipeTys...>::match(R))
+        return false;
+      if (Opcode == Instruction::And) {
+        if (!m_False().match(R->getOperand(2)))


Updated now to be based on LogicalAnd.

Perhaps Not should also become a LogicalNot, generating a select %c, false, true instead of the natural xor %c, true?

I am not entirely sure about changing the IR we generate here, the XOR variant appears to be the canonical for expected by other passes (i.e. that's what IRBuilder.CreateNot generates)

fhahn · 2024-05-14T10:50:10Z

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

+
+  bool match(const VPRecipeBase *R) {
+    if (!detail::MatchRecipeAndOpcode<Opcode, RecipeTys...>::match(R)) {
+      if (!detail::MatchRecipeAndOpcode<Instruction::Select,


No, I just updated the PR based on the new LogicalAnd.

fhahn · 2024-05-14T10:50:20Z

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

+}
+
+template <typename Op0_t, typename Op1_t>
+inline LogicalRecipe_match<Op0_t, Op1_t, Instruction::And, true, VPWidenRecipe,


Just updated the PR based on the new LogicalAnd, thanks!

fhahn · 2024-05-14T10:51:19Z

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

+
+template <typename Op0_t, typename Op1_t>
+inline LogicalRecipe_match<Op0_t, Op1_t, Instruction::And, true, VPWidenRecipe,
+                           VPReplicateRecipe, VPWidenCastRecipe, VPInstruction>


Not for the logicalAnd, but in terms of generality we could have ANDs/OR with any of those opcodes, which could potentially benefit.

fhahn · 2024-05-14T10:52:01Z

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

+  }
+
+  bool match(const VPRecipeBase *R) {
+    if (!detail::MatchRecipeAndOpcode<Opcode, RecipeTys...>::match(R)) {


The whole matcher is now gone.

fhahn

The latest version is simplified by stripping out the logic to match commutative expressions and limits the matching to the existing BinaryOr (renamed to be more accurate) and the new LogicalAnd matchers.

I'll share a follow-up adding just the commutative matcher

Split off from llvm#89386, this extends the binary matcher to support matching commuative operations. This is used for a new m_c_BinaryOr matcher, used in simplifyRecipe.

fhahn · 2024-05-17T13:20:30Z

The follow-up to add support for commutative matching is available now as well: #92539

fhahn · 2024-05-17T13:20:47Z

Reopen as I accidentally hit the close button......

ayalz

Thanks for simplifying and extracting the part that deals specifically with the X&&Y || X&&!Y pattern! This by itself results in various levels of subsequent cleanups. Adding a couple of nits.

ayalz · 2024-05-19T05:54:02Z

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

@@ -270,9 +270,15 @@ m_Mul(const Op0_t &Op0, const Op1_t &Op1) {

 template <typename Op0_t, typename Op1_t>
 inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Or>
-m_Or(const Op0_t &Op0, const Op1_t &Op1) {
+m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1) {


Why/Is this distinction of Binary important, here - to contrast LogicalAnd below (i.e., IllogicalOr ;-), emphasizing Bitwise vs. Logical (i.e., those allowed to introduce UB vs. those that prevent it), or to accommodate Ternary and multi-ary Or's in the future. Worth updating recipe to print binary-or or bitwise-or rather than simply or?

This is to clarify the matcher now that we have separate ones for logical AND and in line with naming of the IR pattern matcher.

For printing, we rely on Instruction::getName to print Instruction opcodes, so I think it would probably be good to keep printing as regular or to stay consistent with the original/underlying IR

ayalz · 2024-05-19T08:07:54Z

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

+  VPValue *X;
+  VPValue *Y;
+  VPValue *X1;
+  VPValue *Y1;
+  // Simplify (X && Y) || (X && !Y) -> X.
+  // TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
+  // && (Y || Z) and (X || !X) into true. This requires queuing newly created
+  // recipes to be visited during simplification.


Suggested change

VPValue *X;

VPValue *Y;

VPValue *X1;

VPValue *Y1;

// Simplify (X && Y) || (X && !Y) -> X.

// TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X

// && (Y || Z) and (X || !X) into true. This requires queuing newly created

// recipes to be visited during simplification.

// Simplify (X && Y) || (X && !Y) -> X.

// TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X

// && (Y || Z) and (X || !X) into true. This requires queuing newly created

// recipes to be visited during simplification.

VPValue *X, Y, X1, Y1;

nit: better place definitions together, and closer to their use.

Done, thanks!

ayalz · 2024-05-19T08:30:24Z

llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll

 ; CHECK-NEXT:   BLEND ir<%p> = ir<0> vp<[[PRED]]>/vp<[[MASK2]]>
-; CHECK-NEXT:   EMIT vp<[[MASK5:%.+]]> = logical-and vp<[[MASK4]]>, ir<%c.0>
+; CHECK-NEXT:   EMIT vp<[[MASK3:%.+]]> = logical-and vp<[[MASK1]]>, ir<%c.0>


Just noting that X is MASK1, Y is ir<%c.0>; all of !Y, X&&Y, (X&&Y) || (X&&!Y) are eliminated, the latter replaced by X, while X&&Y (MASK2) is retained due to another use - by BRANCH-ON-COUNT.

ayalz · 2024-05-19T08:56:54Z

llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll

-; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0
-; CHECK-NEXT:    [[TMP11:%.*]] = or i1 [[TMP9]], [[TMP10]]
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select i1 [[TMP11]], ptr [[B]], ptr poison


Nice clean-up once TMP11 is replaced by TMP4, through extractelements, eliminating both logical ANDs and code generating Y (TMP5).

ayalz · 2024-05-19T09:02:16Z

llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll

@@ -223,10 +223,9 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
 ; TFCOMMON-NEXT:    [[TMP10:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP9]])
 ; TFCOMMON-NEXT:    [[TMP11:%.*]] = xor <vscale x 2 x i1> [[TMP8]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
 ; TFCOMMON-NEXT:    [[TMP12:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP11]], <vscale x 2 x i1> zeroinitializer
-; TFCOMMON-NEXT:    [[TMP13:%.*]] = or <vscale x 2 x i1> [[TMP9]], [[TMP12]]


This eliminates only the OR, leaving the two logical ANDs intact, as they are used elsewhere.

ayalz

Ship it!

Split off from #89386, this extends the binary matcher to support matching commuative operations. This is used for a new m_c_BinaryOr matcher, used in simplifyRecipe. PR: #92539

[VPlan] Simplify (X && Y) || (X && !Y) -> X.

b0bdfaa

Simplify a common pattern generated for masks when folding the tail.

fhahn requested review from ayalz and aniragil April 19, 2024 14:09

llvmbot added vectorizers llvm:transforms labels Apr 19, 2024

Merge remote-tracking branch 'origin/main' into vplan-simplify-or-and

a0291e1

ayalz reviewed Apr 25, 2024

View reviewed changes

Merge branch 'main' into vplan-simplify-or-and

fdadfe4

fhahn added 2 commits May 9, 2024 13:48

Merge remote-tracking branch 'origin/main' into vplan-simplify-or-and

8e80785

!fixup address comemnts, thanks!

4419476

!fixup fix formatting

0abc563

ayalz reviewed May 10, 2024

View reviewed changes

fhahn mentioned this pull request May 10, 2024

[VPlan] First step towards VPlan cost modeling. #67934

Closed

Merge remote-tracking branch 'origin/main' into vplan-simplify-or-and

0cef9fa

fhahn mentioned this pull request May 12, 2024

[VPlan] Add non-poison propagating LogicalAnd VPInstruction opcode. #91897

Merged

ayalz reviewed May 13, 2024

View reviewed changes

fhahn added 4 commits May 14, 2024 11:34

Merge remote-tracking branch 'origin/main' into vplan-simplify-or-and

45e6a0c

Conflicts: llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll

!fixup update based on LogialAnd

fd62d99

!fixup restore original code, limit matched types for logicalAnd

e868336

!fixup add comment for arg

b3c8df8

fhahn commented May 14, 2024

View reviewed changes

fhahn added 2 commits May 17, 2024 12:22

Merge remote-tracking branch 'origin/main' into vplan-simplify-or-and

53127f9

!fixup simplify

d2deb2c

fhahn commented May 17, 2024

View reviewed changes

fhahn added a commit to fhahn/llvm-project that referenced this pull request May 17, 2024

[VPlan] Simplify (X && Y) || (X && !Y) -> X. (PR llvm#89386)

68a7751

fhahn mentioned this pull request May 17, 2024

[VPlan] Add commutative binary OR matcher, use in transform. #92539

Merged

fhahn closed this May 17, 2024

fhahn reopened this May 17, 2024

ayalz reviewed May 19, 2024

View reviewed changes

fhahn added 2 commits May 19, 2024 13:24

Merge remote-tracking branch 'origin/main' into vplan-simplify-or-and

16582e2

!fixup address comments, thanks!

a90891e

ayalz approved these changes May 19, 2024

View reviewed changes

fhahn merged commit b050048 into llvm:main May 19, 2024
4 checks passed

fhahn deleted the vplan-simplify-or-and branch May 19, 2024 15:45

PeddleSpam mentioned this pull request May 20, 2024

[AMDGPU] Fix error in #88512. #92770

Merged

ayalz mentioned this pull request Aug 24, 2024

VPlan: prepare to recursively simplify recipes (NFC) #105699

Open

	// Simplify (X && Y) \|\| (X1 && !Y1) -> X.
	// Simplify (X && Y) \|\| (X && !Y) -> X.

	inline LogicalRecipe_match<Op0_t, Op1_t, Instruction::And, true, VPWidenRecipe,
	inline LogicalRecipe_match<Op0_t, Op1_t, VPInstruction::LogicalAnd, true, VPWidenRecipe,

[VPlan] Simplify (X && Y) || (X && !Y) -> X. #89386

[VPlan] Simplify (X && Y) || (X && !Y) -> X. #89386

Conversation

fhahn commented Apr 19, 2024

llvmbot commented Apr 19, 2024

fhahn commented Apr 23, 2024

fhahn commented Apr 25, 2024

ayalz left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

ayalz May 10, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

fhahn commented May 3, 2024

github-actions bot commented May 9, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

fhahn left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

fhahn left a comment

Choose a reason for hiding this comment

fhahn commented May 17, 2024

fhahn commented May 17, 2024

ayalz left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

ayalz left a comment

Choose a reason for hiding this comment

ayalz May 10, 2024 •

edited

Loading

github-actions bot commented May 9, 2024 •

edited

Loading