diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index e86705e898890..5d03b66b0ce33 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -68,6 +68,9 @@ class VPBuilder { public: VPBuilder() = default; VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); } + VPBuilder(VPRecipeBase *InsertPt) { + setInsertPoint(InsertPt->getParent(), InsertPt->getIterator()); + } /// Clear the insertion point: created instructions will not be inserted into /// a block. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index d77c7554d50e4..d720f17a3a881 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1127,6 +1127,12 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe { return WrapFlags.HasNSW; } + bool isDisjoint() const { + assert(OpType == OperationType::DisjointOp && + "recipe cannot have a disjoing flag"); + return DisjointFlags.IsDisjoint; + } + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void printFlags(raw_ostream &O) const; #endif diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index aa25359069451..a03a408686ef1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -261,6 +261,11 @@ m_Mul(const Op0_t &Op0, const Op1_t &Op1) { return m_Binary(Op0, Op1); } +template +inline AllBinaryRecipe_match +m_Or(const Op0_t &Op0, const Op1_t &Op1) { + return m_Binary(Op0, Op1); +} } // namespace VPlanPatternMatch } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index a91ccefe4b6d7..c6ec99fbbf0a0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1216,6 +1216,23 @@ void VPlanTransforms::dropPoisonGeneratingRecipes( // load/store. If the underlying instruction has poison-generating flags, // drop them directly. if (auto *RecWithFlags = dyn_cast(CurRec)) { + VPValue *A, *B; + using namespace llvm::VPlanPatternMatch; + // Dropping disjoint from an OR may yield incorrect results, as some + // analysis may have converted it to an Add implicitly (e.g. SCEV used + // for dependence analysis). Instead, replace it with an equivalent Add. + // This is possible as all users of the disjoint OR only access lanes + // where the operands are disjoint or poison otherwise. + if (match(RecWithFlags, m_Or(m_VPValue(A), m_VPValue(B))) && + RecWithFlags->isDisjoint()) { + VPBuilder Builder(RecWithFlags); + VPInstruction *New = Builder.createOverflowingOp( + Instruction::Add, {A, B}, {false, false}, + RecWithFlags->getDebugLoc()); + RecWithFlags->replaceAllUsesWith(New); + RecWithFlags->eraseFromParent(); + CurRec = New; + } RecWithFlags->dropPoisonGeneratingFlags(); } else { Instruction *Instr = dyn_cast_or_null( diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll index 14acb6f57aa0c..3f38abc75a583 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll @@ -29,7 +29,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[ARR]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 -3