diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0669f9ad4dfc98..5e13c6e1fd1f65 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -581,7 +581,7 @@ class InnerLoopVectorizer { void fixReduction(VPReductionPHIRecipe *Phi, VPTransformState &State); /// Clear NSW/NUW flags from reduction instructions if necessary. - void clearReductionWrapFlags(const RecurrenceDescriptor &RdxDesc, + void clearReductionWrapFlags(VPReductionPHIRecipe *PhiR, VPTransformState &State); /// Fixup the LCSSA phi nodes in the unique exit block. This simply @@ -3884,7 +3884,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, Type *VecTy = State.get(LoopExitInstDef, 0)->getType(); // Wrap flags are in general invalid after vectorization, clear them. - clearReductionWrapFlags(RdxDesc, State); + clearReductionWrapFlags(PhiR, State); // Before each round, move the insertion point right between // the PHIs and the values we are going to write. @@ -4060,34 +4060,35 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst); } -void InnerLoopVectorizer::clearReductionWrapFlags(const RecurrenceDescriptor &RdxDesc, +void InnerLoopVectorizer::clearReductionWrapFlags(VPReductionPHIRecipe *PhiR, VPTransformState &State) { + const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); RecurKind RK = RdxDesc.getRecurrenceKind(); if (RK != RecurKind::Add && RK != RecurKind::Mul) return; - Instruction *LoopExitInstr = RdxDesc.getLoopExitInstr(); - assert(LoopExitInstr && "null loop exit instruction"); - SmallVector Worklist; - SmallPtrSet Visited; - Worklist.push_back(LoopExitInstr); - Visited.insert(LoopExitInstr); + SmallVector Worklist; + SmallPtrSet Visited; + Worklist.push_back(PhiR); + Visited.insert(PhiR); while (!Worklist.empty()) { - Instruction *Cur = Worklist.pop_back_val(); - if (isa(Cur)) - for (unsigned Part = 0; Part < UF; ++Part) { - // FIXME: Should not rely on getVPValue at this point. - Value *V = State.get(State.Plan->getVPValue(Cur, true), Part); - cast(V)->dropPoisonGeneratingFlags(); + VPValue *Cur = Worklist.pop_back_val(); + for (unsigned Part = 0; Part < UF; ++Part) { + Value *V = State.get(Cur, Part); + if (!isa(V)) + break; + cast(V)->dropPoisonGeneratingFlags(); } - for (User *U : Cur->users()) { - Instruction *UI = cast(U); - if ((Cur != LoopExitInstr || OrigLoop->contains(UI->getParent())) && - Visited.insert(UI).second) - Worklist.push_back(UI); - } + for (VPUser *U : Cur->users()) { + auto *UserRecipe = dyn_cast(U); + if (!UserRecipe) + continue; + for (VPValue *V : UserRecipe->definedValues()) + if (Visited.insert(V).second) + Worklist.push_back(V); + } } } diff --git a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll index 862a3845b4a1bd..9a8e57d396a5a1 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll @@ -478,3 +478,38 @@ exit: store i32 %sum.lcssa, i32* %gep.dst.1, align 4 ret void } + +; Test for PR55540. +define void @test_drop_poison_generating_dead_recipe(i64* %dst) { +; CHECK-LABEL: @test_drop_poison_generating_dead_recipe( +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %vector.ph ], [ [[TMP0:%.*]], %vector.body ] +; CHECK-NEXT: [[TMP0]] = add <4 x i64> [[VEC_PHI]], +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 360 +; CHECK-NEXT: br i1 [[TMP1]], label %middle.block, label %vector.body +; CHECK: middle.block: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP0]]) +; CHECK-NEXT: store i64 [[TMP2]], i64* [[DST:%.*]], align 8 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 363, 360 +; CHECK-NEXT: br i1 [[CMP_N]], label %exit, label %scalar.ph +; CHECK: scalar.ph: +; +entry: + br label %body + +body: + %red = phi i64 [ 0, %entry ], [ %red.next, %body ] + %iv = phi i32 [ 2, %entry ], [ %iv.next, %body ] + %add.1 = add nuw i64 %red, -23523 + store i64 %add.1, i64* %dst, align 8 + %red.next = add nuw i64 %red, -31364 + store i64 %red.next, i64* %dst, align 8 + %iv.next = add nuw nsw i32 %iv, 1 + %ec = icmp ugt i32 %iv, 363 + br i1 %ec, label %exit, label %body + +exit: + ret void +}