diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index f79855f7e2c5f..e65547b932a84 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -29,6 +29,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/ilist.h" @@ -2968,7 +2969,8 @@ class LLVM_ABI_FOR_TEST VPBranchOnMaskRecipe : public VPRecipeBase { /// the expression is elevated to connect the non-expression recipe with the /// VPExpressionRecipe itself. class VPExpressionRecipe : public VPSingleDefRecipe { - /// Recipes included in this VPExpressionRecipe. + /// Recipes included in this VPExpressionRecipe. This could contain + /// duplicates. SmallVector ExpressionRecipes; /// Temporary VPValues used for external operands of the expression, i.e. @@ -3013,8 +3015,11 @@ class VPExpressionRecipe : public VPSingleDefRecipe { {Ext0, Ext1, Mul, Red}) {} ~VPExpressionRecipe() override { - for (auto *R : reverse(ExpressionRecipes)) - delete R; + SmallSet ExpressionRecipesSeen; + for (auto *R : reverse(ExpressionRecipes)) { + if (ExpressionRecipesSeen.insert(R).second) + delete R; + } for (VPValue *T : LiveInPlaceholders) delete T; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 8e9c3db50319f..79c94bf05291a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2761,9 +2761,8 @@ VPExpressionRecipe::VPExpressionRecipe( ExpressionTypes ExpressionType, ArrayRef ExpressionRecipes) : VPSingleDefRecipe(VPDef::VPExpressionSC, {}, {}), - ExpressionRecipes(SetVector( - ExpressionRecipes.begin(), ExpressionRecipes.end()) - .takeVector()), + ExpressionRecipes(SmallVector( + ExpressionRecipes.begin(), ExpressionRecipes.end())), ExpressionType(ExpressionType) { assert(!ExpressionRecipes.empty() && "Nothing to combine?"); assert( @@ -2801,21 +2800,31 @@ VPExpressionRecipe::VPExpressionRecipe( // create new temporary VPValues for all operands defined by a recipe outside // the expression. The original operands are added as operands of the // VPExpressionRecipe itself. + + SmallMapVector OperandPlaceholders; for (auto *R : ExpressionRecipes) { for (const auto &[Idx, Op] : enumerate(R->operands())) { auto *Def = Op->getDefiningRecipe(); if (Def && ExpressionRecipesAsSetOfUsers.contains(Def)) continue; addOperand(Op); - LiveInPlaceholders.push_back(new VPValue()); - R->setOperand(Idx, LiveInPlaceholders.back()); + VPValue *Tmp = new VPValue(); + OperandPlaceholders[Op] = Tmp; + LiveInPlaceholders.push_back(Tmp); } } + + for (auto *R : ExpressionRecipes) + for (auto &Entry : OperandPlaceholders) + R->replaceUsesOfWith(Entry.first, Entry.second); } void VPExpressionRecipe::decompose() { for (auto *R : ExpressionRecipes) - R->insertBefore(this); + // Since the list could contain duplicates, make sure the recipe hasn't + // already been inserted. + if (!R->getParent()) + R->insertBefore(this); for (const auto &[Idx, Op] : enumerate(operands())) LiveInPlaceholders[Idx]->replaceAllUsesWith(Op); diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll index 4e6ef0de6a9ed..db64eb32f0320 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll @@ -632,3 +632,50 @@ exit: %r.0.lcssa = phi i64 [ %rdx.next, %loop ] ret i64 %r.0.lcssa } + +define i64 @print_mulacc_duplicate_extends(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) { +; CHECK-LABEL: 'print_mulacc_duplicate_extends' +; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF +; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<%n> = original trip-count +; CHECK-EMPTY: +; CHECK: vector.ph: +; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<1> +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]> +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1> +; CHECK-NEXT: CLONE ir<[[ARRAYIDX0:%.+]]> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> +; CHECK-NEXT: vp<[[ADDR0:%.+]]> = vector-pointer ir<[[ARRAYIDX0]]> +; CHECK-NEXT: WIDEN ir<[[LOAD0:%.+]]> = load vp<[[ADDR0]]> +; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT:%.+]]> = ir<[[RDX]]> + reduce.sub (mul nsw (ir<[[LOAD0]]> sext to i64), (ir<[[LOAD0]]> sext to i64)) +; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; +entry: + br label %loop + +loop: + %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ] + %rdx = phi i64 [ %rdx.next, %loop ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i16, ptr %x, i32 %iv + %load0 = load i16, ptr %arrayidx, align 4 + %conv0 = sext i16 %load0 to i32 + %mul = mul nsw i32 %conv0, %conv0 + %conv = sext i32 %mul to i64 + %rdx.next = sub nsw i64 %rdx, %conv + %iv.next = add nuw nsw i32 %iv, 1 + %exitcond = icmp eq i32 %iv.next, %n + br i1 %exitcond, label %exit, label %loop + +exit: + %r.0.lcssa = phi i64 [ %rdx.next, %loop ] + ret i64 %r.0.lcssa +}