diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp index 2717b80e2eeaa..2878a619119de 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -69,6 +69,15 @@ class VPPredicator { return EdgeMaskCache[{Src, Dst}] = Mask; } + /// Returns where to insert new masks in \p VPBB. + VPBasicBlock::iterator getMaskInsertPoint(VPBasicBlock *VPBB) { + if (VPValue *Mask = getBlockInMask(VPBB)) + if (VPRecipeBase *MaskR = Mask->getDefiningRecipe()) + if (MaskR->getParent() == VPBB) // In-mask may be the IDom's. + return std::next(MaskR->getIterator()); + return VPBB->getFirstNonPhi(); + } + public: VPPredicator(VPlan &Plan) : VPDT(Plan), VPPDT(Plan) {} @@ -225,6 +234,8 @@ void VPPredicator::createSwitchEdgeMasks(const VPInstruction *SI) { } void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) { + Builder.setInsertPoint(VPBB, getMaskInsertPoint(VPBB)); + SmallVector Phis; for (VPRecipeBase &R : VPBB->phis()) Phis.push_back(cast(&R)); @@ -276,10 +287,8 @@ void VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan) { // Introduce the mask for VPBB, which may introduce needed edge masks, and // convert all phi recipes of VPBB to blend recipes unless VPBB is the // header. - if (VPBB != Header) { + if (VPBB != Header) Predicator.createBlockInMask(VPBB); - Predicator.convertPhisToBlends(VPBB); - } VPValue *BlockMask = Predicator.getBlockInMask(VPBB); if (!BlockMask) @@ -292,6 +301,10 @@ void VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan) { } } + for (VPBlockBase *VPB : reverse(RPOT)) + if (VPB != Header) + Predicator.convertPhisToBlends(cast(VPB)); + // Linearize the blocks of the loop into one serial chain. VPBlockBase *PrevVPBB = nullptr; for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(RPOT)) { diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll index 2e955de703cdf..6f33d05b044e6 100644 --- a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll +++ b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll @@ -638,3 +638,115 @@ bb3: exit: ret void } + +define void @blend_chain_non_trivial(ptr noalias %a, ptr noalias %b) { +; CHECK-LABEL: VPlan for loop in 'blend_chain_non_trivial' +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = CANONICAL-IV +; CHECK-EMPTY: +; CHECK-NEXT: vector.body: +; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nuw nsw ir<0>, ir<1>, vp<[[VP0:%[0-9]+]]> +; CHECK-NEXT: EMIT-SCALAR ir<%lb> = load ir<%b> +; CHECK-NEXT: EMIT ir<%v1> = add ir<%iv>, ir<%lb> +; CHECK-NEXT: EMIT ir<%v2> = mul ir<%iv>, ir<3> +; CHECK-NEXT: EMIT ir<%gep> = getelementptr ir<%a>, ir<%iv> +; CHECK-NEXT: EMIT ir<%c0> = icmp sle ir<%iv>, ir<0> +; CHECK-NEXT: Successor(s): if.a +; CHECK-EMPTY: +; CHECK-NEXT: if.a: +; CHECK-NEXT: EMIT ir<%ca> = icmp sle ir<%iv>, ir<8>, ir<%c0> +; CHECK-NEXT: Successor(s): if.a.inner +; CHECK-EMPTY: +; CHECK-NEXT: if.a.inner: +; CHECK-NEXT: EMIT vp<[[VP4:%[0-9]+]]> = logical-and ir<%c0>, ir<%ca> +; CHECK-NEXT: Successor(s): merge.a.inner +; CHECK-EMPTY: +; CHECK-NEXT: merge.a.inner: +; CHECK-NEXT: Successor(s): merge.a +; CHECK-EMPTY: +; CHECK-NEXT: merge.a: +; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = not ir<%c0> +; CHECK-NEXT: BLEND ir<%blend.a> = ir<%v1>/ir<%c0> ir<%v1>/vp<[[VP5]]> +; CHECK-NEXT: EMIT ir<%d0> = icmp sgt ir<%iv>, ir<0> +; CHECK-NEXT: Successor(s): if.b +; CHECK-EMPTY: +; CHECK-NEXT: if.b: +; CHECK-NEXT: EMIT ir<%cb> = icmp sle ir<%iv>, ir<16>, ir<%d0> +; CHECK-NEXT: Successor(s): if.b.inner +; CHECK-EMPTY: +; CHECK-NEXT: if.b.inner: +; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = logical-and ir<%d0>, ir<%cb> +; CHECK-NEXT: Successor(s): merge.b.inner +; CHECK-EMPTY: +; CHECK-NEXT: merge.b.inner: +; CHECK-NEXT: Successor(s): merge.b +; CHECK-EMPTY: +; CHECK-NEXT: merge.b: +; CHECK-NEXT: EMIT vp<[[VP7:%[0-9]+]]> = not ir<%d0> +; CHECK-NEXT: BLEND ir<%blend.b> = ir<%v2>/ir<%d0> ir<%v2>/vp<[[VP7]]> +; CHECK-NEXT: EMIT ir<%sum> = add ir<%blend.a>, ir<%blend.b> +; CHECK-NEXT: EMIT store ir<%sum>, ir<%gep> +; CHECK-NEXT: Successor(s): loop.latch +; CHECK-EMPTY: +; CHECK-NEXT: loop.latch: +; CHECK-NEXT: EMIT ir<%iv.next> = add nuw nsw ir<%iv>, ir<1> +; CHECK-NEXT: EMIT ir<%ec> = icmp eq ir<%iv.next>, ir<128> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1:%[0-9]+]]> +; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2:%[0-9]+]]> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %lb = load i64, ptr %b + %v1 = add i64 %iv, %lb + %v2 = mul i64 %iv, 3 + %gep = getelementptr i64, ptr %a, i64 %iv + %c0 = icmp sle i64 %iv, 0 + br i1 %c0, label %if.a, label %merge.a + +if.a: + %ca = icmp sle i64 %iv, 8 + br i1 %ca, label %if.a.inner, label %merge.a.inner + +if.a.inner: + br label %merge.a.inner + +merge.a.inner: + %blend.a.inner = phi i64 [ %v1, %if.a ], [ %v1, %if.a.inner ] + br label %merge.a + +merge.a: + %blend.a = phi i64 [ %v1, %loop.header ], [ %blend.a.inner, %merge.a.inner ] + %d0 = icmp sgt i64 %iv, 0 + br i1 %d0, label %if.b, label %merge.b + +if.b: + %cb = icmp sle i64 %iv, 16 + br i1 %cb, label %if.b.inner, label %merge.b.inner + +if.b.inner: + br label %merge.b.inner + +merge.b.inner: + %blend.b.inner = phi i64 [ %v2, %if.b ], [ %v2, %if.b.inner ] + br label %merge.b + +merge.b: + %blend.b = phi i64 [ %v2, %merge.a ], [ %blend.b.inner, %merge.b.inner ] + %sum = add i64 %blend.a, %blend.b + store i64 %sum, ptr %gep + br label %loop.latch + +loop.latch: + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 128 + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +}