-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[LV] Bundle (partial) reductions with a mul of a constant #162503
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/SamTebbs33/expression-recipe-pred
Are you sure you want to change the base?
[LV] Bundle (partial) reductions with a mul of a constant #162503
Conversation
A reduction (including partial reductions) with a multiply of a constant value can be bundled by first converting it from `reduce.add(mul(ext, const))` to `reduce.add(mul(ext, ext(const)))` as long as it is safe to extend the constant. This PR adds such bundling by first truncating the constant to the source type of the other extend, then extending it to the destination type of the extend. The first truncate is necessary so that the types of each extend's operand are then the same, and the call to canConstantBeExtended proves that the extend following a truncate is safe to do. The truncate is removed by optimisations.
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: Sam Tebbs (SamTebbs33) ChangesA reduction (including partial reductions) with a multiply of a constant value can be bundled by first converting it from This PR adds such bundling by first truncating the constant to the source type of the other extend, then extending it to the destination type of the extend. The first truncate is necessary so that the types of each extend's operand are then the same, and the call to canConstantBeExtended proves that the extend following a truncate is safe to do. The truncate is removed by optimisations. Full diff: https://github.com/llvm/llvm-project/pull/162503.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 24426c1a53835..4bf2eb3765080 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3597,6 +3597,32 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
dyn_cast_if_present<VPWidenCastRecipe>(B->getDefiningRecipe());
auto *Mul = cast<VPWidenRecipe>(VecOp->getDefiningRecipe());
+ // Match reduce.add(mul(ext, const)) and convert it to
+ // reduce.add(mul(ext, ext(const)))
+ if (RecipeA && !RecipeB && B->isLiveIn()) {
+ Type *NarrowTy = Ctx.Types.inferScalarType(RecipeA->getOperand(0));
+ Instruction::CastOps ExtOpc = RecipeA->getOpcode();
+ auto *Const = dyn_cast<ConstantInt>(B->getLiveInIRValue());
+ if (Const &&
+ llvm::canConstantBeExtended(
+ Const, NarrowTy, TTI::getPartialReductionExtendKind(ExtOpc))) {
+ // The truncate ensures that the type of each extended operand is the
+ // same, and it's been proven that the constant can be extended from
+ // NarrowTy safely. Necessary since RecipeA's extended operand would be
+ // e.g. an i8, while the const will likely be an i32. This will be
+ // elided by later optimisations.
+ auto *Trunc =
+ new VPWidenCastRecipe(Instruction::CastOps::Trunc, B, NarrowTy);
+ Trunc->insertBefore(*RecipeA->getParent(),
+ std::next(RecipeA->getIterator()));
+
+ Type *WideTy = Ctx.Types.inferScalarType(RecipeA);
+ RecipeB = new VPWidenCastRecipe(ExtOpc, Trunc, WideTy);
+ RecipeB->insertAfter(Trunc);
+ Mul->setOperand(1, RecipeB);
+ }
+ }
+
// Match reduce.add/sub(mul(ext, ext)).
if (RecipeA && RecipeB && match(RecipeA, m_ZExtOrSExt(m_VPValue())) &&
match(RecipeB, m_ZExtOrSExt(m_VPValue())) &&
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
index 06b044872c217..ddae9007b7620 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
@@ -800,3 +800,269 @@ exit:
%r.0.lcssa = phi i64 [ %rdx.next, %loop ]
ret i64 %r.0.lcssa
}
+
+define i32 @print_mulacc_extended_const(ptr %start, ptr %end) {
+; CHECK-LABEL: 'print_mulacc_extended_const'
+; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
+; CHECK-NEXT: Live-in vp<%0> = VF
+; CHECK-NEXT: Live-in vp<%1> = VF * UF
+; CHECK-NEXT: Live-in vp<%2> = vector-trip-count
+; CHECK-NEXT: vp<%3> = original trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT: EMIT vp<%3> = EXPAND SCEV (1 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64))
+; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%start> + vp<%2> * ir<1>
+; CHECK-NEXT: EMIT vp<%5> = reduction-start-vector ir<0>, ir<0>, ir<1>
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
+; CHECK-NEXT: <x1> vector loop: {
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT vp<%6> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi vp<%5>, vp<%9>
+; CHECK-NEXT: vp<%7> = SCALAR-STEPS vp<%6>, ir<1>, vp<%0>
+; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%7>
+; CHECK-NEXT: vp<%8> = vector-pointer vp<%next.gep>
+; CHECK-NEXT: WIDEN ir<%l> = load vp<%8>
+; CHECK-NEXT: EXPRESSION vp<%9> = ir<%red> + reduce.add (mul (ir<%l> zext to i32), (ir<63> zext to i32))
+; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%6>, vp<%1>
+; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2>
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): middle.block
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.block:
+; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<%red>, vp<%9>
+; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<%2>
+; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
+; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<exit>:
+; CHECK-NEXT: IR %red.next.lcssa = phi i32 [ %red.next, %loop ] (extra operand: vp<%11> from middle.block)
+; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%4>, middle.block ], [ ir<%start>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%11>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from scalar.ph)
+; CHECK-NEXT: IR %red = phi i32 [ 0, %entry ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from scalar.ph)
+; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1
+; CHECK-NEXT: IR %l.ext = zext i8 %l to i32
+; CHECK-NEXT: IR %mul = mul i32 %l.ext, 63
+; CHECK-NEXT: IR %red.next = add i32 %red, %mul
+; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
+; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK: VPlan 'Final VPlan for VF={4},UF={1}' {
+; CHECK-NEXT: Live-in ir<%1> = original trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT: IR %start2 = ptrtoint ptr %start to i64
+; CHECK-NEXT: IR %end1 = ptrtoint ptr %end to i64
+; CHECK-NEXT: IR %0 = add i64 %end1, 1
+; CHECK-NEXT: IR %1 = sub i64 %0, %start2
+; CHECK-NEXT: EMIT vp<%min.iters.check> = icmp ult ir<%1>, ir<4>
+; CHECK-NEXT: EMIT branch-on-cond vp<%min.iters.check>
+; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%1>, ir<4>
+; CHECK-NEXT: EMIT vp<%n.vec> = sub ir<%1>, vp<%n.mod.vf>
+; CHECK-NEXT: vp<%3> = DERIVED-IV ir<%start> + vp<%n.vec> * ir<1>
+; CHECK-NEXT: Successor(s): vector.body
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ]
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0>, ir<%red.next>
+; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%index>
+; CHECK-NEXT: WIDEN ir<%l> = load vp<%next.gep>
+; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = zext ir<%l> to i32
+; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%l.ext>, ir<63>
+; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reduce.add (ir<%mul>)
+; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%index>, ir<4>
+; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%n.vec>
+; CHECK-NEXT: Successor(s): middle.block, vector.body
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.block:
+; CHECK-NEXT: EMIT vp<%5> = compute-reduction-result ir<%red>, ir<%red.next>
+; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%1>, vp<%n.vec>
+; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
+; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<exit>:
+; CHECK-NEXT: IR %red.next.lcssa = phi i32 [ %red.next, %loop ] (extra operand: vp<%5> from middle.block)
+; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<scalar.ph>:
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%3>, middle.block ], [ ir<%start>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%5>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %scalar.ph ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from ir-bb<scalar.ph>)
+; CHECK-NEXT: IR %red = phi i32 [ 0, %scalar.ph ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from ir-bb<scalar.ph>)
+; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1
+; CHECK-NEXT: IR %l.ext = zext i8 %l to i32
+; CHECK-NEXT: IR %mul = mul i32 %l.ext, 63
+; CHECK-NEXT: IR %red.next = add i32 %red, %mul
+; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
+; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+entry:
+ br label %loop
+
+loop:
+ %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ]
+ %red = phi i32 [ 0, %entry ], [ %red.next, %loop ]
+ %l = load i8, ptr %ptr.iv, align 1
+ %l.ext = zext i8 %l to i32
+ %mul = mul i32 %l.ext, 63
+ %red.next = add i32 %red, %mul
+ %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
+ %ec = icmp eq ptr %ptr.iv, %end
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret i32 %red.next
+}
+
+; Constants >= 128 cannot be treated as sign-extended, so the expression shouldn't extend 128
+define i32 @print_mulacc_not_extended_const(ptr %start, ptr %end) {
+; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
+; CHECK-NEXT: Live-in vp<%0> = VF
+; CHECK-NEXT: Live-in vp<%1> = VF * UF
+; CHECK-NEXT: Live-in vp<%2> = vector-trip-count
+; CHECK-NEXT: vp<%3> = original trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT: EMIT vp<%3> = EXPAND SCEV (1 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64))
+; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%start> + vp<%2> * ir<1>
+; CHECK-NEXT: EMIT vp<%5> = reduction-start-vector ir<0>, ir<0>, ir<1>
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
+; CHECK-NEXT: <x1> vector loop: {
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT vp<%6> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi vp<%5>, vp<%9>
+; CHECK-NEXT: vp<%7> = SCALAR-STEPS vp<%6>, ir<1>, vp<%0>
+; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%7>
+; CHECK-NEXT: vp<%8> = vector-pointer vp<%next.gep>
+; CHECK-NEXT: WIDEN ir<%l> = load vp<%8>
+; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = sext ir<%l> to i32
+; CHECK-NEXT: EXPRESSION vp<%9> = ir<%red> + reduce.add (mul ir<%l.ext>, ir<128>)
+; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%6>, vp<%1>
+; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2>
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): middle.block
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.block:
+; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<%red>, vp<%9>
+; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<%2>
+; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
+; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<exit>:
+; CHECK-NEXT: IR %red.next.lcssa = phi i32 [ %red.next, %loop ] (extra operand: vp<%11> from middle.block)
+; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%4>, middle.block ], [ ir<%start>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%11>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from scalar.ph)
+; CHECK-NEXT: IR %red = phi i32 [ 0, %entry ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from scalar.ph)
+; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1
+; CHECK-NEXT: IR %l.ext = sext i8 %l to i32
+; CHECK-NEXT: IR %mul = mul i32 %l.ext, 128
+; CHECK-NEXT: IR %red.next = add i32 %red, %mul
+; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
+; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK: VPlan 'Final VPlan for VF={4},UF={1}' {
+; CHECK-NEXT: Live-in ir<%1> = original trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT: IR %start2 = ptrtoint ptr %start to i64
+; CHECK-NEXT: IR %end1 = ptrtoint ptr %end to i64
+; CHECK-NEXT: IR %0 = add i64 %end1, 1
+; CHECK-NEXT: IR %1 = sub i64 %0, %start2
+; CHECK-NEXT: EMIT vp<%min.iters.check> = icmp ult ir<%1>, ir<4>
+; CHECK-NEXT: EMIT branch-on-cond vp<%min.iters.check>
+; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%1>, ir<4>
+; CHECK-NEXT: EMIT vp<%n.vec> = sub ir<%1>, vp<%n.mod.vf>
+; CHECK-NEXT: vp<%3> = DERIVED-IV ir<%start> + vp<%n.vec> * ir<1>
+; CHECK-NEXT: Successor(s): vector.body
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ]
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0>, ir<%red.next>
+; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%index>
+; CHECK-NEXT: WIDEN ir<%l> = load vp<%next.gep>
+; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = sext ir<%l> to i32
+; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%l.ext>, ir<128>
+; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reduce.add (ir<%mul>)
+; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%index>, ir<4>
+; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%n.vec>
+; CHECK-NEXT: Successor(s): middle.block, vector.body
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.block:
+; CHECK-NEXT: EMIT vp<%5> = compute-reduction-result ir<%red>, ir<%red.next>
+; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%1>, vp<%n.vec>
+; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
+; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<exit>:
+; CHECK-NEXT: IR %red.next.lcssa = phi i32 [ %red.next, %loop ] (extra operand: vp<%5> from middle.block)
+; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<scalar.ph>:
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%3>, middle.block ], [ ir<%start>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%5>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %scalar.ph ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from ir-bb<scalar.ph>)
+; CHECK-NEXT: IR %red = phi i32 [ 0, %scalar.ph ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from ir-bb<scalar.ph>)
+; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1
+; CHECK-NEXT: IR %l.ext = sext i8 %l to i32
+; CHECK-NEXT: IR %mul = mul i32 %l.ext, 128
+; CHECK-NEXT: IR %red.next = add i32 %red, %mul
+; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
+; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+entry:
+ br label %loop
+
+loop:
+ %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ]
+ %red = phi i32 [ 0, %entry ], [ %red.next, %loop ]
+ %l = load i8, ptr %ptr.iv, align 1
+ %l.ext = sext i8 %l to i32
+ %mul = mul i32 %l.ext, 128
+ %red.next = add i32 %red, %mul
+ %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
+ %ec = icmp eq ptr %ptr.iv, %end
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ %red.next.lcssa = phi i32 [ %red.next, %loop ]
+ ret i32 %red.next.lcssa
+}
|
A reduction (including partial reductions) with a multiply of a constant value can be bundled by first converting it from
reduce.add(mul(ext, const))
toreduce.add(mul(ext, ext(const)))
as long as it is safe to extend the constant.This PR adds such bundling by first truncating the constant to the source type of the other extend, then extending it to the destination type of the extend. The first truncate is necessary so that the types of each extend's operand are then the same, and the call to canConstantBeExtended proves that the extend following a truncate is safe to do. The truncate is removed by optimisations.