Skip to content

Commit

Permalink
[LoopUnroll] Fold add chains during unrolling
Browse files Browse the repository at this point in the history
Loop unrolling tends to produce chains of
`%x1 = add %x0, 1; %x2 = add %x1, 1; ...` with one add per unrolled
iteration. This patch simplifies these adds to `%xN = add %x0, N`
directly during unrolling, rather than waiting for InstCombine to do so.

The motivation for this is that having a single add (rather than
an add chain) on the induction variable makes it a simple recurrence,
which we specially recognize in a number of places. This allows
InstCombine to directly perform folds with that knowledge, instead
of first folding the add chains, and then doing other folds in another
InstCombine iteration.

Due to the reduced number of InstCombine iterations, this also
results in a small compile-time improvement.

Differential Revision: https://reviews.llvm.org/D153540
  • Loading branch information
nikic committed Jul 5, 2023
1 parent 2049b2a commit b9808e5
Show file tree
Hide file tree
Showing 33 changed files with 514 additions and 928 deletions.
27 changes: 27 additions & 0 deletions llvm/lib/Transforms/Utils/LoopUnroll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/ValueHandle.h"
Expand Down Expand Up @@ -216,6 +217,8 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
ScalarEvolution *SE, DominatorTree *DT,
AssumptionCache *AC,
const TargetTransformInfo *TTI) {
using namespace llvm::PatternMatch;

// Simplify any new induction variables in the partially unrolled loop.
if (SE && SimplifyIVs) {
SmallVector<WeakTrackingVH, 16> DeadInsts;
Expand All @@ -241,6 +244,30 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
Inst.replaceAllUsesWith(V);
if (isInstructionTriviallyDead(&Inst))
DeadInsts.emplace_back(&Inst);

// Fold ((add X, C1), C2) to (add X, C1+C2). This is very common in
// unrolled loops, and handling this early allows following code to
// identify the IV as a "simple recurrence" without first folding away
// a long chain of adds.
{
Value *X;
const APInt *C1, *C2;
if (match(&Inst, m_Add(m_Add(m_Value(X), m_APInt(C1)), m_APInt(C2)))) {
auto *InnerI = dyn_cast<Instruction>(Inst.getOperand(0));
auto *InnerOBO = cast<OverflowingBinaryOperator>(Inst.getOperand(0));
bool SignedOverflow;
APInt NewC = C1->sadd_ov(*C2, SignedOverflow);
Inst.setOperand(0, X);
Inst.setOperand(1, ConstantInt::get(Inst.getType(), NewC));
Inst.setHasNoUnsignedWrap(Inst.hasNoUnsignedWrap() &&
InnerOBO->hasNoUnsignedWrap());
Inst.setHasNoSignedWrap(Inst.hasNoSignedWrap() &&
InnerOBO->hasNoSignedWrap() &&
!SignedOverflow);
if (InnerI && isInstructionTriviallyDead(InnerI))
DeadInsts.emplace_back(InnerI);
}
}
}
// We can't do recursive deletion until we're done iterating, as we might
// have a phi which (potentially indirectly) uses instructions later in
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Analysis/ScalarEvolution/2012-05-29-MulAddRec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ declare void @use(i8 %x)
; CHECK: br label %for.body

; CHECK: for.body:
; CHECK: %inc.9 = add i8 %inc.8, 1
; CHECK: %inc.9 = add i8 %inc1, 10
; CHECK: %0 = add i8 %inc1, 10
; CHECK: br label %for.cond

Expand Down
9 changes: 3 additions & 6 deletions llvm/test/DebugInfo/unrolled-loop-remainder.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@ define i32 @func_c() local_unnamed_addr #0 !dbg !14 {
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_PROL_1]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
; CHECK-NEXT: [[CONV_PROL_1:%.*]] = sext i32 [[TMP7]] to i64, !dbg [[DBG28]]
; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[CONV_PROL_1]] to ptr, !dbg [[DBG28]]
; CHECK-NEXT: [[ADD_PROL_1:%.*]] = add nsw i32 [[ADD_PROL]], 2, !dbg [[DBG29]]
; CHECK-NEXT: [[ADD_PROL_1:%.*]] = add nsw i32 [[DOTPR]], 4, !dbg [[DBG29]]
; CHECK-NEXT: [[PROL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]], !dbg [[DBG24]]
; CHECK-NEXT: br i1 [[PROL_ITER_CMP_1]], label [[FOR_BODY_PROL_2:%.*]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]], !dbg [[DBG24]]
; CHECK: for.body.prol.2:
; CHECK-NEXT: [[ARRAYIDX_PROL_2:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 1, !dbg [[DBG28]]
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_PROL_2]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
; CHECK-NEXT: [[CONV_PROL_2:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG28]]
; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[CONV_PROL_2]] to ptr, !dbg [[DBG28]]
; CHECK-NEXT: [[ADD_PROL_2:%.*]] = add nsw i32 [[ADD_PROL_1]], 2, !dbg [[DBG29]]
; CHECK-NEXT: [[ADD_PROL_2:%.*]] = add nsw i32 [[DOTPR]], 6, !dbg [[DBG29]]
; CHECK-NEXT: br label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]]
; CHECK: for.body.prol.loopexit.unr-lcssa:
; CHECK-NEXT: [[DOTLCSSA_UNR_PH:%.*]] = phi ptr [ [[TMP6]], [[FOR_BODY_PROL]] ], [ [[TMP8]], [[FOR_BODY_PROL_1]] ], [ [[TMP10]], [[FOR_BODY_PROL_2]] ]
Expand All @@ -72,22 +72,19 @@ define i32 @func_c() local_unnamed_addr #0 !dbg !14 {
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP14]] to i64, !dbg [[DBG28]]
; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[CONV]] to ptr, !dbg [[DBG28]]
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 2, !dbg [[DBG29]]
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 1, !dbg [[DBG28]]
; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
; CHECK-NEXT: [[CONV_1:%.*]] = sext i32 [[TMP16]] to i64, !dbg [[DBG28]]
; CHECK-NEXT: [[TMP17:%.*]] = inttoptr i64 [[CONV_1]] to ptr, !dbg [[DBG28]]
; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[ADD]], 2, !dbg [[DBG29]]
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 1, !dbg [[DBG28]]
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
; CHECK-NEXT: [[CONV_2:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG28]]
; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[CONV_2]] to ptr, !dbg [[DBG28]]
; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[ADD_1]], 2, !dbg [[DBG29]]
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 1, !dbg [[DBG28]]
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
; CHECK-NEXT: [[CONV_3:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG28]]
; CHECK-NEXT: [[TMP21]] = inttoptr i64 [[CONV_3]] to ptr, !dbg [[DBG28]]
; CHECK-NEXT: [[ADD_3]] = add nsw i32 [[ADD_2]], 2, !dbg [[DBG29]]
; CHECK-NEXT: [[ADD_3]] = add nsw i32 [[TMP13]], 8, !dbg [[DBG29]]
; CHECK-NEXT: [[TOBOOL_3:%.*]] = icmp eq i32 [[ADD_3]], 0, !dbg [[DBG24]]
; CHECK-NEXT: br i1 [[TOBOOL_3]], label [[FOR_COND_FOR_END_CRIT_EDGE_UNR_LCSSA:%.*]], label [[FOR_BODY]], !dbg [[DBG24]], !llvm.loop [[LOOP30:![0-9]+]]
; CHECK: for.cond.for.end_crit_edge.unr-lcssa:
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ define i32 @test2(ptr nocapture %p, i32 %n) nounwind readonly {
; CHECK-NEXT: [[TMP4:%.*]] = add nsw i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: br label [[BB1_1:%.*]]
; CHECK: bb1.1:
; CHECK-NEXT: [[INDVAR_NEXT_1:%.*]] = add nuw nsw i64 [[INDVAR_NEXT]], 1
; CHECK-NEXT: [[INDVAR_NEXT_1:%.*]] = add nuw nsw i64 [[INDVAR]], 2
; CHECK-NEXT: [[EXITCOND_1:%.*]] = icmp ne i64 [[INDVAR_NEXT_1]], [[TMP]]
; CHECK-NEXT: br i1 [[EXITCOND_1]], label [[BB_2:%.*]], label [[BB1_BB2_CRIT_EDGE]]
; CHECK: bb.2:
Expand All @@ -92,7 +92,7 @@ define i32 @test2(ptr nocapture %p, i32 %n) nounwind readonly {
; CHECK-NEXT: [[TMP6:%.*]] = add nsw i32 [[TMP5]], [[TMP4]]
; CHECK-NEXT: br label [[BB1_2:%.*]]
; CHECK: bb1.2:
; CHECK-NEXT: [[INDVAR_NEXT_2:%.*]] = add nuw nsw i64 [[INDVAR_NEXT_1]], 1
; CHECK-NEXT: [[INDVAR_NEXT_2:%.*]] = add nuw nsw i64 [[INDVAR]], 3
; CHECK-NEXT: [[EXITCOND_2:%.*]] = icmp ne i64 [[INDVAR_NEXT_2]], [[TMP]]
; CHECK-NEXT: br i1 [[EXITCOND_2]], label [[BB_3:%.*]], label [[BB1_BB2_CRIT_EDGE]]
; CHECK: bb.3:
Expand All @@ -101,7 +101,7 @@ define i32 @test2(ptr nocapture %p, i32 %n) nounwind readonly {
; CHECK-NEXT: [[TMP8]] = add nsw i32 [[TMP7]], [[TMP6]]
; CHECK-NEXT: br label [[BB1_3]]
; CHECK: bb1.3:
; CHECK-NEXT: [[INDVAR_NEXT_3]] = add i64 [[INDVAR_NEXT_2]], 1
; CHECK-NEXT: [[INDVAR_NEXT_3]] = add i64 [[INDVAR]], 4
; CHECK-NEXT: [[EXITCOND_3:%.*]] = icmp ne i64 [[INDVAR_NEXT_3]], [[TMP]]
; CHECK-NEXT: br i1 [[EXITCOND_3]], label [[BB]], label [[BB1_BB2_CRIT_EDGE]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK: bb1.bb2_crit_edge:
Expand Down
26 changes: 2 additions & 24 deletions llvm/test/Transforms/LoopUnroll/AArch64/partial.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,7 @@ exit:
ret void
}

; CHECK: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK: add{{.*}}, 8
; CHECK-NEXT: icmp

; Partial unroll 16 times for this loop.
Expand Down Expand Up @@ -57,20 +50,5 @@ exit:



; CHECK: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK: add{{.*}}, 16
; CHECK-NEXT: icmp
12 changes: 6 additions & 6 deletions llvm/test/Transforms/LoopUnroll/ARM/instr-size-costs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ define void @test_i32_add_optsize(ptr %a, ptr %b, ptr %c) #0 {
; CHECK-V8-NEXT: [[RES_1:%.*]] = add i32 [[DATA_A_1]], [[DATA_B_1]]
; CHECK-V8-NEXT: [[ADDR_C_1:%.*]] = getelementptr i32, ptr [[C]], i32 [[COUNT]]
; CHECK-V8-NEXT: store i32 [[RES_1]], ptr [[ADDR_C_1]], align 4
; CHECK-V8-NEXT: [[COUNT_1]] = add nuw nsw i32 [[COUNT]], 1
; CHECK-V8-NEXT: [[COUNT_1]] = add nuw nsw i32 [[IV]], 2
; CHECK-V8-NEXT: [[END_1:%.*]] = icmp ne i32 [[COUNT_1]], 100
; CHECK-V8-NEXT: br i1 [[END_1]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK-V8: exit:
Expand Down Expand Up @@ -69,7 +69,7 @@ define void @test_i32_add_minsize(ptr %a, ptr %b, ptr %c) #1 {
; CHECK-V8-NEXT: [[RES_1:%.*]] = add i32 [[DATA_A_1]], [[DATA_B_1]]
; CHECK-V8-NEXT: [[ADDR_C_1:%.*]] = getelementptr i32, ptr [[C]], i32 [[COUNT]]
; CHECK-V8-NEXT: store i32 [[RES_1]], ptr [[ADDR_C_1]], align 4
; CHECK-V8-NEXT: [[COUNT_1]] = add nuw nsw i32 [[COUNT]], 1
; CHECK-V8-NEXT: [[COUNT_1]] = add nuw nsw i32 [[IV]], 2
; CHECK-V8-NEXT: [[END_1:%.*]] = icmp ne i32 [[COUNT_1]], 100
; CHECK-V8-NEXT: br i1 [[END_1]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK-V8: exit:
Expand Down Expand Up @@ -116,7 +116,7 @@ define void @test_i64_add_optsize(ptr %a, ptr %b, ptr %c) #0 {
; CHECK-V8-NEXT: [[RES_1:%.*]] = add i64 [[DATA_A_1]], [[DATA_B_1]]
; CHECK-V8-NEXT: [[ADDR_C_1:%.*]] = getelementptr i64, ptr [[C]], i32 [[COUNT]]
; CHECK-V8-NEXT: store i64 [[RES_1]], ptr [[ADDR_C_1]], align 4
; CHECK-V8-NEXT: [[COUNT_1]] = add nuw nsw i32 [[COUNT]], 1
; CHECK-V8-NEXT: [[COUNT_1]] = add nuw nsw i32 [[IV]], 2
; CHECK-V8-NEXT: [[END_1:%.*]] = icmp ne i32 [[COUNT_1]], 100
; CHECK-V8-NEXT: br i1 [[END_1]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK-V8: exit:
Expand Down Expand Up @@ -163,7 +163,7 @@ define void @test_i64_add_minsize(ptr %a, ptr %b, ptr %c) #1 {
; CHECK-V8-NEXT: [[RES_1:%.*]] = add i64 [[DATA_A_1]], [[DATA_B_1]]
; CHECK-V8-NEXT: [[ADDR_C_1:%.*]] = getelementptr i64, ptr [[C]], i32 [[COUNT]]
; CHECK-V8-NEXT: store i64 [[RES_1]], ptr [[ADDR_C_1]], align 4
; CHECK-V8-NEXT: [[COUNT_1]] = add nuw nsw i32 [[COUNT]], 1
; CHECK-V8-NEXT: [[COUNT_1]] = add nuw nsw i32 [[IV]], 2
; CHECK-V8-NEXT: [[END_1:%.*]] = icmp ne i32 [[COUNT_1]], 100
; CHECK-V8-NEXT: br i1 [[END_1]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK-V8: exit:
Expand Down Expand Up @@ -215,7 +215,7 @@ define i32 @test_i32_select_optsize(ptr %a, ptr %b, ptr %c) #0 {
; CHECK-V8-NEXT: [[ACC_NEXT_1]] = add i32 [[UMAX_1]], [[ACC_NEXT]]
; CHECK-V8-NEXT: [[ADDR_C_1:%.*]] = getelementptr i32, ptr [[C]], i32 [[COUNT]]
; CHECK-V8-NEXT: store i32 [[UMAX_1]], ptr [[ADDR_C_1]], align 4
; CHECK-V8-NEXT: [[COUNT_1]] = add nuw nsw i32 [[COUNT]], 1
; CHECK-V8-NEXT: [[COUNT_1]] = add nuw nsw i32 [[IV]], 2
; CHECK-V8-NEXT: [[END_1:%.*]] = icmp ne i32 [[COUNT_1]], 100
; CHECK-V8-NEXT: br i1 [[END_1]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK-V8: exit:
Expand Down Expand Up @@ -271,7 +271,7 @@ define i32 @test_i32_select_minsize(ptr %a, ptr %b, ptr %c) #1 {
; CHECK-V8-NEXT: [[ACC_NEXT_1]] = add i32 [[UMAX_1]], [[ACC_NEXT]]
; CHECK-V8-NEXT: [[ADDR_C_1:%.*]] = getelementptr i32, ptr [[C]], i32 [[COUNT]]
; CHECK-V8-NEXT: store i32 [[UMAX_1]], ptr [[ADDR_C_1]], align 4
; CHECK-V8-NEXT: [[COUNT_1]] = add nuw nsw i32 [[COUNT]], 1
; CHECK-V8-NEXT: [[COUNT_1]] = add nuw nsw i32 [[IV]], 2
; CHECK-V8-NEXT: [[END_1:%.*]] = icmp ne i32 [[COUNT_1]], 100
; CHECK-V8-NEXT: br i1 [[END_1]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK-V8: exit:
Expand Down
46 changes: 23 additions & 23 deletions llvm/test/Transforms/LoopUnroll/ARM/loop-unrolling.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,27 +15,27 @@ for.body:

; CHECK-NOUNROLL: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, %entry ], [ [[IV2:%[a-z.0-9]+]], %for.body ]
; CHECK-NOUNROLL: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1
; CHECK-NOUNROLL: [[IV2]] = add nuw nsw i32 [[IV1]], 1
; CHECK-NOUNROLL: [[IV2]] = add nuw nsw i32 [[IV0]], 2
; CHECK-NOUNROLL: [[CMP:%[a-z.0-9]+]] = icmp eq i32 [[IV2]], 1024
; CHECK-NOUNROLL: br i1 [[CMP]], label [[END:%[a-z.]+]], label %for.body

; CHECK-UNROLL: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, %entry ], [ [[IV16:%[a-z.0-9]+]], %for.body ]
; CHECK-UNROLL: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1
; CHECK-UNROLL: [[IV2:%[a-z.0-9]+]] = add nuw nsw i32 [[IV1]], 1
; CHECK-UNROLL: [[IV3:%[a-z.0-9]+]] = add nuw nsw i32 [[IV2]], 1
; CHECK-UNROLL: [[IV4:%[a-z.0-9]+]] = add nuw nsw i32 [[IV3]], 1
; CHECK-UNROLL: [[IV5:%[a-z.0-9]+]] = add nuw nsw i32 [[IV4]], 1
; CHECK-UNROLL: [[IV6:%[a-z.0-9]+]] = add nuw nsw i32 [[IV5]], 1
; CHECK-UNROLL: [[IV7:%[a-z.0-9]+]] = add nuw nsw i32 [[IV6]], 1
; CHECK-UNROLL: [[IV8:%[a-z.0-9]+]] = add nuw nsw i32 [[IV7]], 1
; CHECK-UNROLL: [[IV9:%[a-z.0-9]+]] = add nuw nsw i32 [[IV8]], 1
; CHECK-UNROLL: [[IV10:%[a-z.0-9]+]] = add nuw nsw i32 [[IV9]], 1
; CHECK-UNROLL: [[IV11:%[a-z.0-9]+]] = add nuw nsw i32 [[IV10]], 1
; CHECK-UNROLL: [[IV12:%[a-z.0-9]+]] = add nuw nsw i32 [[IV11]], 1
; CHECK-UNROLL: [[IV13:%[a-z.0-9]+]] = add nuw nsw i32 [[IV12]], 1
; CHECK-UNROLL: [[IV14:%[a-z.0-9]+]] = add nuw nsw i32 [[IV13]], 1
; CHECK-UNROLL: [[IV15:%[a-z.0-9]+]] = add nuw nsw i32 [[IV14]], 1
; CHECK-UNROLL: [[IV16]] = add nuw nsw i32 [[IV15]], 1
; CHECK-UNROLL: [[IV2:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 2
; CHECK-UNROLL: [[IV3:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 3
; CHECK-UNROLL: [[IV4:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 4
; CHECK-UNROLL: [[IV5:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 5
; CHECK-UNROLL: [[IV6:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 6
; CHECK-UNROLL: [[IV7:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 7
; CHECK-UNROLL: [[IV8:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 8
; CHECK-UNROLL: [[IV9:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 9
; CHECK-UNROLL: [[IV10:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 10
; CHECK-UNROLL: [[IV11:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 11
; CHECK-UNROLL: [[IV12:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 12
; CHECK-UNROLL: [[IV13:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 13
; CHECK-UNROLL: [[IV14:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 14
; CHECK-UNROLL: [[IV15:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 15
; CHECK-UNROLL: [[IV16]] = add nuw nsw i32 [[IV0]], 16
; CHECK-UNROLL: [[CMP:%[a-z.0-9]+]] = icmp eq i32 [[IV16]], 1024
; CHECK-UNROLL: br i1 [[CMP]], label [[END:%[a-z.]+]], label %for.body

Expand Down Expand Up @@ -65,14 +65,14 @@ entry:
for.body:
; CHECK-NOUNROLL: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, [[PRE:%[a-z.0-9]+]] ], [ [[IV2:%[a-z.0-9]+]], %for.body ]
; CHECK-NOUNROLL: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1
; CHECK-NOUNROLL: [[IV2]] = add nuw i32 [[IV1]], 1
; CHECK-NOUNROLL: [[IV2]] = add nuw i32 [[IV0]], 2
; CHECK-NOUNROLL: br

; CHECK-UNROLL: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, [[PRE:%[a-z.0-9]+]] ], [ [[IV4:%[a-z.0-9]+]], %for.body ]
; CHECK-UNROLL: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1
; CHECK-UNROLL: [[IV2:%[a-z.0-9]+]] = add nuw nsw i32 [[IV1]], 1
; CHECK-UNROLL: [[IV3:%[a-z.0-9]+]] = add nuw nsw i32 [[IV2]], 1
; CHECK-UNROLL: [[IV4]] = add nuw i32 [[IV3]], 1
; CHECK-UNROLL: [[IV2:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 2
; CHECK-UNROLL: [[IV3:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 3
; CHECK-UNROLL: [[IV4]] = add nuw i32 [[IV0]], 4
; CHECK-UNROLL: br

; CHECK-UNROLL: for.body.epil:
Expand Down Expand Up @@ -125,9 +125,9 @@ for.body4:
; CHECK-UNROLL: for.body4.epil.2:
; CHECK-UNROLL: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, [[PRE:%[a-z0-9.]+]] ], [ [[IV4:%[a-z.0-9]+]], %for.body4 ]
; CHECK-UNROLL: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1
; CHECK-UNROLL: [[IV2:%[a-z.0-9]+]] = add nuw nsw i32 [[IV1]], 1
; CHECK-UNROLL: [[IV3:%[a-z.0-9]+]] = add nuw nsw i32 [[IV2]], 1
; CHECK-UNROLL: [[IV4]] = add nuw i32 [[IV3]], 1
; CHECK-UNROLL: [[IV2:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 2
; CHECK-UNROLL: [[IV3:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 3
; CHECK-UNROLL: [[IV4]] = add nuw i32 [[IV0]], 4
; CHECK-UNROLL: br

%w.024 = phi i32 [ 0, %for.body4.lr.ph ], [ %inc, %for.body4 ]
Expand Down
Loading

0 comments on commit b9808e5

Please sign in to comment.