diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 09b126d35bde0..bd57d1192eb94 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -3215,6 +3215,15 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, return getZeroExtendExpr(Res, Ops[1]->getType(), Depth + 1); }; } + + // Try to fold (C * D /u C) -> D, if C is a power-of-2 and D is a multiple + // of C. + const SCEV *D; + if (match(Ops[1], m_scev_UDiv(m_SCEV(D), m_scev_Specific(LHSC))) && + LHSC->getAPInt().isPowerOf2() && + LHSC->getAPInt().logBase2() <= getMinTrailingZeros(D)) { + return D; + } } } diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll index 8c77d704eac6a..4e5033b7a2f7f 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll @@ -12,9 +12,9 @@ define void @rewrite_zext(i32 %n) { ; CHECK-NEXT: %n.vec = and i64 %ext, -8 ; CHECK-NEXT: --> (8 * ((zext i32 %n to i64) /u 8)) U: [0,4294967289) S: [0,4294967289) ; CHECK-NEXT: %index = phi i64 [ 0, %check ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,8}<%loop> U: [0,17) S: [0,17) Exits: (8 * ((-8 + (8 * ((zext i32 %n to i64) /u 8))) /u 8)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,8}<%loop> U: [0,17) S: [0,17) Exits: (-8 + (8 * ((zext i32 %n to i64) /u 8))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw nsw i64 %index, 8 -; CHECK-NEXT: --> {8,+,8}<%loop> U: [8,25) S: [8,25) Exits: (8 + (8 * ((-8 + (8 * ((zext i32 %n to i64) /u 8))) /u 8))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {8,+,8}<%loop> U: [8,25) S: [8,25) Exits: (8 * ((zext i32 %n to i64) /u 8)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-8 + (8 * ((zext i32 %n to i64) /u 8))) /u 8) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2 @@ -52,11 +52,11 @@ define i32 @rewrite_zext_min_max(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %ext, 28 ; CHECK-NEXT: --> (4 * ((16 umin (zext i32 %N to i64)) /u 4)) U: [0,17) S: [0,17) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 * ((16 umin (zext i32 %N to i64)) /u 4)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_min_max ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -98,11 +98,11 @@ define i32 @rewrite_min_max_zext(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %umin, 28 ; CHECK-NEXT: --> (4 * ((16 umin (zext i32 %N to i64)) /u 4)) U: [0,17) S: [0,17) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 * ((16 umin (zext i32 %N to i64)) /u 4)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_min_max_zext ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -144,11 +144,11 @@ define i32 @rewrite_sext_min_max(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %ext, 28 ; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64)) U: [0,29) S: [0,29) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nsw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_sext_min_max ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -190,11 +190,11 @@ define i32 @rewrite_min_max_sext(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %smin, 28 ; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64)) U: [0,29) S: [0,29) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nsw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_min_max_sext ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -288,9 +288,9 @@ define i32 @rewrite_zext_no_icmp_ne(i32 %N) { ; CHECK-NEXT: %n.vec = and i64 %n.rnd.up, 8589934588 ; CHECK-NEXT: --> (4 * ((4 + (zext i32 (-1 + (zext i2 (trunc i32 %N to i2) to i32)) to i64)) /u 4)) U: [4,4294967297) S: [4,4294967297) ; CHECK-NEXT: %iv = phi i64 [ 0, %loop.ph ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,4294967293) S: [0,4294967293) Exits: (4 * ((-4 + (4 * ((4 + (zext i32 (-1 + (zext i2 (trunc i32 %N to i2) to i32)) to i64)) /u 4))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,4294967293) S: [0,4294967293) Exits: (-4 + (4 * ((4 + (zext i32 (-1 + (zext i2 (trunc i32 %N to i2) to i32)) to i64)) /u 4))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i64 %iv, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,4294967297) S: [4,4294967297) Exits: (4 + (4 * ((-4 + (4 * ((4 + (zext i32 (-1 + (zext i2 (trunc i32 %N to i2) to i32)) to i64)) /u 4))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,4294967297) S: [4,4294967297) Exits: (4 * ((4 + (zext i32 (-1 + (zext i2 (trunc i32 %N to i2) to i32)) to i64)) /u 4)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_no_icmp_ne ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * ((4 + (zext i32 (-1 + (zext i2 (trunc i32 %N to i2) to i32)) to i64)) /u 4))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 1073741823 @@ -328,9 +328,9 @@ define void @rewrite_zext_and_base_1(i32 %n) { ; CHECK-NEXT: %n.vec = and i64 %ext, -8 ; CHECK-NEXT: --> (8 * ((zext i32 %n to i64) /u 8)) U: [0,4294967289) S: [0,4294967289) ; CHECK-NEXT: %index = phi i64 [ 0, %check ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,8}<%loop> U: [0,25) S: [0,25) Exits: (8 * ((-8 + (8 * ((zext i32 %n to i64) /u 8))) /u 8)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,8}<%loop> U: [0,25) S: [0,25) Exits: (-8 + (8 * ((zext i32 %n to i64) /u 8))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw nsw i64 %index, 8 -; CHECK-NEXT: --> {8,+,8}<%loop> U: [8,33) S: [8,33) Exits: (8 + (8 * ((-8 + (8 * ((zext i32 %n to i64) /u 8))) /u 8))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {8,+,8}<%loop> U: [8,33) S: [8,33) Exits: (8 * ((zext i32 %n to i64) /u 8)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_and_base_1 ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-8 + (8 * ((zext i32 %n to i64) /u 8))) /u 8) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -371,9 +371,9 @@ define void @rewrite_zext_and_base_2(i32 %n) { ; CHECK-NEXT: %n.vec = and i64 %ext, -8 ; CHECK-NEXT: --> (8 * ((zext i32 %n to i64) /u 8)) U: [0,4294967289) S: [0,4294967289) ; CHECK-NEXT: %index = phi i64 [ 0, %check ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,8}<%loop> U: [0,25) S: [0,25) Exits: (8 * ((-8 + (8 * ((zext i32 %n to i64) /u 8))) /u 8)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,8}<%loop> U: [0,25) S: [0,25) Exits: (-8 + (8 * ((zext i32 %n to i64) /u 8))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw nsw i64 %index, 8 -; CHECK-NEXT: --> {8,+,8}<%loop> U: [8,33) S: [8,33) Exits: (8 + (8 * ((-8 + (8 * ((zext i32 %n to i64) /u 8))) /u 8))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {8,+,8}<%loop> U: [8,33) S: [8,33) Exits: (8 * ((zext i32 %n to i64) /u 8)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_and_base_2 ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-8 + (8 * ((zext i32 %n to i64) /u 8))) /u 8) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -412,9 +412,9 @@ define void @guard_pessimizes_analysis_step2(i1 %c, i32 %N) { ; CHECK-NEXT: %init = phi i64 [ 2, %entry ], [ 4, %bb1 ] ; CHECK-NEXT: --> %init U: [2,5) S: [2,5) ; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ %init, %loop.ph ] -; CHECK-NEXT: --> {%init,+,2}<%loop> U: [2,17) S: [2,17) Exits: ((2 * ((14 + (-1 * %init)) /u 2)) + %init) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%init,+,2}<%loop> U: [2,17) S: [2,17) Exits: 14 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i64 %iv, 2 -; CHECK-NEXT: --> {(2 + %init),+,2}<%loop> U: [4,19) S: [4,19) Exits: (2 + (2 * ((14 + (-1 * %init)) /u 2)) + %init) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(2 + %init),+,2}<%loop> U: [4,19) S: [4,19) Exits: 16 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @guard_pessimizes_analysis_step2 ; CHECK-NEXT: Loop %loop: backedge-taken count is ((14 + (-1 * %init)) /u 2) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 6 @@ -456,11 +456,11 @@ define i32 @rewrite_sext_slt_narrow_check(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %ext, 28 ; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64)) U: [0,29) S: [0,29) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_sext_slt_narrow_check ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -500,11 +500,11 @@ define i32 @rewrite_zext_ult_narrow_check(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %ext, 28 ; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64)) U: [0,29) S: [0,29) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_ult_narrow_check ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -544,11 +544,11 @@ define i32 @rewrite_zext_ule_narrow_check(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %ext, 28 ; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64)) U: [0,29) S: [0,29) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_ule_narrow_check ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((4 umax (zext i32 %N to i64)) /u 4) to i3) to i64))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -588,11 +588,11 @@ define i32 @rewrite_zext_sle_narrow_check(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %ext, 28 ; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64)) U: [0,29) S: [0,29) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_sle_narrow_check ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((zext i32 (4 smax %N) to i64) /u 4) to i3) to i64))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -632,11 +632,11 @@ define i32 @rewrite_zext_uge_narrow_check(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %ext, 28 ; CHECK-NEXT: --> (4 * ((16 umin (zext i32 %N to i64)) /u 4)) U: [0,17) S: [0,17) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 * ((16 umin (zext i32 %N to i64)) /u 4)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_uge_narrow_check ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -676,11 +676,11 @@ define i32 @rewrite_sext_sge_narrow_check(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %ext, 28 ; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64)) U: [0,29) S: [0,29) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_sext_sge_narrow_check ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -720,11 +720,11 @@ define i32 @rewrite_zext_ugt_narrow_check(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %ext, 28 ; CHECK-NEXT: --> (4 * ((16 umin (zext i32 %N to i64)) /u 4)) U: [0,17) S: [0,17) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 * ((16 umin (zext i32 %N to i64)) /u 4)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_ugt_narrow_check ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -764,11 +764,11 @@ define i32 @rewrite_sext_sgt_narrow_check(i32 %N, ptr %arr) { ; CHECK-NEXT: %n.vec = and i64 %ext, 28 ; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64)) U: [0,29) S: [0,29) ; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ] -; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,4}<%loop> U: [0,13) S: [0,13) Exits: (-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index ; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4)) + %arr) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index.next = add nuw i64 %index, 4 -; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {4,+,4}<%loop> U: [4,17) S: [4,17) Exits: (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_sext_sgt_narrow_check ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 3 @@ -808,9 +808,9 @@ define void @rewrite_add_rec() { ; CHECK-NEXT: %n.vec = and i64 %sub, -2 ; CHECK-NEXT: --> (2 * ({9,+,-1}<%outer.header> /u 2)) U: [0,9) S: [0,9) Exits: 0 LoopDispositions: { %outer.header: Computable, %inner: Invariant } ; CHECK-NEXT: %inner.iv = phi i64 [ 0, %inner.ph ], [ %inner.iv.next, %inner ] -; CHECK-NEXT: --> {0,+,2}<%inner> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (2 * ((-2 + (2 * ({9,+,-1}<%outer.header> /u 2))) /u 2)) LoopDispositions: { %inner: Computable, %outer.header: Variant } +; CHECK-NEXT: --> {0,+,2}<%inner> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (-2 + (2 * ({9,+,-1}<%outer.header> /u 2))) LoopDispositions: { %inner: Computable, %outer.header: Variant } ; CHECK-NEXT: %inner.iv.next = add i64 %inner.iv, 2 -; CHECK-NEXT: --> {2,+,2}<%inner> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (2 + (2 * ((-2 + (2 * ({9,+,-1}<%outer.header> /u 2))) /u 2))) LoopDispositions: { %inner: Computable, %outer.header: Variant } +; CHECK-NEXT: --> {2,+,2}<%inner> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (2 * ({9,+,-1}<%outer.header> /u 2)) LoopDispositions: { %inner: Computable, %outer.header: Variant } ; CHECK-NEXT: %iv.next = add i64 %iv, 1 ; CHECK-NEXT: --> {1,+,1}<%outer.header> U: [1,11) S: [1,11) Exits: 10 LoopDispositions: { %outer.header: Computable, %inner: Invariant } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_add_rec diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll index 7cdf3a2d5fd58..4024c986dd11d 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll @@ -1390,9 +1390,9 @@ define void @ptr_induction_eq_2(ptr %a, i64 %n) { ; CHECK-NEXT: %b = getelementptr inbounds ptr, ptr %a, i64 %n ; CHECK-NEXT: --> ((8 * %n) + %a) U: full-set S: full-set ; CHECK-NEXT: %ptr.iv = phi ptr [ %ptr.iv.next, %loop ], [ %a, %entry ] -; CHECK-NEXT: --> {%a,+,8}<%loop> U: full-set S: full-set Exits: ((8 * ((-8 + (8 * %n)) /u 8)) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%a,+,8}<%loop> U: full-set S: full-set Exits: (-8 + (8 * %n) + %a) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %ptr.iv.next = getelementptr inbounds i8, ptr %ptr.iv, i64 8 -; CHECK-NEXT: --> {(8 + %a),+,8}<%loop> U: full-set S: full-set Exits: (8 + (8 * ((-8 + (8 * %n)) /u 8)) + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(8 + %a),+,8}<%loop> U: full-set S: full-set Exits: ((8 * %n) + %a) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @ptr_induction_eq_2 ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-8 + (8 * %n)) /u 8) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2305843009213693951 diff --git a/llvm/test/Analysis/ScalarEvolution/pr58402-large-number-of-zext-exprs.ll b/llvm/test/Analysis/ScalarEvolution/pr58402-large-number-of-zext-exprs.ll index c79befac2fb1d..1c108bd7318e9 100644 --- a/llvm/test/Analysis/ScalarEvolution/pr58402-large-number-of-zext-exprs.ll +++ b/llvm/test/Analysis/ScalarEvolution/pr58402-large-number-of-zext-exprs.ll @@ -17,67 +17,67 @@ define i32 @pr58402_large_number_of_zext(ptr %dst) { ; CHECK-NEXT: %add7 = add i32 %i, 4 ; CHECK-NEXT: --> (4 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [4,5) S: [4,5) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i1 = and i32 %add7, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2)) U: [4,5) S: [4,5) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (4 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [4,5) S: [4,5) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.1 = add i32 %i1, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) U: [8,9) S: [8,9) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (8 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [8,9) S: [8,9) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i2 = and i32 %add7.1, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2)) U: [8,9) S: [8,9) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (8 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [8,9) S: [8,9) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.2 = add i32 %i2, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) U: [12,13) S: [12,13) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (12 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [12,13) S: [12,13) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i3 = and i32 %add7.2, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2)) U: [12,13) S: [12,13) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (12 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [12,13) S: [12,13) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.3 = add i32 %i3, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) U: [16,17) S: [16,17) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (16 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [16,17) S: [16,17) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i4 = and i32 %add7.3, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [16,17) S: [16,17) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (16 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [16,17) S: [16,17) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.4 = add i32 %i4, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [20,21) S: [20,21) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (20 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [20,21) S: [20,21) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i5 = and i32 %add7.4, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [20,21) S: [20,21) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (20 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [20,21) S: [20,21) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.5 = add i32 %i5, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [24,25) S: [24,25) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (24 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [24,25) S: [24,25) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i6 = and i32 %add7.5, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [24,25) S: [24,25) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (24 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [24,25) S: [24,25) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.6 = add i32 %i6, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [28,29) S: [28,29) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (28 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [28,29) S: [28,29) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i7 = and i32 %add7.6, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [28,29) S: [28,29) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (28 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [28,29) S: [28,29) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.7 = add i32 %i7, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [32,33) S: [32,33) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (32 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [32,33) S: [32,33) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i8 = and i32 %add7.7, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [32,33) S: [32,33) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (32 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [32,33) S: [32,33) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.8 = add i32 %i8, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [36,37) S: [36,37) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (36 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [36,37) S: [36,37) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i9 = and i32 %add7.8, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [36,37) S: [36,37) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (36 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [36,37) S: [36,37) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.9 = add i32 %i9, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [40,41) S: [40,41) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (40 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [40,41) S: [40,41) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i10 = and i32 %add7.9, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [40,41) S: [40,41) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (40 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [40,41) S: [40,41) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.10 = add i32 %i10, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [44,45) S: [44,45) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (44 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [44,45) S: [44,45) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i11 = and i32 %add7.10, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [44,45) S: [44,45) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (44 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [44,45) S: [44,45) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.11 = add i32 %i11, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [48,49) S: [48,49) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (48 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [48,49) S: [48,49) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i12 = and i32 %add7.11, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [48,49) S: [48,49) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (48 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [48,49) S: [48,49) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.12 = add i32 %i12, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [52,53) S: [52,53) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (52 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [52,53) S: [52,53) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i13 = and i32 %add7.12, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [52,53) S: [52,53) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (52 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [52,53) S: [52,53) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.13 = add i32 %i13, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [56,57) S: [56,57) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (56 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [56,57) S: [56,57) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i14 = and i32 %add7.13, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [56,57) S: [56,57) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (56 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [56,57) S: [56,57) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.14 = add i32 %i14, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [60,61) S: [60,61) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (60 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [60,61) S: [60,61) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i15 = and i32 %add7.14, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [60,61) S: [60,61) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (60 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [60,61) S: [60,61) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %add7.15 = add i32 %i15, 4 -; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [64,65) S: [64,65) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (64 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [64,65) S: [64,65) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: %i16 = and i32 %add7.15, -2 -; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [64,65) S: [64,65) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: --> (64 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [64,65) S: [64,65) Exits: <> LoopDispositions: { %header: Variant } ; CHECK-NEXT: Determining loop execution counts for: @pr58402_large_number_of_zext ; CHECK-NEXT: Loop %header: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %header: Unpredictable constant max backedge-taken count. diff --git a/llvm/test/CodeGen/PowerPC/common-chain.ll b/llvm/test/CodeGen/PowerPC/common-chain.ll index b71a360d1be12..8283e7bac3457 100644 --- a/llvm/test/CodeGen/PowerPC/common-chain.ll +++ b/llvm/test/CodeGen/PowerPC/common-chain.ll @@ -721,6 +721,13 @@ define signext i32 @spill_reduce_succ(ptr %input1, ptr %input2, ptr %output, i64 ; CHECK-LABEL: spill_reduce_succ: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: cmpdi r6, 0 +; CHECK-NEXT: ble cr0, .LBB7_9 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: sldi r6, r6, 2 +; CHECK-NEXT: li r11, 1 +; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill +; CHECK-NEXT: mr r26, r10 +; CHECK-NEXT: cmpdi r6, 1 ; CHECK-NEXT: std r14, -144(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r15, -136(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r16, -128(r1) # 8-byte Folded Spill @@ -733,231 +740,232 @@ define signext i32 @spill_reduce_succ(ptr %input1, ptr %input2, ptr %output, i64 ; CHECK-NEXT: std r23, -72(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r24, -64(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r31, -8(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r2, -152(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r9, -184(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r8, -176(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r7, -168(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r3, -160(r1) # 8-byte Folded Spill -; CHECK-NEXT: ble cr0, .LBB7_7 -; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: sldi r6, r6, 2 -; CHECK-NEXT: li r7, 1 -; CHECK-NEXT: mr r30, r10 -; CHECK-NEXT: cmpdi r6, 1 -; CHECK-NEXT: iselgt r7, r6, r7 -; CHECK-NEXT: addi r8, r7, -1 -; CHECK-NEXT: clrldi r6, r7, 63 -; CHECK-NEXT: cmpldi r8, 3 -; CHECK-NEXT: blt cr0, .LBB7_4 +; CHECK-NEXT: iselgt r11, r6, r11 +; CHECK-NEXT: addi r12, r11, -1 +; CHECK-NEXT: cmpldi r12, 3 +; CHECK-NEXT: clrldi r6, r11, 63 +; CHECK-NEXT: blt cr0, .LBB7_5 ; CHECK-NEXT: # %bb.2: # %for.body.preheader.new -; CHECK-NEXT: ld r14, -168(r1) # 8-byte Folded Reload -; CHECK-NEXT: mulli r24, r30, 24 -; CHECK-NEXT: ld r16, -184(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r15, -176(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r3, -160(r1) # 8-byte Folded Reload -; CHECK-NEXT: rldicl r0, r7, 62, 2 -; CHECK-NEXT: sldi r11, r30, 5 -; CHECK-NEXT: sldi r19, r30, 4 -; CHECK-NEXT: sldi r7, r14, 3 -; CHECK-NEXT: add r14, r30, r14 -; CHECK-NEXT: sldi r10, r16, 3 -; CHECK-NEXT: sldi r12, r15, 3 -; CHECK-NEXT: add r16, r30, r16 -; CHECK-NEXT: add r15, r30, r15 -; CHECK-NEXT: add r27, r11, r7 -; CHECK-NEXT: add r22, r24, r7 -; CHECK-NEXT: add r17, r19, r7 -; CHECK-NEXT: sldi r2, r14, 3 -; CHECK-NEXT: add r26, r24, r10 -; CHECK-NEXT: add r25, r24, r12 -; CHECK-NEXT: add r21, r19, r10 -; CHECK-NEXT: add r20, r19, r12 -; CHECK-NEXT: add r8, r11, r10 -; CHECK-NEXT: sldi r16, r16, 3 -; CHECK-NEXT: add r29, r5, r27 -; CHECK-NEXT: add r28, r4, r27 -; CHECK-NEXT: add r27, r3, r27 -; CHECK-NEXT: add r24, r5, r22 -; CHECK-NEXT: add r23, r4, r22 -; CHECK-NEXT: add r22, r3, r22 -; CHECK-NEXT: add r19, r5, r17 -; CHECK-NEXT: add r18, r4, r17 -; CHECK-NEXT: add r17, r3, r17 -; CHECK-NEXT: add r14, r5, r2 -; CHECK-NEXT: add r31, r4, r2 -; CHECK-NEXT: add r2, r3, r2 -; CHECK-NEXT: add r9, r5, r8 -; CHECK-NEXT: add r8, r11, r12 +; CHECK-NEXT: rldicl r11, r11, 62, 2 +; CHECK-NEXT: sldi r20, r8, 3 +; CHECK-NEXT: mr r14, r7 +; CHECK-NEXT: sldi r7, r7, 3 +; CHECK-NEXT: sldi r21, r9, 3 +; CHECK-NEXT: std r3, -160(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r9, -208(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r8, -184(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r5, -200(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r4, -168(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r11, -192(r1) # 8-byte Folded Spill +; CHECK-NEXT: sldi r11, r10, 5 +; CHECK-NEXT: add r0, r11, r20 +; CHECK-NEXT: add r12, r11, r21 +; CHECK-NEXT: add r30, r5, r0 +; CHECK-NEXT: add r0, r11, r7 +; CHECK-NEXT: std r21, -216(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r20, -224(r1) # 8-byte Folded Spill +; CHECK-NEXT: add r12, r5, r12 +; CHECK-NEXT: add r29, r5, r0 +; CHECK-NEXT: add r28, r4, r0 +; CHECK-NEXT: add r27, r3, r0 +; CHECK-NEXT: mulli r0, r10, 24 +; CHECK-NEXT: std r14, -176(r1) # 8-byte Folded Spill +; CHECK-NEXT: add r26, r0, r21 +; CHECK-NEXT: add r25, r0, r20 +; CHECK-NEXT: add r0, r0, r7 +; CHECK-NEXT: add r24, r5, r0 +; CHECK-NEXT: add r23, r4, r0 +; CHECK-NEXT: add r22, r3, r0 +; CHECK-NEXT: sldi r0, r10, 4 ; CHECK-NEXT: add r26, r5, r26 ; CHECK-NEXT: add r25, r5, r25 +; CHECK-NEXT: add r21, r0, r21 +; CHECK-NEXT: add r20, r0, r20 +; CHECK-NEXT: add r0, r0, r7 +; CHECK-NEXT: add r19, r5, r0 +; CHECK-NEXT: add r18, r4, r0 +; CHECK-NEXT: add r17, r3, r0 +; CHECK-NEXT: add r0, r10, r9 ; CHECK-NEXT: add r21, r5, r21 ; CHECK-NEXT: add r20, r5, r20 -; CHECK-NEXT: add r16, r5, r16 -; CHECK-NEXT: add r8, r5, r8 -; CHECK-NEXT: rldicl r3, r0, 2, 1 -; CHECK-NEXT: addi r3, r3, -4 -; CHECK-NEXT: sub r0, r12, r7 -; CHECK-NEXT: sub r12, r10, r7 -; CHECK-NEXT: li r7, 0 -; CHECK-NEXT: mr r10, r30 -; CHECK-NEXT: sldi r15, r15, 3 -; CHECK-NEXT: add r15, r5, r15 -; CHECK-NEXT: rldicl r3, r3, 62, 2 -; CHECK-NEXT: addi r3, r3, 1 -; CHECK-NEXT: mtctr r3 +; CHECK-NEXT: sldi r0, r0, 3 +; CHECK-NEXT: add r16, r5, r0 +; CHECK-NEXT: add r0, r10, r8 +; CHECK-NEXT: sldi r0, r0, 3 +; CHECK-NEXT: add r15, r5, r0 +; CHECK-NEXT: add r0, r10, r14 +; CHECK-NEXT: sldi r0, r0, 3 +; CHECK-NEXT: add r2, r3, r0 +; CHECK-NEXT: ld r3, -224(r1) # 8-byte Folded Reload +; CHECK-NEXT: add r14, r5, r0 +; CHECK-NEXT: add r31, r4, r0 +; CHECK-NEXT: sub r0, r3, r7 +; CHECK-NEXT: ld r3, -192(r1) # 8-byte Folded Reload +; CHECK-NEXT: rldicl r9, r3, 2, 1 +; CHECK-NEXT: ld r3, -216(r1) # 8-byte Folded Reload +; CHECK-NEXT: addi r8, r9, -4 +; CHECK-NEXT: rldicl r8, r8, 62, 2 +; CHECK-NEXT: sub r7, r3, r7 +; CHECK-NEXT: ori r3, r9, 1 +; CHECK-NEXT: addi r8, r8, 1 +; CHECK-NEXT: mulld r3, r10, r3 +; CHECK-NEXT: mtctr r8 +; CHECK-NEXT: li r8, 0 +; CHECK-NEXT: std r10, -192(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r3, -216(r1) # 8-byte Folded Spill ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB7_3: # %for.body ; CHECK-NEXT: # ; CHECK-NEXT: lfd f0, 0(r2) ; CHECK-NEXT: lfd f1, 0(r31) -; CHECK-NEXT: add r3, r10, r30 -; CHECK-NEXT: add r3, r3, r30 ; CHECK-NEXT: xsmuldp f0, f0, f1 ; CHECK-NEXT: lfd f1, 0(r14) -; CHECK-NEXT: add r3, r3, r30 -; CHECK-NEXT: add r10, r3, r30 ; CHECK-NEXT: xsadddp f0, f1, f0 ; CHECK-NEXT: stfd f0, 0(r14) ; CHECK-NEXT: add r14, r14, r11 ; CHECK-NEXT: lfdx f0, r2, r0 ; CHECK-NEXT: lfdx f1, r31, r0 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r15, r7 +; CHECK-NEXT: lfdx f1, r15, r8 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r15, r7 -; CHECK-NEXT: lfdx f0, r2, r12 -; CHECK-NEXT: lfdx f1, r31, r12 +; CHECK-NEXT: stfdx f0, r15, r8 +; CHECK-NEXT: lfdx f0, r2, r7 +; CHECK-NEXT: lfdx f1, r31, r7 ; CHECK-NEXT: add r2, r2, r11 ; CHECK-NEXT: add r31, r31, r11 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r16, r7 +; CHECK-NEXT: lfdx f1, r16, r8 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r16, r7 +; CHECK-NEXT: stfdx f0, r16, r8 ; CHECK-NEXT: lfd f0, 0(r17) ; CHECK-NEXT: lfd f1, 0(r18) ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r19, r7 +; CHECK-NEXT: lfdx f1, r19, r8 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r19, r7 +; CHECK-NEXT: stfdx f0, r19, r8 ; CHECK-NEXT: lfdx f0, r17, r0 ; CHECK-NEXT: lfdx f1, r18, r0 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r20, r7 +; CHECK-NEXT: lfdx f1, r20, r8 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r20, r7 -; CHECK-NEXT: lfdx f0, r17, r12 -; CHECK-NEXT: lfdx f1, r18, r12 +; CHECK-NEXT: stfdx f0, r20, r8 +; CHECK-NEXT: lfdx f0, r17, r7 +; CHECK-NEXT: lfdx f1, r18, r7 ; CHECK-NEXT: add r17, r17, r11 ; CHECK-NEXT: add r18, r18, r11 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r21, r7 +; CHECK-NEXT: lfdx f1, r21, r8 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r21, r7 +; CHECK-NEXT: stfdx f0, r21, r8 ; CHECK-NEXT: lfd f0, 0(r22) ; CHECK-NEXT: lfd f1, 0(r23) ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r24, r7 +; CHECK-NEXT: lfdx f1, r24, r8 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r24, r7 +; CHECK-NEXT: stfdx f0, r24, r8 ; CHECK-NEXT: lfdx f0, r22, r0 ; CHECK-NEXT: lfdx f1, r23, r0 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r25, r7 +; CHECK-NEXT: lfdx f1, r25, r8 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r25, r7 -; CHECK-NEXT: lfdx f0, r22, r12 -; CHECK-NEXT: lfdx f1, r23, r12 +; CHECK-NEXT: stfdx f0, r25, r8 +; CHECK-NEXT: lfdx f0, r22, r7 +; CHECK-NEXT: lfdx f1, r23, r7 ; CHECK-NEXT: add r22, r22, r11 ; CHECK-NEXT: add r23, r23, r11 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r26, r7 +; CHECK-NEXT: lfdx f1, r26, r8 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r26, r7 +; CHECK-NEXT: stfdx f0, r26, r8 ; CHECK-NEXT: lfd f0, 0(r27) ; CHECK-NEXT: lfd f1, 0(r28) ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r29, r7 +; CHECK-NEXT: lfdx f1, r29, r8 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r29, r7 +; CHECK-NEXT: stfdx f0, r29, r8 ; CHECK-NEXT: lfdx f0, r27, r0 ; CHECK-NEXT: lfdx f1, r28, r0 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r8, r7 +; CHECK-NEXT: lfdx f1, r30, r8 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r8, r7 -; CHECK-NEXT: lfdx f0, r27, r12 -; CHECK-NEXT: lfdx f1, r28, r12 +; CHECK-NEXT: stfdx f0, r30, r8 +; CHECK-NEXT: lfdx f0, r27, r7 +; CHECK-NEXT: lfdx f1, r28, r7 ; CHECK-NEXT: add r27, r27, r11 ; CHECK-NEXT: add r28, r28, r11 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r9, r7 +; CHECK-NEXT: lfdx f1, r12, r8 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r9, r7 -; CHECK-NEXT: add r7, r7, r11 +; CHECK-NEXT: stfdx f0, r12, r8 +; CHECK-NEXT: add r8, r8, r11 ; CHECK-NEXT: bdnz .LBB7_3 -; CHECK-NEXT: .LBB7_4: # %for.cond.cleanup.loopexit.unr-lcssa +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT: ld r3, -160(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r4, -168(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r7, -176(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r8, -184(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r10, -192(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r5, -200(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r9, -208(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r26, -216(r1) # 8-byte Folded Reload +; CHECK-NEXT: .LBB7_5: # %for.cond.cleanup.loopexit.unr-lcssa ; CHECK-NEXT: cmpldi r6, 0 -; CHECK-NEXT: beq cr0, .LBB7_7 -; CHECK-NEXT: # %bb.5: # %for.body.epil.preheader -; CHECK-NEXT: ld r3, -184(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r0, -160(r1) # 8-byte Folded Reload -; CHECK-NEXT: sldi r8, r30, 3 -; CHECK-NEXT: add r3, r10, r3 -; CHECK-NEXT: sldi r3, r3, 3 -; CHECK-NEXT: add r7, r5, r3 -; CHECK-NEXT: add r9, r4, r3 -; CHECK-NEXT: add r11, r0, r3 -; CHECK-NEXT: ld r3, -176(r1) # 8-byte Folded Reload -; CHECK-NEXT: add r3, r10, r3 -; CHECK-NEXT: sldi r3, r3, 3 -; CHECK-NEXT: add r12, r5, r3 -; CHECK-NEXT: add r30, r4, r3 -; CHECK-NEXT: add r29, r0, r3 -; CHECK-NEXT: ld r3, -168(r1) # 8-byte Folded Reload -; CHECK-NEXT: add r3, r10, r3 -; CHECK-NEXT: li r10, 0 -; CHECK-NEXT: sldi r3, r3, 3 -; CHECK-NEXT: add r5, r5, r3 -; CHECK-NEXT: add r4, r4, r3 -; CHECK-NEXT: add r3, r0, r3 +; CHECK-NEXT: beq cr0, .LBB7_8 +; CHECK-NEXT: # %bb.6: # %for.body.epil.preheader +; CHECK-NEXT: add r11, r26, r9 +; CHECK-NEXT: add r12, r26, r8 +; CHECK-NEXT: add r9, r26, r7 +; CHECK-NEXT: sldi r27, r10, 3 +; CHECK-NEXT: sldi r11, r11, 3 +; CHECK-NEXT: sldi r0, r12, 3 +; CHECK-NEXT: sldi r9, r9, 3 +; CHECK-NEXT: add r28, r5, r11 +; CHECK-NEXT: add r10, r4, r11 +; CHECK-NEXT: add r11, r3, r11 +; CHECK-NEXT: add r12, r5, r0 +; CHECK-NEXT: add r30, r4, r0 +; CHECK-NEXT: add r29, r3, r0 +; CHECK-NEXT: add r5, r5, r9 +; CHECK-NEXT: add r4, r4, r9 +; CHECK-NEXT: add r3, r3, r9 +; CHECK-NEXT: li r9, 0 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB7_6: # %for.body.epil +; CHECK-NEXT: .LBB7_7: # %for.body.epil ; CHECK-NEXT: # -; CHECK-NEXT: lfdx f0, r3, r10 -; CHECK-NEXT: lfdx f1, r4, r10 +; CHECK-NEXT: lfdx f0, r3, r9 +; CHECK-NEXT: lfdx f1, r4, r9 ; CHECK-NEXT: addi r6, r6, -1 ; CHECK-NEXT: cmpldi r6, 0 ; CHECK-NEXT: xsmuldp f0, f0, f1 ; CHECK-NEXT: lfd f1, 0(r5) ; CHECK-NEXT: xsadddp f0, f1, f0 ; CHECK-NEXT: stfd f0, 0(r5) -; CHECK-NEXT: add r5, r5, r8 -; CHECK-NEXT: lfdx f0, r29, r10 -; CHECK-NEXT: lfdx f1, r30, r10 +; CHECK-NEXT: add r5, r5, r27 +; CHECK-NEXT: lfdx f0, r29, r9 +; CHECK-NEXT: lfdx f1, r30, r9 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r12, r10 +; CHECK-NEXT: lfdx f1, r12, r9 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r12, r10 -; CHECK-NEXT: lfdx f0, r11, r10 -; CHECK-NEXT: lfdx f1, r9, r10 +; CHECK-NEXT: stfdx f0, r12, r9 +; CHECK-NEXT: lfdx f0, r11, r9 +; CHECK-NEXT: lfdx f1, r10, r9 ; CHECK-NEXT: xsmuldp f0, f0, f1 -; CHECK-NEXT: lfdx f1, r7, r10 +; CHECK-NEXT: lfdx f1, r28, r9 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: stfdx f0, r7, r10 -; CHECK-NEXT: add r10, r10, r8 -; CHECK-NEXT: bne cr0, .LBB7_6 -; CHECK-NEXT: .LBB7_7: # %for.cond.cleanup +; CHECK-NEXT: stfdx f0, r28, r9 +; CHECK-NEXT: add r9, r9, r27 +; CHECK-NEXT: bne cr0, .LBB7_7 +; CHECK-NEXT: .LBB7_8: ; CHECK-NEXT: ld r2, -152(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r31, -8(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r27, -40(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r26, -48(r1) # 8-byte Folded Reload @@ -973,6 +981,8 @@ define signext i32 @spill_reduce_succ(ptr %input1, ptr %input2, ptr %output, i64 ; CHECK-NEXT: ld r16, -128(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r15, -136(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r14, -144(r1) # 8-byte Folded Reload +; CHECK-NEXT: .LBB7_9: # %for.cond.cleanup +; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: blr entry: %cmp49 = icmp sgt i64 %m, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll index 48845c54c5603..b1a8bf4c43c2b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll @@ -912,7 +912,7 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt ; CHECK-NEXT: addi a6, a4, 1 ; CHECK-NEXT: andi a7, a6, -32 ; CHECK-NEXT: add a4, a7, a2 -; CHECK-NEXT: add a2, a4, a0 +; CHECK-NEXT: add a2, a0, a4 ; CHECK-NEXT: li t1, 5 ; CHECK-NEXT: vsetvli zero, t2, e8, m1, ta, ma ; CHECK-NEXT: .LBB14_3: # %bb15 @@ -1019,10 +1019,7 @@ define void @gather_no_scalar_remainder(ptr noalias nocapture noundef %arg, ptr ; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: beqz a2, .LBB15_3 ; CHECK-NEXT: # %bb.1: # %bb2 -; CHECK-NEXT: addi a2, a2, -16 -; CHECK-NEXT: andi a2, a2, -16 -; CHECK-NEXT: add a2, a2, a0 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: add a2, a0, a2 ; CHECK-NEXT: li a3, 5 ; CHECK-NEXT: vsetivli zero, 16, e8, mf2, ta, ma ; CHECK-NEXT: .LBB15_2: # %bb4 diff --git a/llvm/test/Transforms/LoopIdiom/basic.ll b/llvm/test/Transforms/LoopIdiom/basic.ll index 8fdaac3fdffe3..e8ea912246728 100644 --- a/llvm/test/Transforms/LoopIdiom/basic.ll +++ b/llvm/test/Transforms/LoopIdiom/basic.ll @@ -682,10 +682,7 @@ define void @PR14241(ptr %s, i64 %size) { ; CHECK-NEXT: [[END_PTR:%.*]] = getelementptr inbounds i32, ptr [[S:%.*]], i64 [[END_IDX]] ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr nuw i8, ptr [[S]], i64 4 ; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[SIZE]], 2 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], -8 -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP0]], -4 ; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 4 [[S]], ptr align 4 [[SCEVGEP]], i64 [[TMP4]], i1 false) ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/zext-signed-addrec.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/zext-signed-addrec.ll index fb5cfc7bd93be..f2fa771ac6f29 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/zext-signed-addrec.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/zext-signed-addrec.ll @@ -31,25 +31,27 @@ define i32 @foo() { ; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[INC:%.*]], %[[OUTER_LATCH:.*]] ], [ [[DOTPR]], %[[OUTER_HEADER_PREHEADER]] ] ; CHECK-NEXT: br label %[[INNER_LOOP:.*]] ; CHECK: [[INNER_LOOP]]: -; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], %[[INNER_LOOP]] ], [ 258, %[[OUTER_HEADER]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], %[[INNER_LOOP]] ], [ 516, %[[OUTER_HEADER]] ] ; CHECK-NEXT: [[TMP2:%.*]] = phi i8 [ 1, %[[OUTER_HEADER]] ], [ [[DEC:%.*]], %[[INNER_LOOP]] ] -; CHECK-NEXT: [[SHL:%.*]] = and i32 [[LSR_IV]], 510 +; CHECK-NEXT: [[SHL:%.*]] = add i32 [[LSR_IV]], -258 ; CHECK-NEXT: store i32 [[SHL]], ptr @c, align 4 ; CHECK-NEXT: [[DEC]] = add i8 [[TMP2]], -1 ; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i32 [[LSR_IV]], -258 ; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i8 [[DEC]], -1 ; CHECK-NEXT: br i1 [[CMP2]], label %[[INNER_LOOP]], label %[[OUTER_LATCH]] ; CHECK: [[OUTER_LATCH]]: +; CHECK-NEXT: [[LSR_IV_NEXT_LCSSA:%.*]] = phi i32 [ [[LSR_IV_NEXT]], %[[INNER_LOOP]] ] ; CHECK-NEXT: store i32 0, ptr @d, align 4 ; CHECK-NEXT: [[INC]] = add nsw i32 [[TMP1]], 1 ; CHECK-NEXT: store i32 [[INC]], ptr @b, align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[CMP]], label %[[OUTER_HEADER]], label %[[OUTER_EXIT:.*]] ; CHECK: [[OUTER_EXIT]]: +; CHECK-NEXT: [[LSR_IV_NEXT_LCSSA_LCSSA:%.*]] = phi i32 [ [[LSR_IV_NEXT_LCSSA]], %[[OUTER_LATCH]] ] ; CHECK-NEXT: store i8 [[DEC]], ptr @e, align 1 ; CHECK-NEXT: br label %[[MERGE]] ; CHECK: [[MERGE]]: -; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ [[DOTPRE]], %[[ENTRY_ELSE]] ], [ [[SHL]], %[[OUTER_EXIT]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ [[DOTPRE]], %[[ENTRY_ELSE]] ], [ [[LSR_IV_NEXT_LCSSA_LCSSA]], %[[OUTER_EXIT]] ] ; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @bar(i32 [[TMP3]]) ; CHECK-NEXT: br label %[[RETURN:.*]] ; CHECK: [[P_ELSE]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll index 07ff8e2f9f8e4..8156bb976228d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -591,7 +591,7 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-LABEL: @test_gather_not_profitable_pr48429( ; AVX512-NEXT: entry: ; AVX512-NEXT: [[IDX_EXT:%.*]] = sext i32 [[D:%.*]] to i64 -; AVX512-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[PTR:%.*]], i64 [[IDX_EXT]] +; AVX512-NEXT: [[SCEVGEP1:%.*]] = getelementptr float, ptr [[PTR:%.*]], i64 [[IDX_EXT]] ; AVX512-NEXT: [[CMP_NOT10:%.*]] = icmp eq i32 [[D]], 0 ; AVX512-NEXT: br i1 [[CMP_NOT10]], label [[FOR_END:%.*]], label [[ITER_CHECK:%.*]] ; AVX512: iter.check: @@ -610,17 +610,12 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 6 ; AVX512-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP7]], 8 ; AVX512-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DEST:%.*]], i64 [[TMP8]] -; AVX512-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP6]], 2 -; AVX512-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 4 -; AVX512-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP10]] ; AVX512-NEXT: [[TMP11:%.*]] = mul nsw i64 [[IDX_EXT]], -4 ; AVX512-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP11]] -; AVX512-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP4]] -; AVX512-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP12]] ; AVX512-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DEST]], [[SCEVGEP1]] ; AVX512-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[PTR]], [[SCEVGEP]] ; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; AVX512-NEXT: [[BOUND04:%.*]] = icmp ult ptr [[DEST]], [[SCEVGEP3]] +; AVX512-NEXT: [[BOUND04:%.*]] = icmp ult ptr [[DEST]], [[PTR]] ; AVX512-NEXT: [[BOUND15:%.*]] = icmp ult ptr [[SCEVGEP2]], [[SCEVGEP]] ; AVX512-NEXT: [[FOUND_CONFLICT6:%.*]] = and i1 [[BOUND04]], [[BOUND15]] ; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT6]] @@ -643,9 +638,9 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM]] ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x float>, ptr [[TMP17]], align 4, !alias.scope [[META8:![0-9]+]] ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[WIDE_LOAD]], <16 x ptr> [[TMP14]], i32 4, <16 x i1> splat (i1 true)), !alias.scope [[META11:![0-9]+]], !noalias [[META13:![0-9]+]] -; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x float>, ptr [[TMP16]], align 4, !alias.scope [[META15:![0-9]+]] +; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x float>, ptr [[TMP16]], align 4, !alias.scope [[META15:![0-9]+]] ; AVX512-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, <16 x ptr> [[TMP14]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[WIDE_LOAD8]], <16 x ptr> [[TMP20]], i32 4, <16 x i1> splat (i1 true)), !alias.scope [[META11]], !noalias [[META13]] +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[WIDE_LOAD6]], <16 x ptr> [[TMP20]], i32 4, <16 x i1> splat (i1 true)), !alias.scope [[META11]], !noalias [[META13]] ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX512-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 1024 ; AVX512-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -660,7 +655,7 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-NEXT: [[IND_END15:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP38]] ; AVX512-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP3]], [[N_VEC]] ; AVX512-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 -; AVX512-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; AVX512-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF17:![0-9]+]] ; AVX512: vec.epilog.ph: ; AVX512-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[DEST]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -678,15 +673,15 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-NEXT: [[OFFSET_IDX21:%.*]] = mul i64 [[INDEX18]], 4 ; AVX512-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[OFFSET_IDX21]] ; AVX512-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM]] -; AVX512-NEXT: [[WIDE_LOAD16:%.*]] = load <8 x float>, ptr [[TMP29]], align 4, !alias.scope [[META8]] -; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD16]], <8 x ptr> [[TMP26]], i32 4, <8 x i1> splat (i1 true)), !alias.scope [[META11]], !noalias [[META13]] -; AVX512-NEXT: [[WIDE_LOAD17:%.*]] = load <8 x float>, ptr [[TMP28]], align 4, !alias.scope [[META15]] +; AVX512-NEXT: [[WIDE_LOAD13:%.*]] = load <8 x float>, ptr [[TMP29]], align 4, !alias.scope [[META8]] +; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD13]], <8 x ptr> [[TMP26]], i32 4, <8 x i1> splat (i1 true)), !alias.scope [[META11]], !noalias [[META13]] +; AVX512-NEXT: [[WIDE_LOAD14:%.*]] = load <8 x float>, ptr [[TMP28]], align 4, !alias.scope [[META15]] ; AVX512-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, <8 x ptr> [[TMP26]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD17]], <8 x ptr> [[TMP32]], i32 4, <8 x i1> splat (i1 true)), !alias.scope [[META11]], !noalias [[META13]] +; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD14]], <8 x ptr> [[TMP32]], i32 4, <8 x i1> splat (i1 true)), !alias.scope [[META11]], !noalias [[META13]] ; AVX512-NEXT: [[INDEX_NEXT24]] = add nuw i64 [[INDEX18]], 8 ; AVX512-NEXT: [[PTR_IND20]] = getelementptr i8, ptr [[POINTER_PHI19]], i64 512 ; AVX512-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT24]], [[N_VEC10]] -; AVX512-NEXT: br i1 [[TMP33]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; AVX512-NEXT: br i1 [[TMP33]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; AVX512: vec.epilog.middle.block: ; AVX512-NEXT: [[CMP_N17:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC10]] ; AVX512-NEXT: br i1 [[CMP_N17]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] @@ -705,15 +700,15 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-NEXT: store float [[TMP35]], ptr [[ARRAYIDX5]], align 4 ; AVX512-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, ptr [[PTR_ADDR_012]], i64 1 ; AVX512-NEXT: [[ADD_PTR6]] = getelementptr inbounds float, ptr [[DEST_ADDR_011]], i64 16 -; AVX512-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[ADD_PTR]] -; AVX512-NEXT: br i1 [[CMP_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; AVX512-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[SCEVGEP1]] +; AVX512-NEXT: br i1 [[CMP_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; AVX512: for.end: ; AVX512-NEXT: ret void ; ; FVW2-LABEL: @test_gather_not_profitable_pr48429( ; FVW2-NEXT: entry: ; FVW2-NEXT: [[IDX_EXT:%.*]] = sext i32 [[D:%.*]] to i64 -; FVW2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[PTR:%.*]], i64 [[IDX_EXT]] +; FVW2-NEXT: [[SCEVGEP1:%.*]] = getelementptr float, ptr [[PTR:%.*]], i64 [[IDX_EXT]] ; FVW2-NEXT: [[CMP_NOT10:%.*]] = icmp eq i32 [[D]], 0 ; FVW2-NEXT: br i1 [[CMP_NOT10]], label [[FOR_END:%.*]], label [[FOR_BODY_LR_PH:%.*]] ; FVW2: for.body.lr.ph: @@ -732,17 +727,12 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; FVW2-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 6 ; FVW2-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP7]], 8 ; FVW2-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DEST:%.*]], i64 [[TMP8]] -; FVW2-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP6]], 2 -; FVW2-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 4 -; FVW2-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP10]] ; FVW2-NEXT: [[TMP11:%.*]] = mul nsw i64 [[IDX_EXT]], -4 ; FVW2-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP11]] -; FVW2-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP4]] -; FVW2-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP12]] ; FVW2-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DEST]], [[SCEVGEP1]] ; FVW2-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[PTR]], [[SCEVGEP]] ; FVW2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; FVW2-NEXT: [[BOUND04:%.*]] = icmp ult ptr [[DEST]], [[SCEVGEP3]] +; FVW2-NEXT: [[BOUND04:%.*]] = icmp ult ptr [[DEST]], [[PTR]] ; FVW2-NEXT: [[BOUND15:%.*]] = icmp ult ptr [[SCEVGEP2]], [[SCEVGEP]] ; FVW2-NEXT: [[FOUND_CONFLICT6:%.*]] = and i1 [[BOUND04]], [[BOUND15]] ; FVW2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT6]] @@ -798,7 +788,7 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; FVW2-NEXT: store float [[TMP32]], ptr [[ARRAYIDX5]], align 4 ; FVW2-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, ptr [[PTR_ADDR_012]], i64 1 ; FVW2-NEXT: [[ADD_PTR6]] = getelementptr inbounds float, ptr [[DEST_ADDR_011]], i64 16 -; FVW2-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[ADD_PTR]] +; FVW2-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[SCEVGEP1]] ; FVW2-NEXT: br i1 [[CMP_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; FVW2: for.end: ; FVW2-NEXT: ret void diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll index 67082c518dc82..371a59874519e 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll @@ -315,24 +315,60 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_n_not_zero_i16_p ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 2) ] ; CHECK-NEXT: [[N_EXT:%.*]] = zext i32 [[N]] to i64 -; CHECK-NEXT: [[MUL:%.*]] = shl nuw nsw i64 [[N_EXT]], 1 +; CHECK-NEXT: [[MUL:%.*]] = shl i64 [[N_EXT]], 1 ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 [[MUL]]) ] ; CHECK-NEXT: [[A_END:%.*]] = getelementptr i8, ptr [[A]], i64 [[MUL]] ; CHECK-NEXT: [[PRE:%.*]] = icmp eq i32 [[N]], 0 ; CHECK-NEXT: br i1 [[PRE]], label [[EXIT:%.*]], label [[LOOP_HEADER_PREHEADER:%.*]] ; CHECK: loop.header.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[MUL]], -2 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-NEXT: [[IV_NEXT1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: br label [[LOOP_HEADER1:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP_HEADER1]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[NEXT_GEP]], align 2 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP_HEADER1]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[LOOP_LATCH1:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: vector.early.exit: +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP12]] +; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[IV1:%.*]] = phi ptr [ [[IV_NEXT1]], [[LOOP_LATCH1]] ], [ [[A]], [[LOOP_HEADER_PREHEADER]] ] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV1:%.*]] = phi ptr [ [[IV_NEXT1:%.*]], [[LOOP_LATCH1:%.*]] ], [ [[A]], [[LOOP_HEADER_PREHEADER]] ] -; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[IV1]], align 2 +; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ [[IV1]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[IV]], align 2 ; CHECK-NEXT: [[C_0:%.*]] = icmp eq i16 [[L]], 0 -; CHECK-NEXT: br i1 [[C_0]], label [[EXIT_LOOPEXIT:%.*]], label [[LOOP_LATCH1]] +; CHECK-NEXT: br i1 [[C_0]], label [[EXIT_LOOPEXIT]], label [[LOOP_LATCH]] ; CHECK: loop.latch: -; CHECK-NEXT: [[IV_NEXT1]] = getelementptr inbounds nuw i8, ptr [[IV1]], i64 2 -; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[IV_NEXT1]], [[A_END]] -; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP_HEADER1]] +; CHECK-NEXT: [[IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[IV]], i64 2 +; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[IV_NEXT]], [[A_END]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: exit.loopexit: -; CHECK-NEXT: [[P_PH:%.*]] = phi ptr [ [[A_END]], [[LOOP_LATCH1]] ], [ [[IV1]], [[LOOP_HEADER1]] ] +; CHECK-NEXT: [[P_PH:%.*]] = phi ptr [ [[A_END]], [[LOOP_LATCH]] ], [ [[IV]], [[LOOP_HEADER]] ], [ [[A_END]], [[LOOP_LATCH1]] ], [ [[TMP13]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: [[P:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[P_PH]], [[EXIT_LOOPEXIT]] ]