diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 79f43572cb9b0..cd3b6c1a095a7 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -73,7 +73,8 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *OriginalLoopLatchExit, BasicBlock *PreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, - LoopInfo *LI, bool PreserveLCSSA) { + LoopInfo *LI, bool PreserveLCSSA, + ScalarEvolution &SE) { // Loop structure should be the following: // Preheader // PrologHeader @@ -133,6 +134,7 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, PN.setIncomingValueForBlock(NewPreHeader, NewPN); else PN.addIncoming(NewPN, PrologExit); + SE.forgetValue(&PN); } } @@ -927,7 +929,7 @@ bool llvm::UnrollRuntimeLoopRemainder( // Connect the prolog code to the original loop and update the // PHI functions. ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader, - NewPreHeader, VMap, DT, LI, PreserveLCSSA); + NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE); } // If this loop is nested, then the loop unroller changes the code in the any diff --git a/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll b/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll index 7a29f1cd6d35b..379e9528c3faa 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes='loop-unroll' -S %s | FileCheck %s +; RUN: opt -passes='loop-unroll' -S %s | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-ni:1-p2:32:8:8:32-ni:2" @@ -162,3 +162,220 @@ exit.deopt: call void (...) @llvm.experimental.deoptimize.isVoid(i32 0) [ "deopt"() ] ret void } + +declare void @bar() +declare void @use.2(ptr, i32) + +define void @pr56286(i64 %x, ptr %src, ptr %dst, ptr %ptr.src) !prof !0 { +; CHECK-LABEL: @pr56286( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[X:%.*]], i64 1) +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[SMAX]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[X]] +; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] +; CHECK: outer.header: +; CHECK-NEXT: [[OUTER_P:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[L_1_LCSSA:%.*]], [[OUTER_LATCH:%.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = freeze i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP2]], 7 +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[INNER_1_HEADER_PROL_PREHEADER:%.*]], label [[INNER_1_HEADER_PROL_LOOPEXIT:%.*]] +; CHECK: inner.1.header.prol.preheader: +; CHECK-NEXT: br label [[INNER_1_HEADER_PROL:%.*]] +; CHECK: inner.1.header.prol: +; CHECK-NEXT: [[INNER_1_IV_PROL:%.*]] = phi i64 [ [[X]], [[INNER_1_HEADER_PROL_PREHEADER]] ], [ [[INNER_1_IV_NEXT_PROL:%.*]], [[INNER_1_LATCH_PROL:%.*]] ] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, [[INNER_1_HEADER_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[INNER_1_LATCH_PROL]] ] +; CHECK-NEXT: [[CMP_1_PROL:%.*]] = icmp sgt i32 [[OUTER_P]], 0 +; CHECK-NEXT: br i1 [[CMP_1_PROL]], label [[EXIT_DEOPT_LOOPEXIT1:%.*]], label [[INNER_1_LATCH_PROL]] +; CHECK: inner.1.latch.prol: +; CHECK-NEXT: [[L_1_PROL:%.*]] = load i32, ptr [[SRC:%.*]], align 4 +; CHECK-NEXT: store i32 [[L_1_PROL]], ptr [[DST:%.*]], align 8 +; CHECK-NEXT: [[INNER_1_IV_NEXT_PROL]] = add i64 [[INNER_1_IV_PROL]], 1 +; CHECK-NEXT: [[CMP_2_PROL:%.*]] = icmp sgt i64 [[INNER_1_IV_PROL]], 0 +; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 +; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]] +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[INNER_1_HEADER_PROL]], label [[INNER_1_HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: inner.1.header.prol.loopexit.unr-lcssa: +; CHECK-NEXT: [[L_1_LCSSA_UNR_PH:%.*]] = phi i32 [ [[L_1_PROL]], [[INNER_1_LATCH_PROL]] ] +; CHECK-NEXT: [[INNER_1_IV_UNR_PH:%.*]] = phi i64 [ [[INNER_1_IV_NEXT_PROL]], [[INNER_1_LATCH_PROL]] ] +; CHECK-NEXT: br label [[INNER_1_HEADER_PROL_LOOPEXIT]] +; CHECK: inner.1.header.prol.loopexit: +; CHECK-NEXT: [[L_1_LCSSA_UNR:%.*]] = phi i32 [ undef, [[OUTER_HEADER]] ], [ [[L_1_LCSSA_UNR_PH]], [[INNER_1_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[INNER_1_IV_UNR:%.*]] = phi i64 [ [[X]], [[OUTER_HEADER]] ], [ [[INNER_1_IV_UNR_PH]], [[INNER_1_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 7 +; CHECK-NEXT: br i1 [[TMP4]], label [[OUTER_MIDDLE:%.*]], label [[OUTER_HEADER_NEW:%.*]] +; CHECK: outer.header.new: +; CHECK-NEXT: br label [[INNER_1_HEADER:%.*]] +; CHECK: inner.1.header: +; CHECK-NEXT: [[INNER_1_IV:%.*]] = phi i64 [ [[INNER_1_IV_UNR]], [[OUTER_HEADER_NEW]] ], [ [[INNER_1_IV_NEXT_7:%.*]], [[INNER_1_LATCH_7:%.*]] ] +; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[OUTER_P]], 0 +; CHECK-NEXT: br i1 [[CMP_1]], label [[EXIT_DEOPT_LOOPEXIT:%.*]], label [[INNER_1_LATCH:%.*]] +; CHECK: inner.1.latch: +; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[SRC]], align 4 +; CHECK-NEXT: store i32 [[L_1]], ptr [[DST]], align 8 +; CHECK-NEXT: [[INNER_1_IV_NEXT:%.*]] = add i64 [[INNER_1_IV]], 1 +; CHECK-NEXT: [[CMP_1_1:%.*]] = icmp sgt i32 [[OUTER_P]], 0 +; CHECK-NEXT: br i1 [[CMP_1_1]], label [[EXIT_DEOPT_LOOPEXIT]], label [[INNER_1_LATCH_1:%.*]] +; CHECK: inner.1.latch.1: +; CHECK-NEXT: [[L_1_1:%.*]] = load i32, ptr [[SRC]], align 4 +; CHECK-NEXT: store i32 [[L_1_1]], ptr [[DST]], align 8 +; CHECK-NEXT: [[INNER_1_IV_NEXT_1:%.*]] = add i64 [[INNER_1_IV_NEXT]], 1 +; CHECK-NEXT: [[CMP_1_2:%.*]] = icmp sgt i32 [[OUTER_P]], 0 +; CHECK-NEXT: br i1 [[CMP_1_2]], label [[EXIT_DEOPT_LOOPEXIT]], label [[INNER_1_LATCH_2:%.*]] +; CHECK: inner.1.latch.2: +; CHECK-NEXT: [[L_1_2:%.*]] = load i32, ptr [[SRC]], align 4 +; CHECK-NEXT: store i32 [[L_1_2]], ptr [[DST]], align 8 +; CHECK-NEXT: [[INNER_1_IV_NEXT_2:%.*]] = add i64 [[INNER_1_IV_NEXT_1]], 1 +; CHECK-NEXT: [[CMP_1_3:%.*]] = icmp sgt i32 [[OUTER_P]], 0 +; CHECK-NEXT: br i1 [[CMP_1_3]], label [[EXIT_DEOPT_LOOPEXIT]], label [[INNER_1_LATCH_3:%.*]] +; CHECK: inner.1.latch.3: +; CHECK-NEXT: [[L_1_3:%.*]] = load i32, ptr [[SRC]], align 4 +; CHECK-NEXT: store i32 [[L_1_3]], ptr [[DST]], align 8 +; CHECK-NEXT: [[INNER_1_IV_NEXT_3:%.*]] = add i64 [[INNER_1_IV_NEXT_2]], 1 +; CHECK-NEXT: [[CMP_1_4:%.*]] = icmp sgt i32 [[OUTER_P]], 0 +; CHECK-NEXT: br i1 [[CMP_1_4]], label [[EXIT_DEOPT_LOOPEXIT]], label [[INNER_1_LATCH_4:%.*]] +; CHECK: inner.1.latch.4: +; CHECK-NEXT: [[L_1_4:%.*]] = load i32, ptr [[SRC]], align 4 +; CHECK-NEXT: store i32 [[L_1_4]], ptr [[DST]], align 8 +; CHECK-NEXT: [[INNER_1_IV_NEXT_4:%.*]] = add i64 [[INNER_1_IV_NEXT_3]], 1 +; CHECK-NEXT: [[CMP_1_5:%.*]] = icmp sgt i32 [[OUTER_P]], 0 +; CHECK-NEXT: br i1 [[CMP_1_5]], label [[EXIT_DEOPT_LOOPEXIT]], label [[INNER_1_LATCH_5:%.*]] +; CHECK: inner.1.latch.5: +; CHECK-NEXT: [[L_1_5:%.*]] = load i32, ptr [[SRC]], align 4 +; CHECK-NEXT: store i32 [[L_1_5]], ptr [[DST]], align 8 +; CHECK-NEXT: [[INNER_1_IV_NEXT_5:%.*]] = add i64 [[INNER_1_IV_NEXT_4]], 1 +; CHECK-NEXT: [[CMP_1_6:%.*]] = icmp sgt i32 [[OUTER_P]], 0 +; CHECK-NEXT: br i1 [[CMP_1_6]], label [[EXIT_DEOPT_LOOPEXIT]], label [[INNER_1_LATCH_6:%.*]] +; CHECK: inner.1.latch.6: +; CHECK-NEXT: [[L_1_6:%.*]] = load i32, ptr [[SRC]], align 4 +; CHECK-NEXT: store i32 [[L_1_6]], ptr [[DST]], align 8 +; CHECK-NEXT: [[INNER_1_IV_NEXT_6:%.*]] = add i64 [[INNER_1_IV_NEXT_5]], 1 +; CHECK-NEXT: [[CMP_1_7:%.*]] = icmp sgt i32 [[OUTER_P]], 0 +; CHECK-NEXT: br i1 [[CMP_1_7]], label [[EXIT_DEOPT_LOOPEXIT]], label [[INNER_1_LATCH_7]] +; CHECK: inner.1.latch.7: +; CHECK-NEXT: [[L_1_7:%.*]] = load i32, ptr [[SRC]], align 4 +; CHECK-NEXT: store i32 [[L_1_7]], ptr [[DST]], align 8 +; CHECK-NEXT: [[INNER_1_IV_NEXT_7]] = add i64 [[INNER_1_IV_NEXT_6]], 1 +; CHECK-NEXT: [[CMP_2_7:%.*]] = icmp sgt i64 [[INNER_1_IV_NEXT_6]], 0 +; CHECK-NEXT: br i1 [[CMP_2_7]], label [[OUTER_MIDDLE_UNR_LCSSA:%.*]], label [[INNER_1_HEADER]], !prof [[PROF5:![0-9]+]] +; CHECK: outer.middle.unr-lcssa: +; CHECK-NEXT: [[L_1_LCSSA_PH:%.*]] = phi i32 [ [[L_1_7]], [[INNER_1_LATCH_7]] ] +; CHECK-NEXT: br label [[OUTER_MIDDLE]] +; CHECK: outer.middle: +; CHECK-NEXT: [[L_1_LCSSA]] = phi i32 [ [[L_1_LCSSA_UNR]], [[INNER_1_HEADER_PROL_LOOPEXIT]] ], [ [[L_1_LCSSA_PH]], [[OUTER_MIDDLE_UNR_LCSSA]] ] +; CHECK-NEXT: br label [[INNER_2:%.*]] +; CHECK: inner.2: +; CHECK-NEXT: [[INNER_2_IV:%.*]] = phi i32 [ [[L_1_LCSSA]], [[OUTER_MIDDLE]] ], [ [[INNER_2_IV_NEXT_2:%.*]], [[INNER_2]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ 0, [[OUTER_MIDDLE]] ], [ [[TMP33_2:%.*]], [[INNER_2]] ] +; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[SRC]], align 8 +; CHECK-NEXT: [[INNER_2_IV_NEXT:%.*]] = add i32 [[INNER_2_IV]], 1 +; CHECK-NEXT: [[TMP27:%.*]] = load ptr, ptr [[PTR_SRC:%.*]], align 8 +; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[INNER_2_IV]], [[L_2]] +; CHECK-NEXT: [[TMP281:%.*]] = call i32 @use.2(ptr [[TMP27]], i32 [[ADD_1]]) +; CHECK-NEXT: [[TMP31:%.*]] = shl nuw nsw i32 [[TMP15]], 16 +; CHECK-NEXT: [[TMP32:%.*]] = add nuw i32 [[TMP31]], 262144 +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: [[L_2_1:%.*]] = load i32, ptr [[SRC]], align 8 +; CHECK-NEXT: [[INNER_2_IV_NEXT_1:%.*]] = add i32 [[INNER_2_IV_NEXT]], 1 +; CHECK-NEXT: [[TMP27_1:%.*]] = load ptr, ptr [[PTR_SRC]], align 8 +; CHECK-NEXT: [[ADD_1_1:%.*]] = add i32 [[INNER_2_IV_NEXT]], [[L_2_1]] +; CHECK-NEXT: [[TMP281_1:%.*]] = call i32 @use.2(ptr [[TMP27_1]], i32 [[ADD_1_1]]) +; CHECK-NEXT: [[TMP32_1:%.*]] = add nuw i32 [[TMP32]], 262144 +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: [[L_2_2:%.*]] = load i32, ptr [[SRC]], align 8 +; CHECK-NEXT: [[INNER_2_IV_NEXT_2]] = add i32 [[INNER_2_IV_NEXT_1]], 1 +; CHECK-NEXT: [[TMP27_2:%.*]] = load ptr, ptr [[PTR_SRC]], align 8 +; CHECK-NEXT: [[ADD_1_2:%.*]] = add i32 [[INNER_2_IV_NEXT_1]], [[L_2_2]] +; CHECK-NEXT: [[TMP281_2:%.*]] = call i32 @use.2(ptr [[TMP27_2]], i32 [[ADD_1_2]]) +; CHECK-NEXT: [[TMP32_2:%.*]] = add nuw i32 [[TMP32_1]], 262144 +; CHECK-NEXT: [[TMP33_2]] = ashr exact i32 [[TMP32_2]], 16 +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: [[CMP_3_2:%.*]] = icmp sgt i32 [[TMP32_1]], 2031616 +; CHECK-NEXT: br i1 [[CMP_3_2]], label [[OUTER_LATCH]], label [[INNER_2]] +; CHECK: outer.latch: +; CHECK-NEXT: br label [[OUTER_HEADER]] +; CHECK: exit.deopt.loopexit: +; CHECK-NEXT: br label [[EXIT_DEOPT:%.*]] +; CHECK: exit.deopt.loopexit1: +; CHECK-NEXT: br label [[EXIT_DEOPT]] +; CHECK: exit.deopt: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid(i32 0) [ "deopt"() ] +; CHECK-NEXT: ret void +; +bb: + br label %outer.header + +outer.header: + %outer.p = phi i32 [ 0, %bb ], [ %l.1, %outer.latch ] + br label %inner.1.header + +inner.1.header: + %inner.1.iv = phi i64 [ %x, %outer.header ], [ %inner.1.iv.next, %inner.1.latch ] + %cmp.1 = icmp sgt i32 %outer.p, 0 + br i1 %cmp.1, label %exit.deopt, label %inner.1.latch + +inner.1.latch: + %l.1 = load i32, ptr %src, align 4 + store i32 %l.1, ptr %dst, align 8 + %inner.1.iv.next = add i64 %inner.1.iv, 1 + %cmp.2 = icmp sgt i64 %inner.1.iv, 0 + br i1 %cmp.2, label %outer.middle, label %inner.1.header, !prof !1 + +outer.middle: + br label %inner.2 + +inner.2: + %inner.2.iv = phi i32 [ %l.1, %outer.middle ], [ %inner.2.iv.next, %inner.2 ] + %tmp15 = phi i32 [ 0, %outer.middle ], [ %tmp33, %inner.2 ] + %l.2 = load i32, ptr %src , align 8 + %l.3 = load i32, ptr %dst, align 4 + %inner.2.iv.next = add i32 %inner.2.iv, 1 + %tmp27 = load ptr, ptr %ptr.src + %add.1 = add i32 %inner.2.iv, %l.2 + %add.2 = add i32 %add.1, %l.3 + %tmp281 = call i32 @use.2(ptr %tmp27, i32 %add.1) + %tmp31 = shl nuw nsw i32 %tmp15, 16 + %tmp32 = add nuw i32 %tmp31, 262144 + %tmp33 = ashr exact i32 %tmp32, 16 + call void @bar() + call void @bar() + call void @bar() + call void @bar() + call void @bar() + call void @bar() + call void @bar() + call void @bar() + %cmp.3 = icmp sgt i32 %tmp31, 2031616 + br i1 %cmp.3, label %outer.latch, label %inner.2 + +outer.latch: + br label %outer.header + +exit.deopt: + call void (...) @llvm.experimental.deoptimize.isVoid(i32 0) [ "deopt"() ] + ret void +} + +!0 = !{!"function_entry_count", i64 32768} +!1 = !{!"branch_weights", i32 1, i32 32}