diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp index 0c8d6fa47b9ae..2abc56d95a393 100644 --- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -200,6 +200,19 @@ static bool profitableToRotateLoopExitingLatch(Loop *L) { return false; } +// Checks that if loop gets rotated it makes the exit latch count computable. +// This form is beneficial to runtime loop unrolling as well as loop +// vectorization, which requires the loop to be bottom-tested. +static bool rotationMakesLoopComputable(Loop *L, ScalarEvolution *SE) { + BasicBlock *Header = L->getHeader(); + BranchInst *BI = dyn_cast(Header->getTerminator()); + assert(BI && BI->isConditional() && "need header with conditional exit"); + if (SE && isa(SE->getExitCount(L, L->getLoopLatch())) && + !isa(SE->getExitCount(L, Header))) + return true; + return false; +} + static void updateBranchWeights(BranchInst &PreHeaderBI, BranchInst &LoopBI, bool HasConditionalPreHeader, bool SuccsSwapped) { @@ -364,7 +377,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // Rotate if the loop latch was just simplified. Or if it makes the loop exit // count computable. Or if we think it will be profitable. if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false && - !profitableToRotateLoopExitingLatch(L)) + !profitableToRotateLoopExitingLatch(L) && + !rotationMakesLoopComputable(L, SE)) return Rotated; // Check size of original header and reject loop if it is very big or we can't diff --git a/llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate-if-computable.ll b/llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate-if-computable.ll new file mode 100644 index 0000000000000..2a408fbb364da --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate-if-computable.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt --passes='loop(loop-rotate),loop-unroll' -unroll-runtime=true -unroll-runtime-other-exit-predictable=1 -S %s | FileCheck %s +; RUN: opt --passes='loop-unroll' -unroll-runtime=true -unroll-runtime-other-exit-predictable=1 -S %s | FileCheck %s -check-prefix=NO-ROTATE + +target triple = "x86_64-unknown-linux-gnu" + +; Test that loop gets unrolled if rotated (becomes computable after rotation). +define void @test(i64 %0, ptr %1) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: i64 [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[B1:%.*]] = icmp eq i64 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[B1]], label %[[AFTER:.*]], label %[[BODY_LR_PH:.*]] +; CHECK: [[BODY_LR_PH]]: +; CHECK-NEXT: [[TMP5:%.*]] = sub i64 0, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = freeze i64 [[TMP5]] +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP2]], 7 +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[BODY_PROL_PREHEADER:.*]], label %[[BODY_PROL_LOOPEXIT:.*]] +; CHECK: [[BODY_PROL_PREHEADER]]: +; CHECK-NEXT: br label %[[BODY_PROL:.*]] +; CHECK: [[BODY_PROL]]: +; CHECK-NEXT: [[A2_PROL:%.*]] = phi i64 [ [[TMP0]], %[[BODY_PROL_PREHEADER]] ], [ [[A_PROL:%.*]], %[[HEADER_PROL:.*]] ] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, %[[BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], %[[HEADER_PROL]] ] +; CHECK-NEXT: [[C_PROL:%.*]] = add i64 [[A2_PROL]], 1 +; CHECK-NEXT: [[D_PROL:%.*]] = load i32, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[E_PROL:%.*]] = icmp eq i32 [[D_PROL]], 0 +; CHECK-NEXT: br i1 [[E_PROL]], label %[[END_LOOPEXIT3:.*]], label %[[HEADER_PROL]] +; CHECK: [[HEADER_PROL]]: +; CHECK-NEXT: [[A_PROL]] = phi i64 [ [[C_PROL]], %[[BODY_PROL]] ] +; CHECK-NEXT: [[B_PROL:%.*]] = icmp eq i64 [[A_PROL]], 0 +; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 +; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]] +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label %[[BODY_PROL]], label %[[BODY_PROL_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[BODY_PROL_LOOPEXIT_UNR_LCSSA]]: +; CHECK-NEXT: [[A2_UNR_PH:%.*]] = phi i64 [ [[A_PROL]], %[[HEADER_PROL]] ] +; CHECK-NEXT: br label %[[BODY_PROL_LOOPEXIT]] +; CHECK: [[BODY_PROL_LOOPEXIT]]: +; CHECK-NEXT: [[A2_UNR:%.*]] = phi i64 [ [[TMP0]], %[[BODY_LR_PH]] ], [ [[A2_UNR_PH]], %[[BODY_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP3]], 7 +; CHECK-NEXT: br i1 [[TMP6]], label %[[HEADER_AFTER_CRIT_EDGE:.*]], label %[[BODY_LR_PH_NEW:.*]] +; CHECK: [[BODY_LR_PH_NEW]]: +; CHECK-NEXT: br label %[[BODY:.*]] +; CHECK: [[HEADER:.*]]: +; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT:.*]], label %[[HEADER_1:.*]] +; CHECK: [[HEADER_1]]: +; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_2:.*]] +; CHECK: [[HEADER_2]]: +; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_3:.*]] +; CHECK: [[HEADER_3]]: +; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_4:.*]] +; CHECK: [[HEADER_4]]: +; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_5:.*]] +; CHECK: [[HEADER_5]]: +; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_6:.*]] +; CHECK: [[HEADER_6]]: +; CHECK-NEXT: [[C_7:%.*]] = add i64 [[A2:%.*]], 8 +; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_7:.*]] +; CHECK: [[HEADER_7]]: +; CHECK-NEXT: [[B_7:%.*]] = icmp eq i64 [[C_7]], 0 +; CHECK-NEXT: br i1 [[B_7]], label %[[HEADER_AFTER_CRIT_EDGE_UNR_LCSSA:.*]], label %[[BODY]] +; CHECK: [[BODY]]: +; CHECK-NEXT: [[A2]] = phi i64 [ [[A2_UNR]], %[[BODY_LR_PH_NEW]] ], [ [[C_7]], %[[HEADER_7]] ] +; CHECK-NEXT: [[D:%.*]] = load i32, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[E:%.*]] = icmp eq i32 [[D]], 0 +; CHECK-NEXT: br i1 [[E]], label %[[END_LOOPEXIT]], label %[[HEADER]] +; CHECK: [[END_LOOPEXIT]]: +; CHECK-NEXT: br label %[[END:.*]] +; CHECK: [[END_LOOPEXIT3]]: +; CHECK-NEXT: br label %[[END]] +; CHECK: [[END]]: +; CHECK-NEXT: ret void +; CHECK: [[HEADER_AFTER_CRIT_EDGE_UNR_LCSSA]]: +; CHECK-NEXT: br label %[[HEADER_AFTER_CRIT_EDGE]] +; CHECK: [[HEADER_AFTER_CRIT_EDGE]]: +; CHECK-NEXT: br label %[[AFTER]] +; CHECK: [[AFTER]]: +; CHECK-NEXT: ret void +; +; NO-ROTATE-LABEL: define void @test( +; NO-ROTATE-SAME: i64 [[TMP0:%.*]], ptr [[TMP1:%.*]]) { +; NO-ROTATE-NEXT: [[ENTRY:.*]]: +; NO-ROTATE-NEXT: br label %[[HEADER:.*]] +; NO-ROTATE: [[HEADER]]: +; NO-ROTATE-NEXT: [[A_PROL:%.*]] = phi i64 [ [[TMP0]], %[[ENTRY]] ], [ [[C:%.*]], %[[BODY:.*]] ] +; NO-ROTATE-NEXT: [[B_PROL:%.*]] = icmp eq i64 [[A_PROL]], 0 +; NO-ROTATE-NEXT: br i1 [[B_PROL]], label %[[AFTER:.*]], label %[[BODY]] +; NO-ROTATE: [[BODY]]: +; NO-ROTATE-NEXT: [[C]] = add i64 [[A_PROL]], 1 +; NO-ROTATE-NEXT: [[D:%.*]] = load i32, ptr [[TMP1]], align 4 +; NO-ROTATE-NEXT: [[E:%.*]] = icmp eq i32 [[D]], 0 +; NO-ROTATE-NEXT: br i1 [[E]], label %[[END:.*]], label %[[HEADER]] +; NO-ROTATE: [[END]]: +; NO-ROTATE-NEXT: ret void +; NO-ROTATE: [[AFTER]]: +; NO-ROTATE-NEXT: ret void +; +entry: + br label %header + +header: + %a = phi i64 [ %0, %entry ], [ %c, %body ] + %b = icmp eq i64 %a, 0 + br i1 %b, label %after, label %body + +body: + %c = add i64 %a, 1 + %d = load i32, ptr %1, align 4 + %e = icmp eq i32 %d, 0 + br i1 %e, label %end, label %header + +end: + ret void + +after: + ret void +} +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.unroll.disable"} +;.