Skip to content

Commit

Permalink
[SimpleLoopUnswitch] Skip non-trivial unswitching of cold functions
Browse files Browse the repository at this point in the history
In the current main branch, all cold loops will not be applied non-trivial unswitch. As reported in D129599, skipping these cold loops will incur regression in SPEC benchmark.
Thus, instead of skipping cold loops, now only skipping loops in cold functions.

Reviewed By: alexgatea, aeubanks

Differential Revision: https://reviews.llvm.org/D133275
  • Loading branch information
drcut committed Sep 6, 2022
1 parent bb6d12b commit fb45f3c
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 102 deletions.
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
Expand Up @@ -3086,7 +3086,7 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
// Skip cold loops, as unswitching them brings little benefit
// but increases the code size
if (PSI && PSI->hasProfileSummary() && BFI &&
PSI->isColdBlock(L.getHeader(), BFI)) {
PSI->isFunctionColdInCallGraph(L.getHeader()->getParent(), *BFI)) {
LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");
return false;
}
Expand Down
80 changes: 8 additions & 72 deletions llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll
Expand Up @@ -6,89 +6,27 @@

declare i32 @a()
declare i32 @b()

; Check loops in cold functions will not be applied non-trivial loop unswitch
define void @f1(i32 %i, i1 %cond, i1 %hot_cond, i1 %cold_cond, i1* %ptr) !prof !0 {
; CHECK-LABEL: @f1(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[ENTRY_HOT_LOOP:%.*]]
; CHECK: entry_hot_loop:
; CHECK-NEXT: br i1 [[HOT_COND:%.*]], label [[HOT_LOOP_BEGIN_PREHEADER:%.*]], label [[HOT_LOOP_EXIT:%.*]], !prof [[PROF15:![0-9]+]]
; CHECK: hot_loop_begin.preheader:
; CHECK-NEXT: br i1 [[COND:%.*]], label [[HOT_LOOP_BEGIN_PREHEADER_SPLIT_US:%.*]], label [[HOT_LOOP_BEGIN_PREHEADER_SPLIT:%.*]]
; CHECK: hot_loop_begin.preheader.split.us:
; CHECK-NEXT: br label [[HOT_LOOP_BEGIN_US:%.*]]
; CHECK: hot_loop_begin.us:
; CHECK-NEXT: br label [[HOT_LOOP_A_US:%.*]]
; CHECK: hot_loop_a.us:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @a()
; CHECK-NEXT: br label [[HOT_LOOP_LATCH_US:%.*]]
; CHECK: hot_loop_latch.us:
; CHECK-NEXT: [[V1_US:%.*]] = load i1, i1* [[PTR:%.*]], align 1
; CHECK-NEXT: br i1 [[V1_US]], label [[HOT_LOOP_BEGIN_US]], label [[HOT_LOOP_EXIT_LOOPEXIT_SPLIT_US:%.*]]
; CHECK: hot_loop_exit.loopexit.split.us:
; CHECK-NEXT: br label [[HOT_LOOP_EXIT_LOOPEXIT:%.*]]
; CHECK: hot_loop_begin.preheader.split:
; CHECK-NEXT: br label [[HOT_LOOP_BEGIN:%.*]]
; CHECK: hot_loop_begin:
; CHECK-NEXT: br label [[HOT_LOOP_B:%.*]]
; CHECK: hot_loop_b:
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @b()
; CHECK-NEXT: br label [[HOT_LOOP_LATCH:%.*]]
; CHECK: hot_loop_latch:
; CHECK-NEXT: [[V1:%.*]] = load i1, i1* [[PTR]], align 1
; CHECK-NEXT: br i1 [[V1]], label [[HOT_LOOP_BEGIN]], label [[HOT_LOOP_EXIT_LOOPEXIT_SPLIT:%.*]]
; CHECK: hot_loop_exit.loopexit.split:
; CHECK-NEXT: br label [[HOT_LOOP_EXIT_LOOPEXIT]]
; CHECK: hot_loop_exit.loopexit:
; CHECK-NEXT: br label [[HOT_LOOP_EXIT]]
; CHECK: hot_loop_exit:
; CHECK-NEXT: br label [[ENTRY_COLD_LOOP:%.*]]
; CHECK: entry_cold_loop:
; CHECK-NEXT: br i1 [[COLD_COND:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER:%.*]], label [[COLD_LOOP_EXIT:%.*]], !prof [[PROF16:![0-9]+]]
; CHECK: cold_loop_begin.preheader:
; CHECK-NEXT: br label [[COLD_LOOP_BEGIN:%.*]]
; CHECK: cold_loop_begin:
; CHECK-NEXT: br i1 [[COND]], label [[COLD_LOOP_A:%.*]], label [[COLD_LOOP_B:%.*]]
; CHECK-NEXT: br i1 [[COND:%.*]], label [[COLD_LOOP_A:%.*]], label [[COLD_LOOP_B:%.*]]
; CHECK: cold_loop_a:
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @a()
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @a()
; CHECK-NEXT: br label [[COLD_LOOP_LATCH:%.*]]
; CHECK: cold_loop_b:
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @b()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @b()
; CHECK-NEXT: br label [[COLD_LOOP_LATCH]]
; CHECK: cold_loop_latch:
; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR]], align 1
; CHECK-NEXT: br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT_LOOPEXIT:%.*]]
; CHECK: cold_loop_exit.loopexit:
; CHECK-NEXT: br label [[COLD_LOOP_EXIT]]
; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR:%.*]], align 1
; CHECK-NEXT: br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT:%.*]]
; CHECK: cold_loop_exit:
; CHECK-NEXT: ret void
;
entry:
br label %entry_hot_loop

entry_hot_loop:
br i1 %hot_cond, label %hot_loop_begin, label %hot_loop_exit, !prof !15

hot_loop_begin:
br i1 %cond, label %hot_loop_a, label %hot_loop_b

hot_loop_a:
call i32 @a()
br label %hot_loop_latch

hot_loop_b:
call i32 @b()
br label %hot_loop_latch

hot_loop_latch:
%v1 = load i1, i1* %ptr
br i1 %v1, label %hot_loop_begin, label %hot_loop_exit

hot_loop_exit:
br label %entry_cold_loop

entry_cold_loop:
br i1 %cold_cond, label %cold_loop_begin, label %cold_loop_exit, !prof !16
br label %cold_loop_begin

cold_loop_begin:
br i1 %cond, label %cold_loop_a, label %cold_loop_b
Expand All @@ -110,7 +48,7 @@ cold_loop_exit:
}

!llvm.module.flags = !{!1}
!0 = !{!"function_entry_count", i64 400}
!0 = !{!"function_entry_count", i64 0}
!1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
!3 = !{!"ProfileFormat", !"InstrProf"}
Expand All @@ -125,5 +63,3 @@ cold_loop_exit:
!12 = !{i32 10000, i64 100, i32 1}
!13 = !{i32 999000, i64 100, i32 1}
!14 = !{i32 999999, i64 1, i32 2}
!15 = !{!"branch_weights", i32 100, i32 0}
!16 = !{!"branch_weights", i32 0, i32 100}
71 changes: 42 additions & 29 deletions llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch2.ll
Expand Up @@ -8,25 +8,38 @@ declare i32 @b()
; Check loops will be applied non-trivial loop unswitch in a non-cold function,
; even loop headers are cold

define void @f1(i32 %i, i1 %cond, i1 %hot_cond, i1 %cold_cond, i1* %ptr) !prof !0 {
define void @f1(i32 %i, i1 %cond, i1 %hot_cond, i1 %cold_cond, i1* %ptr) !prof !14 {
; CHECK-LABEL: @f1(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[ENTRY_COLD_LOOP:%.*]]
; CHECK: entry_cold_loop:
; CHECK-NEXT: br i1 [[COLD_COND:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER:%.*]], label [[COLD_LOOP_EXIT:%.*]], !prof [[PROF15:![0-9]+]]
; CHECK: cold_loop_begin.preheader:
; CHECK-NEXT: br i1 [[COND:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER_SPLIT_US:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER_SPLIT:%.*]]
; CHECK: cold_loop_begin.preheader.split.us:
; CHECK-NEXT: br label [[COLD_LOOP_BEGIN_US:%.*]]
; CHECK: cold_loop_begin.us:
; CHECK-NEXT: br label [[COLD_LOOP_A_US:%.*]]
; CHECK: cold_loop_a.us:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @a()
; CHECK-NEXT: br label [[COLD_LOOP_LATCH_US:%.*]]
; CHECK: cold_loop_latch.us:
; CHECK-NEXT: [[V2_US:%.*]] = load i1, i1* [[PTR:%.*]], align 1
; CHECK-NEXT: br i1 [[V2_US]], label [[COLD_LOOP_BEGIN_US]], label [[COLD_LOOP_EXIT_LOOPEXIT_SPLIT_US:%.*]]
; CHECK: cold_loop_exit.loopexit.split.us:
; CHECK-NEXT: br label [[COLD_LOOP_EXIT_LOOPEXIT:%.*]]
; CHECK: cold_loop_begin.preheader.split:
; CHECK-NEXT: br label [[COLD_LOOP_BEGIN:%.*]]
; CHECK: cold_loop_begin:
; CHECK-NEXT: br i1 [[COND:%.*]], label [[COLD_LOOP_A:%.*]], label [[COLD_LOOP_B:%.*]]
; CHECK: cold_loop_a:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @a()
; CHECK-NEXT: br label [[COLD_LOOP_LATCH:%.*]]
; CHECK-NEXT: br label [[COLD_LOOP_B:%.*]]
; CHECK: cold_loop_b:
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @b()
; CHECK-NEXT: br label [[COLD_LOOP_LATCH]]
; CHECK-NEXT: br label [[COLD_LOOP_LATCH:%.*]]
; CHECK: cold_loop_latch:
; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR:%.*]], align 1
; CHECK-NEXT: br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT_LOOPEXIT:%.*]]
; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR]], align 1
; CHECK-NEXT: br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT_LOOPEXIT_SPLIT:%.*]]
; CHECK: cold_loop_exit.loopexit.split:
; CHECK-NEXT: br label [[COLD_LOOP_EXIT_LOOPEXIT]]
; CHECK: cold_loop_exit.loopexit:
; CHECK-NEXT: br label [[COLD_LOOP_EXIT]]
; CHECK: cold_loop_exit:
Expand All @@ -36,17 +49,17 @@ entry:
br label %entry_cold_loop

entry_cold_loop:
br i1 %cold_cond, label %cold_loop_begin, label %cold_loop_exit, !prof !16
br i1 %cold_cond, label %cold_loop_begin, label %cold_loop_exit, !prof !15

cold_loop_begin:
br i1 %cond, label %cold_loop_a, label %cold_loop_b

cold_loop_a:
call i32 @a()
%0 = call i32 @a()
br label %cold_loop_latch

cold_loop_b:
call i32 @b()
%1 = call i32 @b()
br label %cold_loop_latch

cold_loop_latch:
Expand All @@ -57,21 +70,21 @@ cold_loop_exit:
ret void
}

!llvm.module.flags = !{!1}
!0 = !{!"function_entry_count", i64 400}
!1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
!3 = !{!"ProfileFormat", !"InstrProf"}
!4 = !{!"TotalCount", i64 10000}
!5 = !{!"MaxCount", i64 10}
!6 = !{!"MaxInternalCount", i64 1}
!7 = !{!"MaxFunctionCount", i64 1000}
!8 = !{!"NumCounts", i64 3}
!9 = !{!"NumFunctions", i64 3}
!10 = !{!"DetailedSummary", !11}
!11 = !{!12, !13, !14}
!12 = !{i32 10000, i64 100, i32 1}
!13 = !{i32 999000, i64 100, i32 1}
!14 = !{i32 999999, i64 1, i32 2}
!15 = !{!"branch_weights", i32 100, i32 0}
!16 = !{!"branch_weights", i32 0, i32 100}
!llvm.module.flags = !{!0}

!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 400}
!15 = !{!"branch_weights", i32 0, i32 100}

0 comments on commit fb45f3c

Please sign in to comment.