Skip to content

Commit 2fef685

Browse files
authored
[llvm][loop-rotate] Allow forcing loop-rotation (#82828)
Many profitable optimizations cannot be performed at -Oz, due to unrotated loops. While this is worse for size (minimally), many of the optimizations significantly reduce code size, such as memcpy optimizations and other patterns found by loop idiom recognition. Related discussion can be found in issue #50308. This patch adds an experimental, backend-only flag to allow loop header duplication, regardless of the optimization level. Downstream consumers can experiment with this flag, and if it is profitable, we can adjust the compiler's defaults accordingly, and expose any useful frontend flags to opt into the new behavior.
1 parent 91895f5 commit 2fef685

File tree

3 files changed

+77
-4
lines changed

3 files changed

+77
-4
lines changed

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,15 @@ static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
209209
cl::Hidden,
210210
cl::desc("Enable the LoopFlatten Pass"));
211211

212+
// Experimentally allow loop header duplication. This should allow for better
213+
// optimization at Oz, since loop-idiom recognition can then recognize things
214+
// like memcpy. If this ends up being useful for many targets, we should drop
215+
// this flag and make a code generation option that can be controlled
216+
// independent of the opt level and exposed through the frontend.
217+
static cl::opt<bool> EnableLoopHeaderDuplication(
218+
"enable-loop-header-duplication", cl::init(false), cl::Hidden,
219+
cl::desc("Enable loop header duplication at any optimization level"));
220+
212221
static cl::opt<bool>
213222
EnableDFAJumpThreading("enable-dfa-jump-thread",
214223
cl::desc("Enable DFA jump threading"),
@@ -630,8 +639,9 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
630639
/*AllowSpeculation=*/false));
631640

632641
// Disable header duplication in loop rotation at -Oz.
633-
LPM1.addPass(
634-
LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase)));
642+
LPM1.addPass(LoopRotatePass(EnableLoopHeaderDuplication ||
643+
Level != OptimizationLevel::Oz,
644+
isLTOPreLink(Phase)));
635645
// TODO: Investigate promotion cap for O1.
636646
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
637647
/*AllowSpeculation=*/true));
@@ -812,7 +822,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
812822
// Disable header duplication in loop rotation at -Oz.
813823
MPM.addPass(createModuleToFunctionPassAdaptor(
814824
createFunctionToLoopPassAdaptor(
815-
LoopRotatePass(Level != OptimizationLevel::Oz),
825+
LoopRotatePass(EnableLoopHeaderDuplication ||
826+
Level != OptimizationLevel::Oz),
816827
/*UseMemorySSA=*/false,
817828
/*UseBlockFrequencyInfo=*/false),
818829
PTO.EagerlyInvalidateAnalyses));
@@ -1422,7 +1433,9 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
14221433
LoopPassManager LPM;
14231434
// First rotate loops that may have been un-rotated by prior passes.
14241435
// Disable header duplication at -Oz.
1425-
LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink));
1436+
LPM.addPass(LoopRotatePass(EnableLoopHeaderDuplication ||
1437+
Level != OptimizationLevel::Oz,
1438+
LTOPreLink));
14261439
// Some loops may have become dead by now. Try to delete them.
14271440
// FIXME: see discussion in https://reviews.llvm.org/D112851,
14281441
// this may need to be revisited once we run GVN before loop deletion

llvm/test/Transforms/LoopRotate/oz-disable.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
; RUN: opt < %s -S -passes='default<Os>' -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS
55
; RUN: opt < %s -S -passes='default<Oz>' -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OZ
66

7+
;; Make sure -allow-loop-header-duplication overrides the default behavior at Oz
8+
; RUN: opt < %s -S -passes='default<Oz>' -enable-loop-header-duplication -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS
9+
710
; Loop should be rotated for -Os but not for -Oz.
811
; OS: rotating Loop at depth 1
912
; OZ-NOT: rotating Loop at depth 1
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2+
3+
;; Check that -enable-loop-header-duplication at Oz enables certain types of
4+
;; optimizations, for example replacing the loop body w/ a call to memset. If
5+
;; loop idiom recognition begins to recognize unrotated loops, this test will
6+
;; need to be updated.
7+
8+
; RUN: opt -passes='default<Oz>' -S < %s | FileCheck %s --check-prefix=NOROTATION
9+
; RUN: opt -passes='default<Oz>' -S -enable-loop-header-duplication < %s | FileCheck %s --check-prefix=ROTATION
10+
; RUN: opt -passes='default<O2>' -S < %s | FileCheck %s --check-prefix=ROTATION
11+
12+
define void @test(i8* noalias nonnull align 1 %start, i8* %end) unnamed_addr {
13+
; NOROTATION-LABEL: define void @test(
14+
; NOROTATION-SAME: ptr noalias nonnull writeonly align 1 [[START:%.*]], ptr readnone [[END:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] {
15+
; NOROTATION-NEXT: entry:
16+
; NOROTATION-NEXT: br label [[LOOP_HEADER:%.*]]
17+
; NOROTATION: loop.header:
18+
; NOROTATION-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
19+
; NOROTATION-NEXT: [[_12_I:%.*]] = icmp eq ptr [[PTR_IV]], [[END]]
20+
; NOROTATION-NEXT: br i1 [[_12_I]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
21+
; NOROTATION: loop.latch:
22+
; NOROTATION-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i8, ptr [[PTR_IV]], i64 1
23+
; NOROTATION-NEXT: store i8 1, ptr [[PTR_IV]], align 1
24+
; NOROTATION-NEXT: br label [[LOOP_HEADER]]
25+
; NOROTATION: exit:
26+
; NOROTATION-NEXT: ret void
27+
;
28+
; ROTATION-LABEL: define void @test(
29+
; ROTATION-SAME: ptr noalias nonnull writeonly align 1 [[START:%.*]], ptr readnone [[END:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] {
30+
; ROTATION-NEXT: entry:
31+
; ROTATION-NEXT: [[_12_I1:%.*]] = icmp eq ptr [[START]], [[END]]
32+
; ROTATION-NEXT: br i1 [[_12_I1]], label [[EXIT:%.*]], label [[LOOP_LATCH_PREHEADER:%.*]]
33+
; ROTATION: loop.latch.preheader:
34+
; ROTATION-NEXT: [[END3:%.*]] = ptrtoint ptr [[END]] to i64
35+
; ROTATION-NEXT: [[START4:%.*]] = ptrtoint ptr [[START]] to i64
36+
; ROTATION-NEXT: [[TMP0:%.*]] = sub i64 [[END3]], [[START4]]
37+
; ROTATION-NEXT: tail call void @llvm.memset.p0.i64(ptr nonnull align 1 [[START]], i8 1, i64 [[TMP0]], i1 false)
38+
; ROTATION-NEXT: br label [[EXIT]]
39+
; ROTATION: exit:
40+
; ROTATION-NEXT: ret void
41+
;
42+
entry:
43+
br label %loop.header
44+
45+
loop.header:
46+
%ptr.iv = phi i8* [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
47+
%_12.i = icmp eq i8* %ptr.iv, %end
48+
br i1 %_12.i, label %exit, label %loop.latch
49+
50+
loop.latch:
51+
%ptr.iv.next = getelementptr inbounds i8, i8* %ptr.iv, i64 1
52+
store i8 1, i8* %ptr.iv, align 1
53+
br label %loop.header
54+
55+
exit:
56+
ret void
57+
}

0 commit comments

Comments
 (0)