Skip to content

Commit

Permalink
[MachineSink] Add a loop sink limit
Browse files Browse the repository at this point in the history
To make sure compile-times don't regress, add an option to restrict the number
of instructions considered for sinking as alias analysis can be expensive and
for the same reason also skip large blocks.

Differential Revision: https://reviews.llvm.org/D96485
  • Loading branch information
Sjoerd Meijer committed Feb 17, 2021
1 parent ac6c13b commit 7f3170e
Show file tree
Hide file tree
Showing 2 changed files with 195 additions and 0 deletions.
17 changes: 17 additions & 0 deletions llvm/lib/CodeGen/MachineSink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@ SinkInstsIntoLoop("sink-insts-to-avoid-spills",
"register spills"),
cl::init(false), cl::Hidden);

static cl::opt<unsigned> SinkIntoLoopLimit(
"machine-sink-loop-limit",
cl::desc("The maximum number of instructions considered for loop sinking."),
cl::init(50), cl::Hidden);

STATISTIC(NumSunk, "Number of machine instructions sunk");
STATISTIC(NumLoopSunk, "Number of machine instructions sunk into a loop");
STATISTIC(NumSplit, "Number of critical edges split");
Expand Down Expand Up @@ -468,7 +473,15 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {

// Walk the candidates in reverse order so that we start with the use
// of a def-use chain, if there is any.
// TODO: Sort the candidates using a cost-model.
unsigned i = 0;
for (auto It = Candidates.rbegin(); It != Candidates.rend(); ++It) {
if (i++ == SinkIntoLoopLimit) {
LLVM_DEBUG(dbgs() << "LoopSink: Limit reached of instructions to "
"be analysed.");
break;
}

MachineInstr *I = *It;
if (!SinkIntoLoop(L, *I))
break;
Expand Down Expand Up @@ -1243,6 +1256,10 @@ bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) {
LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n");
return false;
}
if (SinkBlock->size() > SinkLoadInstsPerBlockThreshold) {
LLVM_DEBUG(dbgs() << "LoopSink: Not Sinking, block too large to analyse.\n");
return false;
}

LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n");
SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I);
Expand Down
178 changes: 178 additions & 0 deletions llvm/test/CodeGen/AArch64/loop-sink-limit.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple aarch64 -run-pass=machine-sink -sink-insts-to-avoid-spills \
# RUN: -machine-sink-loop-limit=1 -verify-machineinstrs %s -o - 2>&1 | \
# RUN: FileCheck %s --check-prefix=SINK1
#
# RUN: llc -mtriple aarch64 -run-pass=machine-sink -sink-insts-to-avoid-spills \
# RUN: -machine-sink-loop-limit=2 -verify-machineinstrs %s -o - 2>&1 | \
# RUN: FileCheck %s --check-prefix=SINK2

--- |
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64"

@A = external dso_local global [100 x i32], align 4
%struct.A = type { i32, i32, i32, i32, i32, i32 }

define i32 @do_sink_use_is_not_a_copy(i32 %n) {
entry:
%cmp63 = icmp sgt i32 %n, 0
br i1 %cmp63, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader: ; preds = %entry
%0 = load i32, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0), align 4
br label %for.body

for.cond.cleanup: ; preds = %for.body, %entry
%sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ]
ret i32 %sum.0.lcssa

for.body: ; preds = %for.body, %for.body.preheader
%lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
%sum.065 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ]
%div = sdiv i32 %sum.065, %0
%lsr.iv.next = add i32 %lsr.iv, -1
%exitcond.not = icmp eq i32 %lsr.iv.next, 0
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}

...
---
name: do_sink_use_is_not_a_copy
alignment: 4
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers:
- { id: 0, class: gpr32, preferred-register: '' }
- { id: 1, class: gpr32all, preferred-register: '' }
- { id: 2, class: gpr32sp, preferred-register: '' }
- { id: 3, class: gpr32, preferred-register: '' }
- { id: 4, class: gpr32all, preferred-register: '' }
- { id: 5, class: gpr32all, preferred-register: '' }
- { id: 6, class: gpr32common, preferred-register: '' }
- { id: 7, class: gpr32, preferred-register: '' }
- { id: 8, class: gpr64common, preferred-register: '' }
- { id: 9, class: gpr32, preferred-register: '' }
- { id: 10, class: gpr32, preferred-register: '' }
- { id: 11, class: gpr32, preferred-register: '' }
liveins:
- { reg: '$w0', virtual-reg: '%6' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 1
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
callSites: []
debugValueSubstitutions: []
constants: []
machineFunctionInfo: {}
body: |
; SINK1-LABEL: name: do_sink_use_is_not_a_copy
; SINK1: bb.0.entry:
; SINK1: successors: %bb.1(0x50000000), %bb.2(0x30000000)
; SINK1: liveins: $w0
; SINK1: [[COPY:%[0-9]+]]:gpr32common = COPY $w0
; SINK1: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv
; SINK1: Bcc 11, %bb.2, implicit $nzcv
; SINK1: B %bb.1
; SINK1: bb.1.for.body.preheader:
; SINK1: successors: %bb.3(0x80000000)
; SINK1: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A
; SINK1: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`)
; SINK1: B %bb.3
; SINK1: bb.2.for.cond.cleanup:
; SINK1: [[PHI:%[0-9]+]]:gpr32all = PHI [[COPY]], %bb.0, %4, %bb.3
; SINK1: $w0 = COPY [[PHI]]
; SINK1: RET_ReallyLR implicit $w0
; SINK1: bb.3.for.body:
; SINK1: successors: %bb.2(0x04000000), %bb.3(0x7c000000)
; SINK1: [[PHI1:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %5, %bb.3
; SINK1: [[PHI2:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.1, %4, %bb.3
; SINK1: [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI2]], [[LDRWui]]
; SINK1: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SDIVWr]]
; SINK1: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI1]], 1, 0, implicit-def $nzcv
; SINK1: [[COPY2:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]]
; SINK1: Bcc 0, %bb.2, implicit $nzcv
; SINK1: B %bb.3
; SINK2-LABEL: name: do_sink_use_is_not_a_copy
; SINK2: bb.0.entry:
; SINK2: successors: %bb.1(0x50000000), %bb.2(0x30000000)
; SINK2: liveins: $w0
; SINK2: [[COPY:%[0-9]+]]:gpr32common = COPY $w0
; SINK2: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv
; SINK2: Bcc 11, %bb.2, implicit $nzcv
; SINK2: B %bb.1
; SINK2: bb.1.for.body.preheader:
; SINK2: successors: %bb.3(0x80000000)
; SINK2: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A
; SINK2: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`)
; SINK2: B %bb.3
; SINK2: bb.2.for.cond.cleanup:
; SINK2: [[PHI:%[0-9]+]]:gpr32all = PHI [[COPY]], %bb.0, %4, %bb.3
; SINK2: $w0 = COPY [[PHI]]
; SINK2: RET_ReallyLR implicit $w0
; SINK2: bb.3.for.body:
; SINK2: successors: %bb.2(0x04000000), %bb.3(0x7c000000)
; SINK2: [[PHI1:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %5, %bb.3
; SINK2: [[PHI2:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.1, %4, %bb.3
; SINK2: [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI2]], [[LDRWui]]
; SINK2: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SDIVWr]]
; SINK2: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI1]], 1, 0, implicit-def $nzcv
; SINK2: [[COPY2:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]]
; SINK2: Bcc 0, %bb.2, implicit $nzcv
; SINK2: B %bb.3
bb.0.entry:
successors: %bb.1(0x50000000), %bb.2(0x30000000)
liveins: $w0
%6:gpr32common = COPY $w0
%7:gpr32 = SUBSWri %6, 1, 0, implicit-def $nzcv
Bcc 11, %bb.2, implicit $nzcv
B %bb.1
bb.1.for.body.preheader:
successors: %bb.3(0x80000000)
%8:gpr64common = ADRP target-flags(aarch64-page) @A
%9:gpr32 = LDRWui killed %8, target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`)
B %bb.3
bb.2.for.cond.cleanup:
%1:gpr32all = PHI %6, %bb.0, %4, %bb.3
$w0 = COPY %1
RET_ReallyLR implicit $w0
bb.3.for.body:
successors: %bb.2(0x04000000), %bb.3(0x7c000000)
%2:gpr32sp = PHI %6, %bb.1, %5, %bb.3
%3:gpr32 = PHI %6, %bb.1, %4, %bb.3
%10:gpr32 = SDIVWr %3, %9
%4:gpr32all = COPY %10
%11:gpr32 = SUBSWri %2, 1, 0, implicit-def $nzcv
%5:gpr32all = COPY %11
Bcc 0, %bb.2, implicit $nzcv
B %bb.3
...

0 comments on commit 7f3170e

Please sign in to comment.