Skip to content

Commit

Permalink
[AArch64] Fix resource length computation for STP. (#81749)
Browse files Browse the repository at this point in the history
On some uArchs, `STP [s|d], [s|d]` first combines the 2 input registers
in a single register using a vector execution unit. IIUC
AArch64StorePairSuppress tries to prevent forming STPs in case the
critical resource are the vector units, in order to prevent adding more
pressure on those units.

The implementation however simply computes the new critical resource
length by adding resource for another STP. If load/store units are the
critical resource, this means we increase that length by one, and
incorrectly prevent forming the STP.

This patch adjusts the resource computation by also removing 2 STRs, as
introducing a STP will remove 2 single stores. This should more
accurately reflect the resource usage after introducing an STP, and does
not prevent forming STPs if load/store units are the critical resources;
in those cases, STP can actually help to reduce resource usage.

PR: #81749
  • Loading branch information
fhahn committed Feb 16, 2024
1 parent 0b1c25c commit 2f083b3
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 58 deletions.
18 changes: 13 additions & 5 deletions llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,15 +81,23 @@ bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB)
MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB);
unsigned ResLength = BBTrace.getResourceLength();

// Get the machine model's scheduling class for STPQi.
// Get the machine model's scheduling class for STPDi and STRDui.
// Bypass TargetSchedule's SchedClass resolution since we only have an opcode.
unsigned SCIdx = TII->get(AArch64::STPDi).getSchedClass();
const MCSchedClassDesc *SCDesc =
const MCSchedClassDesc *PairSCDesc =
SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx);

// If a subtarget does not define resources for STPQi, bail here.
if (SCDesc->isValid() && !SCDesc->isVariant()) {
unsigned ResLenWithSTP = BBTrace.getResourceLength(std::nullopt, SCDesc);
unsigned SCIdx2 = TII->get(AArch64::STRDui).getSchedClass();
const MCSchedClassDesc *SingleSCDesc =
SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx2);

// If a subtarget does not define resources for STPDi, bail here.
if (PairSCDesc->isValid() && !PairSCDesc->isVariant() &&
SingleSCDesc->isValid() && !SingleSCDesc->isVariant()) {
// Compute the new critical resource length after replacing 2 separate
// STRDui with one STPDi.
unsigned ResLenWithSTP = BBTrace.getResourceLength(
std::nullopt, PairSCDesc, {SingleSCDesc, SingleSCDesc});
if (ResLenWithSTP > ResLength) {
LLVM_DEBUG(dbgs() << " Suppress STP in BB: " << BB->getNumber()
<< " resources " << ResLength << " -> " << ResLenWithSTP
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/AArch64/arm64-stur.ll
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,8 @@ declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind

; CHECK-LABEL: unaligned:
; CHECK-NOT: str q0
; CHECK: str d[[REG:[0-9]+]], [x0]
; CHECK: ext.16b v[[REG2:[0-9]+]], v[[REG]], v[[REG]], #8
; CHECK: str d[[REG2]], [x0, #8]
; CHECK: ext.16b v[[REG2:[0-9]+]], v[[REG:[0-9]+]], v[[REG]], #8
; CHECK: stp d[[REG]], d[[REG2]], [x0]
define void @unaligned(ptr %p, <4 x i32> %v) nounwind {
store <4 x i32> %v, ptr %p, align 4
ret void
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AArch64/merge-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,7 @@ define void @merge_vec_extract_stores(<4 x float> %v1, ptr %ptr) {
; SPLITTING-LABEL: merge_vec_extract_stores:
; SPLITTING: // %bb.0:
; SPLITTING-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; SPLITTING-NEXT: str d0, [x0, #24]
; SPLITTING-NEXT: str d1, [x0, #32]
; SPLITTING-NEXT: stp d0, d1, [x0, #24]
; SPLITTING-NEXT: ret
;
; MISALIGNED-LABEL: merge_vec_extract_stores:
Expand Down
72 changes: 24 additions & 48 deletions llvm/test/CodeGen/AArch64/storepairsuppress.ll
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
; SUPPRESS-NEXT: fmadd s0, s5, s0, s1
; SUPPRESS-NEXT: fadd s1, s4, s2
; SUPPRESS-NEXT: fadd s5, s0, s3
; SUPPRESS-NEXT: str s1, [x8]
; SUPPRESS-NEXT: str s5, [x8, #4]
; SUPPRESS-NEXT: stp s1, s5, [x8]
; SUPPRESS-NEXT: fsub s2, s2, s4
; SUPPRESS-NEXT: fsub s0, s3, s0
; SUPPRESS-NEXT: str s2, [x8, #8]
; SUPPRESS-NEXT: str s0, [x8, #12]
; SUPPRESS-NEXT: stp s2, s0, [x8, #8]
; SUPPRESS-NEXT: ldr x9, [x0, #8]
; SUPPRESS-NEXT: ldp s3, s4, [x9]
; SUPPRESS-NEXT: ldp s6, s7, [x8, #16]
Expand All @@ -60,12 +58,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
; SUPPRESS-NEXT: fmadd s3, s17, s3, s4
; SUPPRESS-NEXT: fadd s4, s16, s6
; SUPPRESS-NEXT: fadd s17, s3, s7
; SUPPRESS-NEXT: str s4, [x8, #16]
; SUPPRESS-NEXT: str s17, [x8, #20]
; SUPPRESS-NEXT: stp s4, s17, [x8, #16]
; SUPPRESS-NEXT: fsub s6, s6, s16
; SUPPRESS-NEXT: fsub s3, s7, s3
; SUPPRESS-NEXT: str s6, [x8, #24]
; SUPPRESS-NEXT: str s3, [x8, #28]
; SUPPRESS-NEXT: stp s6, s3, [x8, #24]
; SUPPRESS-NEXT: ldr x9, [x0, #8]
; SUPPRESS-NEXT: ldp s7, s16, [x9]
; SUPPRESS-NEXT: fmul s18, s16, s17
Expand All @@ -74,12 +70,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
; SUPPRESS-NEXT: fmadd s4, s16, s4, s17
; SUPPRESS-NEXT: fadd s16, s7, s1
; SUPPRESS-NEXT: fadd s17, s4, s5
; SUPPRESS-NEXT: str s16, [x8]
; SUPPRESS-NEXT: str s17, [x8, #4]
; SUPPRESS-NEXT: stp s16, s17, [x8]
; SUPPRESS-NEXT: fsub s1, s1, s7
; SUPPRESS-NEXT: fsub s4, s5, s4
; SUPPRESS-NEXT: str s1, [x8, #16]
; SUPPRESS-NEXT: str s4, [x8, #20]
; SUPPRESS-NEXT: stp s1, s4, [x8, #16]
; SUPPRESS-NEXT: ldr x10, [x0, #8]
; SUPPRESS-NEXT: lsl x9, x3, #4
; SUPPRESS-NEXT: add x10, x10, x9
Expand All @@ -90,12 +84,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
; SUPPRESS-NEXT: fmadd s3, s4, s6, s3
; SUPPRESS-NEXT: fadd s4, s1, s2
; SUPPRESS-NEXT: fadd s5, s3, s0
; SUPPRESS-NEXT: str s4, [x8, #8]
; SUPPRESS-NEXT: str s5, [x8, #12]
; SUPPRESS-NEXT: stp s4, s5, [x8, #8]
; SUPPRESS-NEXT: fsub s1, s2, s1
; SUPPRESS-NEXT: fsub s0, s0, s3
; SUPPRESS-NEXT: str s1, [x8, #24]
; SUPPRESS-NEXT: str s0, [x8, #28]
; SUPPRESS-NEXT: stp s1, s0, [x8, #24]
; SUPPRESS-NEXT: ldr x10, [x0, #8]
; SUPPRESS-NEXT: ldp s0, s1, [x10]
; SUPPRESS-NEXT: ldp s2, s3, [x8, #32]
Expand All @@ -106,12 +98,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
; SUPPRESS-NEXT: fmadd s0, s5, s0, s1
; SUPPRESS-NEXT: fadd s1, s4, s2
; SUPPRESS-NEXT: fadd s5, s0, s3
; SUPPRESS-NEXT: str s1, [x8, #32]
; SUPPRESS-NEXT: str s5, [x8, #36]
; SUPPRESS-NEXT: stp s1, s5, [x8, #32]
; SUPPRESS-NEXT: fsub s2, s2, s4
; SUPPRESS-NEXT: fsub s3, s3, s0
; SUPPRESS-NEXT: str s2, [x8, #40]
; SUPPRESS-NEXT: str s3, [x8, #44]
; SUPPRESS-NEXT: stp s2, s3, [x8, #40]
; SUPPRESS-NEXT: ldr x10, [x0, #8]
; SUPPRESS-NEXT: ldp s0, s4, [x10]
; SUPPRESS-NEXT: ldp s6, s7, [x8, #48]
Expand All @@ -122,12 +112,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
; SUPPRESS-NEXT: fmadd s0, s17, s0, s4
; SUPPRESS-NEXT: fadd s4, s16, s6
; SUPPRESS-NEXT: fadd s17, s0, s7
; SUPPRESS-NEXT: str s4, [x8, #48]
; SUPPRESS-NEXT: str s17, [x8, #52]
; SUPPRESS-NEXT: stp s4, s17, [x8, #48]
; SUPPRESS-NEXT: fsub s6, s6, s16
; SUPPRESS-NEXT: fsub s0, s7, s0
; SUPPRESS-NEXT: str s6, [x8, #56]
; SUPPRESS-NEXT: str s0, [x8, #60]
; SUPPRESS-NEXT: stp s6, s0, [x8, #56]
; SUPPRESS-NEXT: ldr x10, [x0, #8]
; SUPPRESS-NEXT: ldp s7, s16, [x10]
; SUPPRESS-NEXT: fmul s18, s16, s17
Expand All @@ -136,12 +124,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
; SUPPRESS-NEXT: fmadd s4, s16, s4, s17
; SUPPRESS-NEXT: fadd s16, s7, s1
; SUPPRESS-NEXT: fadd s17, s4, s5
; SUPPRESS-NEXT: str s16, [x8, #32]
; SUPPRESS-NEXT: str s17, [x8, #36]
; SUPPRESS-NEXT: stp s16, s17, [x8, #32]
; SUPPRESS-NEXT: fsub s7, s1, s7
; SUPPRESS-NEXT: fsub s4, s5, s4
; SUPPRESS-NEXT: str s7, [x8, #48]
; SUPPRESS-NEXT: str s4, [x8, #52]
; SUPPRESS-NEXT: stp s7, s4, [x8, #48]
; SUPPRESS-NEXT: ldr x10, [x0, #8]
; SUPPRESS-NEXT: add x9, x10, x9
; SUPPRESS-NEXT: ldp s1, s5, [x9]
Expand All @@ -151,12 +137,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
; SUPPRESS-NEXT: fmadd s5, s5, s6, s0
; SUPPRESS-NEXT: fadd s6, s1, s2
; SUPPRESS-NEXT: fadd s18, s5, s3
; SUPPRESS-NEXT: str s6, [x8, #40]
; SUPPRESS-NEXT: str s18, [x8, #44]
; SUPPRESS-NEXT: stp s6, s18, [x8, #40]
; SUPPRESS-NEXT: fsub s0, s2, s1
; SUPPRESS-NEXT: fsub s1, s3, s5
; SUPPRESS-NEXT: str s0, [x8, #56]
; SUPPRESS-NEXT: str s1, [x8, #60]
; SUPPRESS-NEXT: stp s0, s1, [x8, #56]
; SUPPRESS-NEXT: ldr x9, [x0, #8]
; SUPPRESS-NEXT: ldp s2, s3, [x9]
; SUPPRESS-NEXT: ldp s5, s19, [x8]
Expand All @@ -166,12 +150,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
; SUPPRESS-NEXT: fmadd s2, s17, s2, s3
; SUPPRESS-NEXT: fadd s3, s16, s5
; SUPPRESS-NEXT: fadd s17, s2, s19
; SUPPRESS-NEXT: str s3, [x8]
; SUPPRESS-NEXT: str s17, [x8, #4]
; SUPPRESS-NEXT: stp s3, s17, [x8]
; SUPPRESS-NEXT: fsub s3, s5, s16
; SUPPRESS-NEXT: fsub s2, s19, s2
; SUPPRESS-NEXT: str s3, [x8, #32]
; SUPPRESS-NEXT: str s2, [x8, #36]
; SUPPRESS-NEXT: stp s3, s2, [x8, #32]
; SUPPRESS-NEXT: ldr x9, [x0, #8]
; SUPPRESS-NEXT: add x9, x9, w3, sxtw #3
; SUPPRESS-NEXT: ldp s2, s3, [x9]
Expand All @@ -182,12 +164,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
; SUPPRESS-NEXT: fmadd s2, s18, s2, s3
; SUPPRESS-NEXT: fadd s3, s6, s5
; SUPPRESS-NEXT: fadd s17, s2, s16
; SUPPRESS-NEXT: str s3, [x8, #8]
; SUPPRESS-NEXT: str s17, [x8, #12]
; SUPPRESS-NEXT: stp s3, s17, [x8, #8]
; SUPPRESS-NEXT: fsub s3, s5, s6
; SUPPRESS-NEXT: fsub s2, s16, s2
; SUPPRESS-NEXT: str s3, [x8, #40]
; SUPPRESS-NEXT: str s2, [x8, #44]
; SUPPRESS-NEXT: stp s3, s2, [x8, #40]
; SUPPRESS-NEXT: lsl x9, x3, #33
; SUPPRESS-NEXT: ldr x10, [x0, #8]
; SUPPRESS-NEXT: add x9, x10, x9, asr #29
Expand All @@ -199,12 +179,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
; SUPPRESS-NEXT: fmadd s2, s4, s2, s3
; SUPPRESS-NEXT: fadd s3, s7, s5
; SUPPRESS-NEXT: fadd s4, s2, s6
; SUPPRESS-NEXT: str s3, [x8, #16]
; SUPPRESS-NEXT: str s4, [x8, #20]
; SUPPRESS-NEXT: stp s3, s4, [x8, #16]
; SUPPRESS-NEXT: fsub s3, s5, s7
; SUPPRESS-NEXT: fsub s2, s6, s2
; SUPPRESS-NEXT: str s3, [x8, #48]
; SUPPRESS-NEXT: str s2, [x8, #52]
; SUPPRESS-NEXT: stp s3, s2, [x8, #48]
; SUPPRESS-NEXT: add w9, w3, w3, lsl #1
; SUPPRESS-NEXT: ldr x10, [x0, #8]
; SUPPRESS-NEXT: add x9, x10, w9, sxtw #3
Expand All @@ -216,12 +194,10 @@ define void @load_store_units_critical(ptr %arg, ptr noundef %arg1, i64 noundef
; SUPPRESS-NEXT: fmadd s1, s1, s2, s3
; SUPPRESS-NEXT: fadd s2, s0, s4
; SUPPRESS-NEXT: fadd s3, s1, s5
; SUPPRESS-NEXT: str s2, [x8, #24]
; SUPPRESS-NEXT: str s3, [x8, #28]
; SUPPRESS-NEXT: stp s2, s3, [x8, #24]
; SUPPRESS-NEXT: fsub s0, s4, s0
; SUPPRESS-NEXT: fsub s1, s5, s1
; SUPPRESS-NEXT: str s0, [x8, #56]
; SUPPRESS-NEXT: str s1, [x8, #60]
; SUPPRESS-NEXT: stp s0, s1, [x8, #56]
; SUPPRESS-NEXT: ret
;
; NOSUPPRESS-LABEL: load_store_units_critical:
Expand Down

0 comments on commit 2f083b3

Please sign in to comment.