Skip to content

Commit

Permalink
[AArch64] MI Scheduler STP combine (#80188)
Browse files Browse the repository at this point in the history
Add opcodes for different store instructions to the target hook that can
enable more STP pairs. This is split off from the patch that does the
same for some load instructions (#79003).

Patch co-authored by Cameron McInally.
  • Loading branch information
sjoerdmeijer committed Feb 6, 2024
1 parent c6b5ea3 commit 35904ec
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 1 deletion.
15 changes: 15 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4206,6 +4206,21 @@ static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
switch (FirstOpc) {
default:
return false;
case AArch64::STRSui:
case AArch64::STURSi:
return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
case AArch64::STRDui:
case AArch64::STURDi:
return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
case AArch64::STRQui:
case AArch64::STURQi:
return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
case AArch64::STRWui:
case AArch64::STURWi:
return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
case AArch64::STRXui:
case AArch64::STURXi:
return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
case AArch64::LDRSui:
case AArch64::LDURSi:
return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
Expand Down
72 changes: 71 additions & 1 deletion llvm/test/CodeGen/AArch64/arm64-ldp-cluster.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; REQUIRES: asserts
; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s --check-prefixes=CHECK,CHECK-A57
; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=exynos-m3 -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s

; Test ldr clustering.
Expand Down Expand Up @@ -227,3 +227,73 @@ entry:
store i64 %r53, ptr %wb
ret void
}

; CHECK: ********** MI Scheduling **********
; CHECK: STURWi_STRWui:%bb.0 entry
; CHECK: Cluster ld/st SU(3) - SU(4)
; CHECK: SU(3): STURWi %{{[0-9]+}}:gpr32
; CHECK: SU(4): STRWui %{{[0-9]+}}:gpr32
;
define void @STURWi_STRWui(ptr nocapture readonly %arg, i32 %b, i32 %c) {
entry:
%r51 = getelementptr i8, ptr %arg, i64 -4
store i32 %b, ptr %r51
store i32 %c, ptr %arg
ret void
}

; CHECK: ********** MI Scheduling **********
; CHECK: STURXi_STRXui:%bb.0 entry
; CHECK: Cluster ld/st SU(3) - SU(4)
; CHECK: SU(3): STURXi %{{[0-9]+}}:gpr64
; CHECK: SU(4): STRXui %{{[0-9]+}}:gpr64
;
define void @STURXi_STRXui(ptr nocapture readonly %arg, i64 %b, i64 %c) {
entry:
%r51 = getelementptr i8, ptr %arg, i64 -8
store i64 %b, ptr %r51
store i64 %c, ptr %arg
ret void
}

; CHECK-A57: ********** MI Scheduling **********
; CHECK-A57: STURSi_STRSui:%bb.0 entry
; CHECK-A57: Cluster ld/st SU(3) - SU(4)
; CHECK-A57: SU(3): STURSi %{{[0-9]+}}:fpr32
; CHECK-A57: SU(4): STRSui %{{[0-9]+}}:fpr32
;
define void @STURSi_STRSui(ptr nocapture readonly %arg, float %b, float %c) {
entry:
%r51 = getelementptr i8, ptr %arg, i64 -4
store float %b, ptr %r51
store float %c, ptr %arg
ret void
}

; CHECK-A57: ********** MI Scheduling **********
; CHECK-A57: STURDi_STRDui:%bb.0 entry
; CHECK-A57: Cluster ld/st SU(3) - SU(4)
; CHECK-A57: SU(3): STURDi %{{[0-9]+}}:fpr64
; CHECK-A57: SU(4): STRDui %{{[0-9]+}}:fpr64
;
define void @STURDi_STRDui(ptr nocapture readonly %arg, <2 x float> %b, <2 x float> %c) {
entry:
%r51 = getelementptr i8, ptr %arg, i64 -8
store <2 x float> %b, ptr %r51
store <2 x float> %c, ptr %arg
ret void
}

; CHECK-A57: ********** MI Scheduling **********
; CHECK-A57: STURQi_STRQui:%bb.0 entry
; CHECK-A57: Cluster ld/st SU(3) - SU(4)
; CHECK-A57: SU(3): STURQi %{{[0-9]+}}:fpr128
; CHECK-A57: SU(4): STRQui %{{[0-9]+}}:fpr128
;
define void @STURQi_STRQui(ptr nocapture readonly %arg, <2 x double> %b, <2 x double> %c) {
entry:
%r51 = getelementptr i8, ptr %arg, i64 -16
store <2 x double> %b, ptr %r51
store <2 x double> %c, ptr %arg
ret void
}

0 comments on commit 35904ec

Please sign in to comment.