Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1144,6 +1144,28 @@ static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
return Is.size() <= 2;
}

// Check if a COPY instruction is cheap.
static bool isCheapCopy(const MachineInstr &MI, const AArch64RegisterInfo &RI) {
assert(MI.isCopy() && "Expected COPY instruction");
const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();

// Cross-bank copies (e.g., between GPR and FPR) are expensive on AArch64,
// typically requiring an FMOV instruction with a 2-6 cycle latency.
auto GetRegClass = [&](Register Reg) -> const TargetRegisterClass * {
if (Reg.isVirtual())
return MRI.getRegClass(Reg);
if (Reg.isPhysical())
return RI.getMinimalPhysRegClass(Reg);
return nullptr;
};
const TargetRegisterClass *DstRC = GetRegClass(MI.getOperand(0).getReg());
const TargetRegisterClass *SrcRC = GetRegClass(MI.getOperand(1).getReg());
if (DstRC && SrcRC && !RI.getCommonSubClass(DstRC, SrcRC))
return false;

return MI.isAsCheapAsAMove();
}

// FIXME: this implementation should be micro-architecture dependent, so a
// micro-architecture target hook should be introduced here in future.
bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
Expand All @@ -1157,6 +1179,9 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
default:
return MI.isAsCheapAsAMove();

case TargetOpcode::COPY:
return isCheapCopy(MI, RI);

case AArch64::ADDWrs:
case AArch64::ADDXrs:
case AArch64::SUBWrs:
Expand Down
197 changes: 197 additions & 0 deletions llvm/test/CodeGen/AArch64/licm-regclass-copy.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64 -run-pass=early-machinelicm -o - %s | FileCheck %s

# This test verifies that cross-bank copies (e.g., GPR to FPR, FPR to GPR)
# are hoisted out of loops by MachineLICM, as they are expensive on AArch64.

--- |
declare void @use_float(float)
declare void @use_int(i32)

define void @gpr_to_fpr_virtual_copy_hoisted() {
ret void
}

define void @gpr_to_fpr_physical_copy_hoisted() {
ret void
}

define void @fpr_to_gpr_virtual_copy_hoisted() {
ret void
}
...
---
name: gpr_to_fpr_virtual_copy_hoisted
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: gpr_to_fpr_virtual_copy_hoisted
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY $wzr
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32all = COPY [[COPY2]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:fpr32 = COPY [[COPY1]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr32common = PHI [[COPY3]], %bb.0, %5, %bb.2
; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[PHI]], [[COPY]], implicit-def $nzcv
; CHECK-NEXT: Bcc 1, %bb.3, implicit $nzcv
; CHECK-NEXT: B %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $s0 = COPY [[COPY4]]
; CHECK-NEXT: BL @use_float, implicit-def dead $lr, implicit $sp, implicit $s0, implicit-def $sp
; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[PHI]], 1, 0
; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr32all = COPY [[ADDWri]]
; CHECK-NEXT: B %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: RET_ReallyLR
bb.0:
liveins: $w0, $w1
%1:gpr32 = COPY $w0
%0:gpr32 = COPY $w1
%3:gpr32all = COPY $wzr
%2:gpr32all = COPY %3:gpr32all

bb.1:
%4:gpr32common = PHI %2:gpr32all, %bb.0, %5:gpr32all, %bb.2
%6:gpr32 = SUBSWrr %4:gpr32common, %1:gpr32, implicit-def $nzcv
Bcc 1, %bb.3, implicit $nzcv
B %bb.2

bb.2:
%7:fpr32 = COPY %0:gpr32
$s0 = COPY %7:fpr32
BL @use_float, implicit-def dead $lr, implicit $sp, implicit $s0, implicit-def $sp
%8:gpr32sp = ADDWri %4:gpr32common, 1, 0
%5:gpr32all = COPY %8:gpr32sp
B %bb.1

bb.3:
RET_ReallyLR

...
---
name: gpr_to_fpr_physical_copy_hoisted
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: gpr_to_fpr_physical_copy_hoisted
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY $wzr
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY [[COPY1]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY $wzr
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr32common = PHI [[COPY2]], %bb.0, %4, %bb.2
; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[PHI]], [[COPY]], implicit-def $nzcv
; CHECK-NEXT: Bcc 1, %bb.3, implicit $nzcv
; CHECK-NEXT: B %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $s0 = COPY [[COPY3]]
; CHECK-NEXT: BL @use_float, implicit-def dead $lr, implicit $sp, implicit $s0, implicit-def $sp
; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[PHI]], 1, 0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32all = COPY [[ADDWri]]
; CHECK-NEXT: B %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: RET_ReallyLR
bb.0:
liveins: $w0
%1:gpr32 = COPY $w0
%3:gpr32all = COPY $wzr
%2:gpr32all = COPY %3:gpr32all

bb.1:
%4:gpr32common = PHI %2:gpr32all, %bb.0, %5:gpr32all, %bb.2
%6:gpr32 = SUBSWrr %4:gpr32common, %1:gpr32, implicit-def $nzcv
Bcc 1, %bb.3, implicit $nzcv
B %bb.2

bb.2:
%7:fpr32 = COPY $wzr
$s0 = COPY %7:fpr32
BL @use_float, implicit-def dead $lr, implicit $sp, implicit $s0, implicit-def $sp
%8:gpr32sp = ADDWri %4:gpr32common, 1, 0
%5:gpr32all = COPY %8:gpr32sp
B %bb.1

bb.3:
RET_ReallyLR

...
---
name: fpr_to_gpr_virtual_copy_hoisted
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: fpr_to_gpr_virtual_copy_hoisted
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $w0, $s0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY $s0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY $wzr
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32all = COPY [[COPY2]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY [[COPY1]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr32common = PHI [[COPY3]], %bb.0, %5, %bb.2
; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[PHI]], [[COPY]], implicit-def $nzcv
; CHECK-NEXT: Bcc 1, %bb.3, implicit $nzcv
; CHECK-NEXT: B %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $w0 = COPY [[COPY4]]
; CHECK-NEXT: BL @use_int, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp
; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[PHI]], 1, 0
; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr32all = COPY [[ADDWri]]
; CHECK-NEXT: B %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: RET_ReallyLR
bb.0:
liveins: $w0, $s0
%1:gpr32 = COPY $w0
%0:fpr32 = COPY $s0
%3:gpr32all = COPY $wzr
%2:gpr32all = COPY %3:gpr32all

bb.1:
%4:gpr32common = PHI %2:gpr32all, %bb.0, %5:gpr32all, %bb.2
%6:gpr32 = SUBSWrr %4:gpr32common, %1:gpr32, implicit-def $nzcv
Bcc 1, %bb.3, implicit $nzcv
B %bb.2

bb.2:
%7:gpr32 = COPY %0:fpr32
$w0 = COPY %7:gpr32
BL @use_int, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp
%8:gpr32sp = ADDWri %4:gpr32common, 1, 0
%5:gpr32all = COPY %8:gpr32sp
B %bb.1

bb.3:
RET_ReallyLR

...
Loading