diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 4b4073365483e..ca0272cd54869 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1043,6 +1043,26 @@ static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) { return Is.size() <= 2; } +// Check if a COPY instruction is cheap. +static bool isCheapCopy(const MachineInstr &MI, const AArch64RegisterInfo &RI) { + assert(MI.isCopy() && "Expected COPY instruction"); + const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); + + // Cross-register-class copies (e.g., between GPR and FPR) are expensive on + // AArch64, typically requiring an FMOV instruction with a 2-6 cycle latency. + auto getRegClass = [&](Register Reg) -> const TargetRegisterClass * { + return Reg.isVirtual() ? MRI.getRegClass(Reg) + : Reg.isPhysical() ? RI.getMinimalPhysRegClass(Reg) + : nullptr; + }; + const TargetRegisterClass *DstRC = getRegClass(MI.getOperand(0).getReg()); + const TargetRegisterClass *SrcRC = getRegClass(MI.getOperand(1).getReg()); + if (DstRC && SrcRC && !RI.getCommonSubClass(DstRC, SrcRC)) + return false; + + return MI.isAsCheapAsAMove(); +} + // FIXME: this implementation should be micro-architecture dependent, so a // micro-architecture target hook should be introduced here in future. bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { @@ -1056,6 +1076,9 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { default: return MI.isAsCheapAsAMove(); + case TargetOpcode::COPY: + return isCheapCopy(MI, RI); + case AArch64::ADDWrs: case AArch64::ADDXrs: case AArch64::SUBWrs: diff --git a/llvm/test/CodeGen/AArch64/licm-regclass-copy.mir b/llvm/test/CodeGen/AArch64/licm-regclass-copy.mir new file mode 100644 index 0000000000000..b552790cd7c52 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/licm-regclass-copy.mir @@ -0,0 +1,49 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -run-pass=early-machinelicm -o - %s | FileCheck %s + +# This test verifies that cross-register-class copies (e.g., between GPR and FPR) +# are hoisted out of loops by MachineLICM, as they translate to expensive +# instructions like FMOV (2-6 cycles) on AArch64. + +--- | + define void @cross_regclass_copy_hoisted() { + ret void + } +... +--- +name: cross_regclass_copy_hoisted +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: cross_regclass_copy_hoisted + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 2143289344 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]] + ; CHECK-NEXT: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: FCMPSrr [[COPY]], [[FMOVS0_]], implicit-def $nzcv, implicit $fpcr + ; CHECK-NEXT: Bcc 11, %bb.1, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: RET_ReallyLR + bb.0: + B %bb.1 + + bb.1: + ; High latency copy. + %0:gpr32 = MOVi32imm 2143289344 + %1:fpr32 = COPY %0:gpr32 + %2:fpr32 = FMOVS0 + FCMPSrr %1, %2, implicit-def $nzcv, implicit $fpcr + Bcc 11, %bb.1, implicit $nzcv + B %bb.2 + + bb.2: + RET_ReallyLR +...