diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index a7e44602066fd..0bceb322726d1 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -1102,67 +1102,6 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, return true; } -static unsigned selectIntToFPConvOpc(unsigned GenericOpc, LLT DstTy, - LLT SrcTy) { - if (!DstTy.isScalar() || !SrcTy.isScalar()) - return GenericOpc; - - const unsigned DstSize = DstTy.getSizeInBits(); - const unsigned SrcSize = SrcTy.getSizeInBits(); - - switch (DstSize) { - case 32: - switch (SrcSize) { - case 32: - switch (GenericOpc) { - case TargetOpcode::G_SITOFP: - return AArch64::SCVTFUWSri; - case TargetOpcode::G_UITOFP: - return AArch64::UCVTFUWSri; - default: - return GenericOpc; - } - case 64: - switch (GenericOpc) { - case TargetOpcode::G_SITOFP: - return AArch64::SCVTFUXSri; - case TargetOpcode::G_UITOFP: - return AArch64::UCVTFUXSri; - default: - return GenericOpc; - } - default: - return GenericOpc; - } - case 64: - switch (SrcSize) { - case 32: - switch (GenericOpc) { - case TargetOpcode::G_SITOFP: - return AArch64::SCVTFUWDri; - case TargetOpcode::G_UITOFP: - return AArch64::UCVTFUWDri; - default: - return GenericOpc; - } - case 64: - switch (GenericOpc) { - case TargetOpcode::G_SITOFP: - return AArch64::SCVTFUXDri; - case TargetOpcode::G_UITOFP: - return AArch64::UCVTFUXDri; - default: - return GenericOpc; - } - default: - return GenericOpc; - } - default: - return GenericOpc; - }; - return GenericOpc; -} - MachineInstr * AArch64InstructionSelector::emitSelect(Register Dst, Register True, Register False, AArch64CC::CondCode CC, @@ -3509,21 +3448,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { return true; } - case TargetOpcode::G_SITOFP: - case TargetOpcode::G_UITOFP: { - const LLT DstTy = MRI.getType(I.getOperand(0).getReg()), - SrcTy = MRI.getType(I.getOperand(1).getReg()); - const unsigned NewOpc = selectIntToFPConvOpc(Opcode, DstTy, SrcTy); - if (NewOpc == Opcode) - return false; - - I.setDesc(TII.get(NewOpc)); - constrainSelectedInstRegOperands(I, TII, TRI, RBI); - I.setFlags(MachineInstr::NoFPExcept); - - return true; - } - case TargetOpcode::G_FREEZE: return selectCopy(I, TII, MRI, TRI, RBI); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index 31954e7954c03..1b919abd222e4 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -13,6 +13,7 @@ #include "AArch64RegisterBankInfo.h" #include "AArch64RegisterInfo.h" +#include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" @@ -492,7 +493,7 @@ static bool isFPIntrinsic(const MachineRegisterInfo &MRI, bool AArch64RegisterBankInfo::isPHIWithFPConstraints( const MachineInstr &MI, const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, const unsigned Depth) const { + const AArch64RegisterInfo &TRI, const unsigned Depth) const { if (!MI.isPHI() || Depth > MaxFPRSearchDepth) return false; @@ -506,7 +507,7 @@ bool AArch64RegisterBankInfo::isPHIWithFPConstraints( bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, + const AArch64RegisterInfo &TRI, unsigned Depth) const { unsigned Op = MI.getOpcode(); if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI)) @@ -544,7 +545,7 @@ bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI, bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, + const AArch64RegisterInfo &TRI, unsigned Depth) const { switch (MI.getOpcode()) { case TargetOpcode::G_FPTOSI: @@ -582,7 +583,7 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI, bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, + const AArch64RegisterInfo &TRI, unsigned Depth) const { switch (MI.getOpcode()) { case AArch64::G_DUP: @@ -618,6 +619,19 @@ bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI, return hasFPConstraints(MI, MRI, TRI, Depth); } +bool AArch64RegisterBankInfo::prefersFPUse(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + const AArch64RegisterInfo &TRI, + unsigned Depth) const { + switch (MI.getOpcode()) { + case TargetOpcode::G_SITOFP: + case TargetOpcode::G_UITOFP: + return MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == + MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + } + return onlyDefinesFP(MI, MRI, TRI, Depth); +} + bool AArch64RegisterBankInfo::isLoadFromFPType(const MachineInstr &MI) const { // GMemOperation because we also want to match indexed loads. auto *MemOp = cast(&MI); @@ -671,8 +685,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - const TargetSubtargetInfo &STI = MF.getSubtarget(); - const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + const AArch64Subtarget &STI = MF.getSubtarget(); + const AArch64RegisterInfo &TRI = *STI.getRegisterInfo(); switch (Opc) { // G_{F|S|U}REM are not listed because they are not legal. @@ -826,7 +840,9 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // Integer to FP conversions don't necessarily happen between GPR -> FPR // regbanks. They can also be done within an FPR register. Register SrcReg = MI.getOperand(1).getReg(); - if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank) + if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank && + MRI.getType(SrcReg).getSizeInBits() == + MRI.getType(MI.getOperand(0).getReg()).getSizeInBits()) OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; else OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; @@ -895,13 +911,13 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // instruction. // // Int->FP conversion operations are also captured in - // onlyDefinesFP(). + // prefersFPUse(). if (isPHIWithFPConstraints(UseMI, MRI, TRI)) return true; return onlyUsesFP(UseMI, MRI, TRI) || - onlyDefinesFP(UseMI, MRI, TRI); + prefersFPUse(UseMI, MRI, TRI); })) OpRegBankIdx[0] = PMI_FirstFPR; break; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h index 3abbc1b68b5be..b2de031772108 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h @@ -22,6 +22,7 @@ namespace llvm { class TargetRegisterInfo; +class AArch64RegisterInfo; class AArch64GenRegisterBankInfo : public RegisterBankInfo { protected: @@ -123,21 +124,26 @@ class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo { /// \returns true if \p MI is a PHI that its def is used by /// any instruction that onlyUsesFP. bool isPHIWithFPConstraints(const MachineInstr &MI, - const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - unsigned Depth = 0) const; + const MachineRegisterInfo &MRI, + const AArch64RegisterInfo &TRI, + unsigned Depth = 0) const; /// \returns true if \p MI only uses and defines FPRs. bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, unsigned Depth = 0) const; + const AArch64RegisterInfo &TRI, + unsigned Depth = 0) const; /// \returns true if \p MI only uses FPRs. bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, unsigned Depth = 0) const; + const AArch64RegisterInfo &TRI, unsigned Depth = 0) const; /// \returns true if \p MI only defines FPRs. bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, unsigned Depth = 0) const; + const AArch64RegisterInfo &TRI, unsigned Depth = 0) const; + + /// \returns true if \p MI can take both fpr and gpr uses, but prefers fp. + bool prefersFPUse(const MachineInstr &MI, const MachineRegisterInfo &MRI, + const AArch64RegisterInfo &TRI, unsigned Depth = 0) const; /// \returns true if the load \p MI is likely loading from a floating-point /// type. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir index 0ee0d9587e88b..a58492114003a 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir @@ -357,31 +357,6 @@ body: | $d0 = COPY %1(s64) ... ---- -name: sitofp_s64_s32_fpr_both -legalized: true -regBankSelected: true - -registers: - - { id: 0, class: fpr } - - { id: 1, class: fpr } - -body: | - bb.0: - liveins: $s0 - - ; CHECK-LABEL: name: sitofp_s64_s32_fpr_both - ; CHECK: liveins: $s0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] - ; CHECK-NEXT: [[SCVTFUWDri:%[0-9]+]]:fpr64 = nofpexcept SCVTFUWDri [[COPY1]] - ; CHECK-NEXT: $d0 = COPY [[SCVTFUWDri]] - %0(s32) = COPY $s0 - %1(s64) = G_SITOFP %0 - $d0 = COPY %1(s64) -... - --- name: sitofp_s64_s64_fpr legalized: true diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll index 84c1b97a34e88..caf87a13f283b 100644 --- a/llvm/test/CodeGen/AArch64/itofp.ll +++ b/llvm/test/CodeGen/AArch64/itofp.ll @@ -4,11 +4,6 @@ ; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16,CHECK-NOFP16-GI ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-FP16-GI -; CHECK-FP16-GI: warning: Instruction selection used fallback path for stofp_load_i64_f16 -; CHECK-FP16-GI-NEXT: warning: Instruction selection used fallback path for utofp_load_i64_f16 -; CHECK-FP16-GI-NEXT: warning: Instruction selection used fallback path for stofp_load_i32_f16 -; CHECK-FP16-GI-NEXT: warning: Instruction selection used fallback path for utofp_load_i32_f16 - define double @stofp_i64_f64(i64 %a) { ; CHECK-LABEL: stofp_i64_f64: ; CHECK: // %bb.0: // %entry @@ -844,31 +839,11 @@ entry: } define double @stofp_load_i32_f64(ptr %p) { -; CHECK-NOFP16-SD-LABEL: stofp_load_i32_f64: -; CHECK-NOFP16-SD: // %bb.0: // %entry -; CHECK-NOFP16-SD-NEXT: ldr w8, [x0] -; CHECK-NOFP16-SD-NEXT: scvtf d0, w8 -; CHECK-NOFP16-SD-NEXT: ret -; -; CHECK-FP16-SD-LABEL: stofp_load_i32_f64: -; CHECK-FP16-SD: // %bb.0: // %entry -; CHECK-FP16-SD-NEXT: ldr w8, [x0] -; CHECK-FP16-SD-NEXT: scvtf d0, w8 -; CHECK-FP16-SD-NEXT: ret -; -; CHECK-NOFP16-GI-LABEL: stofp_load_i32_f64: -; CHECK-NOFP16-GI: // %bb.0: // %entry -; CHECK-NOFP16-GI-NEXT: ldr s0, [x0] -; CHECK-NOFP16-GI-NEXT: fmov w8, s0 -; CHECK-NOFP16-GI-NEXT: scvtf d0, w8 -; CHECK-NOFP16-GI-NEXT: ret -; -; CHECK-FP16-GI-LABEL: stofp_load_i32_f64: -; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: ldr s0, [x0] -; CHECK-FP16-GI-NEXT: fmov w8, s0 -; CHECK-FP16-GI-NEXT: scvtf d0, w8 -; CHECK-FP16-GI-NEXT: ret +; CHECK-LABEL: stofp_load_i32_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: scvtf d0, w8 +; CHECK-NEXT: ret entry: %a = load i32, ptr %p %c = sitofp i32 %a to double @@ -936,31 +911,11 @@ entry: } define float @stofp_load_i64_f32(ptr %p) { -; CHECK-NOFP16-SD-LABEL: stofp_load_i64_f32: -; CHECK-NOFP16-SD: // %bb.0: // %entry -; CHECK-NOFP16-SD-NEXT: ldr x8, [x0] -; CHECK-NOFP16-SD-NEXT: scvtf s0, x8 -; CHECK-NOFP16-SD-NEXT: ret -; -; CHECK-FP16-SD-LABEL: stofp_load_i64_f32: -; CHECK-FP16-SD: // %bb.0: // %entry -; CHECK-FP16-SD-NEXT: ldr x8, [x0] -; CHECK-FP16-SD-NEXT: scvtf s0, x8 -; CHECK-FP16-SD-NEXT: ret -; -; CHECK-NOFP16-GI-LABEL: stofp_load_i64_f32: -; CHECK-NOFP16-GI: // %bb.0: // %entry -; CHECK-NOFP16-GI-NEXT: ldr d0, [x0] -; CHECK-NOFP16-GI-NEXT: fmov x8, d0 -; CHECK-NOFP16-GI-NEXT: scvtf s0, x8 -; CHECK-NOFP16-GI-NEXT: ret -; -; CHECK-FP16-GI-LABEL: stofp_load_i64_f32: -; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: ldr d0, [x0] -; CHECK-FP16-GI-NEXT: fmov x8, d0 -; CHECK-FP16-GI-NEXT: scvtf s0, x8 -; CHECK-FP16-GI-NEXT: ret +; CHECK-LABEL: stofp_load_i64_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: scvtf s0, x8 +; CHECK-NEXT: ret entry: %a = load i64, ptr %p %c = sitofp i64 %a to float @@ -968,31 +923,11 @@ entry: } define float @utofp_load_i64_f32(ptr %p) { -; CHECK-NOFP16-SD-LABEL: utofp_load_i64_f32: -; CHECK-NOFP16-SD: // %bb.0: // %entry -; CHECK-NOFP16-SD-NEXT: ldr x8, [x0] -; CHECK-NOFP16-SD-NEXT: ucvtf s0, x8 -; CHECK-NOFP16-SD-NEXT: ret -; -; CHECK-FP16-SD-LABEL: utofp_load_i64_f32: -; CHECK-FP16-SD: // %bb.0: // %entry -; CHECK-FP16-SD-NEXT: ldr x8, [x0] -; CHECK-FP16-SD-NEXT: ucvtf s0, x8 -; CHECK-FP16-SD-NEXT: ret -; -; CHECK-NOFP16-GI-LABEL: utofp_load_i64_f32: -; CHECK-NOFP16-GI: // %bb.0: // %entry -; CHECK-NOFP16-GI-NEXT: ldr d0, [x0] -; CHECK-NOFP16-GI-NEXT: fmov x8, d0 -; CHECK-NOFP16-GI-NEXT: ucvtf s0, x8 -; CHECK-NOFP16-GI-NEXT: ret -; -; CHECK-FP16-GI-LABEL: utofp_load_i64_f32: -; CHECK-FP16-GI: // %bb.0: // %entry -; CHECK-FP16-GI-NEXT: ldr d0, [x0] -; CHECK-FP16-GI-NEXT: fmov x8, d0 -; CHECK-FP16-GI-NEXT: ucvtf s0, x8 -; CHECK-FP16-GI-NEXT: ret +; CHECK-LABEL: utofp_load_i64_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ucvtf s0, x8 +; CHECK-NEXT: ret entry: %a = load i64, ptr %p %c = uitofp i64 %a to float @@ -1072,26 +1007,18 @@ entry: } define half @stofp_load_i64_f16(ptr %p) { -; CHECK-NOFP16-SD-LABEL: stofp_load_i64_f16: -; CHECK-NOFP16-SD: // %bb.0: // %entry -; CHECK-NOFP16-SD-NEXT: ldr x8, [x0] -; CHECK-NOFP16-SD-NEXT: scvtf s0, x8 -; CHECK-NOFP16-SD-NEXT: fcvt h0, s0 -; CHECK-NOFP16-SD-NEXT: ret +; CHECK-NOFP16-LABEL: stofp_load_i64_f16: +; CHECK-NOFP16: // %bb.0: // %entry +; CHECK-NOFP16-NEXT: ldr x8, [x0] +; CHECK-NOFP16-NEXT: scvtf s0, x8 +; CHECK-NOFP16-NEXT: fcvt h0, s0 +; CHECK-NOFP16-NEXT: ret ; ; CHECK-FP16-LABEL: stofp_load_i64_f16: ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: ldr x8, [x0] ; CHECK-FP16-NEXT: scvtf h0, x8 ; CHECK-FP16-NEXT: ret -; -; CHECK-NOFP16-GI-LABEL: stofp_load_i64_f16: -; CHECK-NOFP16-GI: // %bb.0: // %entry -; CHECK-NOFP16-GI-NEXT: ldr d0, [x0] -; CHECK-NOFP16-GI-NEXT: fmov x8, d0 -; CHECK-NOFP16-GI-NEXT: scvtf s0, x8 -; CHECK-NOFP16-GI-NEXT: fcvt h0, s0 -; CHECK-NOFP16-GI-NEXT: ret entry: %a = load i64, ptr %p %c = sitofp i64 %a to half @@ -1099,26 +1026,18 @@ entry: } define half @utofp_load_i64_f16(ptr %p) { -; CHECK-NOFP16-SD-LABEL: utofp_load_i64_f16: -; CHECK-NOFP16-SD: // %bb.0: // %entry -; CHECK-NOFP16-SD-NEXT: ldr x8, [x0] -; CHECK-NOFP16-SD-NEXT: ucvtf s0, x8 -; CHECK-NOFP16-SD-NEXT: fcvt h0, s0 -; CHECK-NOFP16-SD-NEXT: ret +; CHECK-NOFP16-LABEL: utofp_load_i64_f16: +; CHECK-NOFP16: // %bb.0: // %entry +; CHECK-NOFP16-NEXT: ldr x8, [x0] +; CHECK-NOFP16-NEXT: ucvtf s0, x8 +; CHECK-NOFP16-NEXT: fcvt h0, s0 +; CHECK-NOFP16-NEXT: ret ; ; CHECK-FP16-LABEL: utofp_load_i64_f16: ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: ldr x8, [x0] ; CHECK-FP16-NEXT: ucvtf h0, x8 ; CHECK-FP16-NEXT: ret -; -; CHECK-NOFP16-GI-LABEL: utofp_load_i64_f16: -; CHECK-NOFP16-GI: // %bb.0: // %entry -; CHECK-NOFP16-GI-NEXT: ldr d0, [x0] -; CHECK-NOFP16-GI-NEXT: fmov x8, d0 -; CHECK-NOFP16-GI-NEXT: ucvtf s0, x8 -; CHECK-NOFP16-GI-NEXT: fcvt h0, s0 -; CHECK-NOFP16-GI-NEXT: ret entry: %a = load i64, ptr %p %c = uitofp i64 %a to half