diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index 807cec3c177d9..c4174cee5e10c 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -555,5 +555,9 @@ void eraseInstr(MachineInstr &MI, MachineRegisterInfo &MRI, /// debug users of \p MI by writing the effect of \p MI in a DIExpression. void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI); +/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode, +/// having only floating-point operands. +bool isPreISelGenericFloatingPointOpcode(unsigned Opc); + } // End namespace llvm. #endif diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index c3bc3203b6360..ae43e9ccf6112 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -1665,3 +1665,47 @@ void llvm::salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI) { } } } + +bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) { + switch (Opc) { + case TargetOpcode::G_FABS: + case TargetOpcode::G_FADD: + case TargetOpcode::G_FCANONICALIZE: + case TargetOpcode::G_FCEIL: + case TargetOpcode::G_FCONSTANT: + case TargetOpcode::G_FCOPYSIGN: + case TargetOpcode::G_FCOS: + case TargetOpcode::G_FDIV: + case TargetOpcode::G_FEXP2: + case TargetOpcode::G_FEXP: + case TargetOpcode::G_FFLOOR: + case TargetOpcode::G_FLOG10: + case TargetOpcode::G_FLOG2: + case TargetOpcode::G_FLOG: + case TargetOpcode::G_FMA: + case TargetOpcode::G_FMAD: + case TargetOpcode::G_FMAXIMUM: + case TargetOpcode::G_FMAXNUM: + case TargetOpcode::G_FMAXNUM_IEEE: + case TargetOpcode::G_FMINIMUM: + case TargetOpcode::G_FMINNUM: + case TargetOpcode::G_FMINNUM_IEEE: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_FNEARBYINT: + case TargetOpcode::G_FNEG: + case TargetOpcode::G_FPEXT: + case TargetOpcode::G_FPOW: + case TargetOpcode::G_FPTRUNC: + case TargetOpcode::G_FREM: + case TargetOpcode::G_FRINT: + case TargetOpcode::G_FSIN: + case TargetOpcode::G_FSQRT: + case TargetOpcode::G_FSUB: + case TargetOpcode::G_INTRINSIC_ROUND: + case TargetOpcode::G_INTRINSIC_ROUNDEVEN: + case TargetOpcode::G_INTRINSIC_TRUNC: + return true; + default: + return false; + } +} diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index d39de770eaf16..d5c4ce1888e78 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -424,43 +424,6 @@ void AArch64RegisterBankInfo::applyMappingImpl( } } -/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode, -/// having only floating-point operands. -static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) { - switch (Opc) { - case TargetOpcode::G_FADD: - case TargetOpcode::G_FSUB: - case TargetOpcode::G_FMUL: - case TargetOpcode::G_FMA: - case TargetOpcode::G_FDIV: - case TargetOpcode::G_FCONSTANT: - case TargetOpcode::G_FPEXT: - case TargetOpcode::G_FPTRUNC: - case TargetOpcode::G_FCEIL: - case TargetOpcode::G_FFLOOR: - case TargetOpcode::G_FNEARBYINT: - case TargetOpcode::G_FNEG: - case TargetOpcode::G_FCOS: - case TargetOpcode::G_FSIN: - case TargetOpcode::G_FLOG10: - case TargetOpcode::G_FLOG: - case TargetOpcode::G_FLOG2: - case TargetOpcode::G_FSQRT: - case TargetOpcode::G_FABS: - case TargetOpcode::G_FEXP: - case TargetOpcode::G_FRINT: - case TargetOpcode::G_INTRINSIC_TRUNC: - case TargetOpcode::G_INTRINSIC_ROUND: - case TargetOpcode::G_INTRINSIC_ROUNDEVEN: - case TargetOpcode::G_FMAXNUM: - case TargetOpcode::G_FMINNUM: - case TargetOpcode::G_FMAXIMUM: - case TargetOpcode::G_FMINIMUM: - return true; - } - return false; -} - const RegisterBankInfo::InstructionMapping & AArch64RegisterBankInfo::getSameKindOfOperandsMapping( const MachineInstr &MI) const { diff --git a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp index 6af1fd8c88e57..62b58cba9f24a 100644 --- a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp +++ b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp @@ -104,26 +104,6 @@ MipsRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, } } -// Instructions where all register operands are floating point. -static bool isFloatingPointOpcode(unsigned Opc) { - switch (Opc) { - case TargetOpcode::G_FCONSTANT: - case TargetOpcode::G_FADD: - case TargetOpcode::G_FSUB: - case TargetOpcode::G_FMUL: - case TargetOpcode::G_FDIV: - case TargetOpcode::G_FABS: - case TargetOpcode::G_FSQRT: - case TargetOpcode::G_FCEIL: - case TargetOpcode::G_FFLOOR: - case TargetOpcode::G_FPEXT: - case TargetOpcode::G_FPTRUNC: - return true; - default: - return false; - } -} - // Instructions where use operands are floating point registers. // Def operands are general purpose. static bool isFloatingPointOpcodeUse(unsigned Opc) { @@ -133,7 +113,7 @@ static bool isFloatingPointOpcodeUse(unsigned Opc) { case TargetOpcode::G_FCMP: return true; default: - return isFloatingPointOpcode(Opc); + return isPreISelGenericFloatingPointOpcode(Opc); } } @@ -145,7 +125,7 @@ static bool isFloatingPointOpcodeDef(unsigned Opc) { case TargetOpcode::G_UITOFP: return true; default: - return isFloatingPointOpcode(Opc); + return isPreISelGenericFloatingPointOpcode(Opc); } } diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp index 6aeef145e3078..125a49de7b27d 100644 --- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp +++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp @@ -13,6 +13,7 @@ #include "PPCRegisterBankInfo.h" #include "PPCRegisterInfo.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" @@ -239,44 +240,6 @@ PPCRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { return getInstructionMapping(MappingID, Cost, OperandsMapping, NumOperands); } -/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode, -/// having only floating-point operands. -/// FIXME: this is copied from target AArch64. Needs some code refactor here to -/// put this function in GlobalISel/Utils.cpp. -static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) { - switch (Opc) { - case TargetOpcode::G_FADD: - case TargetOpcode::G_FSUB: - case TargetOpcode::G_FMUL: - case TargetOpcode::G_FMA: - case TargetOpcode::G_FDIV: - case TargetOpcode::G_FCONSTANT: - case TargetOpcode::G_FPEXT: - case TargetOpcode::G_FPTRUNC: - case TargetOpcode::G_FCEIL: - case TargetOpcode::G_FFLOOR: - case TargetOpcode::G_FNEARBYINT: - case TargetOpcode::G_FNEG: - case TargetOpcode::G_FCOS: - case TargetOpcode::G_FSIN: - case TargetOpcode::G_FLOG10: - case TargetOpcode::G_FLOG: - case TargetOpcode::G_FLOG2: - case TargetOpcode::G_FSQRT: - case TargetOpcode::G_FABS: - case TargetOpcode::G_FEXP: - case TargetOpcode::G_FRINT: - case TargetOpcode::G_INTRINSIC_TRUNC: - case TargetOpcode::G_INTRINSIC_ROUND: - case TargetOpcode::G_FMAXNUM: - case TargetOpcode::G_FMINNUM: - case TargetOpcode::G_FMAXIMUM: - case TargetOpcode::G_FMINIMUM: - return true; - } - return false; -} - /// \returns true if a given intrinsic \p ID only uses and defines FPRs. static bool isFPIntrinsic(unsigned ID) { // TODO: Add more intrinsics. diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp index 86e44343b5086..cc534f29685f2 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp @@ -154,46 +154,6 @@ static const RegisterBankInfo::ValueMapping *getFPValueMapping(unsigned Size) { return &RISCV::ValueMappings[Idx]; } -/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode, -/// having only floating-point operands. -/// FIXME: this is copied from target AArch64. Needs some code refactor here to -/// put this function in GlobalISel/Utils.cpp. -static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) { - switch (Opc) { - case TargetOpcode::G_FADD: - case TargetOpcode::G_FSUB: - case TargetOpcode::G_FMUL: - case TargetOpcode::G_FMA: - case TargetOpcode::G_FDIV: - case TargetOpcode::G_FCONSTANT: - case TargetOpcode::G_FPEXT: - case TargetOpcode::G_FPTRUNC: - case TargetOpcode::G_FCEIL: - case TargetOpcode::G_FFLOOR: - case TargetOpcode::G_FNEARBYINT: - case TargetOpcode::G_FNEG: - case TargetOpcode::G_FCOPYSIGN: - case TargetOpcode::G_FCOS: - case TargetOpcode::G_FSIN: - case TargetOpcode::G_FLOG10: - case TargetOpcode::G_FLOG: - case TargetOpcode::G_FLOG2: - case TargetOpcode::G_FSQRT: - case TargetOpcode::G_FABS: - case TargetOpcode::G_FEXP: - case TargetOpcode::G_FRINT: - case TargetOpcode::G_INTRINSIC_TRUNC: - case TargetOpcode::G_INTRINSIC_ROUND: - case TargetOpcode::G_INTRINSIC_ROUNDEVEN: - case TargetOpcode::G_FMAXNUM: - case TargetOpcode::G_FMINNUM: - case TargetOpcode::G_FMAXIMUM: - case TargetOpcode::G_FMINIMUM: - return true; - } - return false; -} - // TODO: Make this more like AArch64? bool RISCVRegisterBankInfo::hasFPConstraints( const MachineInstr &MI, const MachineRegisterInfo &MRI, diff --git a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp index e7c9e60ba95f1..9e85424e76e62 100644 --- a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp @@ -13,10 +13,13 @@ #include "X86RegisterBankInfo.h" #include "X86InstrInfo.h" #include "X86Subtarget.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterBank.h" #include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/IR/IntrinsicsX86.h" #define GET_TARGET_REGBANK_IMPL #include "X86GenRegisterBank.inc" @@ -68,6 +71,98 @@ X86RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, llvm_unreachable("Unsupported register kind yet."); } +// \returns true if a given intrinsic only uses and defines FPRs. +static bool isFPIntrinsic(const MachineRegisterInfo &MRI, + const MachineInstr &MI) { + // TODO: Add more intrinsics. + switch (cast(MI).getIntrinsicID()) { + default: + return false; + // SSE1 + case Intrinsic::x86_sse_rcp_ss: + case Intrinsic::x86_sse_rcp_ps: + case Intrinsic::x86_sse_rsqrt_ss: + case Intrinsic::x86_sse_rsqrt_ps: + case Intrinsic::x86_sse_min_ss: + case Intrinsic::x86_sse_min_ps: + case Intrinsic::x86_sse_max_ss: + case Intrinsic::x86_sse_max_ps: + return true; + } + return false; +} + +bool X86RegisterBankInfo::hasFPConstraints(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + unsigned Depth) const { + unsigned Op = MI.getOpcode(); + if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI)) + return true; + + // Do we have an explicit floating point instruction? + if (isPreISelGenericFloatingPointOpcode(Op)) + return true; + + // No. Check if we have a copy-like instruction. If we do, then we could + // still be fed by floating point instructions. + if (Op != TargetOpcode::COPY && !MI.isPHI() && + !isPreISelGenericOptimizationHint(Op)) + return false; + + // Check if we already know the register bank. + auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI); + if (RB == &getRegBank(X86::PSRRegBankID)) + return true; + if (RB == &getRegBank(X86::GPRRegBankID)) + return false; + + // We don't know anything. + // + // If we have a phi, we may be able to infer that it will be assigned a fp + // type based off of its inputs. + if (!MI.isPHI() || Depth > MaxFPRSearchDepth) + return false; + + return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) { + return Op.isReg() && + onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1); + }); +} + +bool X86RegisterBankInfo::onlyUsesFP(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + unsigned Depth) const { + switch (MI.getOpcode()) { + case TargetOpcode::G_FPTOSI: + case TargetOpcode::G_FPTOUI: + case TargetOpcode::G_FCMP: + case TargetOpcode::G_LROUND: + case TargetOpcode::G_LLROUND: + case TargetOpcode::G_INTRINSIC_TRUNC: + case TargetOpcode::G_INTRINSIC_ROUND: + return true; + default: + break; + } + return hasFPConstraints(MI, MRI, TRI, Depth); +} + +bool X86RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + unsigned Depth) const { + switch (MI.getOpcode()) { + case TargetOpcode::G_SITOFP: + case TargetOpcode::G_UITOFP: + return true; + default: + break; + } + return hasFPConstraints(MI, MRI, TRI, Depth); +} + X86GenRegisterBankInfo::PartialMappingIdx X86GenRegisterBankInfo::getPartialMappingIdx(const MachineInstr &MI, const LLT &Ty, bool isFP) { @@ -180,11 +275,13 @@ X86RegisterBankInfo::getSameOperandsMapping(const MachineInstr &MI, const RegisterBankInfo::InstructionMapping & X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); const MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned Opc = MI.getOpcode(); - // Try the default logic for non-generic instructions that are either copies - // or already have some operands assigned to banks. + // Try the default logic for non-generic instructions that are either + // copies or already have some operands assigned to banks. if (!isPreISelGenericOpcode(Opc) || Opc == TargetOpcode::G_PHI) { const InstructionMapping &Mapping = getInstrMappingImpl(MI); if (Mapping.isValid()) @@ -221,13 +318,14 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case TargetOpcode::G_FPEXT: case TargetOpcode::G_FPTRUNC: case TargetOpcode::G_FCONSTANT: - // Instruction having only floating-point operands (all scalars in VECRReg) + // Instruction having only floating-point operands (all scalars in + // VECRReg) getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ true, OpRegBankIdx); break; case TargetOpcode::G_SITOFP: case TargetOpcode::G_FPTOSI: { - // Some of the floating-point instructions have mixed GPR and FP operands: - // fine-tune the computed mapping. + // Some of the floating-point instructions have mixed GPR and FP + // operands: fine-tune the computed mapping. auto &Op0 = MI.getOperand(0); auto &Op1 = MI.getOperand(1); const LLT Ty0 = MRI.getType(Op0.getReg()); @@ -271,9 +369,36 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ isFPTrunc || isFPAnyExt, OpRegBankIdx); - } break; + break; + } + case TargetOpcode::G_LOAD: { + // Check if that load feeds fp instructions. + // In that case, we want the default mapping to be on FPR + // instead of blind map every scalar to GPR. + bool IsFP = any_of(MRI.use_nodbg_instructions(cast(MI).getDstReg()), + [&](const MachineInstr &UseMI) { + // If we have at least one direct use in a FP + // instruction, assume this was a floating point load + // in the IR. If it was not, we would have had a + // bitcast before reaching that instruction. + return onlyUsesFP(UseMI, MRI, TRI); + }); + getInstrPartialMappingIdxs(MI, MRI, IsFP, OpRegBankIdx); + break; + } + case TargetOpcode::G_STORE: { + // Check if that store is fed by fp instructions. + Register VReg = cast(MI).getValueReg(); + if (!VReg) + break; + MachineInstr *DefMI = MRI.getVRegDef(VReg); + bool IsFP = onlyDefinesFP(*DefMI, MRI, TRI); + getInstrPartialMappingIdxs(MI, MRI, IsFP, OpRegBankIdx); + break; + } default: - // Track the bank of each register, use NotFP mapping (all scalars in GPRs) + // Track the bank of each register, use NotFP mapping (all scalars in + // GPRs) getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ false, OpRegBankIdx); break; } diff --git a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h index 989c5956ad591..8f38e717e36b0 100644 --- a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h +++ b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.h @@ -62,6 +62,22 @@ class X86RegisterBankInfo final : public X86GenRegisterBankInfo { const SmallVectorImpl &OpRegBankIdx, SmallVectorImpl &OpdsMapping); + // Maximum recursion depth for hasFPConstraints. + const unsigned MaxFPRSearchDepth = 2; + + /// \returns true if \p MI only uses and defines FPRs. + bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + unsigned Depth = 0) const; + + /// \returns true if \p MI only uses FPRs. + bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, unsigned Depth = 0) const; + + /// \returns true if \p MI only defines FPRs. + bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, unsigned Depth = 0) const; + public: X86RegisterBankInfo(const TargetRegisterInfo &TRI); diff --git a/llvm/test/CodeGen/X86/GlobalISel/fconstant.ll b/llvm/test/CodeGen/X86/GlobalISel/fconstant.ll index a9b2037e9947a..8d2ee3c50f215 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/fconstant.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/fconstant.ll @@ -10,27 +10,22 @@ define void @test_float(ptr %a , float %b) { ; CHECK64_SMALL: # %bb.0: # %entry ; CHECK64_SMALL-NEXT: movss {{.*#+}} xmm1 = [5.5E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK64_SMALL-NEXT: addss %xmm0, %xmm1 -; CHECK64_SMALL-NEXT: movd %xmm1, %eax -; CHECK64_SMALL-NEXT: movl %eax, (%rdi) +; CHECK64_SMALL-NEXT: movss %xmm1, (%rdi) ; CHECK64_SMALL-NEXT: retq ; ; CHECK64_LARGE-LABEL: test_float: ; CHECK64_LARGE: # %bb.0: # %entry ; CHECK64_LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax ; CHECK64_LARGE-NEXT: addss (%rax), %xmm0 -; CHECK64_LARGE-NEXT: movd %xmm0, %eax -; CHECK64_LARGE-NEXT: movl %eax, (%rdi) +; CHECK64_LARGE-NEXT: movss %xmm0, (%rdi) ; CHECK64_LARGE-NEXT: retq ; ; CHECK32-LABEL: test_float: ; CHECK32: # %bb.0: # %entry ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK32-NEXT: movss {{.*#+}} xmm0 = [5.5E+0,0.0E+0,0.0E+0,0.0E+0] -; CHECK32-NEXT: movd %ecx, %xmm1 -; CHECK32-NEXT: addss %xmm0, %xmm1 -; CHECK32-NEXT: movd %xmm1, %ecx -; CHECK32-NEXT: movl %ecx, (%eax) +; CHECK32-NEXT: addss {{[0-9]+}}(%esp), %xmm0 +; CHECK32-NEXT: movss %xmm0, (%eax) ; CHECK32-NEXT: retl entry: %aa = fadd float 5.500000e+00, %b diff --git a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-sse-intrinsics.ll b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-sse-intrinsics.ll new file mode 100644 index 0000000000000..3388af605d969 --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-sse-intrinsics.ll @@ -0,0 +1,153 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -mtriple=i686-- -mattr=+sse -global-isel -stop-after=regbankselect | FileCheck %s + +define void @test_x86_sse_max_ps(ptr %p1, ptr %p2) { + ; CHECK-LABEL: name: test_x86_sse_max_ps + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD1]](p0) :: (load (<4 x s32>) from %ir.p2) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.max.ps), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; CHECK-NEXT: G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1) + ; CHECK-NEXT: RET 0 + %a0 = load <4 x float>, ptr %p1, align 16 + %a1 = load <4 x float>, ptr %p2, align 16 + %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + store <4 x float> %res, ptr %p1 + ret void +} +declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone + + +define void @test_x86_sse_max_ss(ptr %p1, ptr %p2) { + ; CHECK-LABEL: name: test_x86_sse_max_ss + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD1]](p0) :: (load (<4 x s32>) from %ir.p2) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.max.ss), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; CHECK-NEXT: G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1) + ; CHECK-NEXT: RET 0 + %a0 = load <4 x float>, ptr %p1, align 16 + %a1 = load <4 x float>, ptr %p2, align 16 + %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + store <4 x float> %res, ptr %p1 + ret void +} +declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone + + +define void @test_x86_sse_min_ps(ptr %p1, ptr %p2) { + ; CHECK-LABEL: name: test_x86_sse_min_ps + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD1]](p0) :: (load (<4 x s32>) from %ir.p2) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.min.ps), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; CHECK-NEXT: G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1) + ; CHECK-NEXT: RET 0 + %a0 = load <4 x float>, ptr %p1, align 16 + %a1 = load <4 x float>, ptr %p2, align 16 + %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + store <4 x float> %res, ptr %p1 + ret void +} +declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone + + +define void @test_x86_sse_min_ss(ptr %p1, ptr %p2) { + ; CHECK-LABEL: name: test_x86_sse_min_ss + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD1]](p0) :: (load (<4 x s32>) from %ir.p2) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.min.ss), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; CHECK-NEXT: G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1) + ; CHECK-NEXT: RET 0 + %a0 = load <4 x float>, ptr %p1, align 16 + %a1 = load <4 x float>, ptr %p2, align 16 + %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + store <4 x float> %res, ptr %p1 + ret void +} +declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone + + +define void @test_x86_sse_rcp_ps(ptr %p1, ptr %p2) { + ; CHECK-LABEL: name: test_x86_sse_rcp_ps + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.rcp.ps), [[LOAD1]](<4 x s32>) + ; CHECK-NEXT: G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1) + ; CHECK-NEXT: RET 0 + %a0 = load <4 x float>, ptr %p1, align 16 + %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] + store <4 x float> %res, ptr %p1 + ret void +} +declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone + + +define void @test_x86_sse_rcp_ss(ptr %p1, ptr %p2) { + ; CHECK-LABEL: name: test_x86_sse_rcp_ss + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.rcp.ss), [[LOAD1]](<4 x s32>) + ; CHECK-NEXT: G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1) + ; CHECK-NEXT: RET 0 + %a0 = load <4 x float>, ptr %p1, align 16 + %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] + store <4 x float> %res, ptr %p1 + ret void +} +declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone + + +define void @test_x86_sse_rsqrt_ps(ptr %p1, ptr %p2) { + ; CHECK-LABEL: name: test_x86_sse_rsqrt_ps + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.rsqrt.ps), [[LOAD1]](<4 x s32>) + ; CHECK-NEXT: G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1) + ; CHECK-NEXT: RET 0 + %a0 = load <4 x float>, ptr %p1, align 16 + %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] + store <4 x float> %res, ptr %p1 + ret void +} +declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone + + +define void @test_x86_sse_rsqrt_ss(ptr %p1, ptr %p2) { + ; CHECK-LABEL: name: test_x86_sse_rsqrt_ss + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.rsqrt.ss), [[LOAD1]](<4 x s32>) + ; CHECK-NEXT: G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1) + ; CHECK-NEXT: RET 0 + %a0 = load <4 x float>, ptr %p1, align 16 + %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] + store <4 x float> %res, ptr %p1 + ret void +} +declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone diff --git a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll index d09db0f2474c9..99d458a183a9b 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll @@ -142,7 +142,7 @@ define float @f4(float %val) { ; X86-LABEL: name: f4 ; X86: bb.1 (%ir-block.0): ; X86-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - ; X86-NEXT: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0) + ; X86-NEXT: [[LOAD:%[0-9]+]]:psr(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0) ; X86-NEXT: $fp0 = COPY [[LOAD]](s32) ; X86-NEXT: RET 0, implicit $fp0 ; @@ -187,13 +187,10 @@ define void @f5(ptr %a, ptr %b) { ; X64-NEXT: {{ $}} ; X64-NEXT: [[COPY:%[0-9]+]]:gpr(p0) = COPY $rdi ; X64-NEXT: [[COPY1:%[0-9]+]]:gpr(p0) = COPY $rsi - ; X64-NEXT: [[LOAD:%[0-9]+]]:gpr(s64) = G_LOAD [[COPY]](p0) :: (load (s64) from %ir.a) - ; X64-NEXT: [[LOAD1:%[0-9]+]]:gpr(s64) = G_LOAD [[COPY1]](p0) :: (load (s64) from %ir.b) - ; X64-NEXT: [[COPY2:%[0-9]+]]:psr(s64) = COPY [[LOAD]](s64) - ; X64-NEXT: [[COPY3:%[0-9]+]]:psr(s64) = COPY [[LOAD1]](s64) - ; X64-NEXT: [[FADD:%[0-9]+]]:psr(s64) = G_FADD [[COPY2]], [[COPY3]] - ; X64-NEXT: [[COPY4:%[0-9]+]]:gpr(s64) = COPY [[FADD]](s64) - ; X64-NEXT: G_STORE [[COPY4]](s64), [[COPY]](p0) :: (store (s64) into %ir.a) + ; X64-NEXT: [[LOAD:%[0-9]+]]:psr(s64) = G_LOAD [[COPY]](p0) :: (load (s64) from %ir.a) + ; X64-NEXT: [[LOAD1:%[0-9]+]]:psr(s64) = G_LOAD [[COPY1]](p0) :: (load (s64) from %ir.b) + ; X64-NEXT: [[FADD:%[0-9]+]]:psr(s64) = G_FADD [[LOAD]], [[LOAD1]] + ; X64-NEXT: G_STORE [[FADD]](s64), [[COPY]](p0) :: (store (s64) into %ir.a) ; X64-NEXT: RET 0 %load1 = load double, ptr %a, align 8 %load2 = load double, ptr %b, align 8 @@ -210,11 +207,9 @@ define void @f6(ptr %0, ptr %1) { ; X86-NEXT: [[FRAME_INDEX1:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 ; X86-NEXT: [[LOAD1:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0) ; X86-NEXT: [[C:%[0-9]+]]:psr(s32) = G_FCONSTANT float 2.000000e+01 - ; X86-NEXT: [[LOAD2:%[0-9]+]]:gpr(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.0) - ; X86-NEXT: [[COPY:%[0-9]+]]:psr(s32) = COPY [[LOAD2]](s32) - ; X86-NEXT: [[FADD:%[0-9]+]]:psr(s32) = G_FADD [[COPY]], [[C]] - ; X86-NEXT: [[COPY1:%[0-9]+]]:gpr(s32) = COPY [[FADD]](s32) - ; X86-NEXT: G_STORE [[COPY1]](s32), [[LOAD1]](p0) :: (store (s32) into %ir.1) + ; X86-NEXT: [[LOAD2:%[0-9]+]]:psr(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.0) + ; X86-NEXT: [[FADD:%[0-9]+]]:psr(s32) = G_FADD [[LOAD2]], [[C]] + ; X86-NEXT: G_STORE [[FADD]](s32), [[LOAD1]](p0) :: (store (s32) into %ir.1) ; X86-NEXT: RET 0 ; ; X64-LABEL: name: f6 @@ -224,11 +219,9 @@ define void @f6(ptr %0, ptr %1) { ; X64-NEXT: [[COPY:%[0-9]+]]:gpr(p0) = COPY $rdi ; X64-NEXT: [[COPY1:%[0-9]+]]:gpr(p0) = COPY $rsi ; X64-NEXT: [[C:%[0-9]+]]:psr(s32) = G_FCONSTANT float 2.000000e+01 - ; X64-NEXT: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.0) - ; X64-NEXT: [[COPY2:%[0-9]+]]:psr(s32) = COPY [[LOAD]](s32) - ; X64-NEXT: [[FADD:%[0-9]+]]:psr(s32) = G_FADD [[COPY2]], [[C]] - ; X64-NEXT: [[COPY3:%[0-9]+]]:gpr(s32) = COPY [[FADD]](s32) - ; X64-NEXT: G_STORE [[COPY3]](s32), [[COPY1]](p0) :: (store (s32) into %ir.1) + ; X64-NEXT: [[LOAD:%[0-9]+]]:psr(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.0) + ; X64-NEXT: [[FADD:%[0-9]+]]:psr(s32) = G_FADD [[LOAD]], [[C]] + ; X64-NEXT: G_STORE [[FADD]](s32), [[COPY1]](p0) :: (store (s32) into %ir.1) ; X64-NEXT: RET 0 %load1 = load float, ptr %0 %add = fadd float %load1, 20.0