diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp index 53ec7125a6490..72cde46b2c5f5 100644 --- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp @@ -312,6 +312,55 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I, } } + const int RegBankSize = 16; + + // Special case GPR16 -> XMM + if (SrcSize == RegBankSize && SrcRegBank.getID() == X86::GPRRegBankID && + (DstRegBank.getID() == X86::VECRRegBankID)) { + + const DebugLoc &DL = I.getDebugLoc(); + + // Any extend GPR16 -> GPR32 + Register ExtReg = MRI.createVirtualRegister(&X86::GR32RegClass); + BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::SUBREG_TO_REG), + ExtReg) + .addImm(0) + .addReg(SrcReg) + .addImm(X86::sub_16bit); + + // Copy GR32 -> XMM + BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), DstReg) + .addReg(ExtReg); + + I.eraseFromParent(); + } + + // Special case XMM -> GR16 + if (DstSize == RegBankSize && DstRegBank.getID() == X86::GPRRegBankID && + (SrcRegBank.getID() == X86::VECRRegBankID)) { + + const DebugLoc &DL = I.getDebugLoc(); + + // Move XMM to GR32 register. + Register Temp32 = MRI.createVirtualRegister(&X86::GR32RegClass); + BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), Temp32) + .addReg(SrcReg); + + // Extract the lower 16 bits + if (Register Dst32 = TRI.getMatchingSuperReg(DstReg, X86::sub_16bit, + &X86::GR32RegClass)) { + // Optimization for Physical Dst (e.g. AX): Copy to EAX directly. + BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), Dst32) + .addReg(Temp32); + } else { + // Handle if there is no super. + BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), DstReg) + .addReg(Temp32, 0, X86::sub_16bit); + } + + I.eraseFromParent(); + } + return true; } diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index cb0208a4a5f32..e03b3ae68a7df 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4294,10 +4294,11 @@ static unsigned CopyToFromAsymmetricReg(Register DestReg, Register SrcReg, if (X86::VR128XRegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg)) - // Copy from a VR128 register to a VR128 register. + // Copy from a GR32 register to a VR128 register. return HasAVX512 ? X86::VMOVDI2PDIZrr : HasAVX ? X86::VMOVDI2PDIrr : X86::MOVDI2PDIrr; + return 0; } @@ -4366,6 +4367,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, else if (X86::VK16RegClass.contains(DestReg, SrcReg)) Opc = Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk) : (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVWkk); + if (!Opc) Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget); diff --git a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll new file mode 100644 index 0000000000000..dad33cac66dc7 --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx | FileCheck %s --check-prefixes=AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 + +define half @test_i16_to_half(i16 %0) { +; SSE2-LABEL: test_i16_to_half: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movd %edi, %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: test_i16_to_half: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vmovd %edi, %xmm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: test_i16_to_half: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vmovd %edi, %xmm0 +; AVX512-NEXT: retq +entry: + %2 = bitcast i16 %0 to half + ret half %2 +} + +define i16 @test_half_to_i16(half %0) { +; SSE2-LABEL: test_half_to_i16: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: # kill: def $eax killed $eax def $ax +; SSE2-NEXT: retq +; +; AVX-LABEL: test_half_to_i16: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: # kill: def $eax killed $eax def $ax +; AVX-NEXT: retq +; +; AVX512-LABEL: test_half_to_i16: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: # kill: def $eax killed $eax def $ax +; AVX512-NEXT: retq +entry: + %2 = bitcast half %0 to i16 + ret i16 %2 +}