Skip to content
49 changes: 49 additions & 0 deletions llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,55 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I,
}
}

const int RegBankSize = 16;

// Special case GPR16 -> XMM
if (SrcSize == RegBankSize && SrcRegBank.getID() == X86::GPRRegBankID &&
(DstRegBank.getID() == X86::VECRRegBankID)) {

const DebugLoc &DL = I.getDebugLoc();

// Any extend GPR16 -> GPR32
Register ExtReg = MRI.createVirtualRegister(&X86::GR32RegClass);
BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::SUBREG_TO_REG),
ExtReg)
.addImm(0)
.addReg(SrcReg)
.addImm(X86::sub_16bit);

// Copy GR32 -> XMM
BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), DstReg)
.addReg(ExtReg);

I.eraseFromParent();
}

// Special case XMM -> GR16
if (DstSize == RegBankSize && DstRegBank.getID() == X86::GPRRegBankID &&
(SrcRegBank.getID() == X86::VECRRegBankID)) {

const DebugLoc &DL = I.getDebugLoc();

// Move XMM to GR32 register.
Register Temp32 = MRI.createVirtualRegister(&X86::GR32RegClass);
BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), Temp32)
.addReg(SrcReg);

// Extract the lower 16 bits
if (Register Dst32 = TRI.getMatchingSuperReg(DstReg, X86::sub_16bit,
&X86::GR32RegClass)) {
// Optimization for Physical Dst (e.g. AX): Copy to EAX directly.
BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), Dst32)
.addReg(Temp32);
} else {
// Handle if there is no super.
BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), DstReg)
.addReg(Temp32, 0, X86::sub_16bit);
}

I.eraseFromParent();
}

return true;
}

Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/X86/X86InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4294,10 +4294,11 @@ static unsigned CopyToFromAsymmetricReg(Register DestReg, Register SrcReg,

if (X86::VR128XRegClass.contains(DestReg) &&
X86::GR32RegClass.contains(SrcReg))
// Copy from a VR128 register to a VR128 register.
// Copy from a GR32 register to a VR128 register.
return HasAVX512 ? X86::VMOVDI2PDIZrr
: HasAVX ? X86::VMOVDI2PDIrr
: X86::MOVDI2PDIrr;

return 0;
}

Expand Down Expand Up @@ -4366,6 +4367,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else if (X86::VK16RegClass.contains(DestReg, SrcReg))
Opc = Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk)
: (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVWkk);

if (!Opc)
Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);

Expand Down
47 changes: 47 additions & 0 deletions llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx | FileCheck %s --check-prefixes=AVX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512

define half @test_i16_to_half(i16 %0) {
; SSE2-LABEL: test_i16_to_half:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movd %edi, %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: test_i16_to_half:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovd %edi, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_i16_to_half:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vmovd %edi, %xmm0
; AVX512-NEXT: retq
entry:
%2 = bitcast i16 %0 to half
ret half %2
}

define i16 @test_half_to_i16(half %0) {
; SSE2-LABEL: test_half_to_i16:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: # kill: def $eax killed $eax def $ax
; SSE2-NEXT: retq
;
; AVX-LABEL: test_half_to_i16:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: def $eax killed $eax def $ax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_half_to_i16:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: # kill: def $eax killed $eax def $ax
; AVX512-NEXT: retq
entry:
%2 = bitcast half %0 to i16
ret i16 %2
}