From d750cc2e41361752535d74084cead772da1d36cf Mon Sep 17 00:00:00 2001 From: GrumpyPigSkin Date: Mon, 17 Nov 2025 22:32:26 +0000 Subject: [PATCH 01/10] [X86][GISel] Fix crash on casting i16 <-> half. --- llvm/lib/Target/X86/X86InstrInfo.cpp | 40 ++++++++++++++-- .../test/CodeGen/X86/GlobalISel/fp-bitcast.ll | 46 +++++++++++++++++++ 2 files changed, 83 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index cb0208a4a5f32..30c2e535a9a35 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4294,10 +4294,28 @@ static unsigned CopyToFromAsymmetricReg(Register DestReg, Register SrcReg, if (X86::VR128XRegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg)) - // Copy from a VR128 register to a VR128 register. + // Copy from a GR32 register to a VR128 register. return HasAVX512 ? X86::VMOVDI2PDIZrr : HasAVX ? X86::VMOVDI2PDIrr : X86::MOVDI2PDIrr; + + // SrcReg(VR128) -> DestReg(GR16) + // SrcReg(GR16) -> DestReg(VR128) + + if (X86::GR16RegClass.contains(DestReg) && + X86::VR128XRegClass.contains(SrcReg)) + // Copy from a VR128 register to a GR16 register. + return HasAVX512 ? X86::VPEXTRWZrri + : HasAVX ? X86::VPEXTRWrri + : X86::PEXTRWrri; + + if (X86::VR128XRegClass.contains(DestReg) && + X86::GR16RegClass.contains(SrcReg)) + // Copy from a GR16 register to a VR128 register. + return HasAVX512 ? X86::VPINSRWZrri + : HasAVX ? X86::VPINSRWrri + : X86::PINSRWrri; + return 0; } @@ -4370,8 +4388,24 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget); if (Opc) { - BuildMI(MBB, MI, DL, get(Opc), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + auto MIB = BuildMI(MBB, MI, DL, get(Opc), DestReg); + switch (Opc) { + case X86::VPINSRWZrri: + case X86::VPINSRWrri: + case X86::PINSRWrri: + MIB.addReg(DestReg, RegState::Undef) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(0); + break; + case X86::VPEXTRWZrri: + case X86::VPEXTRWrri: + case X86::PEXTRWrri: + MIB.addReg(SrcReg, getKillRegState(KillSrc)).addImm(0); + break; + default: + MIB.addReg(SrcReg, getKillRegState(KillSrc)); + break; + } return; } diff --git a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll new file mode 100644 index 0000000000000..1d2bd50dbc368 --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=CHECK,SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512 + +define dso_local noundef half @bar(i16 %0) { +; SSE2-LABEL: bar: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: pinsrw $0, %di, %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: bar: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vpinsrw $0, %di, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: bar: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpinsrw $0, %di, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %2 = bitcast i16 %0 to half + ret half %2 +} + +define dso_local noundef i16 @test_half_to_i16(half %0) { +; SSE2-LABEL: test_half_to_i16: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: pextrw $0, %xmm0, %ax +; SSE2-NEXT: retq +; +; AVX-LABEL: test_half_to_i16: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vpextrw $0, %xmm0, %ax +; AVX-NEXT: retq +; +; AVX512-LABEL: test_half_to_i16: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpextrw $0, %xmm0, %ax +; AVX512-NEXT: retq +entry: + %2 = bitcast half %0 to i16 + ret i16 %2 +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} From 84f4536a41fa7a1258695f2a50dc90505cc2d1e5 Mon Sep 17 00:00:00 2001 From: GrumpyPigSkin Date: Mon, 17 Nov 2025 22:58:11 +0000 Subject: [PATCH 02/10] [X86][GISel] Updated fp-bitcast test name --- llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll index 1d2bd50dbc368..7d44841df1cf9 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll @@ -3,18 +3,18 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512 -define dso_local noundef half @bar(i16 %0) { -; SSE2-LABEL: bar: +define dso_local noundef half @test_i16_to_half(i16 %0) { +; SSE2-LABEL: test_i16_to_half: ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: pinsrw $0, %di, %xmm0 ; SSE2-NEXT: retq ; -; AVX-LABEL: bar: +; AVX-LABEL: test_i16_to_half: ; AVX: # %bb.0: # %entry ; AVX-NEXT: vpinsrw $0, %di, %xmm0, %xmm0 ; AVX-NEXT: retq ; -; AVX512-LABEL: bar: +; AVX512-LABEL: test_i16_to_half: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vpinsrw $0, %di, %xmm0, %xmm0 ; AVX512-NEXT: retq From 4d6d6e90933da209c2fc294a55677de2765aa2e6 Mon Sep 17 00:00:00 2001 From: GrumpyPigSkin Date: Mon, 17 Nov 2025 23:05:11 +0000 Subject: [PATCH 03/10] [X86][GISel] Removed redundant check from tests. --- llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll index 7d44841df1cf9..c41f2cd596ff1 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=CHECK,SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel | FileCheck %s --check-prefixes=SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx | FileCheck %s --check-prefixes=AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 define dso_local noundef half @test_i16_to_half(i16 %0) { ; SSE2-LABEL: test_i16_to_half: @@ -42,5 +42,3 @@ entry: %2 = bitcast half %0 to i16 ret i16 %2 } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK: {{.*}} From 7860e6e12e7b556fdafc04dc734d41b9a43e673f Mon Sep 17 00:00:00 2001 From: GrumpyPigSkin Date: Tue, 18 Nov 2025 19:54:30 +0000 Subject: [PATCH 04/10] [X86][GISel] Changed insert/extract functions to use movd instructions --- llvm/lib/Target/X86/X86InstrInfo.cpp | 80 +++++++++++-------- .../test/CodeGen/X86/GlobalISel/fp-bitcast.ll | 19 +++-- 2 files changed, 56 insertions(+), 43 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 30c2e535a9a35..20363917045cd 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4299,23 +4299,6 @@ static unsigned CopyToFromAsymmetricReg(Register DestReg, Register SrcReg, : HasAVX ? X86::VMOVDI2PDIrr : X86::MOVDI2PDIrr; - // SrcReg(VR128) -> DestReg(GR16) - // SrcReg(GR16) -> DestReg(VR128) - - if (X86::GR16RegClass.contains(DestReg) && - X86::VR128XRegClass.contains(SrcReg)) - // Copy from a VR128 register to a GR16 register. - return HasAVX512 ? X86::VPEXTRWZrri - : HasAVX ? X86::VPEXTRWrri - : X86::PEXTRWrri; - - if (X86::VR128XRegClass.contains(DestReg) && - X86::GR16RegClass.contains(SrcReg)) - // Copy from a GR16 register to a VR128 register. - return HasAVX512 ? X86::VPINSRWZrri - : HasAVX ? X86::VPINSRWrri - : X86::PINSRWrri; - return 0; } @@ -4384,28 +4367,55 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, else if (X86::VK16RegClass.contains(DestReg, SrcReg)) Opc = Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk) : (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVWkk); + + else if (X86::GR16RegClass.contains(DestReg) && + X86::VR128XRegClass.contains(SrcReg)) { + // Special case for moving xmm to GPR16 registers, get super reg and fall + // use CopyToFromAsymmetricReg + const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); + DestReg = + TRI->getMatchingSuperReg(DestReg, X86::sub_16bit, &X86::GR32RegClass); + } else if (X86::VR128XRegClass.contains(DestReg) && + X86::GR16RegClass.contains(SrcReg)) { + + const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); + + // Zero extend GPR16 register to GPR32 + Register Src32 = + TRI->getMatchingSuperReg(SrcReg, X86::sub_16bit, &X86::GR32RegClass); + + BuildMI(MBB, MI, DL, get(X86::MOVZX32rr16), Src32) + .addReg(SrcReg, getKillRegState(KillSrc)); + + // Assign Src32 to SrcReg and use CopyToFromAsymmetricReg + SrcReg = Src32; + } + if (!Opc) Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget); if (Opc) { - auto MIB = BuildMI(MBB, MI, DL, get(Opc), DestReg); - switch (Opc) { - case X86::VPINSRWZrri: - case X86::VPINSRWrri: - case X86::PINSRWrri: - MIB.addReg(DestReg, RegState::Undef) - .addReg(SrcReg, getKillRegState(KillSrc)) - .addImm(0); - break; - case X86::VPEXTRWZrri: - case X86::VPEXTRWrri: - case X86::PEXTRWrri: - MIB.addReg(SrcReg, getKillRegState(KillSrc)).addImm(0); - break; - default: - MIB.addReg(SrcReg, getKillRegState(KillSrc)); - break; - } + BuildMI(MBB, MI, DL, get(Opc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + + // Special case for moving GPR16 to xmm registers + if (X86::VR128XRegClass.contains(DestReg) && + X86::GR16RegClass.contains(SrcReg)) { + + const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); + + // Zero extend GPR16 register to GPR32 + Register Src32 = + TRI->getMatchingSuperReg(SrcReg, X86::sub_16bit, &X86::GR32RegClass); + + BuildMI(MBB, MI, DL, get(X86::MOVZX32rr16), Src32) + .addReg(SrcReg, getKillRegState(KillSrc)); + + unsigned Opc = CopyToFromAsymmetricReg(DestReg, Src32, Subtarget); + BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(Src32, RegState::Kill); + return; } diff --git a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll index c41f2cd596ff1..96ee93fd18762 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll @@ -3,40 +3,43 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx | FileCheck %s --check-prefixes=AVX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 -define dso_local noundef half @test_i16_to_half(i16 %0) { +define half @test_i16_to_half(i16 %0) { ; SSE2-LABEL: test_i16_to_half: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: pinsrw $0, %di, %xmm0 +; SSE2-NEXT: movzwl %di, %edi +; SSE2-NEXT: movd %edi, %xmm0 ; SSE2-NEXT: retq ; ; AVX-LABEL: test_i16_to_half: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vpinsrw $0, %di, %xmm0, %xmm0 +; AVX-NEXT: movzwl %di, %edi +; AVX-NEXT: vmovd %edi, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_i16_to_half: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vpinsrw $0, %di, %xmm0, %xmm0 +; AVX512-NEXT: movzwl %di, %edi +; AVX512-NEXT: vmovd %edi, %xmm0 ; AVX512-NEXT: retq entry: %2 = bitcast i16 %0 to half ret half %2 } -define dso_local noundef i16 @test_half_to_i16(half %0) { +define i16 @test_half_to_i16(half %0) { ; SSE2-LABEL: test_half_to_i16: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: pextrw $0, %xmm0, %ax +; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: retq ; ; AVX-LABEL: test_half_to_i16: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vpextrw $0, %xmm0, %ax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: retq ; ; AVX512-LABEL: test_half_to_i16: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vpextrw $0, %xmm0, %ax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: retq entry: %2 = bitcast half %0 to i16 From 29941cb87e7124a48ad5837f235d3033eac8015e Mon Sep 17 00:00:00 2001 From: GrumpyPigSkin Date: Tue, 18 Nov 2025 20:27:56 +0000 Subject: [PATCH 05/10] [X86][GISel] Replace variable TRI with RI --- llvm/lib/Target/X86/X86InstrInfo.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 20363917045cd..11bfcedd37e11 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4372,17 +4372,14 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, X86::VR128XRegClass.contains(SrcReg)) { // Special case for moving xmm to GPR16 registers, get super reg and fall // use CopyToFromAsymmetricReg - const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); DestReg = - TRI->getMatchingSuperReg(DestReg, X86::sub_16bit, &X86::GR32RegClass); + RI.getMatchingSuperReg(DestReg, X86::sub_16bit, &X86::GR32RegClass); } else if (X86::VR128XRegClass.contains(DestReg) && X86::GR16RegClass.contains(SrcReg)) { - const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); - // Zero extend GPR16 register to GPR32 Register Src32 = - TRI->getMatchingSuperReg(SrcReg, X86::sub_16bit, &X86::GR32RegClass); + RI.getMatchingSuperReg(SrcReg, X86::sub_16bit, &X86::GR32RegClass); BuildMI(MBB, MI, DL, get(X86::MOVZX32rr16), Src32) .addReg(SrcReg, getKillRegState(KillSrc)); From e35ca3c44d9e5081dc6ea294822471ac496bed79 Mon Sep 17 00:00:00 2001 From: GrumpyPigSkin Date: Wed, 19 Nov 2025 00:16:19 +0000 Subject: [PATCH 06/10] [X86][GISel] Moved special GPR16 <-> XMM case to X86InstructionSelector::selectCopy --- .../X86/GISel/X86InstructionSelector.cpp | 46 +++++++++++++++++++ llvm/lib/Target/X86/X86InstrInfo.cpp | 20 -------- .../test/CodeGen/X86/GlobalISel/fp-bitcast.ll | 3 ++ 3 files changed, 49 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp index 53ec7125a6490..a2a1644677ccb 100644 --- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp @@ -310,6 +310,52 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I, I.getOperand(1).setReg(ExtSrc); } + + const int RegBankSize = 16; + + // Special case GPR16 -> XMM + if (SrcSize == RegBankSize && SrcRegBank.getID() == X86::GPRRegBankID && + (SrcRegBank.getID() == X86::VECRRegBankID)) { + + const DebugLoc &DL = I.getDebugLoc(); + + // Zero extend GP16 -> GP32 + Register ExtReg = MRI.createVirtualRegister(&X86::GR32RegClass); + BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), ExtReg) + .addReg(SrcReg); + + // Copy GPR32 -> XMM + BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), DstReg) + .addReg(ExtReg); + + I.eraseFromParent(); + } + + // Special case XMM -> GPR16 + if (DstSize == RegBankSize && DstRegBank.getID() == X86::GPRRegBankID && + (SrcRegBank.getID() == X86::VECRRegBankID)) { + + const DebugLoc &DL = I.getDebugLoc(); + + // Move XMM to GPR32 register. + Register Temp32 = MRI.createVirtualRegister(&X86::GR32RegClass); + BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), Temp32) + .addReg(SrcReg); + + // Extract the lower 16 bits + if (Register Dst32 = TRI.getMatchingSuperReg(DstReg, X86::sub_16bit, + &X86::GR32RegClass)) { + // Optimization for Physical Dst (e.g. AX): Copy to EAX directly. + BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), Dst32) + .addReg(Temp32); + } else { + // Handle if there is no super. + BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), DstReg) + .addReg(Temp32, 0, X86::sub_16bit); + } + + I.eraseFromParent(); + } } return true; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 11bfcedd37e11..7e8823ee8761e 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4368,26 +4368,6 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk) : (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVWkk); - else if (X86::GR16RegClass.contains(DestReg) && - X86::VR128XRegClass.contains(SrcReg)) { - // Special case for moving xmm to GPR16 registers, get super reg and fall - // use CopyToFromAsymmetricReg - DestReg = - RI.getMatchingSuperReg(DestReg, X86::sub_16bit, &X86::GR32RegClass); - } else if (X86::VR128XRegClass.contains(DestReg) && - X86::GR16RegClass.contains(SrcReg)) { - - // Zero extend GPR16 register to GPR32 - Register Src32 = - RI.getMatchingSuperReg(SrcReg, X86::sub_16bit, &X86::GR32RegClass); - - BuildMI(MBB, MI, DL, get(X86::MOVZX32rr16), Src32) - .addReg(SrcReg, getKillRegState(KillSrc)); - - // Assign Src32 to SrcReg and use CopyToFromAsymmetricReg - SrcReg = Src32; - } - if (!Opc) Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget); diff --git a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll index 96ee93fd18762..a5d3c02b29a92 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll @@ -30,16 +30,19 @@ define i16 @test_half_to_i16(half %0) { ; SSE2-LABEL: test_half_to_i16: ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: # kill: def $eax killed $eax def $ax ; SSE2-NEXT: retq ; ; AVX-LABEL: test_half_to_i16: ; AVX: # %bb.0: # %entry ; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: # kill: def $eax killed $eax def $ax ; AVX-NEXT: retq ; ; AVX512-LABEL: test_half_to_i16: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: # kill: def $eax killed $eax def $ax ; AVX512-NEXT: retq entry: %2 = bitcast half %0 to i16 From 3455c83ecd1694247ab99a2fe32c6e94fdc21b73 Mon Sep 17 00:00:00 2001 From: GrumpyPigSkin Date: Wed, 19 Nov 2025 10:25:24 +0000 Subject: [PATCH 07/10] [X86][GISel] Removed missed code and updated test. --- .../X86/GISel/X86InstructionSelector.cpp | 76 +++++++++---------- llvm/lib/Target/X86/X86InstrInfo.cpp | 19 ----- .../test/CodeGen/X86/GlobalISel/fp-bitcast.ll | 12 +-- 3 files changed, 44 insertions(+), 63 deletions(-) diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp index a2a1644677ccb..7cddef10146c7 100644 --- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp @@ -310,52 +310,52 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I, I.getOperand(1).setReg(ExtSrc); } + } - const int RegBankSize = 16; + const int RegBankSize = 16; - // Special case GPR16 -> XMM - if (SrcSize == RegBankSize && SrcRegBank.getID() == X86::GPRRegBankID && - (SrcRegBank.getID() == X86::VECRRegBankID)) { + // Special case GPR16 -> XMM + if (SrcSize == RegBankSize && SrcRegBank.getID() == X86::GPRRegBankID && + (DstRegBank.getID() == X86::VECRRegBankID)) { - const DebugLoc &DL = I.getDebugLoc(); + const DebugLoc &DL = I.getDebugLoc(); - // Zero extend GP16 -> GP32 - Register ExtReg = MRI.createVirtualRegister(&X86::GR32RegClass); - BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), ExtReg) - .addReg(SrcReg); + // Zero extend GPR16 -> GPR32 + Register ExtReg = MRI.createVirtualRegister(&X86::GR32RegClass); + BuildMI(*I.getParent(), I, DL, TII.get(X86::MOVZX32rr16), ExtReg) + .addReg(SrcReg); - // Copy GPR32 -> XMM - BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), DstReg) - .addReg(ExtReg); + // Copy to GPR32 -> XMM + BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), DstReg) + .addReg(ExtReg); - I.eraseFromParent(); - } + I.eraseFromParent(); + } - // Special case XMM -> GPR16 - if (DstSize == RegBankSize && DstRegBank.getID() == X86::GPRRegBankID && - (SrcRegBank.getID() == X86::VECRRegBankID)) { - - const DebugLoc &DL = I.getDebugLoc(); - - // Move XMM to GPR32 register. - Register Temp32 = MRI.createVirtualRegister(&X86::GR32RegClass); - BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), Temp32) - .addReg(SrcReg); - - // Extract the lower 16 bits - if (Register Dst32 = TRI.getMatchingSuperReg(DstReg, X86::sub_16bit, - &X86::GR32RegClass)) { - // Optimization for Physical Dst (e.g. AX): Copy to EAX directly. - BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), Dst32) - .addReg(Temp32); - } else { - // Handle if there is no super. - BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), DstReg) - .addReg(Temp32, 0, X86::sub_16bit); - } - - I.eraseFromParent(); + // Special case XMM -> GPR16 + if (DstSize == RegBankSize && DstRegBank.getID() == X86::GPRRegBankID && + (SrcRegBank.getID() == X86::VECRRegBankID)) { + + const DebugLoc &DL = I.getDebugLoc(); + + // Move XMM to GPR32 register. + Register Temp32 = MRI.createVirtualRegister(&X86::GR32RegClass); + BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), Temp32) + .addReg(SrcReg); + + // Extract the lower 16 bits + if (Register Dst32 = TRI.getMatchingSuperReg(DstReg, X86::sub_16bit, + &X86::GR32RegClass)) { + // Optimization for Physical Dst (e.g. AX): Copy to EAX directly. + BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), Dst32) + .addReg(Temp32); + } else { + // Handle if there is no super. + BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), DstReg) + .addReg(Temp32, 0, X86::sub_16bit); } + + I.eraseFromParent(); } return true; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 7e8823ee8761e..e03b3ae68a7df 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4377,25 +4377,6 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - // Special case for moving GPR16 to xmm registers - if (X86::VR128XRegClass.contains(DestReg) && - X86::GR16RegClass.contains(SrcReg)) { - - const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); - - // Zero extend GPR16 register to GPR32 - Register Src32 = - TRI->getMatchingSuperReg(SrcReg, X86::sub_16bit, &X86::GR32RegClass); - - BuildMI(MBB, MI, DL, get(X86::MOVZX32rr16), Src32) - .addReg(SrcReg, getKillRegState(KillSrc)); - - unsigned Opc = CopyToFromAsymmetricReg(DestReg, Src32, Subtarget); - BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(Src32, RegState::Kill); - - return; - } - if (SrcReg == X86::EFLAGS || DestReg == X86::EFLAGS) { // FIXME: We use a fatal error here because historically LLVM has tried // lower some of these physreg copies and we want to ensure we get diff --git a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll index a5d3c02b29a92..12728bf82f55e 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll @@ -6,20 +6,20 @@ define half @test_i16_to_half(i16 %0) { ; SSE2-LABEL: test_i16_to_half: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movzwl %di, %edi -; SSE2-NEXT: movd %edi, %xmm0 +; SSE2-NEXT: movzwl %di, %eax +; SSE2-NEXT: movd %eax, %xmm0 ; SSE2-NEXT: retq ; ; AVX-LABEL: test_i16_to_half: ; AVX: # %bb.0: # %entry -; AVX-NEXT: movzwl %di, %edi -; AVX-NEXT: vmovd %edi, %xmm0 +; AVX-NEXT: movzwl %di, %eax +; AVX-NEXT: vmovd %eax, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_i16_to_half: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: movzwl %di, %edi -; AVX512-NEXT: vmovd %edi, %xmm0 +; AVX512-NEXT: movzwl %di, %eax +; AVX512-NEXT: vmovd %eax, %xmm0 ; AVX512-NEXT: retq entry: %2 = bitcast i16 %0 to half From 2b1ff3c94d68ac79d8a89d5813cb0786e43219ff Mon Sep 17 00:00:00 2001 From: GrumpyPigSkin Date: Wed, 19 Nov 2025 13:25:15 +0000 Subject: [PATCH 08/10] [X86][GISel] Changed explicit zero extend to any extend --- .../Target/X86/GISel/X86InstructionSelector.cpp | 15 +++++++++------ llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll | 9 +++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp index 7cddef10146c7..72cde46b2c5f5 100644 --- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp @@ -320,25 +320,28 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I, const DebugLoc &DL = I.getDebugLoc(); - // Zero extend GPR16 -> GPR32 + // Any extend GPR16 -> GPR32 Register ExtReg = MRI.createVirtualRegister(&X86::GR32RegClass); - BuildMI(*I.getParent(), I, DL, TII.get(X86::MOVZX32rr16), ExtReg) - .addReg(SrcReg); + BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::SUBREG_TO_REG), + ExtReg) + .addImm(0) + .addReg(SrcReg) + .addImm(X86::sub_16bit); - // Copy to GPR32 -> XMM + // Copy GR32 -> XMM BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), DstReg) .addReg(ExtReg); I.eraseFromParent(); } - // Special case XMM -> GPR16 + // Special case XMM -> GR16 if (DstSize == RegBankSize && DstRegBank.getID() == X86::GPRRegBankID && (SrcRegBank.getID() == X86::VECRRegBankID)) { const DebugLoc &DL = I.getDebugLoc(); - // Move XMM to GPR32 register. + // Move XMM to GR32 register. Register Temp32 = MRI.createVirtualRegister(&X86::GR32RegClass); BuildMI(*I.getParent(), I, DL, TII.get(TargetOpcode::COPY), Temp32) .addReg(SrcReg); diff --git a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll index 12728bf82f55e..dad33cac66dc7 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/fp-bitcast.ll @@ -6,20 +6,17 @@ define half @test_i16_to_half(i16 %0) { ; SSE2-LABEL: test_i16_to_half: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movzwl %di, %eax -; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: movd %edi, %xmm0 ; SSE2-NEXT: retq ; ; AVX-LABEL: test_i16_to_half: ; AVX: # %bb.0: # %entry -; AVX-NEXT: movzwl %di, %eax -; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vmovd %edi, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_i16_to_half: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: movzwl %di, %eax -; AVX512-NEXT: vmovd %eax, %xmm0 +; AVX512-NEXT: vmovd %edi, %xmm0 ; AVX512-NEXT: retq entry: %2 = bitcast i16 %0 to half From d786665d97f14985cb24bdbbca72c0de2bd5318d Mon Sep 17 00:00:00 2001 From: GrumpyPigSkin <130710602+GrumpyPigSkin@users.noreply.github.com> Date: Sat, 22 Nov 2025 20:38:11 +0000 Subject: [PATCH 09/10] [X86][GISel] Apply suggestions from code review Co-authored-by: Simon Pilgrim --- llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp index 72cde46b2c5f5..090bd3bf3a68b 100644 --- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp @@ -315,7 +315,7 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I, const int RegBankSize = 16; // Special case GPR16 -> XMM - if (SrcSize == RegBankSize && SrcRegBank.getID() == X86::GPRRegBankID && + if (SrcSize == 16 && SrcRegBank.getID() == X86::GPRRegBankID && (DstRegBank.getID() == X86::VECRRegBankID)) { const DebugLoc &DL = I.getDebugLoc(); @@ -336,7 +336,7 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I, } // Special case XMM -> GR16 - if (DstSize == RegBankSize && DstRegBank.getID() == X86::GPRRegBankID && + if (DstSize == 16 && DstRegBank.getID() == X86::GPRRegBankID && (SrcRegBank.getID() == X86::VECRRegBankID)) { const DebugLoc &DL = I.getDebugLoc(); From 7e98fb9044208f63dccecfb3f776043900dbbb20 Mon Sep 17 00:00:00 2001 From: GrumpyPigSkin Date: Sat, 22 Nov 2025 23:23:37 +0000 Subject: [PATCH 10/10] [X86][GISel] Removed unused var --- llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp index 090bd3bf3a68b..f499e6f9d0799 100644 --- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp @@ -312,8 +312,6 @@ bool X86InstructionSelector::selectCopy(MachineInstr &I, } } - const int RegBankSize = 16; - // Special case GPR16 -> XMM if (SrcSize == 16 && SrcRegBank.getID() == X86::GPRRegBankID && (DstRegBank.getID() == X86::VECRRegBankID)) {