diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp index be7e8db95b98e..bdd86e48fa543 100644 --- a/llvm/lib/Target/X86/X86DomainReassignment.cpp +++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp @@ -625,8 +625,10 @@ void X86DomainReassignment::initConverters() { createReplacerDstCOPY(X86::MOVZX64rm16, HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm); - createReplacerDstCOPY(X86::MOVZX32rr16, X86::KMOVWkk); - createReplacerDstCOPY(X86::MOVZX64rr16, X86::KMOVWkk); + createReplacerDstCOPY(X86::MOVZX32rr16, + HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk); + createReplacerDstCOPY(X86::MOVZX64rr16, + HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk); if (STI->hasDQI()) { createReplacerDstCOPY(X86::MOVZX16rm8, @@ -636,9 +638,12 @@ void X86DomainReassignment::initConverters() { createReplacerDstCOPY(X86::MOVZX64rm8, HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm); - createReplacerDstCOPY(X86::MOVZX16rr8, X86::KMOVBkk); - createReplacerDstCOPY(X86::MOVZX32rr8, X86::KMOVBkk); - createReplacerDstCOPY(X86::MOVZX64rr8, X86::KMOVBkk); + createReplacerDstCOPY(X86::MOVZX16rr8, + HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk); + createReplacerDstCOPY(X86::MOVZX32rr8, + HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk); + createReplacerDstCOPY(X86::MOVZX64rr8, + HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk); } auto createReplacer = [&](unsigned From, unsigned To) { @@ -647,7 +652,7 @@ void X86DomainReassignment::initConverters() { createReplacer(X86::MOV16rm, HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm); createReplacer(X86::MOV16mr, HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk); - createReplacer(X86::MOV16rr, X86::KMOVWkk); + createReplacer(X86::MOV16rr, HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk); createReplacer(X86::SHR16ri, X86::KSHIFTRWri); createReplacer(X86::SHL16ri, X86::KSHIFTLWri); createReplacer(X86::NOT16r, X86::KNOTWrr); @@ -662,8 +667,8 @@ void X86DomainReassignment::initConverters() { createReplacer(X86::MOV32mr, HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk); createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk); - createReplacer(X86::MOV32rr, X86::KMOVDkk); - createReplacer(X86::MOV64rr, X86::KMOVQkk); + createReplacer(X86::MOV32rr, HasEGPR ? X86::KMOVDkk_EVEX : X86::KMOVDkk); + createReplacer(X86::MOV64rr, HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk); createReplacer(X86::SHR32ri, X86::KSHIFTRDri); createReplacer(X86::SHR64ri, X86::KSHIFTRQri); @@ -703,7 +708,7 @@ void X86DomainReassignment::initConverters() { createReplacer(X86::MOV8rm, HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm); createReplacer(X86::MOV8mr, HasEGPR ? X86::KMOVBmk_EVEX : X86::KMOVBmk); - createReplacer(X86::MOV8rr, X86::KMOVBkk); + createReplacer(X86::MOV8rr, HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk); createReplacer(X86::NOT8r, X86::KNOTBrr); diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 583f8ec73a036..ea3bf1f101c1e 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4070,6 +4070,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // First deal with the normal symmetric copies. bool HasAVX = Subtarget.hasAVX(); bool HasVLX = Subtarget.hasVLX(); + bool HasEGPR = Subtarget.hasEGPR(); unsigned Opc = 0; if (X86::GR64RegClass.contains(DestReg, SrcReg)) Opc = X86::MOV64rr; @@ -4124,7 +4125,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // All KMASK RegClasses hold the same k registers, can be tested against // anyone. else if (X86::VK16RegClass.contains(DestReg, SrcReg)) - Opc = Subtarget.hasBWI() ? X86::KMOVQkk : X86::KMOVWkk; + Opc = Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk) + : (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVWkk); if (!Opc) Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget); diff --git a/llvm/test/CodeGen/X86/apx/kmov-kk.ll b/llvm/test/CodeGen/X86/apx/kmov-kk.ll new file mode 100644 index 0000000000000..639a35f4546fc --- /dev/null +++ b/llvm/test/CodeGen/X86/apx/kmov-kk.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+egpr -show-mc-encoding | FileCheck --check-prefix=EGPR %s + +define <16 x i32> @kmovkk(ptr %base, <16 x i32> %ind, i16 %mask) { +; EGPR: kmovq %k1, %k2 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf8,0x90,0xd1] + %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 + %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer + %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind + %imask = bitcast i16 %mask to <16 x i1> + %gt1 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef) + %gt2 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1) + %res = add <16 x i32> %gt1, %gt2 + ret <16 x i32> %res +} +declare <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i32>)