diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp index aadbc3845b79c..cab2f0a2e1c1a 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp @@ -377,7 +377,8 @@ void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O, O << "\t{vex2}"; else if (Flags & X86::IP_USE_VEX3) O << "\t{vex3}"; - else if (Flags & X86::IP_USE_EVEX) + else if ((Flags & X86::IP_USE_EVEX) || + (TSFlags & X86II::ExplicitOpPrefixMask) == X86II::ExplicitEVEXPrefix) O << "\t{evex}"; if (Flags & X86::IP_USE_DISP8) diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp index fa8d5c752a3d2..be7e8db95b98e 100644 --- a/llvm/lib/Target/X86/X86DomainReassignment.cpp +++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp @@ -619,16 +619,22 @@ void X86DomainReassignment::initConverters() { std::make_unique(From, To); }; - createReplacerDstCOPY(X86::MOVZX32rm16, X86::KMOVWkm); - createReplacerDstCOPY(X86::MOVZX64rm16, X86::KMOVWkm); + bool HasEGPR = STI->hasEGPR(); + createReplacerDstCOPY(X86::MOVZX32rm16, + HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm); + createReplacerDstCOPY(X86::MOVZX64rm16, + HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm); createReplacerDstCOPY(X86::MOVZX32rr16, X86::KMOVWkk); createReplacerDstCOPY(X86::MOVZX64rr16, X86::KMOVWkk); if (STI->hasDQI()) { - createReplacerDstCOPY(X86::MOVZX16rm8, X86::KMOVBkm); - createReplacerDstCOPY(X86::MOVZX32rm8, X86::KMOVBkm); - createReplacerDstCOPY(X86::MOVZX64rm8, X86::KMOVBkm); + createReplacerDstCOPY(X86::MOVZX16rm8, + HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm); + createReplacerDstCOPY(X86::MOVZX32rm8, + HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm); + createReplacerDstCOPY(X86::MOVZX64rm8, + HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm); createReplacerDstCOPY(X86::MOVZX16rr8, X86::KMOVBkk); createReplacerDstCOPY(X86::MOVZX32rr8, X86::KMOVBkk); @@ -639,8 +645,8 @@ void X86DomainReassignment::initConverters() { Converters[{MaskDomain, From}] = std::make_unique(From, To); }; - createReplacer(X86::MOV16rm, X86::KMOVWkm); - createReplacer(X86::MOV16mr, X86::KMOVWmk); + createReplacer(X86::MOV16rm, HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm); + createReplacer(X86::MOV16mr, HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk); createReplacer(X86::MOV16rr, X86::KMOVWkk); createReplacer(X86::SHR16ri, X86::KSHIFTRWri); createReplacer(X86::SHL16ri, X86::KSHIFTLWri); @@ -650,11 +656,11 @@ void X86DomainReassignment::initConverters() { createReplacer(X86::XOR16rr, X86::KXORWrr); if (STI->hasBWI()) { - createReplacer(X86::MOV32rm, X86::KMOVDkm); - createReplacer(X86::MOV64rm, X86::KMOVQkm); + createReplacer(X86::MOV32rm, HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm); + createReplacer(X86::MOV64rm, HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm); - createReplacer(X86::MOV32mr, X86::KMOVDmk); - createReplacer(X86::MOV64mr, X86::KMOVQmk); + createReplacer(X86::MOV32mr, HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk); + createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk); createReplacer(X86::MOV32rr, X86::KMOVDkk); createReplacer(X86::MOV64rr, X86::KMOVQkk); @@ -695,8 +701,8 @@ void X86DomainReassignment::initConverters() { createReplacer(X86::AND8rr, X86::KANDBrr); - createReplacer(X86::MOV8rm, X86::KMOVBkm); - createReplacer(X86::MOV8mr, X86::KMOVBmk); + createReplacer(X86::MOV8rm, HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm); + createReplacer(X86::MOV8mr, HasEGPR ? X86::KMOVBmk_EVEX : X86::KMOVBmk); createReplacer(X86::MOV8rr, X86::KMOVBkk); createReplacer(X86::NOT8r, X86::KNOTBrr); diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp index 085fa9280b0ea..ecc7208e76072 100644 --- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp +++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp @@ -264,6 +264,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); const DebugLoc &DL = MBBI->getDebugLoc(); + bool HasEGPR = STI->hasEGPR(); switch (Opcode) { default: return false; @@ -466,10 +467,14 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0); Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1); - auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWkm)) - .addReg(Reg0, RegState::Define | getDeadRegState(DstIsDead)); - auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWkm)) - .addReg(Reg1, RegState::Define | getDeadRegState(DstIsDead)); + auto MIBLo = + BuildMI(MBB, MBBI, DL, + TII->get(HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm)) + .addReg(Reg0, RegState::Define | getDeadRegState(DstIsDead)); + auto MIBHi = + BuildMI(MBB, MBBI, DL, + TII->get(HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm)) + .addReg(Reg1, RegState::Define | getDeadRegState(DstIsDead)); for (int i = 0; i < X86::AddrNumOperands; ++i) { MIBLo.add(MBBI->getOperand(1 + i)); @@ -500,8 +505,10 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0); Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1); - auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWmk)); - auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWmk)); + auto MIBLo = BuildMI(MBB, MBBI, DL, + TII->get(HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk)); + auto MIBHi = BuildMI(MBB, MBBI, DL, + TII->get(HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk)); for (int i = 0; i < X86::AddrNumOperands; ++i) { MIBLo.add(MBBI->getOperand(i)); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index b5dac7a0c65af..f325f47d46464 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2853,46 +2853,56 @@ defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, E // - copy from GPR to mask register and vice versa // multiclass avx512_mask_mov opc_kk, bits<8> opc_km, bits<8> opc_mk, - string OpcodeStr, RegisterClass KRC, - ValueType vvt, X86MemOperand x86memop> { + string OpcodeStr, RegisterClass KRC, ValueType vvt, + X86MemOperand x86memop, string Suffix = ""> { + let explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in { let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in - def kk : I, - Sched<[WriteMove]>; - def km : I, - Sched<[WriteLoad]>; - def mk : I, - Sched<[WriteStore]>; + def kk#Suffix : I, + Sched<[WriteMove]>; + def km#Suffix : I, + Sched<[WriteLoad]>; + def mk#Suffix : I, + Sched<[WriteStore]>; + } } multiclass avx512_mask_mov_gpr opc_kr, bits<8> opc_rk, - string OpcodeStr, - RegisterClass KRC, RegisterClass GRC> { - let hasSideEffects = 0 in { - def kr : I, - Sched<[WriteMove]>; - def rk : I, - Sched<[WriteMove]>; + string OpcodeStr, RegisterClass KRC, + RegisterClass GRC, string Suffix = ""> { + let hasSideEffects = 0, explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in { + def kr#Suffix : I, + Sched<[WriteMove]>; + def rk#Suffix : I, + Sched<[WriteMove]>; } } -let Predicates = [HasDQI] in +let Predicates = [HasDQI, NoEGPR] in defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>, avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>, VEX, PD; +let Predicates = [HasDQI, HasEGPR, In64BitMode] in + defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem, "_EVEX">, + avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32, "_EVEX">, + EVEX, PD; -let Predicates = [HasAVX512] in +let Predicates = [HasAVX512, NoEGPR] in defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, VEX, PS; +let Predicates = [HasAVX512, HasEGPR, In64BitMode] in + defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem, "_EVEX">, + avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32, "_EVEX">, + EVEX, PS; -let Predicates = [HasBWI] in { +let Predicates = [HasBWI, NoEGPR] in { defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>, VEX, PD, REX_W; defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>, @@ -2902,6 +2912,16 @@ let Predicates = [HasBWI] in { defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>, VEX, XD, REX_W; } +let Predicates = [HasBWI, HasEGPR, In64BitMode] in { + defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem, "_EVEX">, + EVEX, PD, REX_W; + defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32, "_EVEX">, + EVEX, XD; + defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem, "_EVEX">, + EVEX, PS, REX_W; + defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64, "_EVEX">, + EVEX, XD, REX_W; +} // GR from/to mask register def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 2d15e93e0d23c..b75c00effead0 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -495,10 +495,12 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) { return false; case X86::MOV8rm: case X86::KMOVBkm: + case X86::KMOVBkm_EVEX: MemBytes = 1; return true; case X86::MOV16rm: case X86::KMOVWkm: + case X86::KMOVWkm_EVEX: case X86::VMOVSHZrm: case X86::VMOVSHZrm_alt: MemBytes = 2; @@ -511,6 +513,7 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) { case X86::VMOVSSZrm: case X86::VMOVSSZrm_alt: case X86::KMOVDkm: + case X86::KMOVDkm_EVEX: MemBytes = 4; return true; case X86::MOV64rm: @@ -524,6 +527,7 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) { case X86::MMX_MOVD64rm: case X86::MMX_MOVQ64rm: case X86::KMOVQkm: + case X86::KMOVQkm_EVEX: MemBytes = 8; return true; case X86::MOVAPSrm: @@ -593,10 +597,12 @@ static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) { return false; case X86::MOV8mr: case X86::KMOVBmk: + case X86::KMOVBmk_EVEX: MemBytes = 1; return true; case X86::MOV16mr: case X86::KMOVWmk: + case X86::KMOVWmk_EVEX: case X86::VMOVSHZmr: MemBytes = 2; return true; @@ -605,6 +611,7 @@ static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) { case X86::VMOVSSmr: case X86::VMOVSSZmr: case X86::KMOVDmk: + case X86::KMOVDmk_EVEX: MemBytes = 4; return true; case X86::MOV64mr: @@ -616,6 +623,7 @@ static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) { case X86::MMX_MOVQ64mr: case X86::MMX_MOVNTQmr: case X86::KMOVQmk: + case X86::KMOVQmk_EVEX: MemBytes = 8; return true; case X86::MOVAPSmr: @@ -3519,6 +3527,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, const X86Subtarget &Subtarget) { bool HasAVX = Subtarget.hasAVX(); bool HasAVX512 = Subtarget.hasAVX512(); + bool HasEGPR = Subtarget.hasEGPR(); // SrcReg(MaskReg) -> DestReg(GR64) // SrcReg(MaskReg) -> DestReg(GR32) @@ -3527,10 +3536,11 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, if (X86::VK16RegClass.contains(SrcReg)) { if (X86::GR64RegClass.contains(DestReg)) { assert(Subtarget.hasBWI()); - return X86::KMOVQrk; + return HasEGPR ? X86::KMOVQrk_EVEX : X86::KMOVQrk; } if (X86::GR32RegClass.contains(DestReg)) - return Subtarget.hasBWI() ? X86::KMOVDrk : X86::KMOVWrk; + return Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVDrk_EVEX : X86::KMOVDrk) + : (HasEGPR ? X86::KMOVWrk_EVEX : X86::KMOVWrk); } // SrcReg(GR64) -> DestReg(MaskReg) @@ -3540,10 +3550,11 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, if (X86::VK16RegClass.contains(DestReg)) { if (X86::GR64RegClass.contains(SrcReg)) { assert(Subtarget.hasBWI()); - return X86::KMOVQkr; + return HasEGPR ? X86::KMOVQkr_EVEX : X86::KMOVQkr; } if (X86::GR32RegClass.contains(SrcReg)) - return Subtarget.hasBWI() ? X86::KMOVDkr : X86::KMOVWkr; + return Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVDkr_EVEX : X86::KMOVDkr) + : (HasEGPR ? X86::KMOVWkr_EVEX : X86::KMOVWkr); } @@ -3710,6 +3721,7 @@ static unsigned getLoadStoreRegOpcode(Register Reg, bool HasAVX = STI.hasAVX(); bool HasAVX512 = STI.hasAVX512(); bool HasVLX = STI.hasVLX(); + bool HasEGPR = STI.hasEGPR(); assert(RC != nullptr && "Invalid target register class"); switch (STI.getRegisterInfo()->getSpillSize(*RC)) { @@ -3725,7 +3737,8 @@ static unsigned getLoadStoreRegOpcode(Register Reg, return Load ? X86::MOV8rm : X86::MOV8mr; case 2: if (X86::VK16RegClass.hasSubClassEq(RC)) - return Load ? X86::KMOVWkm : X86::KMOVWmk; + return Load ? (HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm) + : (HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk); assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass"); return Load ? X86::MOV16rm : X86::MOV16mr; case 4: @@ -3743,7 +3756,8 @@ static unsigned getLoadStoreRegOpcode(Register Reg, return Load ? X86::LD_Fp32m : X86::ST_Fp32m; if (X86::VK32RegClass.hasSubClassEq(RC)) { assert(STI.hasBWI() && "KMOVD requires BWI"); - return Load ? X86::KMOVDkm : X86::KMOVDmk; + return Load ? (HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm) + : (HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk); } // All of these mask pair classes have the same spill size, the same kind // of kmov instructions can be used with all of them. @@ -3774,7 +3788,8 @@ static unsigned getLoadStoreRegOpcode(Register Reg, return Load ? X86::LD_Fp64m : X86::ST_Fp64m; if (X86::VK64RegClass.hasSubClassEq(RC)) { assert(STI.hasBWI() && "KMOVQ requires BWI"); - return Load ? X86::KMOVQkm : X86::KMOVQmk; + return Load ? (HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm) + : (HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk); } llvm_unreachable("Unknown 8-byte regclass"); case 10: @@ -7717,9 +7732,13 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case X86::VMOVDQA64Zrm: case X86::VMOVDQU64Zrm: case X86::KMOVBkm: + case X86::KMOVBkm_EVEX: case X86::KMOVWkm: + case X86::KMOVWkm_EVEX: case X86::KMOVDkm: + case X86::KMOVDkm_EVEX: case X86::KMOVQkm: + case X86::KMOVQkm_EVEX: return true; } }; diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 9046b6af463ac..9ec09ac3d28e2 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -878,9 +878,10 @@ def relocImm : ComplexPattern; +def HasEGPR : Predicate<"Subtarget->hasEGPR()">; +def NoEGPR : Predicate<"!Subtarget->hasEGPR()">; def HasCMOV : Predicate<"Subtarget->canUseCMOV()">; def NoCMOV : Predicate<"!Subtarget->canUseCMOV()">; - def HasNOPL : Predicate<"Subtarget->hasNOPL()">; def HasMMX : Predicate<"Subtarget->hasMMX()">; def Has3DNow : Predicate<"Subtarget->hasThreeDNow()">; diff --git a/llvm/test/CodeGen/X86/apx/kmov-copy-to-from-asymmetric-reg.ll b/llvm/test/CodeGen/X86/apx/kmov-copy-to-from-asymmetric-reg.ll new file mode 100644 index 0000000000000..747b288ec2f03 --- /dev/null +++ b/llvm/test/CodeGen/X86/apx/kmov-copy-to-from-asymmetric-reg.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f -show-mc-encoding | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+egpr -show-mc-encoding | FileCheck --check-prefix=EGPR %s + +define void @kmov(i1 %cmp23.not) { +; CHECK-LABEL: kmov: +; CHECK: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; +; EGPR-LABEL: kmov: +; EGPR: kmovw %edi, %k1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x92,0xcf] +entry: + %0 = select i1 %cmp23.not, double 1.000000e+00, double 0.000000e+00 + store double %0, ptr null, align 8 + ret void +} diff --git a/llvm/test/CodeGen/X86/apx/kmov-domain-assignment.ll b/llvm/test/CodeGen/X86/apx/kmov-domain-assignment.ll new file mode 100644 index 0000000000000..b09a14cee9574 --- /dev/null +++ b/llvm/test/CodeGen/X86/apx/kmov-domain-assignment.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown -stop-after=x86-domain-reassignment -mattr=+avx512f,+avx512bw,+avx512dq,+egpr | FileCheck %s + +define void @test_fcmp_storei1(i1 %cond, ptr %fptr, ptr %iptr, float %f1, float %f2, float %f3, float %f4) { + ; CHECK-LABEL: name: test_fcmp_storei1 + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $edi, $rdx, $xmm0, $xmm1, $xmm2, $xmm3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:fr32x = COPY $xmm3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fr32x = COPY $xmm2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fr32x = COPY $xmm1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fr32x = COPY $xmm0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr64 = COPY $rdx + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr32 = COPY $edi + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr8 = COPY [[COPY5]].sub_8bit + ; CHECK-NEXT: TEST8ri killed [[COPY6]], 1, implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.2, 4, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.if: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[VCMPSSZrr:%[0-9]+]]:vk1 = nofpexcept VCMPSSZrr [[COPY3]], [[COPY2]], 0, implicit $mxcsr + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vk16 = COPY [[VCMPSSZrr]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vk32 = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vk8 = COPY [[COPY8]] + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.else: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[VCMPSSZrr1:%[0-9]+]]:vk1 = nofpexcept VCMPSSZrr [[COPY1]], [[COPY]], 0, implicit $mxcsr + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vk16 = COPY [[VCMPSSZrr1]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vk32 = COPY [[COPY10]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vk8 = COPY [[COPY11]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.exit: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:vk8 = PHI [[COPY12]], %bb.2, [[COPY9]], %bb.1 + ; CHECK-NEXT: KMOVBmk_EVEX [[COPY4]], 1, $noreg, 0, $noreg, [[PHI]] + ; CHECK-NEXT: RET 0 +entry: + br i1 %cond, label %if, label %else + +if: + %cmp1 = fcmp oeq float %f1, %f2 + br label %exit + +else: + %cmp2 = fcmp oeq float %f3, %f4 + br label %exit + +exit: + %val = phi i1 [%cmp1, %if], [%cmp2, %else] + store i1 %val, ptr %iptr + ret void +} + diff --git a/llvm/test/CodeGen/X86/apx/kmov-isel.ll b/llvm/test/CodeGen/X86/apx/kmov-isel.ll new file mode 100644 index 0000000000000..882269b498261 --- /dev/null +++ b/llvm/test/CodeGen/X86/apx/kmov-isel.ll @@ -0,0 +1,103 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+egpr --show-mc-encoding | FileCheck --check-prefix=AVX512 %s + +define void @bitcast_16i8_store(ptr %p, <16 x i8> %a0) { +; AVX512-LABEL: bitcast_16i8_store: +; AVX512: # %bb.0: +; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512-NEXT: vpmovb2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc0] +; AVX512-NEXT: kmovw %k0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x91,0x07] +; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; AVX512-NEXT: retq # encoding: [0xc3] + %a1 = icmp slt <16 x i8> %a0, zeroinitializer + %a2 = bitcast <16 x i1> %a1 to i16 + store i16 %a2, ptr %p + ret void +} + +define void @bitcast_32i8_store(ptr %p, <32 x i8> %a0) { +; AVX512-LABEL: bitcast_32i8_store: +; AVX512: # %bb.0: +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512-NEXT: vpmovb2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc0] +; AVX512-NEXT: kmovd %k0, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x91,0x07] +; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; AVX512-NEXT: retq # encoding: [0xc3] + %a1 = icmp slt <32 x i8> %a0, zeroinitializer + %a2 = bitcast <32 x i1> %a1 to i32 + store i32 %a2, ptr %p + ret void +} + +define void @bitcast_64i8_store(ptr %p, <64 x i8> %a0) { +; AVX512-LABEL: bitcast_64i8_store: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovb2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc0] +; AVX512-NEXT: kmovq %k0, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf8,0x91,0x07] +; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; AVX512-NEXT: retq # encoding: [0xc3] + %a1 = icmp slt <64 x i8> %a0, zeroinitializer + %a2 = bitcast <64 x i1> %a1 to i64 + store i64 %a2, ptr %p + ret void +} + +define <16 x i1> @bitcast_16i8_load(ptr %p, <16 x i1> %a, <16 x i1> %b) { +; AVX512-LABEL: bitcast_16i8_load: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsllw $7, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x71,0xf1,0x07] +; AVX512-NEXT: vpmovb2m %zmm1, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc1] +; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xf0,0x07] +; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9] +; AVX512-NEXT: kmovw (%rdi), %k1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x90,0x0f] +; AVX512-NEXT: vpcmpgtb %zmm0, %zmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xd0] +; AVX512-NEXT: kandnw %k0, %k1, %k0 # encoding: [0xc5,0xf4,0x42,0xc0] +; AVX512-NEXT: korw %k0, %k2, %k0 # encoding: [0xc5,0xec,0x45,0xc0] +; AVX512-NEXT: vpmovm2b %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0] +; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; AVX512-NEXT: retq # encoding: [0xc3] + %mask = load i16, ptr %p + %vmask = bitcast i16 %mask to <16 x i1> + %res = select <16 x i1> %vmask, <16 x i1> %a, <16 x i1> %b + ret <16 x i1> %res +} + +define <32 x i1> @bitcast_32i8_load(ptr %p, <32 x i1> %a, <32 x i1> %b) { +; AVX512-LABEL: bitcast_32i8_load: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsllw $7, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x71,0xf1,0x07] +; AVX512-NEXT: vpmovb2m %zmm1, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc1] +; AVX512-NEXT: vpsllw $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x71,0xf0,0x07] +; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9] +; AVX512-NEXT: kmovd (%rdi), %k1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x90,0x0f] +; AVX512-NEXT: vpcmpgtb %zmm0, %zmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xd0] +; AVX512-NEXT: kandnd %k0, %k1, %k0 # encoding: [0xc4,0xe1,0xf5,0x42,0xc0] +; AVX512-NEXT: kord %k0, %k2, %k0 # encoding: [0xc4,0xe1,0xed,0x45,0xc0] +; AVX512-NEXT: vpmovm2b %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0] +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512-NEXT: retq # encoding: [0xc3] + %mask = load i32, ptr %p + %vmask = bitcast i32 %mask to <32 x i1> + %res = select <32 x i1> %vmask, <32 x i1> %a, <32 x i1> %b + ret <32 x i1> %res +} + +define <64 x i1> @bitcast_64i8_load(ptr %p, <64 x i1> %a, <64 x i1> %b) { +; AVX512-LABEL: bitcast_64i8_load: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsllw $7, %zmm1, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x71,0xf1,0x07] +; AVX512-NEXT: vpmovb2m %zmm1, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc1] +; AVX512-NEXT: vpsllw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xf0,0x07] +; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9] +; AVX512-NEXT: kmovq (%rdi), %k1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf8,0x90,0x0f] +; AVX512-NEXT: vpcmpgtb %zmm0, %zmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xd0] +; AVX512-NEXT: kandnq %k0, %k1, %k0 # encoding: [0xc4,0xe1,0xf4,0x42,0xc0] +; AVX512-NEXT: korq %k0, %k2, %k0 # encoding: [0xc4,0xe1,0xec,0x45,0xc0] +; AVX512-NEXT: vpmovm2b %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0] +; AVX512-NEXT: retq # encoding: [0xc3] + %mask = load i64, ptr %p + %vmask = bitcast i64 %mask to <64 x i1> + %res = select <64 x i1> %vmask, <64 x i1> %a, <64 x i1> %b + ret <64 x i1> %res +} diff --git a/llvm/test/CodeGen/X86/apx/kmov-postrapseudos.ll b/llvm/test/CodeGen/X86/apx/kmov-postrapseudos.ll new file mode 100644 index 0000000000000..7e1ce02ed8e89 --- /dev/null +++ b/llvm/test/CodeGen/X86/apx/kmov-postrapseudos.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+egpr -show-mc-encoding | FileCheck --check-prefix=AVX512 %s +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+egpr -show-mc-encoding | FileCheck --check-prefix=AVX512BW %s + +define void @kmovkr_1(i1 %cmp23.not) { +; AVX512-LABEL: kmovkr_1: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: kmovw %edi, %k1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x92,0xcf] +; AVX512-NEXT: vmovsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x10,0x05,A,A,A,A] +; AVX512-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; AVX512-NEXT: vmovsd %xmm0, 0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x04,0x25,0x00,0x00,0x00,0x00] +; AVX512-NEXT: retq # encoding: [0xc3] +; +; AVX512BW-LABEL: kmovkr_1: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: kmovd %edi, %k1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x92,0xcf] +; AVX512BW-NEXT: vmovsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x10,0x05,A,A,A,A] +; AVX512BW-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; AVX512BW-NEXT: vmovsd %xmm0, 0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x04,0x25,0x00,0x00,0x00,0x00] +; AVX512BW-NEXT: retq # encoding: [0xc3] +entry: + %0 = select i1 %cmp23.not, double 1.000000e+00, double 0.000000e+00 + store double %0, ptr null, align 8 + ret void +} + +define void @kmovkr_2() { +; AVX512-LABEL: kmovkr_2: +; AVX512: # %bb.0: # %alloca_21 +; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] +; AVX512-NEXT: movw $3, %ax # encoding: [0x66,0xb8,0x03,0x00] +; AVX512-NEXT: kmovw %eax, %k1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x92,0xc8] +; AVX512-NEXT: vmovups %zmm0, 0 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x11,0x04,0x25,0x00,0x00,0x00,0x00] +; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; AVX512-NEXT: retq # encoding: [0xc3] +; +; AVX512BW-LABEL: kmovkr_2: +; AVX512BW: # %bb.0: # %alloca_21 +; AVX512BW-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] +; AVX512BW-NEXT: movw $3, %ax # encoding: [0x66,0xb8,0x03,0x00] +; AVX512BW-NEXT: kmovd %eax, %k1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x92,0xc8] +; AVX512BW-NEXT: vmovups %zmm0, 0 {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x11,0x04,0x25,0x00,0x00,0x00,0x00] +; AVX512BW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; AVX512BW-NEXT: retq # encoding: [0xc3] +alloca_21: + call void @llvm.masked.store.v4f32.p0(<4 x float> zeroinitializer, ptr null, i32 1, <4 x i1> ) + ret void +} + +define i32 @kmovrk_1(<4 x ptr> %arg) { +; AVX512-LABEL: kmovrk_1: +; AVX512: # %bb.0: # %bb +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512-NEXT: vptestmq %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc0] +; AVX512-NEXT: kmovw %k0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x93,0xc0] +; AVX512-NEXT: testb $15, %al # encoding: [0xa8,0x0f] +; AVX512-NEXT: jne .LBB2_1 # encoding: [0x75,A] +; AVX512-NEXT: # fixup A - offset: 1, value: .LBB2_1-1, kind: FK_PCRel_1 +; AVX512-NEXT: # %bb.2: # %bb3 +; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; AVX512-NEXT: retq # encoding: [0xc3] +; AVX512-NEXT: .LBB2_1: # %bb2 +; +; AVX512BW-LABEL: kmovrk_1: +; AVX512BW: # %bb.0: # %bb +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc0] +; AVX512BW-NEXT: kmovd %k0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x93,0xc0] +; AVX512BW-NEXT: testb $15, %al # encoding: [0xa8,0x0f] +; AVX512BW-NEXT: jne .LBB2_1 # encoding: [0x75,A] +; AVX512BW-NEXT: # fixup A - offset: 1, value: .LBB2_1-1, kind: FK_PCRel_1 +; AVX512BW-NEXT: # %bb.2: # %bb3 +; AVX512BW-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; AVX512BW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; AVX512BW-NEXT: retq # encoding: [0xc3] +; AVX512BW-NEXT: .LBB2_1: # %bb2 +bb: + %icmp = icmp ne <4 x ptr> %arg, zeroinitializer + %freeze = freeze <4 x i1> %icmp + %bitcast = bitcast <4 x i1> %freeze to i4 + %icmp1 = icmp ne i4 %bitcast, 0 + br i1 %icmp1, label %bb2, label %bb3 +bb2: + unreachable +bb3: + ret i32 0 +} + +declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr nocapture, i32 immarg, <4 x i1>) diff --git a/llvm/test/MC/Disassembler/X86/apx/kmov.txt b/llvm/test/MC/Disassembler/X86/apx/kmov.txt new file mode 100644 index 0000000000000..d089ef192230a --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/apx/kmov.txt @@ -0,0 +1,82 @@ +# RUN: llvm-mc -triple x86_64 -disassemble %s | FileCheck %s --check-prefix=ATT +# RUN: llvm-mc -triple x86_64 -disassemble -output-asm-variant=1 %s | FileCheck %s --check-prefix=INTEL + +# ATT: kmovb %r16d, %k1 +# INTEL: kmovb k1, r16d +0x62,0xf9,0x7d,0x08,0x92,0xc8 + +# ATT: kmovw %r16d, %k1 +# INTEL: kmovw k1, r16d +0x62,0xf9,0x7c,0x08,0x92,0xc8 + +# ATT: kmovd %r16d, %k1 +# INTEL: kmovd k1, r16d +0x62,0xf9,0x7f,0x08,0x92,0xc8 + +# ATT: kmovq %r16, %k1 +# INTEL: kmovq k1, r16 +0x62,0xf9,0xff,0x08,0x92,0xc8 + +# ATT: kmovb %k1, %r16d +# INTEL: kmovb r16d, k1 +0x62,0xe1,0x7d,0x08,0x93,0xc1 + +# ATT: kmovw %k1, %r16d +# INTEL: kmovw r16d, k1 +0x62,0xe1,0x7c,0x08,0x93,0xc1 + +# ATT: kmovd %k1, %r16d +# INTEL: kmovd r16d, k1 +0x62,0xe1,0x7f,0x08,0x93,0xc1 + +# ATT: kmovq %k1, %r16 +# INTEL: kmovq r16, k1 +0x62,0xe1,0xff,0x08,0x93,0xc1 + +# ATT: kmovb (%r16,%r17), %k1 +# INTEL: kmovb k1, byte ptr [r16 + r17] +0x62,0xf9,0x79,0x08,0x90,0x0c,0x08 + +# ATT: kmovw (%r16,%r17), %k1 +# INTEL: kmovw k1, word ptr [r16 + r17] +0x62,0xf9,0x78,0x08,0x90,0x0c,0x08 + +# ATT: kmovd (%r16,%r17), %k1 +# INTEL: kmovd k1, dword ptr [r16 + r17] +0x62,0xf9,0xf9,0x08,0x90,0x0c,0x08 + +# ATT: kmovq (%r16,%r17), %k1 +# INTEL: kmovq k1, qword ptr [r16 + r17] +0x62,0xf9,0xf8,0x08,0x90,0x0c,0x08 + +# ATT: kmovb %k1, (%r16,%r17) +# INTEL: kmovb byte ptr [r16 + r17], k1 +0x62,0xf9,0x79,0x08,0x91,0x0c,0x08 + +# ATT: kmovw %k1, (%r16,%r17) +# INTEL: kmovw word ptr [r16 + r17], k1 +0x62,0xf9,0x78,0x08,0x91,0x0c,0x08 + +# ATT: kmovd %k1, (%r16,%r17) +# INTEL: kmovd dword ptr [r16 + r17], k1 +0x62,0xf9,0xf9,0x08,0x91,0x0c,0x08 + +# ATT: kmovq %k1, (%r16,%r17) +# INTEL: kmovq qword ptr [r16 + r17], k1 +0x62,0xf9,0xf8,0x08,0x91,0x0c,0x08 + +# ATT: {evex} kmovb %k1, %k2 +# INTEL: {evex} kmovb k2, k1 +0x62,0xf1,0x7d,0x08,0x90,0xd1 + +# ATT: {evex} kmovw %k1, %k2 +# INTEL: {evex} kmovw k2, k1 +0x62,0xf1,0x7c,0x08,0x90,0xd1 + +# ATT: {evex} kmovd %k1, %k2 +# INTEL: {evex} kmovd k2, k1 +0x62,0xf1,0xfd,0x08,0x90,0xd1 + +# ATT: {evex} kmovq %k1, %k2 +# INTEL: {evex} kmovq k2, k1 +0x62,0xf1,0xfc,0x08,0x90,0xd1 diff --git a/llvm/test/MC/X86/apx/kmov-att.s b/llvm/test/MC/X86/apx/kmov-att.s new file mode 100644 index 0000000000000..be5042cf0a30c --- /dev/null +++ b/llvm/test/MC/X86/apx/kmov-att.s @@ -0,0 +1,69 @@ +# RUN: llvm-mc -triple x86_64 -show-encoding %s | FileCheck %s +# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR + +# ERROR-COUNT-20: error: +# ERROR-NOT: error: +# CHECK: kmovb %r16d, %k1 +# CHECK: encoding: [0x62,0xf9,0x7d,0x08,0x92,0xc8] + kmovb %r16d, %k1 +# CHECK: kmovw %r16d, %k1 +# CHECK: encoding: [0x62,0xf9,0x7c,0x08,0x92,0xc8] + kmovw %r16d, %k1 +# CHECK: kmovd %r16d, %k1 +# CHECK: encoding: [0x62,0xf9,0x7f,0x08,0x92,0xc8] + kmovd %r16d, %k1 +# CHECK: kmovq %r16, %k1 +# CHECK: encoding: [0x62,0xf9,0xff,0x08,0x92,0xc8] + kmovq %r16, %k1 + +# CHECK: kmovb %k1, %r16d +# CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x93,0xc1] + kmovb %k1, %r16d +# CHECK: kmovw %k1, %r16d +# CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x93,0xc1] + kmovw %k1, %r16d +# CHECK: kmovd %k1, %r16d +# CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x93,0xc1] + kmovd %k1, %r16d +# CHECK: kmovq %k1, %r16 +# CHECK: encoding: [0x62,0xe1,0xff,0x08,0x93,0xc1] + kmovq %k1, %r16 + +# CHECK: kmovb (%r16,%r17), %k1 +# CHECK: encoding: [0x62,0xf9,0x79,0x08,0x90,0x0c,0x08] + kmovb (%r16,%r17), %k1 +# CHECK: kmovw (%r16,%r17), %k1 +# CHECK: encoding: [0x62,0xf9,0x78,0x08,0x90,0x0c,0x08] + kmovw (%r16,%r17), %k1 +# CHECK: kmovd (%r16,%r17), %k1 +# CHECK: encoding: [0x62,0xf9,0xf9,0x08,0x90,0x0c,0x08] + kmovd (%r16,%r17), %k1 +# CHECK: kmovq (%r16,%r17), %k1 +# CHECK: encoding: [0x62,0xf9,0xf8,0x08,0x90,0x0c,0x08] + kmovq (%r16,%r17), %k1 + +# CHECK: kmovb %k1, (%r16,%r17) +# CHECK: encoding: [0x62,0xf9,0x79,0x08,0x91,0x0c,0x08] + kmovb %k1, (%r16,%r17) +# CHECK: kmovw %k1, (%r16,%r17) +# CHECK: encoding: [0x62,0xf9,0x78,0x08,0x91,0x0c,0x08] + kmovw %k1, (%r16,%r17) +# CHECK: kmovd %k1, (%r16,%r17) +# CHECK: encoding: [0x62,0xf9,0xf9,0x08,0x91,0x0c,0x08] + kmovd %k1, (%r16,%r17) +# CHECK: kmovq %k1, (%r16,%r17) +# CHECK: encoding: [0x62,0xf9,0xf8,0x08,0x91,0x0c,0x08] + kmovq %k1, (%r16,%r17) + +# CHECK: {evex} kmovb %k1, %k2 +# CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0xd1] + {evex} kmovb %k1, %k2 +# CHECK: {evex} kmovw %k1, %k2 +# CHECK: encoding: [0x62,0xf1,0x7c,0x08,0x90,0xd1] + {evex} kmovw %k1, %k2 +# CHECK: {evex} kmovd %k1, %k2 +# CHECK: encoding: [0x62,0xf1,0xfd,0x08,0x90,0xd1] + {evex} kmovd %k1, %k2 +# CHECK: {evex} kmovq %k1, %k2 +# CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0xd1] + {evex} kmovq %k1, %k2 diff --git a/llvm/test/MC/X86/apx/kmov-intel.s b/llvm/test/MC/X86/apx/kmov-intel.s new file mode 100644 index 0000000000000..8ceb29d32dba6 --- /dev/null +++ b/llvm/test/MC/X86/apx/kmov-intel.s @@ -0,0 +1,66 @@ +# RUN: llvm-mc -triple x86_64 -show-encoding -x86-asm-syntax=intel -output-asm-variant=1 %s | FileCheck %s + +# CHECK: kmovb k1, r16d +# CHECK: encoding: [0x62,0xf9,0x7d,0x08,0x92,0xc8] + kmovb k1, r16d +# CHECK: kmovw k1, r16d +# CHECK: encoding: [0x62,0xf9,0x7c,0x08,0x92,0xc8] + kmovw k1, r16d +# CHECK: kmovd k1, r16d +# CHECK: encoding: [0x62,0xf9,0x7f,0x08,0x92,0xc8] + kmovd k1, r16d +# CHECK: kmovq k1, r16 +# CHECK: encoding: [0x62,0xf9,0xff,0x08,0x92,0xc8] + kmovq k1, r16 + +# CHECK: kmovb r16d, k1 +# CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x93,0xc1] + kmovb r16d, k1 +# CHECK: kmovw r16d, k1 +# CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x93,0xc1] + kmovw r16d, k1 +# CHECK: kmovd r16d, k1 +# CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x93,0xc1] + kmovd r16d, k1 +# CHECK: kmovq r16, k1 +# CHECK: encoding: [0x62,0xe1,0xff,0x08,0x93,0xc1] + kmovq r16, k1 + +# CHECK: kmovb k1, byte ptr [r16 + r17] +# CHECK: encoding: [0x62,0xf9,0x79,0x08,0x90,0x0c,0x08] + kmovb k1, byte ptr [r16 + r17] +# CHECK: kmovw k1, word ptr [r16 + r17] +# CHECK: encoding: [0x62,0xf9,0x78,0x08,0x90,0x0c,0x08] + kmovw k1, word ptr [r16 + r17] +# CHECK: kmovd k1, dword ptr [r16 + r17] +# CHECK: encoding: [0x62,0xf9,0xf9,0x08,0x90,0x0c,0x08] + kmovd k1, dword ptr [r16 + r17] +# CHECK: kmovq k1, qword ptr [r16 + r17] +# CHECK: encoding: [0x62,0xf9,0xf8,0x08,0x90,0x0c,0x08] + kmovq k1, qword ptr [r16 + r17] + +# CHECK: kmovb byte ptr [r16 + r17], k1 +# CHECK: encoding: [0x62,0xf9,0x79,0x08,0x91,0x0c,0x08] + kmovb byte ptr [r16 + r17], k1 +# CHECK: kmovw word ptr [r16 + r17], k1 +# CHECK: encoding: [0x62,0xf9,0x78,0x08,0x91,0x0c,0x08] + kmovw word ptr [r16 + r17], k1 +# CHECK: kmovd dword ptr [r16 + r17], k1 +# CHECK: encoding: [0x62,0xf9,0xf9,0x08,0x91,0x0c,0x08] + kmovd dword ptr [r16 + r17], k1 +# CHECK: kmovq qword ptr [r16 + r17], k1 +# CHECK: encoding: [0x62,0xf9,0xf8,0x08,0x91,0x0c,0x08] + kmovq qword ptr [r16 + r17], k1 + +# CHECK: {evex} kmovb k2, k1 +# CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0xd1] + {evex} kmovb k2, k1 +# CHECK: {evex} kmovw k2, k1 +# CHECK: encoding: [0x62,0xf1,0x7c,0x08,0x90,0xd1] + {evex} kmovw k2, k1 +# CHECK: {evex} kmovd k2, k1 +# CHECK: encoding: [0x62,0xf1,0xfd,0x08,0x90,0xd1] + {evex} kmovd k2, k1 +# CHECK: {evex} kmovq k2, k1 +# CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0xd1] + {evex} kmovq k2, k1 diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index d4515161ee085..a0d35655ae4e6 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -487,9 +487,13 @@ static const X86FoldTableEntry Table1[] = { {X86::IMUL64rri32, X86::IMUL64rmi32, 0}, {X86::IMUL64rri8, X86::IMUL64rmi8, 0}, {X86::KMOVBkk, X86::KMOVBkm, TB_NO_REVERSE}, + {X86::KMOVBkk_EVEX, X86::KMOVBkm_EVEX, TB_NO_REVERSE}, {X86::KMOVDkk, X86::KMOVDkm, 0}, + {X86::KMOVDkk_EVEX, X86::KMOVDkm_EVEX, 0}, {X86::KMOVQkk, X86::KMOVQkm, 0}, + {X86::KMOVQkk_EVEX, X86::KMOVQkm_EVEX, 0}, {X86::KMOVWkk, X86::KMOVWkm, 0}, + {X86::KMOVWkk_EVEX, X86::KMOVWkm_EVEX, 0}, {X86::LWPINS32rri, X86::LWPINS32rmi, 0}, {X86::LWPINS64rri, X86::LWPINS64rmi, 0}, {X86::LWPVAL32rri, X86::LWPVAL32rmi, 0},