diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt index 0b7a98ad6341d..5cd2a8e40d0d5 100644 --- a/llvm/lib/Target/X86/CMakeLists.txt +++ b/llvm/lib/Target/X86/CMakeLists.txt @@ -8,7 +8,7 @@ tablegen(LLVM X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1) tablegen(LLVM X86GenCallingConv.inc -gen-callingconv) tablegen(LLVM X86GenDAGISel.inc -gen-dag-isel) tablegen(LLVM X86GenDisassemblerTables.inc -gen-disassembler) -tablegen(LLVM X86GenEVEX2VEXTables.inc -gen-x86-EVEX2VEX-tables) +tablegen(LLVM X86GenEVEX2NonEVEXTables.inc -gen-x86-EVEX2NonEVEX-tables) tablegen(LLVM X86GenExegesis.inc -gen-exegesis) tablegen(LLVM X86GenFastISel.inc -gen-fast-isel) tablegen(LLVM X86GenGlobalISel.inc -gen-global-isel) @@ -61,7 +61,7 @@ set(sources X86InstrFMA3Info.cpp X86InstrFoldTables.cpp X86InstrInfo.cpp - X86EvexToVex.cpp + X86EvexToNonEvex.cpp X86LoadValueInjectionLoadHardening.cpp X86LoadValueInjectionRetHardening.cpp X86MCInstLower.cpp diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 485afbc1dfbc2..9f2c641cce3ae 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -131,9 +131,9 @@ FunctionPass *createX86FixupBWInsts(); /// to another, when profitable. FunctionPass *createX86DomainReassignmentPass(); -/// This pass replaces EVEX encoded of AVX-512 instructiosn by VEX +/// This pass replaces EVEX encoded of AVX-512 instructiosn by non-EVEX /// encoding when possible in order to reduce code size. -FunctionPass *createX86EvexToVexInsts(); +FunctionPass *createX86EvexToNonEvexInsts(); /// This pass creates the thunks for the retpoline feature. FunctionPass *createX86IndirectThunksPass(); @@ -167,7 +167,7 @@ FunctionPass *createX86SpeculativeLoadHardeningPass(); FunctionPass *createX86SpeculativeExecutionSideEffectSuppression(); FunctionPass *createX86ArgumentStackSlotPass(); -void initializeEvexToVexInstPassPass(PassRegistry &); +void initializeEvexToNonEvexInstPassPass(PassRegistry &); void initializeFPSPass(PassRegistry &); void initializeFixupBWInstPassPass(PassRegistry &); void initializeFixupLEAPassPass(PassRegistry &); diff --git a/llvm/lib/Target/X86/X86EvexToVex.cpp b/llvm/lib/Target/X86/X86EvexToNonEvex.cpp similarity index 75% rename from llvm/lib/Target/X86/X86EvexToVex.cpp rename to llvm/lib/Target/X86/X86EvexToNonEvex.cpp index c425c37b41868..444180b34661b 100644 --- a/llvm/lib/Target/X86/X86EvexToVex.cpp +++ b/llvm/lib/Target/X86/X86EvexToNonEvex.cpp @@ -1,5 +1,6 @@ -//===- X86EvexToVex.cpp ---------------------------------------------------===// -// Compress EVEX instructions to VEX encoding when possible to reduce code size +//===- X86EvexToNonEvex.cpp -----------------------------------------------===// +// Compress EVEX instructions to Non-EVEX encoding when possible to reduce code +// size. // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -16,7 +17,11 @@ /// accessed by instruction is less than 512 bits and when it does not use // the xmm or the mask registers or xmm/ymm registers with indexes higher // than 15. -/// The pass applies code reduction on the generated code for AVX-512 instrs. +// APX promoted instrs use evex encoding which could let them use r16-r31, if +// they don't use egpr, we could compress them back to legacy encoding to save +// code size. +/// The pass applies code reduction on the generated code for AVX-512 instrs and +/// APX promoted instrs. // //===----------------------------------------------------------------------===// @@ -38,34 +43,35 @@ using namespace llvm; -// Including the generated EVEX2VEX tables. -struct X86EvexToVexCompressTableEntry { +// Including the generated EVEX2NonEVEX tables. +struct X86EvexToNonEvexCompressTableEntry { uint16_t EvexOpc; - uint16_t VexOpc; + uint16_t NonEvexOpc; - bool operator<(const X86EvexToVexCompressTableEntry &RHS) const { + bool operator<(const X86EvexToNonEvexCompressTableEntry &RHS) const { return EvexOpc < RHS.EvexOpc; } - friend bool operator<(const X86EvexToVexCompressTableEntry &TE, + friend bool operator<(const X86EvexToNonEvexCompressTableEntry &TE, unsigned Opc) { return TE.EvexOpc < Opc; } }; -#include "X86GenEVEX2VEXTables.inc" +#include "X86GenEVEX2NonEVEXTables.inc" -#define EVEX2VEX_DESC "Compressing EVEX instrs to VEX encoding when possible" -#define EVEX2VEX_NAME "x86-evex-to-vex-compress" +#define EVEX2NONEVEX_DESC \ + "Compressing EVEX instrs to Non-EVEX encoding when possible" +#define EVEX2NONEVEX_NAME "x86-evex-to-non-evex-compress" -#define DEBUG_TYPE EVEX2VEX_NAME +#define DEBUG_TYPE EVEX2NONEVEX_NAME namespace { -class EvexToVexInstPass : public MachineFunctionPass { +class EvexToNonEvexInstPass : public MachineFunctionPass { public: static char ID; - EvexToVexInstPass() : MachineFunctionPass(ID) {} - StringRef getPassName() const override { return EVEX2VEX_DESC; } + EvexToNonEvexInstPass() : MachineFunctionPass(ID) {} + StringRef getPassName() const override { return EVEX2NONEVEX_DESC; } /// Loop over all of the basic blocks, replacing EVEX instructions /// by equivalent VEX instructions when possible for reducing code size. @@ -80,7 +86,7 @@ class EvexToVexInstPass : public MachineFunctionPass { } // end anonymous namespace -char EvexToVexInstPass::ID = 0; +char EvexToNonEvexInstPass::ID = 0; static bool usesExtendedRegister(const MachineInstr &MI) { auto isHiRegIdx = [](unsigned Reg) { @@ -200,7 +206,7 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) { case X86::VRNDSCALESDZm_Int: case X86::VRNDSCALESSZr_Int: case X86::VRNDSCALESSZm_Int: - const MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1); + const MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1); int64_t ImmVal = Imm.getImm(); // Ensure that only bits 3:0 of the immediate are used. if ((ImmVal & 0xf) != ImmVal) @@ -214,6 +220,8 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) { // For EVEX instructions that can be encoded using VEX encoding // replace them by the VEX encoding in order to reduce size. static bool CompressEvexToVexImpl(MachineInstr &MI, const X86Subtarget &ST) { + if (!ST.hasAVX512()) + return false; // VEX format. // # of bytes: 0,2,3 1 1 0,1 0,1,2,4 0,1 // [Prefixes] [VEX] OPCODE ModR/M [SIB] [DISP] [IMM] @@ -239,7 +247,7 @@ static bool CompressEvexToVexImpl(MachineInstr &MI, const X86Subtarget &ST) { return false; // Use the VEX.L bit to select the 128 or 256-bit table. - ArrayRef Table = + ArrayRef Table = (Desc.TSFlags & X86II::VEX_L) ? ArrayRef(X86EvexToVex256CompressTable) : ArrayRef(X86EvexToVex128CompressTable); @@ -252,15 +260,37 @@ static bool CompressEvexToVexImpl(MachineInstr &MI, const X86Subtarget &ST) { return false; if (!checkVEXInstPredicate(EvexOpc, ST)) return false; - if (!performCustomAdjustments(MI, I->VexOpc)) + if (!performCustomAdjustments(MI, I->NonEvexOpc)) return false; - MI.setDesc(ST.getInstrInfo()->get(I->VexOpc)); + MI.setDesc(ST.getInstrInfo()->get(I->NonEvexOpc)); MI.setAsmPrinterFlag(X86::AC_EVEX_2_VEX); return true; } -bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) { +// For apx promoted instructions, if they don't use egpr, we could try to use +// legacy encoding to save code size. +static bool CompressEVEX2LegacyImpl(MachineInstr &MI, const X86Subtarget &ST) { + if (!ST.hasEGPR()) + return false; + ArrayRef Table = + X86EvexToLegacyCompressTable; + unsigned EvexOpc = MI.getOpcode(); + const auto *I = llvm::lower_bound(Table, EvexOpc); + if (I == Table.end() || I->EvexOpc != EvexOpc) + return false; + unsigned NewOpc = I->NonEvexOpc; + for (unsigned Index = 0, Size = MI.getNumOperands(); Index < Size; Index++) { + const MachineOperand &Op = MI.getOperand(Index); + if (Op.isReg() && X86II::isApxExtendedReg(Op.getReg())) + return false; + } + MI.setDesc(ST.getInstrInfo()->get(NewOpc)); + MI.setAsmPrinterFlag(X86::AC_EVEX_2_LEGACY); + return true; +} + +bool EvexToNonEvexInstPass::runOnMachineFunction(MachineFunction &MF) { #ifndef NDEBUG // Make sure the tables are sorted. static std::atomic TableChecked(false); @@ -269,28 +299,33 @@ bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) { "X86EvexToVex128CompressTable is not sorted!"); assert(llvm::is_sorted(X86EvexToVex256CompressTable) && "X86EvexToVex256CompressTable is not sorted!"); + assert(llvm::is_sorted(X86EvexToLegacyCompressTable) && + "X86EvexToLegacyCompressTable is not sorted!"); TableChecked.store(true, std::memory_order_relaxed); } #endif const X86Subtarget &ST = MF.getSubtarget(); - if (!ST.hasAVX512()) + if (!ST.hasAVX512() && !ST.hasEGPR()) return false; bool Changed = false; /// Go over all basic blocks in function and replace - /// EVEX encoded instrs by VEX encoding when possible. + /// EVEX encoded instrs by VEX/Legacy encoding when possible. for (MachineBasicBlock &MBB : MF) { // Traverse the basic block. - for (MachineInstr &MI : MBB) + for (MachineInstr &MI : MBB) { Changed |= CompressEvexToVexImpl(MI, ST); + Changed |= CompressEVEX2LegacyImpl(MI, ST); + } } return Changed; } -INITIALIZE_PASS(EvexToVexInstPass, EVEX2VEX_NAME, EVEX2VEX_DESC, false, false) +INITIALIZE_PASS(EvexToNonEvexInstPass, EVEX2NONEVEX_NAME, EVEX2NONEVEX_DESC, + false, false) -FunctionPass *llvm::createX86EvexToVexInsts() { - return new EvexToVexInstPass(); +FunctionPass *llvm::createX86EvexToNonEvexInsts() { + return new EvexToNonEvexInstPass(); } diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index eac8d79eb8a32..87f4d3d72c3b7 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -30,7 +30,9 @@ namespace X86 { enum AsmComments { // For instr that was compressed from EVEX to VEX. - AC_EVEX_2_VEX = MachineInstr::TAsmComments + AC_EVEX_2_VEX = MachineInstr::TAsmComments, + // For instrs that was compressed from EVEX to Legacy. + AC_EVEX_2_LEGACY = AC_EVEX_2_VEX << 1 }; /// Return a pair of condition code for the given predicate and whether diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index e1a67f61e7664..b3544bb5a278d 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -2060,6 +2060,8 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { if (TM.Options.MCOptions.ShowMCEncoding) { if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX) OutStreamer->AddComment("EVEX TO VEX Compression ", false); + else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_LEGACY) + OutStreamer->AddComment("EVEX TO LEGACY Compression ", false); } // Add comments for values loaded from constant pool. diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 5668b514d6dec..05f1dbd63f4f1 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -75,7 +75,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() { initializeGlobalISel(PR); initializeWinEHStatePassPass(PR); initializeFixupBWInstPassPass(PR); - initializeEvexToVexInstPassPass(PR); + initializeEvexToNonEvexInstPassPass(PR); initializeFixupLEAPassPass(PR); initializeFPSPass(PR); initializeX86FixupSetCCPassPass(PR); @@ -575,7 +575,7 @@ void X86PassConfig::addPreEmitPass() { addPass(createX86FixupInstTuning()); addPass(createX86FixupVectorConstants()); } - addPass(createX86EvexToVexInsts()); + addPass(createX86EvexToNonEvexInsts()); addPass(createX86DiscriminateMemOpsPass()); addPass(createX86InsertPrefetchPass()); addPass(createX86InsertX87waitPass()); diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll index 402645ed1e2e5..feec8d3db27e6 100644 --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -68,7 +68,7 @@ ; CHECK-NEXT: Implement the 'patchable-function' attribute ; CHECK-NEXT: X86 Indirect Branch Tracking ; CHECK-NEXT: X86 vzeroupper inserter -; CHECK-NEXT: Compressing EVEX instrs to VEX encoding when possibl +; CHECK-NEXT: Compressing EVEX instrs to Non-EVEX encoding when possible ; CHECK-NEXT: X86 Discriminate Memory Operands ; CHECK-NEXT: X86 Insert Cache Prefetches ; CHECK-NEXT: X86 insert wait instruction diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll index 873986e99777d..fe5182e5ef731 100644 --- a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll +++ b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll @@ -29,7 +29,7 @@ define i32 @test_mm_crc32_u8(i32 %a0, i32 %a1) nounwind { ; EGPR-LABEL: test_mm_crc32_u8: ; EGPR: # %bb.0: ; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] -; EGPR-NEXT: crc32b %sil, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6] +; EGPR-NEXT: crc32b %sil, %eax # EVEX TO LEGACY Compression encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6] ; EGPR-NEXT: retq # encoding: [0xc3] %trunc = trunc i32 %a1 to i8 %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %trunc) @@ -55,7 +55,7 @@ define i32 @test_mm_crc32_u16(i32 %a0, i32 %a1) nounwind { ; EGPR-LABEL: test_mm_crc32_u16: ; EGPR: # %bb.0: ; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] -; EGPR-NEXT: crc32w %si, %eax # encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6] +; EGPR-NEXT: crc32w %si, %eax # EVEX TO LEGACY Compression encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6] ; EGPR-NEXT: retq # encoding: [0xc3] %trunc = trunc i32 %a1 to i16 %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %trunc) @@ -79,7 +79,7 @@ define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) nounwind { ; EGPR-LABEL: test_mm_crc32_u32: ; EGPR: # %bb.0: ; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] -; EGPR-NEXT: crc32l %esi, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6] +; EGPR-NEXT: crc32l %esi, %eax # EVEX TO LEGACY Compression encoding: [0xf2,0x0f,0x38,0xf1,0xc6] ; EGPR-NEXT: retq # encoding: [0xc3] %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1) ret i32 %res diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll index 71d955bda7523..ba5f846c22db0 100644 --- a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll +++ b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll @@ -15,7 +15,7 @@ define i64 @test_mm_crc64_u8(i64 %a0, i32 %a1) nounwind{ ; ; EGPR-LABEL: test_mm_crc64_u8: ; EGPR: # %bb.0: -; EGPR-NEXT: crc32b %sil, %edi # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xfe] +; EGPR-NEXT: crc32b %sil, %edi # EVEX TO LEGACY Compression encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xfe] ; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] ; EGPR-NEXT: retq # encoding: [0xc3] %trunc = trunc i32 %a1 to i8 @@ -34,7 +34,7 @@ define i64 @test_mm_crc64_u64(i64 %a0, i64 %a1) nounwind{ ; EGPR-LABEL: test_mm_crc64_u64: ; EGPR: # %bb.0: ; EGPR-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8] -; EGPR-NEXT: crc32q %rsi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6] +; EGPR-NEXT: crc32q %rsi, %rax # EVEX TO LEGACY Compression encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc6] ; EGPR-NEXT: retq # encoding: [0xc3] %res = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1) ret i64 %res diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll index 84c7f90cfe3c3..ea4e0ffb109ce 100644 --- a/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll @@ -19,7 +19,7 @@ define i32 @crc32_32_8(i32 %a, i8 %b) nounwind { ; EGPR-LABEL: crc32_32_8: ; EGPR: ## %bb.0: ; EGPR-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] -; EGPR-NEXT: crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6] +; EGPR-NEXT: crc32b %sil, %eax ## EVEX TO LEGACY Compression encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6] ; EGPR-NEXT: retq ## encoding: [0xc3] %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b) ret i32 %tmp @@ -42,7 +42,7 @@ define i32 @crc32_32_16(i32 %a, i16 %b) nounwind { ; EGPR-LABEL: crc32_32_16: ; EGPR: ## %bb.0: ; EGPR-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] -; EGPR-NEXT: crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6] +; EGPR-NEXT: crc32w %si, %eax ## EVEX TO LEGACY Compression encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6] ; EGPR-NEXT: retq ## encoding: [0xc3] %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b) ret i32 %tmp @@ -65,7 +65,7 @@ define i32 @crc32_32_32(i32 %a, i32 %b) nounwind { ; EGPR-LABEL: crc32_32_32: ; EGPR: ## %bb.0: ; EGPR-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] -; EGPR-NEXT: crc32l %esi, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6] +; EGPR-NEXT: crc32l %esi, %eax ## EVEX TO LEGACY Compression encoding: [0xf2,0x0f,0x38,0xf1,0xc6] ; EGPR-NEXT: retq ## encoding: [0xc3] %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b) ret i32 %tmp diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll index bda26a15b277a..af2b590b1f6b2 100644 --- a/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll +++ b/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll @@ -15,7 +15,7 @@ define i64 @crc32_64_8(i64 %a, i8 %b) nounwind { ; EGPR-LABEL: crc32_64_8: ; EGPR: ## %bb.0: ; EGPR-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] -; EGPR-NEXT: crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6] +; EGPR-NEXT: crc32b %sil, %eax ## EVEX TO LEGACY Compression encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6] ; EGPR-NEXT: retq ## encoding: [0xc3] %tmp = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a, i8 %b) ret i64 %tmp @@ -31,7 +31,7 @@ define i64 @crc32_64_64(i64 %a, i64 %b) nounwind { ; EGPR-LABEL: crc32_64_64: ; EGPR: ## %bb.0: ; EGPR-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] -; EGPR-NEXT: crc32q %rsi, %rax ## encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6] +; EGPR-NEXT: crc32q %rsi, %rax ## EVEX TO LEGACY Compression encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc6] ; EGPR-NEXT: retq ## encoding: [0xc3] %tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b) ret i64 %tmp diff --git a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir index 06d3c1532c3ea..928ac700ee009 100644 --- a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir +++ b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=x86_64-- -run-pass x86-evex-to-vex-compress -verify-machineinstrs -mcpu=skx -o - %s | FileCheck %s +# RUN: llc -mtriple=x86_64-- -run-pass x86-evex-to-non-evex-compress -verify-machineinstrs -mcpu=skx -o - %s | FileCheck %s # This test verifies VEX encoding for AVX-512 instructions that use registers of low indexes and # do not use zmm or mask registers and have a corresponding AVX/AVX2 opcode diff --git a/llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll b/llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll index 4d03510ad5d4f..023dfb110502b 100644 --- a/llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll +++ b/llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll @@ -18,7 +18,7 @@ define void @test_movdiri(ptr %p, i32 %v) { ; ; EGPR-LABEL: test_movdiri: ; EGPR: # %bb.0: # %entry -; EGPR-NEXT: movdiri %esi, (%rdi) # encoding: [0x62,0xf4,0x7c,0x08,0xf9,0x37] +; EGPR-NEXT: movdiri %esi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xf9,0x37] ; EGPR-NEXT: retq # encoding: [0xc3] entry: call void @llvm.x86.directstore32(ptr %p, i32 %v) @@ -42,7 +42,7 @@ define void @test_movdir64b(ptr %dst, ptr %src) { ; ; EGPR-LABEL: test_movdir64b: ; EGPR: # %bb.0: # %entry -; EGPR-NEXT: movdir64b (%rsi), %rdi # encoding: [0x62,0xf4,0x7d,0x08,0xf8,0x3e] +; EGPR-NEXT: movdir64b (%rsi), %rdi # EVEX TO LEGACY Compression encoding: [0x66,0x0f,0x38,0xf8,0x3e] ; EGPR-NEXT: retq # encoding: [0xc3] entry: call void @llvm.x86.movdir64b(ptr %dst, ptr %src) diff --git a/llvm/test/CodeGen/X86/movdir-intrinsic-x86_64.ll b/llvm/test/CodeGen/X86/movdir-intrinsic-x86_64.ll index ddd44f6d73d59..e3736e29a582c 100644 --- a/llvm/test/CodeGen/X86/movdir-intrinsic-x86_64.ll +++ b/llvm/test/CodeGen/X86/movdir-intrinsic-x86_64.ll @@ -10,7 +10,7 @@ define void @test_movdiri(ptr %p, i64 %v) { ; ; EGPR-LABEL: test_movdiri: ; EGPR: # %bb.0: # %entry -; EGPR-NEXT: movdiri %rsi, (%rdi) # encoding: [0x62,0xf4,0xfc,0x08,0xf9,0x37] +; EGPR-NEXT: movdiri %rsi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x38,0xf9,0x37] ; EGPR-NEXT: retq # encoding: [0xc3] entry: call void @llvm.x86.directstore64(ptr %p, i64 %v) diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll index fb8d2335b3410..a44e04e8ee41e 100644 --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -205,7 +205,7 @@ ; CHECK-NEXT: X86 LEA Fixup ; CHECK-NEXT: X86 Fixup Inst Tuning ; CHECK-NEXT: X86 Fixup Vector Constants -; CHECK-NEXT: Compressing EVEX instrs to VEX encoding when possible +; CHECK-NEXT: Compressing EVEX instrs to Non-EVEX encoding when possible ; CHECK-NEXT: X86 Discriminate Memory Operands ; CHECK-NEXT: X86 Insert Cache Prefetches ; CHECK-NEXT: X86 insert wait instruction diff --git a/llvm/test/CodeGen/X86/sha.ll b/llvm/test/CodeGen/X86/sha.ll index d8fa354a39135..65222ba74023f 100644 --- a/llvm/test/CodeGen/X86/sha.ll +++ b/llvm/test/CodeGen/X86/sha.ll @@ -18,7 +18,7 @@ define <4 x i32> @test_sha1rnds4rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable ; ; EGPR-LABEL: test_sha1rnds4rr: ; EGPR: # %bb.0: # %entry -; EGPR-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0xc1,0x03] +; EGPR-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x3a,0xcc,0xc1,0x03] ; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %b, i8 3) @@ -38,7 +38,7 @@ define <4 x i32> @test_sha1rnds4rm(<4 x i32> %a, ptr %b) nounwind uwtable { ; ; EGPR-LABEL: test_sha1rnds4rm: ; EGPR: # %bb.0: # %entry -; EGPR-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0x07,0x03] +; EGPR-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x3a,0xcc,0x07,0x03] ; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = load <4 x i32>, ptr %b @@ -61,7 +61,7 @@ define <4 x i32> @test_sha1nexterr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable ; ; EGPR-LABEL: test_sha1nexterr: ; EGPR: # %bb.0: # %entry -; EGPR-NEXT: sha1nexte %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0xc1] +; EGPR-NEXT: sha1nexte %xmm1, %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xc8,0xc1] ; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %b) @@ -81,7 +81,7 @@ define <4 x i32> @test_sha1nexterm(<4 x i32> %a, ptr %b) nounwind uwtable { ; ; EGPR-LABEL: test_sha1nexterm: ; EGPR: # %bb.0: # %entry -; EGPR-NEXT: sha1nexte (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0x07] +; EGPR-NEXT: sha1nexte (%rdi), %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xc8,0x07] ; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = load <4 x i32>, ptr %b @@ -104,7 +104,7 @@ define <4 x i32> @test_sha1msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { ; ; EGPR-LABEL: test_sha1msg1rr: ; EGPR: # %bb.0: # %entry -; EGPR-NEXT: sha1msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0xc1] +; EGPR-NEXT: sha1msg1 %xmm1, %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xc9,0xc1] ; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %b) @@ -124,7 +124,7 @@ define <4 x i32> @test_sha1msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable { ; ; EGPR-LABEL: test_sha1msg1rm: ; EGPR: # %bb.0: # %entry -; EGPR-NEXT: sha1msg1 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0x07] +; EGPR-NEXT: sha1msg1 (%rdi), %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xc9,0x07] ; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = load <4 x i32>, ptr %b @@ -147,7 +147,7 @@ define <4 x i32> @test_sha1msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { ; ; EGPR-LABEL: test_sha1msg2rr: ; EGPR: # %bb.0: # %entry -; EGPR-NEXT: sha1msg2 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0xc1] +; EGPR-NEXT: sha1msg2 %xmm1, %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xca,0xc1] ; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %b) @@ -167,7 +167,7 @@ define <4 x i32> @test_sha1msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable { ; ; EGPR-LABEL: test_sha1msg2rm: ; EGPR: # %bb.0: # %entry -; EGPR-NEXT: sha1msg2 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0x07] +; EGPR-NEXT: sha1msg2 (%rdi), %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xca,0x07] ; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = load <4 x i32>, ptr %b @@ -198,7 +198,7 @@ define <4 x i32> @test_sha256rnds2rr(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) n ; EGPR: # %bb.0: # %entry ; EGPR-NEXT: movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8] ; EGPR-NEXT: movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2] -; EGPR-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0xd9] +; EGPR-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xcb,0xd9] ; EGPR-NEXT: movaps %xmm3, %xmm0 # encoding: [0x0f,0x28,0xc3] ; EGPR-NEXT: retq # encoding: [0xc3] entry: @@ -227,7 +227,7 @@ define <4 x i32> @test_sha256rnds2rm(<4 x i32> %a, ptr %b, <4 x i32> %c) nounwin ; EGPR: # %bb.0: # %entry ; EGPR-NEXT: movaps %xmm0, %xmm2 # encoding: [0x0f,0x28,0xd0] ; EGPR-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] -; EGPR-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm2 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0x17] +; EGPR-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm2 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xcb,0x17] ; EGPR-NEXT: movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2] ; EGPR-NEXT: retq # encoding: [0xc3] entry: @@ -251,7 +251,7 @@ define <4 x i32> @test_sha256msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable ; ; EGPR-LABEL: test_sha256msg1rr: ; EGPR: # %bb.0: # %entry -; EGPR-NEXT: sha256msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdc,0xc1] +; EGPR-NEXT: sha256msg1 %xmm1, %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xcc,0xc1] ; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %b) @@ -271,7 +271,7 @@ define <4 x i32> @test_sha256msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable { ; ; EGPR-LABEL: test_sha256msg1rm: ; EGPR: # %bb.0: # %entry -; EGPR-NEXT: sha256msg1 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdc,0x07] +; EGPR-NEXT: sha256msg1 (%rdi), %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xcc,0x07] ; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = load <4 x i32>, ptr %b @@ -294,7 +294,7 @@ define <4 x i32> @test_sha256msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable ; ; EGPR-LABEL: test_sha256msg2rr: ; EGPR: # %bb.0: # %entry -; EGPR-NEXT: sha256msg2 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdd,0xc1] +; EGPR-NEXT: sha256msg2 %xmm1, %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xcd,0xc1] ; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %b) @@ -314,7 +314,7 @@ define <4 x i32> @test_sha256msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable { ; ; EGPR-LABEL: test_sha256msg2rm: ; EGPR: # %bb.0: # %entry -; EGPR-NEXT: sha256msg2 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdd,0x07] +; EGPR-NEXT: sha256msg2 (%rdi), %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xcd,0x07] ; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = load <4 x i32>, ptr %b @@ -338,7 +338,7 @@ define <8 x i32> @test_sha1rnds4_zero_extend(<4 x i32> %a, ptr %b) nounwind uwta ; ; EGPR-LABEL: test_sha1rnds4_zero_extend: ; EGPR: # %bb.0: # %entry -; EGPR-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0x07,0x03] +; EGPR-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # EVEX TO LEGACY Compression encoding: [0x0f,0x3a,0xcc,0x07,0x03] ; EGPR-NEXT: xorps %xmm1, %xmm1 # encoding: [0x0f,0x57,0xc9] ; EGPR-NEXT: retq # encoding: [0xc3] entry: diff --git a/llvm/utils/TableGen/CMakeLists.txt b/llvm/utils/TableGen/CMakeLists.txt index 071ea3bc07054..32332c121604e 100644 --- a/llvm/utils/TableGen/CMakeLists.txt +++ b/llvm/utils/TableGen/CMakeLists.txt @@ -81,7 +81,7 @@ add_tablegen(llvm-tblgen LLVM Types.cpp VarLenCodeEmitterGen.cpp X86DisassemblerTables.cpp - X86EVEX2VEXTablesEmitter.cpp + X86EVEX2NonEVEXTablesEmitter.cpp X86FoldTablesEmitter.cpp X86MnemonicTables.cpp X86ModRMFilters.cpp diff --git a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp b/llvm/utils/TableGen/X86EVEX2NonEVEXTablesEmitter.cpp similarity index 51% rename from llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp rename to llvm/utils/TableGen/X86EVEX2NonEVEXTablesEmitter.cpp index c80d9a199fa3c..54c2564dd3d4f 100644 --- a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp +++ b/llvm/utils/TableGen/X86EVEX2NonEVEXTablesEmitter.cpp @@ -1,4 +1,4 @@ -//===- utils/TableGen/X86EVEX2VEXTablesEmitter.cpp - X86 backend-*- C++ -*-===// +//=- utils/TableGen/X86EVEX2NonEVEXTablesEmitter.cpp - X86 backend-*- C++ -*-// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -23,7 +23,12 @@ using namespace X86Disassembler; namespace { -class X86EVEX2VEXTablesEmitter { +const std::map ManualMap = { +#define EVEXENTRY(EVEX, NonEVEXInstStr) {#EVEX, #NonEVEXInstStr}, +#include "X86ManualEVEXCompressTables.def" +}; + +class X86EVEX2NonEVEXTablesEmitter { RecordKeeper &Records; CodeGenTarget Target; @@ -40,28 +45,54 @@ class X86EVEX2VEXTablesEmitter { std::vector EVEX2VEX128; std::vector EVEX2VEX256; + // Hold all possibly compressed APX instructions, including only ND and EGPR + // instruction so far + std::vector APXInsts; + // Hold all X86 instructions. Divided into groups with same opcodes + // to make the search more efficient + std::map> LegacyInsts; + // Represent EVEX to Legacy compress tables. + std::vector EVEX2LegacyTable; + public: - X86EVEX2VEXTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {} + X86EVEX2NonEVEXTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {} // run - Output X86 EVEX2VEX tables. void run(raw_ostream &OS); private: // Prints the given table as a C++ array of type - // X86EvexToVexCompressTableEntry + // X86EvexToNonEvexCompressTableEntry void printTable(const std::vector &Table, raw_ostream &OS); + // X86EVEXToLegacyCompressTableEntry + void printEVEX2LegacyTable(const std::vector &Table, raw_ostream &OS); + void addManualEntry(const CodeGenInstruction *EVEXInstr, + const CodeGenInstruction *LegacyInstr, + const char *TableName); }; -void X86EVEX2VEXTablesEmitter::printTable(const std::vector &Table, - raw_ostream &OS) { - StringRef Size = (Table == EVEX2VEX128) ? "128" : "256"; +void X86EVEX2NonEVEXTablesEmitter::printTable(const std::vector &Table, + raw_ostream &OS) { + + StringRef TargetEnc; + StringRef TableName; + StringRef Size; + if (Table == EVEX2LegacyTable) { + TargetEnc = "Legacy"; + TableName = "X86EvexToLegacy"; + } else { + TargetEnc = "VEX"; + TableName = "X86EvexToVex"; + Size = (Table == EVEX2VEX128) ? "128" : "256"; + } - OS << "// X86 EVEX encoded instructions that have a VEX " << Size - << " encoding\n" - << "// (table format: ).\n" - << "static const X86EvexToVexCompressTableEntry X86EvexToVex" << Size + OS << "// X86 EVEX encoded instructions that have a " << TargetEnc << " " + << Size << " encoding\n" + << "// (table format: ).\n" + << "static const X86EvexToNonEvexCompressTableEntry " << TableName << Size << "CompressTable[] = {\n" - << " // EVEX scalar with corresponding VEX.\n"; + << " // EVEX scalar with corresponding " << TargetEnc << ".\n"; // Print all entries added to the table for (const auto &Pair : Table) { @@ -85,6 +116,31 @@ static inline uint64_t getValueFromBitsInit(const BitsInit *B) { return Value; } +static bool checkMatchable(const CodeGenInstruction *EVEXInst, + const CodeGenInstruction *NonEVEXInst) { + for (unsigned I = 0, E = NonEVEXInst->Operands.size(); I < E; I++) { + Record *OpRec1 = EVEXInst->Operands[I].Rec; + Record *OpRec2 = NonEVEXInst->Operands[I].Rec; + + if (OpRec1 == OpRec2) + continue; + + if (isRegisterOperand(OpRec1) && isRegisterOperand(OpRec2)) { + if (getRegOperandSize(OpRec1) != getRegOperandSize(OpRec2)) + return false; + } else if (isMemoryOperand(OpRec1) && isMemoryOperand(OpRec2)) { + if (getMemOperandSize(OpRec1) != getMemOperandSize(OpRec2)) + return false; + } else if (isImmediateOperand(OpRec1) && isImmediateOperand(OpRec2)) { + if (OpRec1->getValueAsDef("Type") != OpRec2->getValueAsDef("Type")) { + return false; + } + } else + return false; + } + return true; +} + // Function object - Operator() returns true if the given VEX instruction // matches the EVEX instruction of this object. class IsMatch { @@ -98,8 +154,8 @@ class IsMatch { RecognizableInstrBase EVEXRI(*EVEXInst); bool VEX_W = VEXRI.HasREX_W; bool EVEX_W = EVEXRI.HasREX_W; - bool VEX_WIG = VEXRI.IgnoresW; - bool EVEX_WIG = EVEXRI.IgnoresW; + bool VEX_WIG = VEXRI.IgnoresW; + bool EVEX_WIG = EVEXRI.IgnoresW; bool EVEX_W1_VEX_W0 = EVEXInst->TheDef->getValueAsBit("EVEX_W1_VEX_W0"); if (VEXRI.IsCodeGenOnly != EVEXRI.IsCodeGenOnly || @@ -121,31 +177,43 @@ class IsMatch { // Also for instructions that their EVEX version was upgraded to work with // k-registers. For example VPCMPEQBrm (xmm output register) and // VPCMPEQBZ128rm (k register output register). - for (unsigned i = 0, e = EVEXInst->Operands.size(); i < e; i++) { - Record *OpRec1 = EVEXInst->Operands[i].Rec; - Record *OpRec2 = VEXInst->Operands[i].Rec; + return checkMatchable(EVEXInst, VEXInst); + } +}; - if (OpRec1 == OpRec2) - continue; +class IsMatchAPX { + const CodeGenInstruction *EVEXInst; - if (isRegisterOperand(OpRec1) && isRegisterOperand(OpRec2)) { - if (getRegOperandSize(OpRec1) != getRegOperandSize(OpRec2)) - return false; - } else if (isMemoryOperand(OpRec1) && isMemoryOperand(OpRec2)) { - return false; - } else if (isImmediateOperand(OpRec1) && isImmediateOperand(OpRec2)) { - if (OpRec1->getValueAsDef("Type") != OpRec2->getValueAsDef("Type")) { - return false; - } - } else - return false; - } +public: + IsMatchAPX(const CodeGenInstruction *EVEXInst) : EVEXInst(EVEXInst) {} + + bool operator()(const CodeGenInstruction *LegacyInst) { + Record *RecEVEX = EVEXInst->TheDef; + Record *RecLegacy = LegacyInst->TheDef; + if (RecLegacy->getValueAsDef("OpSize")->getName() == "OpSize16" && + RecEVEX->getValueAsDef("OpPrefix")->getName() != "PD") + return false; + + if (RecLegacy->getValueAsDef("OpSize")->getName() == "OpSize32" && + RecEVEX->getValueAsDef("OpPrefix")->getName() != "PS") + return false; + + if (RecEVEX->getValueAsBit("hasREX_W") != + RecLegacy->getValueAsBit("hasREX_W")) + return false; + + if (RecLegacy->getValueAsDef("Form") != RecEVEX->getValueAsDef("Form")) + return false; - return true; + if (RecLegacy->getValueAsBit("isCodeGenOnly") != + RecEVEX->getValueAsBit("isCodeGenOnly")) + return false; + + return checkMatchable(EVEXInst, LegacyInst); } }; -void X86EVEX2VEXTablesEmitter::run(raw_ostream &OS) { +void X86EVEX2NonEVEXTablesEmitter::run(raw_ostream &OS) { emitSourceFileHeader("X86 EVEX2VEX tables", OS); ArrayRef NumberedInstructions = @@ -169,18 +237,29 @@ void X86EVEX2VEXTablesEmitter::run(raw_ostream &OS) { else if (RI.Encoding == X86Local::EVEX && !RI.HasEVEX_K && !RI.HasEVEX_B && !RI.HasEVEX_L2 && !Def->getValueAsBit("notEVEX2VEXConvertible")) EVEXInsts.push_back(Inst); + + if (RI.Encoding == X86Local::EVEX && RI.OpMap == X86Local::T_MAP4 && + !RI.HasEVEX_NF && + !getValueFromBitsInit( + Def->getValueAsBitsInit("explicitOpPrefixBits"))) { + APXInsts.push_back(Inst); + } else if (Inst->TheDef->getValueAsDef("OpEnc")->getName() == "EncNormal") { + uint64_t Opcode = + getValueFromBitsInit(Inst->TheDef->getValueAsBitsInit("Opcode")); + LegacyInsts[Opcode].push_back(Inst); + } } for (const CodeGenInstruction *EVEXInst : EVEXInsts) { - uint64_t Opcode = getValueFromBitsInit(EVEXInst->TheDef-> - getValueAsBitsInit("Opcode")); + uint64_t Opcode = + getValueFromBitsInit(EVEXInst->TheDef->getValueAsBitsInit("Opcode")); // For each EVEX instruction look for a VEX match in the appropriate vector // (instructions with the same opcode) using function object IsMatch. // Allow EVEX2VEXOverride to explicitly specify a match. const CodeGenInstruction *VEXInst = nullptr; if (!EVEXInst->TheDef->isValueUnset("EVEX2VEXOverride")) { StringRef AltInstStr = - EVEXInst->TheDef->getValueAsString("EVEX2VEXOverride"); + EVEXInst->TheDef->getValueAsString("EVEX2VEXOverride"); Record *AltInstRec = Records.getDef(AltInstStr); assert(AltInstRec && "EVEX2VEXOverride instruction not found!"); VEXInst = &Target.getInstruction(AltInstRec); @@ -203,8 +282,33 @@ void X86EVEX2VEXTablesEmitter::run(raw_ostream &OS) { // Print both tables printTable(EVEX2VEX128, OS); printTable(EVEX2VEX256, OS); + + for (const CodeGenInstruction *EVEXInst : APXInsts) { + // REV instrs should not appear before encoding optimization. + if (EVEXInst->TheDef->getName().ends_with("_REV")) + continue; + const CodeGenInstruction *LegacyInst = nullptr; + if (ManualMap.count(EVEXInst->TheDef->getName())) { + auto NonEVEXInstStr = + ManualMap.at(StringRef(EVEXInst->TheDef->getName())); + Record *LegacyRec = Records.getDef(NonEVEXInstStr); + LegacyInst = &(Target.getInstruction(LegacyRec)); + } else { + uint64_t Opcode = + getValueFromBitsInit(EVEXInst->TheDef->getValueAsBitsInit("Opcode")); + auto Match = llvm::find_if(LegacyInsts[Opcode], IsMatchAPX(EVEXInst)); + if (Match != LegacyInsts[Opcode].end()) + LegacyInst = *Match; + } + if (LegacyInst) { + if (!EVEXInst->TheDef->getValueAsBit("hasEVEX_B")) + EVEX2LegacyTable.push_back(std::make_pair(EVEXInst, LegacyInst)); + } + } + printTable(EVEX2LegacyTable, OS); } } // namespace -static TableGen::Emitter::OptClass - X("gen-x86-EVEX2VEX-tables", "Generate X86 EVEX to VEX compress tables"); +static TableGen::Emitter::OptClass + X("gen-x86-EVEX2NonEVEX-tables", + "Generate X86 EVEX to NonEVEX compress tables"); diff --git a/llvm/utils/TableGen/X86ManualEVEXCompressTables.def b/llvm/utils/TableGen/X86ManualEVEXCompressTables.def new file mode 100644 index 0000000000000..856b770591ad8 --- /dev/null +++ b/llvm/utils/TableGen/X86ManualEVEXCompressTables.def @@ -0,0 +1,37 @@ +//===- X86ManualEVEXCompressTablesTables.def --------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// \file +// This file defines all the entries in X86 encoding compress tables that need +// special handling. +//===----------------------------------------------------------------------===// + +#ifndef EVEXENTRY +#define EVEXENTRY(EVEX, LEGACY) +#endif +// The following entries are added manually b/c: +// 1. The prefix is used specially, like movdir64b, prefix could be used to +// identify instrs. +// 2. Opcode could change when promote to map4, like sha instrs. +EVEXENTRY(MOVDIR64B32_EVEX, MOVDIR64B32) +EVEXENTRY(MOVDIR64B64_EVEX, MOVDIR64B64) + +EVEXENTRY(SHA1MSG1rm_EVEX, SHA1MSG1rm) +EVEXENTRY(SHA1MSG1rr_EVEX, SHA1MSG1rr) +EVEXENTRY(SHA1MSG2rm_EVEX, SHA1MSG2rm) +EVEXENTRY(SHA1MSG2rr_EVEX, SHA1MSG2rr) +EVEXENTRY(SHA1NEXTErm_EVEX, SHA1NEXTErm) +EVEXENTRY(SHA1NEXTErr_EVEX, SHA1NEXTErr) +EVEXENTRY(SHA1RNDS4rmi_EVEX, SHA1RNDS4rmi) +EVEXENTRY(SHA1RNDS4rri_EVEX, SHA1RNDS4rri) +EVEXENTRY(SHA256MSG1rm_EVEX, SHA256MSG1rm) +EVEXENTRY(SHA256MSG1rr_EVEX, SHA256MSG1rr) +EVEXENTRY(SHA256MSG2rm_EVEX, SHA256MSG2rm) +EVEXENTRY(SHA256MSG2rr_EVEX, SHA256MSG2rr) +EVEXENTRY(SHA256RNDS2rm_EVEX, SHA256RNDS2rm) +EVEXENTRY(SHA256RNDS2rr_EVEX, SHA256RNDS2rr) +#undef EVEXENTRY