diff --git a/llvm/utils/TableGen/CMakeLists.txt b/llvm/utils/TableGen/CMakeLists.txt index 339692bcd65124..87f86dc5b725a4 100644 --- a/llvm/utils/TableGen/CMakeLists.txt +++ b/llvm/utils/TableGen/CMakeLists.txt @@ -53,6 +53,7 @@ add_tablegen(llvm-tblgen LLVM X86DisassemblerTables.cpp X86EVEX2VEXTablesEmitter.cpp X86FoldTablesEmitter.cpp + X86MnemonicTables.cpp X86ModRMFilters.cpp X86RecognizableInstr.cpp WebAssemblyDisassemblerEmitter.cpp diff --git a/llvm/utils/TableGen/TableGen.cpp b/llvm/utils/TableGen/TableGen.cpp index 2d4a45f889be64..cde49919f54fe5 100644 --- a/llvm/utils/TableGen/TableGen.cpp +++ b/llvm/utils/TableGen/TableGen.cpp @@ -52,6 +52,7 @@ enum ActionType { GenGICombiner, GenX86EVEX2VEXTables, GenX86FoldTables, + GenX86MnemonicTables, GenRegisterBank, GenExegesis, GenAutomata, @@ -130,6 +131,8 @@ cl::opt Action( "Generate X86 EVEX to VEX compress tables"), clEnumValN(GenX86FoldTables, "gen-x86-fold-tables", "Generate X86 fold tables"), + clEnumValN(GenX86MnemonicTables, "gen-x86-mnemonic-tables", + "Generate X86 mnemonic tables"), clEnumValN(GenRegisterBank, "gen-register-bank", "Generate registers bank descriptions"), clEnumValN(GenExegesis, "gen-exegesis", @@ -257,6 +260,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case GenX86EVEX2VEXTables: EmitX86EVEX2VEXTables(Records, OS); break; + case GenX86MnemonicTables: + EmitX86MnemonicTables(Records, OS); + break; case GenX86FoldTables: EmitX86FoldTables(Records, OS); break; diff --git a/llvm/utils/TableGen/TableGenBackends.h b/llvm/utils/TableGen/TableGenBackends.h index 71db8dc77b0525..224efa98bae16c 100644 --- a/llvm/utils/TableGen/TableGenBackends.h +++ b/llvm/utils/TableGen/TableGenBackends.h @@ -88,6 +88,7 @@ void EmitGlobalISel(RecordKeeper &RK, raw_ostream &OS); void EmitGICombiner(RecordKeeper &RK, raw_ostream &OS); void EmitX86EVEX2VEXTables(RecordKeeper &RK, raw_ostream &OS); void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS); +void EmitX86MnemonicTables(RecordKeeper &RK, raw_ostream &OS); void EmitRegisterBank(RecordKeeper &RK, raw_ostream &OS); void EmitExegesis(RecordKeeper &RK, raw_ostream &OS); void EmitAutomata(RecordKeeper &RK, raw_ostream &OS); diff --git a/llvm/utils/TableGen/X86MnemonicTables.cpp b/llvm/utils/TableGen/X86MnemonicTables.cpp new file mode 100644 index 00000000000000..8269e6b5b85c5e --- /dev/null +++ b/llvm/utils/TableGen/X86MnemonicTables.cpp @@ -0,0 +1,114 @@ +//==- X86MnemonicTables.cpp - Generate mnemonic extraction tables. -*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This tablegen backend is responsible for emitting tables that group +// instructions by their mnemonic name wrt AsmWriter Variant (e.g. isADD, etc). +// +//===----------------------------------------------------------------------===// + +#include "CodeGenInstruction.h" +#include "CodeGenTarget.h" +#include "X86DisassemblerTables.h" +#include "X86RecognizableInstr.h" +#include "llvm/TableGen/Error.h" +#include "llvm/TableGen/TableGenBackend.h" + +using namespace llvm; + +namespace { + +class X86MnemonicTablesEmitter { + CodeGenTarget Target; + +public: + X86MnemonicTablesEmitter(RecordKeeper &R) : Target(R) {} + + // Output X86 mnemonic tables. + void run(raw_ostream &OS); +}; + +void X86MnemonicTablesEmitter::run(raw_ostream &OS) { + emitSourceFileHeader("X86 Mnemonic tables", OS); + OS << "namespace llvm {\nnamespace X86 {\n\n"; + Record *AsmWriter = Target.getAsmWriter(); + unsigned Variant = AsmWriter->getValueAsInt("Variant"); + + // Hold all instructions grouped by mnemonic + StringMap> MnemonicToCGInstrMap; + + // Unused + X86Disassembler::DisassemblerTables Tables; + ArrayRef NumberedInstructions = + Target.getInstructionsByEnumValue(); + for (unsigned II = 0, IE = NumberedInstructions.size(); II != IE; ++II) { + const CodeGenInstruction *I = NumberedInstructions[II]; + X86Disassembler::RecognizableInstr RI(Tables, *I, II); + Record *Def = I->TheDef; + bool IsCodeGenOnly = RI.IsCodeGenOnly; + bool ForceDisassemble = RI.ForceDisassemble; + uint8_t Form = RI.Form; + if ( // Filter non-X86 instructions + !Def->isSubClassOf("X86Inst") || + // Skip pseudo instructions as they may contain non-alnum characters in + // mnemonic + (IsCodeGenOnly && !ForceDisassemble) || + // Non-parsable instruction defs contain prefix as part of AsmString + Def->getValueAsString("AsmVariantName") == "NonParsable" || + // Skip CodeGenInstructions that are not real standalone instructions + Form == X86Local::PrefixByte || Form == X86Local::Pseudo) + continue; + // Flatten an instruction assembly string. + std::string AsmString = I->FlattenAsmStringVariants(I->AsmString, Variant); + StringRef Mnemonic(AsmString); + // Extract a mnemonic assuming it's separated by \t + Mnemonic = Mnemonic.take_until([](char C) { return C == '\t'; }); + + // Special case: CMOVCC, JCC, SETCC have "${cond}" in mnemonic. + // Replace it with "CC" in-place. + size_t CondPos = Mnemonic.find("${cond}"); + if (CondPos != StringRef::npos) + Mnemonic = AsmString.replace(CondPos, StringRef::npos, "CC"); + + // It's intentional that we put a std::string to the map (StringRef::upper + // returns a string) as AsmString is deallocated at the end of the iteration + MnemonicToCGInstrMap[Mnemonic.upper()].push_back(I); + } + + OS << "#ifdef GET_X86_MNEMONIC_TABLES_H\n"; + OS << "#undef GET_X86_MNEMONIC_TABLES_H\n\n"; + for (StringRef Mnemonic : MnemonicToCGInstrMap.keys()) + OS << "bool is" << Mnemonic << "(unsigned Opcode);\n"; + OS << "#endif // GET_X86_MNEMONIC_TABLES_H\n\n"; + + OS << "#ifdef GET_X86_MNEMONIC_TABLES_CPP\n"; + OS << "#undef GET_X86_MNEMONIC_TABLES_CPP\n\n"; + for (StringRef Mnemonic : MnemonicToCGInstrMap.keys()) { + OS << "bool is" << Mnemonic << "(unsigned Opcode) {\n"; + auto Mnemonics = MnemonicToCGInstrMap[Mnemonic]; + if (Mnemonics.size() == 1) { + const CodeGenInstruction *CGI = *Mnemonics.begin(); + OS << "\treturn Opcode == " << CGI->TheDef->getName() << ";\n}\n\n"; + } else { + OS << "\tswitch (Opcode) {\n"; + for (const CodeGenInstruction *CGI : Mnemonics) { + OS << "\tcase " << CGI->TheDef->getName() << ":\n"; + } + OS << "\t\treturn true;\n\t}\n\treturn false;\n}\n\n"; + } + } + OS << "#endif // GET_X86_MNEMONIC_TABLES_CPP\n\n"; + OS << "} // end namespace X86\n} // end namespace llvm"; +} + +} // namespace + +namespace llvm { +void EmitX86MnemonicTables(RecordKeeper &RK, raw_ostream &OS) { + X86MnemonicTablesEmitter(RK).run(OS); +} +} // namespace llvm diff --git a/llvm/utils/TableGen/X86RecognizableInstr.h b/llvm/utils/TableGen/X86RecognizableInstr.h index 8f557d9ee5f51f..debc6c516ab16c 100644 --- a/llvm/utils/TableGen/X86RecognizableInstr.h +++ b/llvm/utils/TableGen/X86RecognizableInstr.h @@ -163,7 +163,7 @@ class DisassemblerTables; /// to interpret the information available in the LLVM tables, and to emit the /// instruction into DisassemblerTables. class RecognizableInstr { -private: +public: /// The opcode of the instruction, as used in an MCInst InstrUID UID; /// The record from the .td files corresponding to this instruction @@ -232,6 +232,7 @@ class RecognizableInstr { /// info table InstructionSpecifier* Spec; +private: /// insnContext - Returns the primary context in which the instruction is /// valid. /// @@ -339,6 +340,7 @@ class RecognizableInstr { /// decode information for the current instruction. void emitDecodePath(DisassemblerTables &tables) const; +public: /// Constructor - Initializes a RecognizableInstr with the appropriate fields /// from a CodeGenInstruction. /// @@ -348,7 +350,6 @@ class RecognizableInstr { RecognizableInstr(DisassemblerTables &tables, const CodeGenInstruction &insn, InstrUID uid); -public: /// processInstr - Accepts a CodeGenInstruction and loads decode information /// for it into a DisassemblerTables if appropriate. ///