From b4ca7d31fe122bb2f49f9549fde07195c7dc7642 Mon Sep 17 00:00:00 2001 From: Jason Eckhardt Date: Fri, 2 Feb 2024 13:21:37 -0600 Subject: [PATCH 1/4] [TableGen] Extend direct lookup to instruction values in generic tables. Currently, for some tables involving a single primary key field which is integral and densely numbered, a direct lookup is generated rather than a binary search. This patch extends the direct lookup function generation to instructions, where the integral value corresponds to the instruction's enum value. While this isn't as common as for other tables, it does occur in at least one downstream backend and one in-tree backend. Added a unit test and minimally updated the documentation. --- llvm/docs/TableGen/BackEnds.rst | 4 +- .../TableGen/generic-tables-instruction.td | 61 ++++++++++++++++++- .../utils/TableGen/SearchableTableEmitter.cpp | 53 +++++++++++++--- 3 files changed, 106 insertions(+), 12 deletions(-) diff --git a/llvm/docs/TableGen/BackEnds.rst b/llvm/docs/TableGen/BackEnds.rst index 742fea51bcf32..901cb989a5edb 100644 --- a/llvm/docs/TableGen/BackEnds.rst +++ b/llvm/docs/TableGen/BackEnds.rst @@ -817,7 +817,9 @@ The table entries in ``ATable`` are sorted in order by ``Val1``, and within each of those values, by ``Val2``. This allows a binary search of the table, which is performed in the lookup function by ``std::lower_bound``. The lookup function returns a reference to the found table entry, or the null -pointer if no entry is found. +pointer if no entry is found. If the table has a single primary key field +which is integral and densely numbered, a direct lookup is generated rather +than a binary search. This example includes a field whose type TableGen cannot deduce. The ``Kind`` field uses the enumerated type ``CEnum`` defined above. To inform TableGen diff --git a/llvm/test/TableGen/generic-tables-instruction.td b/llvm/test/TableGen/generic-tables-instruction.td index a3bed650890bd..3be2462c9ab69 100644 --- a/llvm/test/TableGen/generic-tables-instruction.td +++ b/llvm/test/TableGen/generic-tables-instruction.td @@ -2,6 +2,10 @@ // XFAIL: vg_leak include "llvm/TableGen/SearchableTable.td" +include "llvm/Target/Target.td" + +def ArchInstrInfo : InstrInfo { } +def Arch : Target { let InstructionSet = ArchInstrInfo; } // CHECK-LABEL: GET_InstrTable_IMPL // CHECK: constexpr MyInstr InstrTable[] = { @@ -11,11 +15,20 @@ include "llvm/TableGen/SearchableTable.td" // CHECK: { D, 0x8 }, // CHECK: }; -class Instruction { - bit isPseudo = 0; -} +// A contiguous primary (Instruction) key should get a direct lookup instead of +// binary search. +// CHECK: const MyInstr *getCustomEncodingHelper(unsigned Opcode) { +// CHECK: if ((Opcode < B) || +// CHECK: (Opcode > D)) +// CHECK: return nullptr; +// CHECK: auto Table = ArrayRef(InstrTable); +// CHECK: size_t Idx = Opcode - B; +// CHECK: return &Table[Idx]; + class MyInstr : Instruction { + let OutOperandList = (outs); + let InOperandList = (ins); Instruction Opcode = !cast(NAME); bits<16> CustomEncoding = op; } @@ -34,3 +47,45 @@ def InstrTable : GenericTable { let PrimaryKey = ["Opcode"]; let PrimaryKeyName = "getCustomEncodingHelper"; } + + +// Non-contiguous instructions should get a binary search instead of direct +// lookup. +// CHECK: const MyInfoEntry *getTable2ByOpcode(unsigned Opcode) { +// CHECK: auto Idx = std::lower_bound(Table.begin(), Table.end(), Key, +// +// Verify contiguous check for SearchIndex. +// const MyInfoEntry *getTable2ByValue(uint8_t Value) { +// CHECK: if ((Value < 0xB) || +// CHECK: (Value > 0xD)) +// CHECK: return nullptr; +// CHECK: auto Table = ArrayRef(Index); +// CHECK: size_t Idx = Value - 0xB; +// CHECK: return &InstrTable2[Table[Idx]._index]; + + +class MyInfoEntry { + Instruction Opcode = !cast(NAME); + bits<4> Value = V; + string Name = S; +} + +let OutOperandList = (outs), InOperandList = (ins) in { +def W : Instruction, MyInfoEntry<12, "IW">; +def X : Instruction; +def Y : Instruction, MyInfoEntry<13, "IY">; +def Z : Instruction, MyInfoEntry<11, "IZ">; +} + +def InstrTable2 : GenericTable { + let FilterClass = "MyInfoEntry"; + let Fields = ["Opcode", "Value", "Name"]; + + let PrimaryKey = ["Opcode"]; + let PrimaryKeyName = "getTable2ByOpcode"; +} + +def getTable2ByValue : SearchIndex { + let Table = InstrTable2; + let Key = ["Value"]; +} diff --git a/llvm/utils/TableGen/SearchableTableEmitter.cpp b/llvm/utils/TableGen/SearchableTableEmitter.cpp index 9987d1ec73d9f..0953ee977b3ed 100644 --- a/llvm/utils/TableGen/SearchableTableEmitter.cpp +++ b/llvm/utils/TableGen/SearchableTableEmitter.cpp @@ -7,12 +7,15 @@ //===----------------------------------------------------------------------===// // // This tablegen backend emits a generic array initialized by specified fields, -// together with companion index tables and lookup functions (binary search, -// currently). +// together with companion index tables and lookup functions. The lookup +// function generated is either a direct lookup (when a single primary key field +// is integral and densely numbered) or a binary search otherwise. // //===----------------------------------------------------------------------===// +#include "CodeGenInstruction.h" #include "CodeGenIntrinsics.h" +#include "CodeGenTarget.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" @@ -31,6 +34,8 @@ using namespace llvm; namespace { +using InstrEnumMapT = DenseMap; + int64_t getAsInt(Init *B) { return cast( B->convertInitializerTo(IntRecTy::get(B->getRecordKeeper()))) @@ -94,6 +99,7 @@ class SearchableTableEmitter { std::vector> Enums; DenseMap EnumMap; std::set PreprocessorGuards; + InstrEnumMapT InstrEnumValueMap; public: SearchableTableEmitter(RecordKeeper &R) : Records(R) {} @@ -207,12 +213,17 @@ class SearchableTableEmitter { // For search indices that consists of a single field whose numeric value is // known, return that numeric value. -static int64_t getNumericKey(const SearchIndex &Index, Record *Rec) { +static int64_t getNumericKey(const SearchIndex &Index, Record *Rec, + InstrEnumMapT &InstrEnumMap) { assert(Index.Fields.size() == 1); if (Index.Fields[0].Enum) { Record *EnumEntry = Rec->getValueAsDef(Index.Fields[0].Name); return Index.Fields[0].Enum->EntryMap[EnumEntry]->second; + } else if (Index.Fields[0].IsInstruction) { + Record *TheDef = Rec->getValueAsDef(Index.Fields[0].Name); + assert(!InstrEnumMap.empty()); + return InstrEnumMap[TheDef]; } return getInt(Rec, Index.Fields[0].Name); @@ -368,12 +379,16 @@ void SearchableTableEmitter::emitLookupFunction(const GenericTable &Table, } bool IsContiguous = false; + int64_t FirstKeyVal = 0; if (Index.Fields.size() == 1 && - (Index.Fields[0].Enum || isa(Index.Fields[0].RecType))) { + (Index.Fields[0].Enum || isa(Index.Fields[0].RecType) || + Index.Fields[0].IsInstruction)) { + FirstKeyVal = getNumericKey(Index, IndexRows[0], InstrEnumValueMap); IsContiguous = true; for (unsigned i = 0; i < IndexRows.size(); ++i) { - if (getNumericKey(Index, IndexRows[i]) != i) { + if (getNumericKey(Index, IndexRows[i], InstrEnumValueMap) != + (FirstKeyVal + i)) { IsContiguous = false; break; } @@ -381,9 +396,18 @@ void SearchableTableEmitter::emitLookupFunction(const GenericTable &Table, } if (IsContiguous) { + const GenericField &Field = Index.Fields[0]; + std::string FirstRepr = primaryRepresentation( + Index.Loc, Field, IndexRows[0]->getValueInit(Field.Name)); + std::string LastRepr = primaryRepresentation( + Index.Loc, Field, IndexRows.back()->getValueInit(Field.Name)); + OS << " if ((" << Field.Name << " < " << FirstRepr << ") ||\n"; + OS << " (" << Field.Name << " > " << LastRepr << "))\n"; + OS << " return nullptr;\n"; OS << " auto Table = ArrayRef(" << IndexName << ");\n"; - OS << " size_t Idx = " << Index.Fields[0].Name << ";\n"; - OS << " return Idx >= Table.size() ? nullptr : "; + OS << " size_t Idx = " << Index.Fields[0].Name << " - " << FirstRepr + << ";\n"; + OS << " return "; if (IsPrimary) OS << "&Table[Idx]"; else @@ -638,6 +662,7 @@ void SearchableTableEmitter::collectTableEntries( Record *IntrinsicClass = Records.getClass("Intrinsic"); Record *InstructionClass = Records.getClass("Instruction"); + bool SawInstructionField = false; for (auto &Field : Table.Fields) { if (!Field.RecType) PrintFatalError(Twine("Cannot determine type of field '") + Field.Name + @@ -646,11 +671,23 @@ void SearchableTableEmitter::collectTableEntries( if (auto RecordTy = dyn_cast(Field.RecType)) { if (IntrinsicClass && RecordTy->isSubClassOf(IntrinsicClass)) Field.IsIntrinsic = true; - else if (InstructionClass && RecordTy->isSubClassOf(InstructionClass)) + else if (InstructionClass && RecordTy->isSubClassOf(InstructionClass)) { Field.IsInstruction = true; + SawInstructionField = true; + } } } + // Build instruction-to-int map to check for contiguous instruction values. + // These are the same values emitted by InstrInfoEmitter. Do this on demand + // only after it is known that there are definitely instruction fields. + if (SawInstructionField && InstrEnumValueMap.empty()) { + CodeGenTarget Target(Records); + unsigned Num = 0; + for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) + InstrEnumValueMap[Inst->TheDef] = Num++; + } + SearchIndex Idx; std::copy(Table.Fields.begin(), Table.Fields.end(), std::back_inserter(Idx.Fields)); From c16b70e80d0a5832cf38cb718c2d58e870e07879 Mon Sep 17 00:00:00 2001 From: Jason Eckhardt Date: Sun, 4 Feb 2024 15:34:24 -0600 Subject: [PATCH 2/4] Address review comments. Move the instruction-to-int map into CodeGenTarget::InstrToIntMap and add CodeGenTarget::getInstrIntValue query method. Populate the map in CodeGenTarget::ComputeInstrsByEnum just below where the order is created in the first place. --- llvm/utils/TableGen/CodeGenTarget.cpp | 7 ++++ llvm/utils/TableGen/CodeGenTarget.h | 10 ++++++ .../utils/TableGen/SearchableTableEmitter.cpp | 36 ++++++------------- 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp index 37fa30349eea9..cf0049e6d33e3 100644 --- a/llvm/utils/TableGen/CodeGenTarget.cpp +++ b/llvm/utils/TableGen/CodeGenTarget.cpp @@ -538,6 +538,13 @@ void CodeGenTarget::ComputeInstrsByEnum() const { return std::make_tuple(!D1.getValueAsBit("isPseudo"), D1.getName()) < std::make_tuple(!D2.getValueAsBit("isPseudo"), D2.getName()); }); + + // Build the instruction-to-int map using the same values emitted by + // InstrInfoEmitter::emitEnums. + assert(InstrToIntMap.empty()); + unsigned Num = 0; + for (const CodeGenInstruction *Inst : InstrsByEnum) + InstrToIntMap[Inst->TheDef] = Num++; } diff --git a/llvm/utils/TableGen/CodeGenTarget.h b/llvm/utils/TableGen/CodeGenTarget.h index 64c3f4470d5e0..0bb8e45a56064 100644 --- a/llvm/utils/TableGen/CodeGenTarget.h +++ b/llvm/utils/TableGen/CodeGenTarget.h @@ -74,6 +74,7 @@ class CodeGenTarget { mutable StringRef InstNamespace; mutable std::vector InstrsByEnum; + mutable DenseMap InstrToIntMap; mutable unsigned NumPseudoInstructions = 0; public: CodeGenTarget(RecordKeeper &Records); @@ -192,6 +193,15 @@ class CodeGenTarget { return InstrsByEnum; } + /// Return the integer enum value corresponding to this instruction record. + unsigned getInstrIntValue(const Record *R) const { + if (InstrsByEnum.empty()) + ComputeInstrsByEnum(); + auto V = InstrToIntMap.find(R); + assert(V != InstrToIntMap.end() && "instr not in InstrToIntMap"); + return V->second; + } + typedef ArrayRef::const_iterator inst_iterator; inst_iterator inst_begin() const{return getInstructionsByEnumValue().begin();} inst_iterator inst_end() const { return getInstructionsByEnumValue().end(); } diff --git a/llvm/utils/TableGen/SearchableTableEmitter.cpp b/llvm/utils/TableGen/SearchableTableEmitter.cpp index 0953ee977b3ed..d4929f6044cb4 100644 --- a/llvm/utils/TableGen/SearchableTableEmitter.cpp +++ b/llvm/utils/TableGen/SearchableTableEmitter.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#include "CodeGenInstruction.h" #include "CodeGenIntrinsics.h" #include "CodeGenTarget.h" #include "llvm/ADT/ArrayRef.h" @@ -34,8 +33,6 @@ using namespace llvm; namespace { -using InstrEnumMapT = DenseMap; - int64_t getAsInt(Init *B) { return cast( B->convertInitializerTo(IntRecTy::get(B->getRecordKeeper()))) @@ -95,11 +92,11 @@ struct GenericTable { class SearchableTableEmitter { RecordKeeper &Records; + std::unique_ptr Target; DenseMap> Intrinsics; std::vector> Enums; DenseMap EnumMap; std::set PreprocessorGuards; - InstrEnumMapT InstrEnumValueMap; public: SearchableTableEmitter(RecordKeeper &R) : Records(R) {} @@ -207,14 +204,15 @@ class SearchableTableEmitter { const std::vector &Items); void collectTableEntries(GenericTable &Table, const std::vector &Items); + int64_t getNumericKey(const SearchIndex &Index, Record *Rec); }; } // End anonymous namespace. // For search indices that consists of a single field whose numeric value is // known, return that numeric value. -static int64_t getNumericKey(const SearchIndex &Index, Record *Rec, - InstrEnumMapT &InstrEnumMap) { +int64_t SearchableTableEmitter::getNumericKey(const SearchIndex &Index, + Record *Rec) { assert(Index.Fields.size() == 1); if (Index.Fields[0].Enum) { @@ -222,8 +220,7 @@ static int64_t getNumericKey(const SearchIndex &Index, Record *Rec, return Index.Fields[0].Enum->EntryMap[EnumEntry]->second; } else if (Index.Fields[0].IsInstruction) { Record *TheDef = Rec->getValueAsDef(Index.Fields[0].Name); - assert(!InstrEnumMap.empty()); - return InstrEnumMap[TheDef]; + return Target->getInstrIntValue(TheDef); } return getInt(Rec, Index.Fields[0].Name); @@ -379,16 +376,14 @@ void SearchableTableEmitter::emitLookupFunction(const GenericTable &Table, } bool IsContiguous = false; - int64_t FirstKeyVal = 0; if (Index.Fields.size() == 1 && (Index.Fields[0].Enum || isa(Index.Fields[0].RecType) || Index.Fields[0].IsInstruction)) { - FirstKeyVal = getNumericKey(Index, IndexRows[0], InstrEnumValueMap); + int64_t FirstKeyVal = getNumericKey(Index, IndexRows[0]); IsContiguous = true; for (unsigned i = 0; i < IndexRows.size(); ++i) { - if (getNumericKey(Index, IndexRows[i], InstrEnumValueMap) != - (FirstKeyVal + i)) { + if (getNumericKey(Index, IndexRows[i]) != (FirstKeyVal + i)) { IsContiguous = false; break; } @@ -662,7 +657,6 @@ void SearchableTableEmitter::collectTableEntries( Record *IntrinsicClass = Records.getClass("Intrinsic"); Record *InstructionClass = Records.getClass("Instruction"); - bool SawInstructionField = false; for (auto &Field : Table.Fields) { if (!Field.RecType) PrintFatalError(Twine("Cannot determine type of field '") + Field.Name + @@ -671,23 +665,11 @@ void SearchableTableEmitter::collectTableEntries( if (auto RecordTy = dyn_cast(Field.RecType)) { if (IntrinsicClass && RecordTy->isSubClassOf(IntrinsicClass)) Field.IsIntrinsic = true; - else if (InstructionClass && RecordTy->isSubClassOf(InstructionClass)) { + else if (InstructionClass && RecordTy->isSubClassOf(InstructionClass)) Field.IsInstruction = true; - SawInstructionField = true; - } } } - // Build instruction-to-int map to check for contiguous instruction values. - // These are the same values emitted by InstrInfoEmitter. Do this on demand - // only after it is known that there are definitely instruction fields. - if (SawInstructionField && InstrEnumValueMap.empty()) { - CodeGenTarget Target(Records); - unsigned Num = 0; - for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) - InstrEnumValueMap[Inst->TheDef] = Num++; - } - SearchIndex Idx; std::copy(Table.Fields.begin(), Table.Fields.end(), std::back_inserter(Idx.Fields)); @@ -700,6 +682,8 @@ void SearchableTableEmitter::run(raw_ostream &OS) { // Emit tables in a deterministic order to avoid needless rebuilds. SmallVector, 4> Tables; DenseMap TableMap; + if (Records.getClass("Instruction")) + Target = std::make_unique(Records); // Collect all definitions first. for (auto *EnumRec : Records.getAllDerivedDefinitions("GenericEnum")) { From 70396ee1f11326ab5a93768510d98dcb5b57a8f5 Mon Sep 17 00:00:00 2001 From: Jason Eckhardt Date: Mon, 5 Feb 2024 07:38:31 -0600 Subject: [PATCH 3/4] Address review nit. Add derived definitions guard. --- llvm/utils/TableGen/SearchableTableEmitter.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/utils/TableGen/SearchableTableEmitter.cpp b/llvm/utils/TableGen/SearchableTableEmitter.cpp index d4929f6044cb4..cdb1a2da4a3dc 100644 --- a/llvm/utils/TableGen/SearchableTableEmitter.cpp +++ b/llvm/utils/TableGen/SearchableTableEmitter.cpp @@ -682,7 +682,8 @@ void SearchableTableEmitter::run(raw_ostream &OS) { // Emit tables in a deterministic order to avoid needless rebuilds. SmallVector, 4> Tables; DenseMap TableMap; - if (Records.getClass("Instruction")) + if (Records.getClass("Instruction") && + !Records.getAllDerivedDefinitions("Instruction").empty()) Target = std::make_unique(Records); // Collect all definitions first. From 1fae47081b75a6539fd4f7bb174154e2dd17de9f Mon Sep 17 00:00:00 2001 From: Jason Eckhardt Date: Tue, 6 Feb 2024 08:45:46 -0600 Subject: [PATCH 4/4] Address review comments. Use getAllDerivedDefinitionsIfDefined. --- llvm/utils/TableGen/SearchableTableEmitter.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/utils/TableGen/SearchableTableEmitter.cpp b/llvm/utils/TableGen/SearchableTableEmitter.cpp index cdb1a2da4a3dc..d75a9e95a7eb0 100644 --- a/llvm/utils/TableGen/SearchableTableEmitter.cpp +++ b/llvm/utils/TableGen/SearchableTableEmitter.cpp @@ -682,8 +682,7 @@ void SearchableTableEmitter::run(raw_ostream &OS) { // Emit tables in a deterministic order to avoid needless rebuilds. SmallVector, 4> Tables; DenseMap TableMap; - if (Records.getClass("Instruction") && - !Records.getAllDerivedDefinitions("Instruction").empty()) + if (!Records.getAllDerivedDefinitionsIfDefined("Instruction").empty()) Target = std::make_unique(Records); // Collect all definitions first.