diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h b/clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h index 735ef3b781ad4..3cff9aec8c5eb 100644 --- a/clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h @@ -145,9 +145,8 @@ class LRTable { size_t bytes() const { return sizeof(*this) + Actions.capacity() * sizeof(Action) + - States.capacity() * sizeof(StateID) + - NontermOffset.capacity() * sizeof(uint32_t) + - TerminalOffset.capacity() * sizeof(uint32_t); + Symbols.capacity() * sizeof(SymbolID) + + StateOffset.capacity() * sizeof(uint32_t); } std::string dumpStatistics() const; @@ -170,17 +169,15 @@ class LRTable { // Conceptually the LR table is a multimap from (State, SymbolID) => Action. // Our physical representation is quite different for compactness. - // Index is nonterminal SymbolID, value is the offset into States/Actions - // where the entries for this nonterminal begin. - // Give a nonterminal id, the corresponding half-open range of StateIdx is - // [NontermIdx[id], NontermIdx[id+1]). - std::vector NontermOffset; - // Similar to NontermOffset, but for terminals, index is tok::TokenKind. - std::vector TerminalOffset; - // Parallel to Actions, the value is State (rows of the matrix). - // Grouped by the SymbolID, and only subranges are sorted. - std::vector States; - // A flat list of available actions, sorted by (SymbolID, State). + // Index is StateID, value is the offset into Symbols/Actions + // where the entries for this state begin. + // Give a state id, the corresponding half-open range of Symbols/Actions is + // [StateOffset[id], StateOffset[id+1]). + std::vector StateOffset; + // Parallel to Actions, the value is SymbolID (columns of the matrix). + // Grouped by the StateID, and only subranges are sorted. + std::vector Symbols; + // A flat list of available actions, sorted by (State, SymbolID). std::vector Actions; // A sorted table, storing the start state for each target parsing symbol. std::vector> StartStates; diff --git a/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp b/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp index 745ad44bafa6c..74e4fb0fedb53 100644 --- a/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp +++ b/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp @@ -34,27 +34,20 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const LRTable::Action &A) { } std::string LRTable::dumpStatistics() const { - StateID NumOfStates = 0; - for (StateID It : States) - NumOfStates = std::max(It, NumOfStates); return llvm::formatv(R"( Statistics of the LR parsing table: number of states: {0} number of actions: {1} size of the table (bytes): {2} )", - NumOfStates, Actions.size(), bytes()) + StateOffset.size() - 1, Actions.size(), bytes()) .str(); } std::string LRTable::dumpForTests(const Grammar &G) const { std::string Result; llvm::raw_string_ostream OS(Result); - StateID MaxState = 0; - for (StateID It : States) - MaxState = std::max(MaxState, It); - OS << "LRTable:\n"; - for (StateID S = 0; S <= MaxState; ++S) { + for (StateID S = 0; S < StateOffset.size() - 1; ++S) { OS << llvm::formatv("State {0}\n", S); for (uint16_t Terminal = 0; Terminal < NumTerminals; ++Terminal) { SymbolID TokID = tokenSymbol(static_cast(Terminal)); @@ -97,26 +90,22 @@ LRTable::StateID LRTable::getGoToState(StateID State, } llvm::ArrayRef LRTable::find(StateID Src, SymbolID ID) const { - size_t Idx = isToken(ID) ? static_cast(symbolToToken(ID)) : ID; - assert(isToken(ID) ? Idx + 1 < TerminalOffset.size() - : Idx + 1 < NontermOffset.size()); - std::pair TargetStateRange = - isToken(ID) ? std::make_pair(TerminalOffset[Idx], TerminalOffset[Idx + 1]) - : std::make_pair(NontermOffset[Idx], NontermOffset[Idx + 1]); - auto TargetedStates = - llvm::makeArrayRef(States.data() + TargetStateRange.first, - States.data() + TargetStateRange.second); + assert(Src + 1 < StateOffset.size()); + std::pair Range = + std::make_pair(StateOffset[Src], StateOffset[Src + 1]); + auto SymbolRange = llvm::makeArrayRef(Symbols.data() + Range.first, + Symbols.data() + Range.second); - assert(llvm::is_sorted(TargetedStates) && - "subrange of the StateIdx should be sorted!"); - const LRTable::StateID *Start = llvm::partition_point( - TargetedStates, [&Src](LRTable::StateID S) { return S < Src; }); - if (Start == TargetedStates.end()) + assert(llvm::is_sorted(SymbolRange) && + "subrange of the Symbols should be sorted!"); + const LRTable::StateID *Start = + llvm::partition_point(SymbolRange, [&ID](SymbolID S) { return S < ID; }); + if (Start == SymbolRange.end()) return {}; const LRTable::StateID *End = Start; - while (End != TargetedStates.end() && *End == Src) + while (End != SymbolRange.end() && *End == ID) ++End; - return llvm::makeArrayRef(&Actions[Start - States.data()], + return llvm::makeArrayRef(&Actions[Start - Symbols.data()], /*length=*/End - Start); } diff --git a/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp index 827eb986ab226..f59c8736fbec5 100644 --- a/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp +++ b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp @@ -44,7 +44,7 @@ class LRTable::Builder { : StartStates(StartStates) {} bool insert(Entry E) { return Entries.insert(std::move(E)).second; } - LRTable build(const GrammarTable >) && { + LRTable build(const GrammarTable >, unsigned NumStates) && { // E.g. given the following parsing table with 3 states and 3 terminals: // // a b c @@ -55,44 +55,34 @@ class LRTable::Builder { // +-------+----+-------+-+ // // The final LRTable: - // - TerminalOffset: [a] = 0, [b] = 1, [c] = 4, [d] = 4 (d is a sentinel) - // - States: [ 1, 0, 0, 2] - // Actions: [ acc, s0, r0, r1] - // ~~~ corresponding range for terminal a - // ~~~~~~~~~~ corresponding range for terminal b - // First step, we sort all entries by (Symbol, State, Action). + // - StateOffset: [s0] = 0, [s1] = 2, [s2] = 3, [sentinel] = 4 + // - Symbols: [ b, b, a, b] + // Actions: [ s0, r0, acc, r1] + // ~~~~~~ range for state 0 + // ~~~~ range for state 1 + // ~~ range for state 2 + // First step, we sort all entries by (State, Symbol, Action). std::vector Sorted(Entries.begin(), Entries.end()); llvm::sort(Sorted, [](const Entry &L, const Entry &R) { - return std::forward_as_tuple(L.Symbol, L.State, L.Act.opaque()) < - std::forward_as_tuple(R.Symbol, R.State, R.Act.opaque()); + return std::forward_as_tuple(L.State, L.Symbol, L.Act.opaque()) < + std::forward_as_tuple(R.State, R.Symbol, R.Act.opaque()); }); LRTable Table; Table.Actions.reserve(Sorted.size()); - Table.States.reserve(Sorted.size()); + Table.Symbols.reserve(Sorted.size()); // We are good to finalize the States and Actions. for (const auto &E : Sorted) { Table.Actions.push_back(E.Act); - Table.States.push_back(E.State); + Table.Symbols.push_back(E.Symbol); } // Initialize the terminal and nonterminal offset, all ranges are empty by // default. - Table.TerminalOffset = std::vector(GT.Terminals.size() + 1, 0); - Table.NontermOffset = std::vector(GT.Nonterminals.size() + 1, 0); + Table.StateOffset = std::vector(NumStates + 1, 0); size_t SortedIndex = 0; - for (SymbolID NonterminalID = 0; NonterminalID < Table.NontermOffset.size(); - ++NonterminalID) { - Table.NontermOffset[NonterminalID] = SortedIndex; - while (SortedIndex < Sorted.size() && - Sorted[SortedIndex].Symbol == NonterminalID) - ++SortedIndex; - } - for (size_t Terminal = 0; Terminal < Table.TerminalOffset.size(); - ++Terminal) { - Table.TerminalOffset[Terminal] = SortedIndex; - while (SortedIndex < Sorted.size() && - Sorted[SortedIndex].Symbol == - tokenSymbol(static_cast(Terminal))) + for (StateID State = 0; State < Table.StateOffset.size(); ++State) { + Table.StateOffset[State] = SortedIndex; + while (SortedIndex < Sorted.size() && Sorted[SortedIndex].State == State) ++SortedIndex; } Table.StartStates = std::move(StartStates); @@ -106,10 +96,13 @@ class LRTable::Builder { LRTable LRTable::buildForTests(const GrammarTable >, llvm::ArrayRef Entries) { + StateID MaxState = 0; + for (const auto &Entry : Entries) + MaxState = std::max(MaxState, Entry.State); Builder Build({}); for (const Entry &E : Entries) Build.insert(E); - return std::move(Build).build(GT); + return std::move(Build).build(GT, /*NumStates=*/MaxState + 1); } LRTable LRTable::buildSLR(const Grammar &G) { @@ -139,7 +132,7 @@ LRTable LRTable::buildSLR(const Grammar &G) { } } } - return std::move(Build).build(G.table()); + return std::move(Build).build(G.table(), Graph.states().size()); } } // namespace pseudo diff --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp index 1d3ab19b3c09d..cbb45504f40c8 100644 --- a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp +++ b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp @@ -111,6 +111,8 @@ int main(int argc, char *argv[]) { auto LRTable = clang::pseudo::LRTable::buildSLR(*G); if (PrintTable) llvm::outs() << LRTable.dumpForTests(*G); + if (PrintStatistics) + llvm::outs() << LRTable.dumpStatistics(); if (ParseableStream) { clang::pseudo::ForestArena Arena;