Skip to content

Commit

Permalink
[pseudo] Eliminate LRTable::Action. NFC
Browse files Browse the repository at this point in the history
The last remaining uses are in tests/test builders.
Replace with a builder struct.

Differential Revision: https://reviews.llvm.org/D129093
  • Loading branch information
sam-mccall committed Jul 5, 2022
1 parent dc96906 commit 9fbf110
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 243 deletions.
97 changes: 24 additions & 73 deletions clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
Expand Up @@ -39,6 +39,7 @@
#include "clang-pseudo/grammar/Grammar.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Support/Capacity.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
Expand All @@ -58,71 +59,15 @@ namespace pseudo {
// Unlike the typical LR parsing table which allows at most one available action
// per entry, conflicted actions are allowed in LRTable. The LRTable is designed
// to be used in nondeterministic LR parsers (e.g. GLR).
//
// There are no "accept" actions in the LRTable, instead the stack is inspected
// after parsing completes: is the state goto(StartState, StartSymbol)?
class LRTable {
public:
// StateID is only 13 bits wide.
using StateID = uint16_t;
static constexpr unsigned StateBits = 13;

// Action represents the terminal and nonterminal actions, it combines the
// entry of the ACTION and GOTO tables from the LR literature.
//
// FIXME: as we move away from a homogeneous table structure shared between
// action types, this class becomes less useful. Remove it.
class Action {
public:
enum Kind : uint8_t {
Sentinel = 0,
// Terminal actions, corresponding to entries of ACTION table.

// Shift to state n: move forward with the lookahead, and push state n
// onto the state stack.
// A shift is a forward transition, and the value n is the next state that
// the parser is to enter.
Shift,

// NOTE: there are no typical accept actions in the LRtable, accept
// actions are handled specifically in the parser -- if the parser
// reaches to a target state (which is goto(StartState, StartSymbol)) at
// the EOF token after a reduce, this indicates the input has been parsed
// as the StartSymbol successfully.

// Nonterminal actions, corresponding to entry of GOTO table.

// Go to state n: push state n onto the state stack.
// Similar to Shift, but it is a nonterminal forward transition.
GoTo,
};

static Action goTo(StateID S) { return Action(GoTo, S); }
static Action shift(StateID S) { return Action(Shift, S); }
static Action sentinel() { return Action(Sentinel, 0); }

StateID getShiftState() const {
assert(kind() == Shift);
return Value;
}
StateID getGoToState() const {
assert(kind() == GoTo);
return Value;
}
Kind kind() const { return static_cast<Kind>(K); }

bool operator==(const Action &L) const { return opaque() == L.opaque(); }
uint16_t opaque() const { return K << ValueBits | Value; };

private:
Action(Kind K1, unsigned Value) : K(K1), Value(Value) {}
static constexpr unsigned ValueBits = StateBits;
static constexpr unsigned KindBits = 3;
static_assert(ValueBits >= RuleBits, "Value must be able to store RuleID");
static_assert(KindBits + ValueBits <= 16,
"Must be able to store kind and value efficiently");
uint16_t K : KindBits;
// Either StateID or RuleID, depending on the Kind.
uint16_t Value : ValueBits;
};

// Returns the state after we reduce a nonterminal.
// Expected to be called by LR parsers.
// If the nonterminal is invalid here, returns None.
Expand Down Expand Up @@ -184,20 +129,27 @@ class LRTable {
// Build a SLR(1) parsing table.
static LRTable buildSLR(const Grammar &G);

struct Builder;
// Represents an entry in the table, used for building the LRTable.
struct Entry {
StateID State;
SymbolID Symbol;
Action Act;
};
struct ReduceEntry {
StateID State;
RuleID Rule;
// Helper for building a table with specified actions/states.
struct Builder {
Builder() = default;
Builder(const Grammar &G) {
NumNonterminals = G.table().Nonterminals.size();
FollowSets = followSets(G);
}

unsigned int NumNonterminals = 0;
// States representing `_ := . start` for various start symbols.
std::vector<std::pair<SymbolID, StateID>> StartStates;
// State transitions `X := ABC . D EFG` => `X := ABC D . EFG`.
// Key is (initial state, D), value is final state.
llvm::DenseMap<std::pair<StateID, SymbolID>, StateID> Transition;
// Reductions available in a given state.
llvm::DenseMap<StateID, llvm::SmallSet<RuleID, 4>> Reduce;
// FollowSets[NT] is the set of terminals that can follow the nonterminal.
std::vector<llvm::DenseSet<SymbolID>> FollowSets;

LRTable build() &&;
};
// Build a specifid table for testing purposes.
static LRTable buildForTests(const Grammar &G, llvm::ArrayRef<Entry>,
llvm::ArrayRef<ReduceEntry>);

private:
unsigned numStates() const { return ReduceOffset.size() - 1; }
Expand Down Expand Up @@ -300,7 +252,6 @@ class LRTable {
// as an index: Nonterminal * NUM_TOKENS + Token.
llvm::BitVector FollowSets;
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const LRTable::Action &);

} // namespace pseudo
} // namespace clang
Expand Down
13 changes: 0 additions & 13 deletions clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
Expand Up @@ -11,25 +11,12 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"

namespace clang {
namespace pseudo {

llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const LRTable::Action &A) {
switch (A.kind()) {
case LRTable::Action::Shift:
return OS << llvm::formatv("shift state {0}", A.getShiftState());
case LRTable::Action::GoTo:
return OS << llvm::formatv("go to state {0}", A.getGoToState());
case LRTable::Action::Sentinel:
llvm_unreachable("unexpected Sentinel action kind!");
}
llvm_unreachable("unexpected action kind!");
}

std::string LRTable::dumpStatistics() const {
return llvm::formatv(R"(
Statistics of the LR parsing table:
Expand Down
139 changes: 48 additions & 91 deletions clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
Expand Up @@ -13,109 +13,67 @@
#include "llvm/ADT/SmallSet.h"
#include <cstdint>

namespace llvm {
template <> struct DenseMapInfo<clang::pseudo::LRTable::Entry> {
using Entry = clang::pseudo::LRTable::Entry;
static inline Entry getEmptyKey() {
static Entry E{static_cast<clang::pseudo::SymbolID>(-1), 0,
clang::pseudo::LRTable::Action::sentinel()};
return E;
}
static inline Entry getTombstoneKey() {
static Entry E{static_cast<clang::pseudo::SymbolID>(-2), 0,
clang::pseudo::LRTable::Action::sentinel()};
return E;
}
static unsigned getHashValue(const Entry &I) {
return llvm::hash_combine(I.State, I.Symbol, I.Act.opaque());
}
static bool isEqual(const Entry &LHS, const Entry &RHS) {
return LHS.State == RHS.State && LHS.Symbol == RHS.Symbol &&
LHS.Act == RHS.Act;
}
};
} // namespace llvm

namespace clang {
namespace pseudo {

struct LRTable::Builder {
std::vector<std::pair<SymbolID, StateID>> StartStates;
llvm::DenseSet<Entry> Entries;
llvm::DenseMap<StateID, llvm::SmallSet<RuleID, 4>> Reduces;
std::vector<llvm::DenseSet<SymbolID>> FollowSets;
LRTable LRTable::Builder::build() && {
assert(NumNonterminals != 0 && "Set NumNonterminals or init with grammar");
LRTable Table;

LRTable build(unsigned NumStates, unsigned NumNonterminals) && {
LRTable Table;
Table.StartStates = std::move(StartStates);
// Count number of states: every state has to be reachable somehow.
StateID MaxState = 0;
for (const auto &Entry : StartStates)
MaxState = std::max(MaxState, Entry.second);
for (const auto &Entry : Transition)
MaxState = std::max(MaxState, Entry.second);
unsigned NumStates = MaxState + 1;

// Compile the goto and shift actions into transition tables.
llvm::DenseMap<unsigned, SymbolID> Gotos;
llvm::DenseMap<unsigned, SymbolID> Shifts;
for (const auto &E : Entries) {
if (E.Act.kind() == Action::Shift)
Shifts.try_emplace(shiftIndex(E.State, E.Symbol, NumStates),
E.Act.getShiftState());
else if (E.Act.kind() == Action::GoTo)
Gotos.try_emplace(gotoIndex(E.State, E.Symbol, NumStates),
E.Act.getGoToState());
}
Table.Shifts = TransitionTable(Shifts, NumStates * NumTerminals);
Table.Gotos = TransitionTable(Gotos, NumStates * NumNonterminals);
Table.StartStates = std::move(StartStates);

// Compile the follow sets into a bitmap.
Table.FollowSets.resize(tok::NUM_TOKENS * FollowSets.size());
for (SymbolID NT = 0; NT < FollowSets.size(); ++NT)
for (SymbolID Follow : FollowSets[NT])
Table.FollowSets.set(NT * tok::NUM_TOKENS + symbolToToken(Follow));
// Compile the goto and shift actions into transition tables.
llvm::DenseMap<unsigned, SymbolID> Gotos;
llvm::DenseMap<unsigned, SymbolID> Shifts;
for (const auto &E : Transition) {
if (isToken(E.first.second))
Shifts.try_emplace(shiftIndex(E.first.first, E.first.second, NumStates),
E.second);
else
Gotos.try_emplace(gotoIndex(E.first.first, E.first.second, NumStates),
E.second);
}
Table.Shifts = TransitionTable(Shifts, NumStates * NumTerminals);
Table.Gotos = TransitionTable(Gotos, NumStates * NumNonterminals);

// Store the reduce actions in a vector partitioned by state.
Table.ReduceOffset.reserve(NumStates + 1);
std::vector<RuleID> StateRules;
for (StateID S = 0; S < NumStates; ++S) {
Table.ReduceOffset.push_back(Table.Reduces.size());
auto It = Reduces.find(S);
if (It == Reduces.end())
continue;
Table.Reduces.insert(Table.Reduces.end(), It->second.begin(),
It->second.end());
std::sort(Table.Reduces.begin() + Table.ReduceOffset.back(),
Table.Reduces.end());
}
Table.ReduceOffset.push_back(Table.Reduces.size());
// Compile the follow sets into a bitmap.
Table.FollowSets.resize(tok::NUM_TOKENS * FollowSets.size());
for (SymbolID NT = 0; NT < FollowSets.size(); ++NT)
for (SymbolID Follow : FollowSets[NT])
Table.FollowSets.set(NT * tok::NUM_TOKENS + symbolToToken(Follow));

return Table;
// Store the reduce actions in a vector partitioned by state.
Table.ReduceOffset.reserve(NumStates + 1);
std::vector<RuleID> StateRules;
for (StateID S = 0; S < NumStates; ++S) {
Table.ReduceOffset.push_back(Table.Reduces.size());
auto It = Reduce.find(S);
if (It == Reduce.end())
continue;
Table.Reduces.insert(Table.Reduces.end(), It->second.begin(),
It->second.end());
std::sort(Table.Reduces.begin() + Table.ReduceOffset.back(),
Table.Reduces.end());
}
};
Table.ReduceOffset.push_back(Table.Reduces.size());

LRTable LRTable::buildForTests(const Grammar &G, llvm::ArrayRef<Entry> Entries,
llvm::ArrayRef<ReduceEntry> Reduces) {
StateID MaxState = 0;
for (const auto &Entry : Entries) {
MaxState = std::max(MaxState, Entry.State);
if (Entry.Act.kind() == LRTable::Action::Shift)
MaxState = std::max(MaxState, Entry.Act.getShiftState());
if (Entry.Act.kind() == LRTable::Action::GoTo)
MaxState = std::max(MaxState, Entry.Act.getGoToState());
}
Builder Build;
Build.Entries.insert(Entries.begin(), Entries.end());
for (const ReduceEntry &E : Reduces)
Build.Reduces[E.State].insert(E.Rule);
Build.FollowSets = followSets(G);
return std::move(Build).build(/*NumStates=*/MaxState + 1,
G.table().Nonterminals.size());
return Table;
}

LRTable LRTable::buildSLR(const Grammar &G) {
auto Graph = LRGraph::buildLR0(G);
Builder Build;
Builder Build(G);
Build.StartStates = Graph.startStates();
for (const auto &T : Graph.edges()) {
Action Act = isToken(T.Label) ? Action::shift(T.Dst) : Action::goTo(T.Dst);
Build.Entries.insert({T.Src, T.Label, Act});
}
Build.FollowSets = followSets(G);
for (const auto &T : Graph.edges())
Build.Transition.try_emplace({T.Src, T.Label}, T.Dst);
assert(Graph.states().size() <= (1 << StateBits) &&
"Graph states execceds the maximum limit!");
// Add reduce actions.
Expand All @@ -129,11 +87,10 @@ LRTable LRTable::buildSLR(const Grammar &G) {
if (!I.hasNext())
// If we've reached the end of a rule A := ..., then we can reduce if
// the next token is in the follow set of A.
Build.Reduces[SID].insert(I.rule());
Build.Reduce[SID].insert(I.rule());
}
}
return std::move(Build).build(Graph.states().size(),
G.table().Nonterminals.size());
return std::move(Build).build();
}

} // namespace pseudo
Expand Down

0 comments on commit 9fbf110

Please sign in to comment.