Skip to content
Browse files

PDAs/NFAs can now use symbolic state names.

The format in the PDA/NFA files is to prefix the name with an @. This
makes is so that the stack language can output to NFA and then use the
nfa-to-dot tool and have almost the same output as using the --dot
option on cfg-stack-lang (differences happen with order of
edges/vertices in DOT output).
  • Loading branch information...
1 parent 456be03 commit 86c2a962289bbb0e6ba2f267e0d6faec9b4b8448 @pgoodman committed Feb 9, 2012
View
157 grail/include/cli/CFG_STACK_LANG.hpp
@@ -29,6 +29,11 @@
#ifndef Grail_Plus_CFG_STACK_LANG_HPP_
#define Grail_Plus_CFG_STACK_LANG_HPP_
+#include <cstdio>
+#include <map>
+#include <set>
+#include <utility>
+
#include "fltl/include/CFG.hpp"
#include "fltl/include/NFA.hpp"
@@ -38,10 +43,6 @@
#include "grail/include/cli/NFA_TO_DOT.hpp"
-#include <map>
-#include <set>
-#include <utility>
-
namespace grail { namespace cli {
template <typename AlphaT>
@@ -56,6 +57,7 @@ namespace grail { namespace cli {
static void declare(io::CommandLineOptions &opt, bool in_help) throw() {
opt.declare("partition", io::opt::OPTIONAL, io::opt::NO_VAL);
opt.declare("dot", io::opt::OPTIONAL, io::opt::NO_VAL);
+ opt.declare("label-states", io::opt::OPTIONAL, io::opt::NO_VAL);
if(!in_help) {
opt.declare_min_num_positional(1);
opt.declare_max_num_positional(1);
@@ -74,11 +76,14 @@ namespace grail { namespace cli {
" pairs where L --> * R * in the grammar, \n"
" instead of just mapping each variable to a\n"
" state in the NFA.\n"
- " --dot output the NFA as a DOT digraph.\n\n",
+ " --dot output the NFA as a DOT digraph.\n"
+ " --label-states label the states instead of the transitions.\n\n",
TOOL_NAME, TOOL_NAME
);
}
+ /// represents a pair of variables, e.g. If A --> \alpha B \Beta then
+ /// context=A and var=B.
struct variable_context {
cfg_variable_type context;
@@ -95,40 +100,14 @@ namespace grail { namespace cli {
}
};
- static int main(io::CommandLineOptions &options) throw() {
-
- // run the tool
- io::option_type file(options[0U]);
- io::option_type partition(options["partition"]);
- io::option_type dot(options["dot"]);
- const char *file_name(file.value());
-
- FILE *fp(fopen(file_name, "r"));
-
- if(0 == fp) {
- options.error(
- "Unable to open file containing context-free "
- "grammar for reading."
- );
- options.note("File specified here:", file);
-
- return 1;
- }
-
- cfg_cfg_type cfg;
- nfa_nfa_type nfa;
-
- // can't bring in the cfg :(
- if(!io::fread(fp, cfg, file_name)) {
- options.error(
- "Unable to read file containing context-free "
- "grammar."
- );
- options.note("File specified here:", file);
- return 1;
- }
-
- io::verbose("Opened '%s'\n", file_name);
+ /// extract the stack language from the CFG
+ static void extract(
+ cfg_cfg_type &cfg,
+ nfa_nfa_type &nfa,
+ bool partition,
+ bool label_states
+ ) throw() {
+ char name_buff[1024] = {'\0'};
std::map<variable_context, nfa_state_type> state_map;
std::map<cfg_variable_type, std::set<variable_context> > contexts;
@@ -137,6 +116,13 @@ namespace grail { namespace cli {
cfg_symbol_string_type syms;
cfg_generator_type cfg_vars(cfg.search((~L) --->* ~syms));
+ // if we're not partitioning then the pre-emptively set the start
+ // state to refer to the start variable of the grammar.
+ if(!partition) {
+ nfa_state_type start_state(nfa.get_start_state());
+
+ }
+
// go make all states
for(; cfg_vars.match_next(); ) {
for(unsigned i(0); i < syms.length(); ++i) {
@@ -148,16 +134,33 @@ namespace grail { namespace cli {
variable_context ctx_var;
ctx_var.var = R;
- if(partition.is_valid()) {
+ if(partition) {
ctx_var.context = L;
}
- io::verbose("Added state for %s to %s\n", cfg.get_name(L), cfg.get_name(R));
- nfa_state_type state(nfa.add_state());
- state_map[ctx_var] = state;
- contexts[L].insert(ctx_var);
+ nfa_state_type state;
+
+ // create the state
+ if(0U == state_map.count(ctx_var)) {
+ io::verbose("Added state for %s to %s\n", cfg.get_name(L), cfg.get_name(R));
+ state = nfa.add_state();
+ state_map[ctx_var] = state;
+
+ if(label_states) {
+ if(partition) {
+ sprintf(name_buff, "%s:%s", cfg.get_name(L), cfg.get_name(R));
+ nfa.set_name(state, name_buff);
+ } else {
+ nfa.set_name(state, cfg.get_name(R));
+ }
+ }
+
+ // we've already created this state
+ } else {
+ state = state_map[ctx_var];
+ }
- nfa.set_name(state, cfg.get_name(R));
+ contexts[L].insert(ctx_var);
}
}
@@ -172,7 +175,7 @@ namespace grail { namespace cli {
variable_context ctx_var;
ctx_var.var = R;
- if(partition.is_valid()) {
+ if(partition) {
ctx_var.context = L;
}
@@ -185,25 +188,79 @@ namespace grail { namespace cli {
nfa_state_type from_state(state_map[ctx_var]);
nfa_state_type to_state(state_map[*it]);
- nfa.add_transition(from_state, nfa.epsilon(), to_state);
+ nfa.add_transition(
+ from_state,
+ label_states ? nfa.epsilon() : nfa.get_symbol(cfg.get_name(it->var)),
+ to_state
+ );
}
}
}
io::verbose("Adding in start state and transitions.\n");
// add in a start state
- nfa_state_type start_state(nfa.add_state());
+ nfa_state_type start_state(nfa.get_start_state());
+ cfg_variable_type start_var(cfg.get_start_variable());
+
nfa.set_start_state(start_state);
nfa.add_accept_state(start_state);
- std::set<variable_context> &related(contexts[cfg.get_start_variable()]);
+ if(label_states) {
+ nfa.set_name(start_state, cfg.get_name(start_var));
+ }
+
+ std::set<variable_context> &related(contexts[start_var]);
typename std::set<variable_context>::iterator it(related.begin());
for(; it != related.end(); ++it) {
nfa_state_type to_state(state_map[*it]);
- nfa.add_transition(start_state, nfa.epsilon(), to_state);
+ nfa.add_transition(
+ start_state,
+ label_states ? nfa.epsilon() : nfa.get_symbol(cfg.get_name(it->var)),
+ to_state
+ );
}
+ }
+
+ static int main(io::CommandLineOptions &options) throw() {
+
+ // run the tool
+ io::option_type file(options[0U]);
+ io::option_type partition(options["partition"]);
+ io::option_type dot(options["dot"]);
+ io::option_type label_states(options["label-states"]);
+
+ const char *file_name(file.value());
+
+ FILE *fp(fopen(file_name, "r"));
+
+ if(0 == fp) {
+ options.error(
+ "Unable to open file containing context-free "
+ "grammar for reading."
+ );
+ options.note("File specified here:", file);
+
+ return 1;
+ }
+
+ cfg_cfg_type cfg;
+ nfa_nfa_type nfa;
+
+ // can't bring in the cfg :(
+ if(!io::fread(fp, cfg, file_name)) {
+ options.error(
+ "Unable to read file containing context-free "
+ "grammar."
+ );
+ options.note("File specified here:", file);
+ return 1;
+ }
+
+ io::verbose("Opened '%s'\n", file_name);
+
+ extract(cfg, nfa, partition.is_valid(), label_states.is_valid());
// output
io::verbose("Outputting...\n");
View
103 grail/include/cli/NFA_DOMINATORS.hpp
@@ -6,10 +6,107 @@
* Version: $Id$
*/
-#ifndef Grail-Plus_NFA_DOMINATORS_HPP_
-#define Grail-Plus_NFA_DOMINATORS_HPP_
+#ifndef Grail_Plus_NFA_DOMINATORS_HPP_
+#define Grail_Plus_NFA_DOMINATORS_HPP_
+#include "fltl/include/NFA.hpp"
+namespace grail { namespace cli {
-#endif /* Grail-Plus_NFA_DOMINATORS_HPP_ */
+ template <typename AlphaT>
+ class NFA_DOMINATORS {
+ public:
+
+ static const char * const TOOL_NAME;
+
+ static void declare(io::CommandLineOptions &opt, bool in_help) throw() {
+ io::option_type in(opt.declare("stdin", io::opt::OPTIONAL, io::opt::NO_VAL));
+ if(!in_help) {
+ if(in.is_valid()) {
+ opt.declare_max_num_positional(0);
+ } else {
+ opt.declare_min_num_positional(1);
+ opt.declare_max_num_positional(1);
+ }
+ }
+ }
+
+ static void help(void) throw() {
+ // " | | | |"
+ printf(
+ " %s:\n"
+ " Converts a context-free grammar (CFG) into a Non-deterministic Pushdown\n"
+ " Automaton (PDA).\n\n"
+ " basic use options for %s:\n"
+ " --stdin Read a CFG from stdin. Typing a new\n"
+ " line followed by Ctrl-D or Ctrl-Z will\n"
+ " close stdin.\n"
+ " <file> read in a CFG from <file>.\n\n",
+ TOOL_NAME, TOOL_NAME
+ );
+ }
+
+ static int main(io::CommandLineOptions &options) throw() {
+
+ using fltl::CFG;
+ using fltl::PDA;
+
+ // run the tool
+ io::option_type file;
+ const char *file_name(0);
+
+ FILE *fp(0);
+
+ if(options["stdin"].is_valid()) {
+ file = options["stdin"];
+ fp = stdin;
+ file_name = "<stdin>";
+ } else {
+ file = options[0U];
+ file_name = file.value();
+ fp = fopen(file_name, "r");
+ }
+
+ if(0 == fp) {
+
+ options.error(
+ "Unable to open file containing context-free "
+ "grammar for reading."
+ );
+ options.note("File specified here:", file);
+
+ return 1;
+ }
+
+ CFG<AlphaT> cfg;
+ PDA<AlphaT> pda;
+ int ret(0);
+
+ if(io::fread(fp, cfg, file_name)) {
+ if(0 != cfg.num_variable_terminals()) {
+ options.error(
+ "There is at least one variable terminal "
+ "in the grammar file."
+ );
+ options.note("File specified here:", file);
+ ret = 1;
+ } else {
+
+ }
+ } else {
+ ret = 1;
+ }
+
+ fclose(fp);
+
+ return ret;
+ }
+ };
+
+ template <typename AlphaT>
+ const char * const NFA_DOMINATORS<AlphaT>::TOOL_NAME("nfa-dominators");
+}}
+
+
+#endif /* Grail_Plus_NFA_DOMINATORS_HPP_ */
View
33 grail/include/cli/NFA_TO_DOT.hpp
@@ -76,6 +76,8 @@ namespace grail { namespace cli {
static void print(FILE *outfile, nfa_type &nfa) throw() {
+ char name_buff[1024] = {'\0'};
+
state_type from, to;
symbol_type condition;
generator_type transitions(nfa.search(~from, ~condition, ~to));
@@ -93,33 +95,42 @@ namespace grail { namespace cli {
// print out the accept states
for(; states.match_next(); ) {
+
+ // how should we label the state?
+ const char *state_name(nfa.get_name(from));
+ if('\0' == state_name[0]) {
+ sprintf(name_buff, "%u", from.number());
+ } else {
+ sprintf(name_buff, "%s", state_name);
+ }
+
if(nfa.is_accept_state(from)) {
fprintf(outfile,
- " %u [shape=doublecircle label=\"%u %s\"]\n",
+ " %u [shape=doublecircle label=\"%s\"]\n",
from.number(),
- from.number(),
- nfa.get_name(from)
+ name_buff
);
} else {
fprintf(outfile,
- " %u [label=\"%u %s\"]\n",
- from.number(),
+ " %u [label=\"%s\"]\n",
from.number(),
- nfa.get_name(from)
+ name_buff
);
}
}
// print out the transitions
for(; transitions.match_next(); ) {
- const char *cond_str(0);
- const char *alpha("");
+ const char *cond_str("&epsilon;");
if(nfa.epsilon() != condition) {
- alpha = nfa.get_alpha(condition);
+ traits_type::unserialize(
+ nfa.get_alpha(condition),
+ cond_str
+ );
}
- traits_type::unserialize(alpha, cond_str);
- fprintf(outfile, " %u -> %u [label=\"%s\"]\n",
+
+ fprintf(outfile, " %u -> %u [label=<%s>]\n",
from.number(),
to.number(),
cond_str
View
36 grail/include/io/fprint_nfa.hpp
@@ -55,36 +55,54 @@ namespace grail { namespace io {
return num;
}
+ /// print out either a state's symbolic name or its number if no name
+ /// exists
+ template <typename AlphaT>
+ static char *sprint_state(
+ char *buffer,
+ const fltl::NFA<AlphaT> &nfa,
+ typename fltl::NFA<AlphaT>::state_type state
+ ) throw() {
+ const char *state_name(nfa.get_name(state));
+ if('\0' != state_name[0]) {
+ sprintf(buffer, "@%s", state_name);
+ } else {
+ sprintf(buffer, "%u", state.number());
+ }
+ return buffer;
+ }
+
/// print out a context-free grammar
template <typename AlphaT>
int fprint(FILE *ff, const fltl::NFA<AlphaT> &nfa) throw() {
+ char state_buffer[1024] = {'\0'};
- typedef fltl::NFA<AlphaT> NFA;
+ FLTL_NFA_USE_TYPES(fltl::NFA<AlphaT>);
int num(0);
if(0 == nfa.num_accept_states()) {
return num;
}
// start state
- num += fprintf(ff, "(START) |- %u\n", nfa.get_start_state().number());
+ state_type state(nfa.get_start_state());
+ num += fprintf(ff, "(START) |- %s\n", sprint_state(state_buffer, nfa, state));
// accept states
- typename NFA::state_type state;
- typename NFA::generator_type accept_states(nfa.search(~state));
+ generator_type accept_states(nfa.search(~state));
for(; accept_states.match_next(); ) {
if(nfa.is_accept_state(state)) {
- num += fprintf(ff, "%u -| (FINAL)\n", state.number());
+ num += fprintf(ff, "%s -| (FINAL)\n", sprint_state(state_buffer, nfa, state));
}
}
// transitions
- typename NFA::transition_type trans;
- typename NFA::generator_type transitions(nfa.search(~trans));
+ transition_type trans;
+ generator_type transitions(nfa.search(~trans));
for(; transitions.match_next(); ) {
- num += fprintf(ff, "%u ", trans.source().number());
+ num += fprintf(ff, "%s ", sprint_state(state_buffer, nfa, trans.source()));
num += fprint_symbol(ff, nfa, trans.read());
- num += fprintf(ff, " %u ", trans.sink().number());
+ num += fprintf(ff, " %s ", sprint_state(state_buffer, nfa, trans.sink()));
num += fprintf(ff, "\n");
}
View
36 grail/include/io/fprint_pda.hpp
@@ -55,36 +55,54 @@ namespace grail { namespace io {
return num;
}
+ /// print out either a state's symbolic name or its number if no name
+ /// exists
+ template <typename AlphaT>
+ static char *sprint_state(
+ char *buffer,
+ const fltl::PDA<AlphaT> &pda,
+ typename fltl::PDA<AlphaT>::state_type state
+ ) throw() {
+ const char *state_name(pda.get_name(state));
+ if('\0' != state_name[0]) {
+ sprintf(buffer, "@%s", state_name);
+ } else {
+ sprintf(buffer, "%u", state.number());
+ }
+ return buffer;
+ }
+
/// print out a context-free grammar
template <typename AlphaT>
int fprint(FILE *ff, const fltl::PDA<AlphaT> &pda) throw() {
+ char state_buffer[1024] = {'\0'};
- typedef fltl::PDA<AlphaT> PDA;
+ FLTL_PDA_USE_TYPES(fltl::PDA<AlphaT>);
int num(0);
if(0 == pda.num_accept_states()) {
return num;
}
// start state
- num += fprintf(ff, "(START) |- %u\n", pda.get_start_state().number());
+ state_type state(pda.get_start_state());
+ num += fprintf(ff, "(START) |- %s\n", sprint_state(state_buffer, pda, state));
// accept states
- typename PDA::state_type state;
- typename PDA::generator_type accept_states(pda.search(~state));
+ generator_type accept_states(pda.search(~state));
for(; accept_states.match_next(); ) {
if(pda.is_accept_state(state)) {
- num += fprintf(ff, "%u -| (FINAL)\n", state.number());
+ num += fprintf(ff, "%s -| (FINAL)\n", sprint_state(state_buffer, pda, state));
}
}
// transitions
- typename PDA::transition_type trans;
- typename PDA::generator_type transitions(pda.search(~trans));
+ transition_type trans;
+ generator_type transitions(pda.search(~trans));
for(; transitions.match_next(); ) {
- num += fprintf(ff, "%u ", trans.source().number());
+ num += fprintf(ff, "%s ", sprint_state(state_buffer, pda, trans.source()));
num += fprint_symbol(ff, pda, trans.read());
- num += fprintf(ff, " %u, ", trans.sink().number());
+ num += fprintf(ff, " %s, ", sprint_state(state_buffer, pda, trans.sink()));
num += fprint_symbol(ff, pda, trans.pop());
num += fprintf(ff, " / ");
num += fprint_symbol(ff, pda, trans.push());
View
228 grail/include/io/fread_nfa.hpp
@@ -36,240 +36,20 @@
#include "grail/include/io/verbose.hpp"
#include "fltl/include/NFA.hpp"
+#include "fltl/include/PDA.hpp"
+#include "fltl/include/helper/UnsafeCast.hpp"
namespace grail { namespace io {
- namespace nfa {
- uint8_t next_state(uint8_t curr_state, pda::token_type input) throw();
-
- enum {
- STATE_START = 0,
- STATE_FINAL = 3,
- STATE_SINK = 6
- };
- }
-
- /// read in a context free grammar from a file
template <typename AlphaT>
bool fread(
FILE *ff,
fltl::NFA<AlphaT> &NFA,
const char * const file_name
) throw() {
-
- if(0 == ff) {
- return false;
- }
-
- io::verbose("Reading NFA from '%s'...\n", file_name);
-
- pda::token_type tt(pda::T_END);
- pda::token_type prev_tt(pda::T_END);
-
- UTF8FileBuffer<pda::BUFFER_SIZE> buffer(ff);
-
- // extra space is given to the scratch space to allow short overruns
- char scratch[pda::SCRATCH_SIZE + 20] = {'\0'};
- char *scratch_end(&(scratch[pda::SCRATCH_SIZE - 1]));
-
- unsigned num_start_states(0);
- unsigned long start_state_val(0);
-
- uint8_t state(nfa::STATE_START);
- uint8_t prev_state(nfa::STATE_START);
-
- unsigned line(0);
- unsigned col(0);
-
- for(;;) {
- prev_tt = tt;
- line = buffer.line();
- col = buffer.column();
- tt = pda::get_token<true>(buffer, scratch, scratch_end, file_name);
-
- if(pda::T_ERROR == tt) {
- return false;
- }
-
- prev_state = state;
- state = nfa::next_state(state, tt);
-
- // looking at the start state
- if(pda::T_STATE == tt && pda::T_START_SET == prev_tt) {
- if(1 == ++num_start_states) {
- start_state_val = strtoul(scratch, 0, 10);
- }
- } else if(pda::T_STACK_SYMBOL == tt) {
- if(0 != strcmp(scratch, "epsilon")) {
- error(file_name, line, col,
- "Expected 'epsilon', but got '%s'",
- scratch
- );
- return false;
- }
- }
-
- switch(state) {
- case nfa::STATE_FINAL:
- goto checked_syntax;
-
- case nfa::STATE_SINK:
- switch(tt) {
- case pda::T_START:
- strcpy(scratch, "(START)");
- break;
- case pda::T_START_SET:
- strcpy(scratch, "|-");
- break;
- case pda::T_FINAL:
- strcpy(scratch, "(FINAL)");
- break;
- case pda::T_FINAL_SET:
- strcpy(scratch, "-|");
- break;
- case pda::T_SLASH:
- strcpy(scratch, "/");
- break;
- case pda::T_COMMA:
- strcpy(scratch, ",");
- break;
- case pda::T_NEW_LINE:
- strcpy(scratch, "\\n");
- break;
- case pda::T_END:
- strcpy(scratch, "<EOF>");
- break;
- case pda::T_ERROR:
- case pda::T_INPUT_SYMBOL:
- case pda::T_STACK_SYMBOL:
- case pda::T_STATE:
- default:
- break;
- }
-
- error(
- file_name, buffer.line(), buffer.column(),
- "Unexpected symbol found with value '%s'. Note: "
- "previous state of parsing automaton was %u.",
- scratch, prev_state
- );
- return false;
-
- case nfa::STATE_START:
- default:
- break;
- }
- }
-
- checked_syntax:
-
- // map unsigned longs to states
- std::map<
- unsigned long,
- typename fltl::NFA<AlphaT>::state_type
- > state_map;
-
- // special case so we don't add in needless epsilon transitions
- if(1 == num_start_states) {
- state_map[start_state_val] = NFA.get_start_state();
- }
-
- buffer.reset();
- state = pda::STATE_START;
-
- typename fltl::NFA<AlphaT>::state_type seen_states[2];
- typename fltl::NFA<AlphaT>::state_type *next_seen_state(
- &(seen_states[0])
- );
- typename fltl::NFA<AlphaT>::symbol_type seen_symbol;
-
- typename fltl::NFA<AlphaT>::alphabet_type sym;
-
- unsigned long state_id;
-
- // re-parse without error checking
- for(tt = pda::T_END;;) {
-
- prev_tt = tt;
- tt = pda::get_token<false>(buffer, scratch, scratch_end, file_name);
-
- state = nfa::next_state(state, tt);
-
- switch(tt) {
-
- case pda::T_INPUT_SYMBOL:
- fltl::NFA<AlphaT>::traits_type::unserialize(scratch, sym);
- seen_symbol = NFA.get_symbol(sym);
- break;
-
- case pda::T_STACK_SYMBOL:
- seen_symbol = NFA.epsilon();
- break;
-
- case pda::T_STATE:
- state_id = strtoul(scratch, 0, 10);
- if(0 == state_map.count(state_id)) {
- state_map[state_id] = NFA.add_state();
- }
- *next_seen_state = state_map[state_id];
- ++next_seen_state;
- break;
-
- case pda::T_ERROR:
- case pda::T_END:
- case pda::T_NEW_LINE:
- case pda::T_COMMA:
- case pda::T_SLASH:
- case pda::T_FINAL_SET:
- case pda::T_FINAL:
- case pda::T_START_SET:
- case pda::T_START:
- default:
- break;
- }
-
- switch(state) {
- case pda::STATE_FINAL:
- goto done;
-
- case pda::STATE_START:
-
- // add a final state
- if(pda::T_FINAL == tt) {
-
- NFA.add_start_state(seen_states[0]);
-
- // add a start state
- } else if(pda::T_START_SET == prev_tt) {
-
- NFA.add_accept_state(seen_states[0]);
-
- // add a transition
- } else if(pda::T_STATE == tt) {
- NFA.add_transition(
- seen_states[0],
- seen_symbol,
- seen_states[1]
- );
- }
-
- next_seen_state = &(seen_states[0]);
- break;
-
- default: break;
- }
- }
-
- done:
-
- io::verbose(" %u states,\n", NFA.num_states());
- io::verbose(" %u accept states,\n", NFA.num_accept_states());
- io::verbose(" %u transitions,\n", NFA.num_transitions());
- io::verbose(" %u symbols.\n", NFA.num_symbols());
-
- return true;
+ fltl::PDA<AlphaT> *PDA(fltl::helper::unsafe_cast<fltl::PDA<AlphaT> *>(&NFA));
+ return fread(ff, *PDA, file_name);
}
-
}}
#endif /* FLTL_FREAD_NFA_HPP_ */
View
95 grail/include/io/fread_pda.hpp
@@ -37,6 +37,8 @@
#include "fltl/include/PDA.hpp"
+#include "grail/include/helper/CStringMap.hpp"
+
#include "grail/include/io/error.hpp"
#include "grail/include/io/fread.hpp"
#include "grail/include/io/UTF8FileBuffer.hpp"
@@ -60,8 +62,9 @@ namespace grail { namespace io {
T_NEW_LINE = 6,
T_INPUT_SYMBOL = 7,
T_STACK_SYMBOL = 8,
- T_STATE = 9,
- T_END = 10,
+ T_STATE_SYMBOL = 9,
+ T_STATE = 10,
+ T_END = 11,
T_ERROR
} token_type;
@@ -353,18 +356,26 @@ namespace grail { namespace io {
// the start symbol
} else if(is_symbol_codepoint(codepoint)) {
+ token_type type(T_STACK_SYMBOL);
+ unsigned scratch_offset(buffer.byte_length());
+
if(LOOK_FOR_ERRORS) {
temp_line = buffer.line();
temp_col = buffer.column();
}
- strcpy(scratch, codepoint);
+ if('@' == ch) {
+ type = T_STATE_SYMBOL;
+ scratch_offset = 0U;
+ } else {
+ strcpy(scratch, codepoint);
+ }
sym_state = detail::find_symbol<
BUFFER_SIZE,
LOOK_FOR_ERRORS
>(
buffer,
- scratch + buffer.byte_length(),
+ scratch + scratch_offset,
scratch_end,
is_symbol_codepoint
);
@@ -397,7 +408,7 @@ namespace grail { namespace io {
}
}
- return T_STACK_SYMBOL;
+ return type;
// unacceptable thing
} else if(LOOK_FOR_ERRORS) {
@@ -436,6 +447,7 @@ namespace grail { namespace io {
// extra space is given to the scratch space to allow short overruns
char scratch[pda::SCRATCH_SIZE + 20] = {'\0'};
+ char start_state_name[pda::SCRATCH_SIZE + 20] = {'\0'};
char *scratch_end(&(scratch[pda::SCRATCH_SIZE - 1]));
unsigned num_start_states(0);
@@ -455,8 +467,10 @@ namespace grail { namespace io {
state = pda::next_state(state, tt);
// looking at the start state
- if(pda::T_STATE == tt && pda::STATE_SEEN_START_SET == prev_state) {
- if(1 == ++num_start_states) {
+ if(pda::STATE_SEEN_START_SET == prev_state
+ && 1 == ++num_start_states) {
+ strcpy(start_state_name, scratch);
+ if(pda::T_STATE == tt) {
start_state_val = strtoul(scratch, 0, 10);
}
}
@@ -494,6 +508,7 @@ namespace grail { namespace io {
case pda::T_INPUT_SYMBOL:
case pda::T_STACK_SYMBOL:
+ case pda::T_STATE_SYMBOL:
case pda::T_STATE:
case pda::T_ERROR:
default:
@@ -512,11 +527,11 @@ namespace grail { namespace io {
checked_syntax:
+ FLTL_PDA_USE_TYPES(fltl::PDA<AlphaT>);
+
// map unsigned longs to states
- std::map<
- unsigned long,
- typename fltl::PDA<AlphaT>::state_type
- > state_map;
+ std::map<unsigned long, state_type> state_map;
+ helper::CStringMap<state_type> named_state_map;
// special case so we don't add in needless epsilon transitions
if(1 == num_start_states) {
@@ -528,16 +543,11 @@ namespace grail { namespace io {
prev_state = pda::STATE_SINK;
uint8_t prev_prev_state(pda::STATE_SINK);
- typename fltl::PDA<AlphaT>::state_type seen_states[2];
- typename fltl::PDA<AlphaT>::state_type *next_seen_state(
- &(seen_states[0])
- );
- typename fltl::PDA<AlphaT>::symbol_type seen_symbols[3];
- typename fltl::PDA<AlphaT>::symbol_type *next_seen_symbol(
- &(seen_symbols[0])
- );
-
- typename fltl::PDA<AlphaT>::alphabet_type sym;
+ state_type seen_states[2];
+ state_type *next_seen_state(&(seen_states[0]));
+ symbol_type seen_symbols[3];
+ symbol_type *next_seen_symbol(&(seen_symbols[0]));
+ alphabet_type sym;
unsigned long state_id;
@@ -552,7 +562,7 @@ namespace grail { namespace io {
switch(tt) {
case pda::T_INPUT_SYMBOL:
- fltl::PDA<AlphaT>::traits_type::unserialize(scratch, sym);
+ traits_type::unserialize(scratch, sym);
*next_seen_symbol = PDA.get_alphabet_symbol(sym);
++next_seen_symbol;
break;
@@ -566,14 +576,47 @@ namespace grail { namespace io {
++next_seen_symbol;
break;
- case pda::T_STATE:
+ case pda::T_STATE: {
+ state_type state;
state_id = strtoul(scratch, 0, 10);
- if(0 == state_map.count(state_id)) {
- state_map[state_id] = PDA.add_state();
+ if(1 == num_start_states && start_state_val == state_id) {
+ state = PDA.get_start_state();
+ } else if(0 == state_map.count(state_id)) {
+ state = PDA.add_state();
+ state_map[state_id] = state;
+ } else {
+ state = state_map[state_id];
}
- *next_seen_state = state_map[state_id];
+
+ *next_seen_state = state;
++next_seen_state;
break;
+ }
+
+ case pda::T_STATE_SYMBOL: {
+ state_type state;
+ bool has_state(false);
+
+ if(1 == num_start_states
+ && 0 == strcmp(scratch, start_state_name)) {
+ state = PDA.get_start_state();
+ has_state = true;
+ }
+
+ if(!named_state_map.contains(scratch)) {
+ if(!has_state) {
+ state = PDA.add_state();
+ }
+ PDA.set_name(state, scratch);
+ named_state_map.set(PDA.get_name(state), state);
+ } else {
+ state = named_state_map.get(scratch);
+ }
+
+ *next_seen_state = state;
+ ++next_seen_state;
+ break;
+ }
case pda::T_ERROR:
case pda::T_END:
View
42 grail/lib/io/fread_pda.cpp
@@ -32,7 +32,12 @@ namespace grail { namespace io { namespace pda {
/// symbol character
bool is_symbol_codepoint(const char * const cp) throw() {
- return *cp < 0 || (0 != isalnum(static_cast<int>(*cp))) || '_' == *cp;
+ return *cp < 0
+ || (0 != isalnum(static_cast<int>(*cp)))
+ || '_' == *cp
+ || ':' == *cp
+ || '$' == *cp
+ || '@' == *cp;
}
bool is_numeric_codepoint(const char * const cp) throw() {
@@ -43,24 +48,25 @@ namespace grail { namespace io { namespace pda {
// ./fa -mdfa "((N*)(((S>Z)|(Z<F)|(Z(\"|I)Z(,(\"|I)?/(\"|I)?)?)))N)*(N*)E"
- static uint8_t trans[13][11] = {
- /*
- (START) |- (FINAL) -| / , \n Z \0
- S > F < / , N " I Z E
+ static uint8_t trans[13][12] = {
+ /* where Z is a state number and @ is a state name, and they
+ * are interchangeable.
+ (START) |- (FINAL) -| / , \n @ Z \0
+ S > F < / , N " I @ Z E
*/
- /* 0 */ {2, 12, 12, 12, 12, 12, 0, 12, 12, 1, 3},
- /* 1 */ {12, 12, 12, 7, 12, 12, 12, 6, 6, 12, 12},
- /* 2 */ {12, 4, 12, 12, 12, 12, 12, 12, 12, 12, 12},
- /* 3 */ {12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12}, // final
- /* 4 */ {12, 12, 12, 12, 12, 12, 12, 12, 12, 5, 12},
- /* 5 */ {12, 12, 12, 12, 12, 12, 0, 12, 12, 12, 12},
- /* 6 */ {12, 12, 12, 12, 12, 12, 12, 12, 12, 8, 12},
- /* 7 */ {12, 12, 5, 12, 12, 12, 12, 12, 12, 12, 12},
- /* 8 */ {12, 12, 12, 12, 12, 9, 0, 12, 12, 12, 12},
- /* 9 */ {12, 12, 12, 12, 11, 12, 12, 10, 10, 12, 12},
- /* 10 */{12, 12, 12, 12, 11, 12, 12, 12, 12, 12, 12},
- /* 11 */{12, 12, 12, 12, 12, 12, 12, 5, 5, 12, 12},
- /* 12 */{12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12},
+ /* 0 */ {2, 12, 12, 12, 12, 12, 0, 12, 12, 1, 1, 3},
+ /* 1 */ {12, 12, 12, 7, 12, 12, 12, 6, 6, 12, 12, 12},
+ /* 2 */ {12, 4, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12},
+ /* 3 */ {12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12}, // final
+ /* 4 */ {12, 12, 12, 12, 12, 12, 12, 12, 12, 5, 5, 12},
+ /* 5 */ {12, 12, 12, 12, 12, 12, 0, 12, 12, 12, 12, 12},
+ /* 6 */ {12, 12, 12, 12, 12, 12, 12, 12, 12, 8, 8, 12},
+ /* 7 */ {12, 12, 5, 12, 12, 12, 12, 12, 12, 12, 12, 12},
+ /* 8 */ {12, 12, 12, 12, 12, 9, 0, 12, 12, 12, 12, 12},
+ /* 9 */ {12, 12, 12, 12, 11, 12, 12, 10, 10, 12, 12, 12},
+ /* 10 */{12, 12, 12, 12, 11, 12, 12, 12, 12, 12, 12, 12},
+ /* 11 */{12, 12, 12, 12, 12, 12, 12, 5, 5, 12, 12, 12},
+ /* 12 */{12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12},
};
return trans[curr_state][input];

0 comments on commit 86c2a96

Please sign in to comment.
Something went wrong with that request. Please try again.