Skip to content

Commit 9bb515e

Browse files
committed
Add missing fallback states with EOF rule.
When EOF rule is used, a final state may be a fallback state even if all outgoing paths from it are accepting, because the possibility of YYFILL failure cretes additional default quasi-transition.
1 parent e6fd899 commit 9bb515e

File tree

6 files changed

+118
-69
lines changed

6 files changed

+118
-69
lines changed

src/adfa/prepare.cc

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -174,24 +174,36 @@ void DFA::prepare(const opt_t *opts)
174174
}
175175

176176
// create default state (if needed)
177-
State * default_state = NULL;
178-
for (State * s = head; s; s = s->next)
179-
{
180-
for (uint32_t i = 0; i < s->go.nSpans; ++i)
181-
{
182-
if (!s->go.span[i].to)
183-
{
184-
if (!default_state)
185-
{
186-
default_state = new State;
187-
defstate = default_state;
177+
State *default_state = NULL;
178+
for (State *s = head; s; s = s->next) {
179+
for (uint32_t i = 0; i < s->go.nSpans; ++i) {
180+
if (!s->go.span[i].to) {
181+
if (!default_state) {
182+
default_state = defstate = new State;
188183
addState(default_state, s);
189184
}
190185
s->go.span[i].to = defstate;
191186
}
192187
}
193188
}
194189

190+
// With EOF rule there is a default quasi-transition on YYFILL failure, so
191+
// default state is needed if there is at least one final state with at
192+
// least one outgoing transition to a non-final state.
193+
if (!default_state && opts->eof != NOEOF) {
194+
bool have_fallback_states = false;
195+
196+
for (State *s = head; s; s = s->next) {
197+
have_fallback_states |= s->fallback;
198+
199+
if (!s->next && have_fallback_states) {
200+
default_state = defstate = new State;
201+
addState(default_state, s);
202+
break;
203+
}
204+
}
205+
}
206+
195207
// bind save actions to fallback states and create accept state (if needed)
196208
if (default_state) {
197209
for (State *s = head; s; s = s->next) {

src/dfa/dead_rules.cc

Lines changed: 53 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ struct tcmd_t;
6767
* them here and save for future use.
6868
*/
6969

70-
7170
// reversed DFA
7271
struct rdfa_t
7372
{
@@ -129,7 +128,6 @@ struct rdfa_t
129128
FORBID_COPY(rdfa_t);
130129
};
131130

132-
133131
static void backprop(const rdfa_t &rdfa, bool *live,
134132
size_t rule, size_t state)
135133
{
@@ -152,7 +150,6 @@ static void backprop(const rdfa_t &rdfa, bool *live,
152150
}
153151
}
154152

155-
156153
static void liveness_analyses(const rdfa_t &rdfa, bool *live)
157154
{
158155
for (size_t i = 0; i < rdfa.nstates; ++i) {
@@ -163,7 +160,6 @@ static void liveness_analyses(const rdfa_t &rdfa, bool *live)
163160
}
164161
}
165162

166-
167163
static void warn_dead_rules(const dfa_t &dfa, size_t defrule,
168164
const std::string &cond, const bool *live, Msg &msg)
169165
{
@@ -190,7 +186,6 @@ static void warn_dead_rules(const dfa_t &dfa, size_t defrule,
190186
}
191187
}
192188

193-
194189
static void warn_sentinel_in_midrule(const dfa_t &dfa, const opt_t *opts
195190
, const std::string &cond, const bool *live, Msg &msg)
196191
{
@@ -237,7 +232,6 @@ static void warn_sentinel_in_midrule(const dfa_t &dfa, const opt_t *opts
237232
delete[] bad;
238233
}
239234

240-
241235
static void remove_dead_final_states(dfa_t &dfa, const bool *fallthru)
242236
{
243237
const size_t
@@ -266,55 +260,78 @@ static void remove_dead_final_states(dfa_t &dfa, const bool *fallthru)
266260
}
267261
}
268262

269-
270263
static void find_fallback_states(dfa_t &dfa, const bool *fallthru)
271264
{
272-
const size_t
273-
nstate = dfa.states.size(),
274-
nsym = dfa.nchars;
265+
const size_t nstate = dfa.states.size();
266+
const size_t nsym = dfa.nchars;
275267

276268
for (size_t i = 0; i < nstate; ++i) {
277269
dfa_state_t *s = dfa.states[i];
278-
279270
s->fallthru = fallthru[i];
271+
if (s->rule == Rule::NONE) continue;
280272

281-
if (s->rule != Rule::NONE) {
282-
for (size_t c = 0; c < nsym; ++c) {
283-
const size_t j = s->arcs[c];
284-
if (j != dfa_t::NIL && fallthru[j]) {
285-
s->fallback = true;
286-
break;
287-
}
273+
// A final state is a fallback state if there are non-accepting paths
274+
// from it (i.e. paths that end with a transition to default state).
275+
for (size_t c = 0; c < nsym; ++c) {
276+
const size_t j = s->arcs[c];
277+
if (j != dfa_t::NIL && fallthru[j]) {
278+
s->fallback = true;
279+
break;
288280
}
289281
}
290282
}
291283
}
292284

285+
static void find_fallback_states_with_eof_rule(dfa_t &dfa)
286+
{
287+
const size_t nstate = dfa.states.size();
288+
const size_t nsym = dfa.nchars;
289+
290+
for (size_t i = 0; i < nstate; ++i) {
291+
dfa_state_t *s = dfa.states[i];
292+
if (s->rule == Rule::NONE) continue;
293+
294+
// With EOF rule, a final state is a fallback state if it has outgoing
295+
// transitions to any non-accepting states (even if all possible paths
296+
// on those transitions are accepting, there is a possibility of YYFILL
297+
// failure on such path, which adds a default quasi-transition).
298+
for (size_t c = 0; c < nsym; ++c) {
299+
const size_t j = s->arcs[c];
300+
if (j != dfa_t::NIL && dfa.states[j]->rule == Rule::NONE) {
301+
s->fallback = true;
302+
break;
303+
}
304+
}
305+
}
306+
}
293307

294308
void cutoff_dead_rules(dfa_t &dfa, const opt_t *opts, size_t defrule
295309
, const std::string &cond, Msg &msg)
296310
{
297-
const rdfa_t rdfa(dfa);
298-
const size_t
299-
ns = rdfa.nstates,
300-
nl = (rdfa.nrules + 1) * ns;
301-
bool *live = new bool[nl],
302-
*fallthru = live + nl - ns;
303-
memset(live, 0, nl * sizeof(bool));
304-
305-
liveness_analyses(rdfa, live);
306-
if (opts->eof == NOEOF) {
307-
// With EOF rule, there is always a possibility that EOF occurs before
308-
// the next matching rule. In essence, sentinel is a special symbol
309-
// which is not matched by any of the rules, so none of the rules can
310-
// be completely shadowed by other rules.
311+
// The case of EOF rule is handled differenly, because with EOF rule there
312+
// is always a possibility that it occurs before the next matching rule.
313+
// In essence, sentinel is a special symbol that is not covered by any of
314+
// the rules, so none of them can be completely shadowed by others.
315+
if (opts->eof != NOEOF) {
316+
find_fallback_states_with_eof_rule(dfa);
317+
}
318+
else {
319+
const rdfa_t rdfa(dfa);
320+
const size_t ns = rdfa.nstates;
321+
const size_t nl = (rdfa.nrules + 1) * ns;
322+
bool *live = new bool[nl];
323+
bool *fallthru = live + nl - ns;
324+
memset(live, 0, nl * sizeof(bool));
325+
326+
liveness_analyses(rdfa, live);
327+
311328
warn_dead_rules(dfa, defrule, cond, live, msg);
312329
remove_dead_final_states(dfa, fallthru);
313-
}
314-
warn_sentinel_in_midrule(dfa, opts, cond, live, msg);
315-
find_fallback_states(dfa, fallthru);
330+
warn_sentinel_in_midrule(dfa, opts, cond, live, msg);
331+
find_fallback_states(dfa, fallthru);
316332

317-
delete[] live;
333+
delete[] live;
334+
}
318335
}
319336

320337
} // namespace re2c

test/eof/eof_01.i--eager-skip.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ static bool lex(input_t & in, unsigned int &count)
146146
yy6:
147147
{ continue; }
148148
yy7:
149+
YYMARKER = in.cur;
149150
yyFillLabel2:
150151
yych = *in.cur;
151152
if (yych <= 0x00) {
@@ -158,6 +159,7 @@ static bool lex(input_t & in, unsigned int &count)
158159
}
159160
goto yy25;
160161
yy8:
162+
YYMARKER = in.cur;
161163
yyFillLabel3:
162164
yych = *in.cur;
163165
if (yych <= 0x00) {
@@ -417,7 +419,7 @@ static bool lex(input_t & in, unsigned int &count)
417419
default:
418420
if (in.lim <= in.cur) {
419421
if (in.fill () == 0) goto yyFillLabel13;
420-
goto yy3;
422+
goto yy58;
421423
}
422424
goto yy24;
423425
}
@@ -429,7 +431,7 @@ static bool lex(input_t & in, unsigned int &count)
429431
if (yych <= 0x00) {
430432
if (in.lim <= in.cur) {
431433
if (in.fill () == 0) goto yyFillLabel14;
432-
goto yy3;
434+
goto yy58;
433435
}
434436
goto yy24;
435437
}
@@ -445,7 +447,7 @@ static bool lex(input_t & in, unsigned int &count)
445447
default:
446448
if (in.lim <= in.cur) {
447449
if (in.fill () == 0) goto yyFillLabel15;
448-
goto yy3;
450+
goto yy58;
449451
}
450452
goto yy29;
451453
}
@@ -457,7 +459,7 @@ static bool lex(input_t & in, unsigned int &count)
457459
if (yych <= 0x00) {
458460
if (in.lim <= in.cur) {
459461
if (in.fill () == 0) goto yyFillLabel16;
460-
goto yy3;
462+
goto yy58;
461463
}
462464
goto yy29;
463465
}
@@ -919,6 +921,9 @@ static bool lex(input_t & in, unsigned int &count)
919921
}
920922
goto yy13;
921923
}
924+
yy58:
925+
in.cur = YYMARKER;
926+
goto yy3;
922927
yyeofrule1:
923928
{ return true; }
924929
}

test/eof/eof_01.i.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ static bool lex(input_t & in, unsigned int &count)
146146
yy6:
147147
{ continue; }
148148
yy7:
149-
++in.cur;
149+
YYMARKER = ++in.cur;
150150
yyFillLabel2:
151151
yych = *in.cur;
152152
if (yych <= 0x00) {
@@ -158,7 +158,7 @@ static bool lex(input_t & in, unsigned int &count)
158158
}
159159
goto yy25;
160160
yy8:
161-
++in.cur;
161+
YYMARKER = ++in.cur;
162162
yyFillLabel3:
163163
yych = *in.cur;
164164
if (yych <= 0x00) {
@@ -401,7 +401,7 @@ static bool lex(input_t & in, unsigned int &count)
401401
default:
402402
if (in.lim <= in.cur) {
403403
if (in.fill () == 0) goto yyFillLabel13;
404-
goto yy3;
404+
goto yy58;
405405
}
406406
goto yy24;
407407
}
@@ -415,7 +415,7 @@ static bool lex(input_t & in, unsigned int &count)
415415
if (yych <= 0x00) {
416416
if (in.lim <= in.cur) {
417417
if (in.fill () == 0) goto yyFillLabel14;
418-
goto yy3;
418+
goto yy58;
419419
}
420420
goto yy24;
421421
}
@@ -431,7 +431,7 @@ static bool lex(input_t & in, unsigned int &count)
431431
default:
432432
if (in.lim <= in.cur) {
433433
if (in.fill () == 0) goto yyFillLabel15;
434-
goto yy3;
434+
goto yy58;
435435
}
436436
goto yy29;
437437
}
@@ -445,7 +445,7 @@ static bool lex(input_t & in, unsigned int &count)
445445
if (yych <= 0x00) {
446446
if (in.lim <= in.cur) {
447447
if (in.fill () == 0) goto yyFillLabel16;
448-
goto yy3;
448+
goto yy58;
449449
}
450450
goto yy29;
451451
}
@@ -874,6 +874,9 @@ static bool lex(input_t & in, unsigned int &count)
874874
}
875875
goto yy13;
876876
}
877+
yy58:
878+
in.cur = YYMARKER;
879+
goto yy3;
877880
yyeofrule1:
878881
{ return true; }
879882
}

0 commit comments

Comments
 (0)