Skip to content

Commit 92cbf8f

Browse files
committed
Add generic API for fixed tags.
List of changes: - Generic APIs YYSHIFT, YYSHIFTSTAG, YYSHIFTMTAG have been added. - Generic APIs YYSTAGPD, YYMTAGPD have been removed. They were needed for STADFA operations, which are now expressed using YYSTAGP / YYSHIFTSTAG and YYMTAGP / YYSHIFTMTAG. - Test results for tests that use generic API have been updated. Some variable tags have turned into fixed tags. Tests with fixed-length trailing context that do not use tags are no longer erroneous. All skeleton tests now need to define new API primitives. STADFA tests that use mtags now use YYMTAGP / YYSHIFTMTAG instead of YYMTAGPD. Golang test with `re2c:decorate = 1;` that used tags in a hacky way via pointers has been removed, because fixed tags cannot be handled (and there is a better alternative with `re2c:decorate = 0;`).
1 parent e40d830 commit 92cbf8f

File tree

76 files changed

+2425
-2339
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+2425
-2339
lines changed

bootstrap/src/parse/lex_conf.cc

Lines changed: 1046 additions & 1015 deletions
Large diffs are not rendered by default.

src/codegen/code.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ class Scratchbuf {
5959
bool empty() const { return os.str().empty(); }
6060
std::ostringstream &stream() { return os; }
6161
const char *flush();
62+
Scratchbuf& i32(int32_t u) { os << u; return *this; }
6263
Scratchbuf& u32(uint32_t u) { os << u; return *this; }
6364
Scratchbuf& u64(uint64_t u) { os << u; return *this; }
6465
Scratchbuf& str(const std::string &s) { os << s; return *this; }

src/codegen/gen_state.cc

Lines changed: 106 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,7 @@ void gen_settags(Output &output, CodeList *tag_actions, const DFA &dfa, tcid_t t
515515
const tagver_t l = p->lhs, r = p->rhs, *h = p->history;
516516
const std::string le = vartag_expr(l, opts);
517517
const std::string re = vartag_expr(r, opts);
518+
std::string s;
518519

519520
if (tcmd_t::iscopy(p)) {
520521
// copy command
@@ -530,34 +531,54 @@ void gen_settags(Output &output, CodeList *tag_actions, const DFA &dfa, tcid_t t
530531
// history is reversed, so use a statement sublist and prepend
531532
CodeList *actions = code_list(alc);
532533
for (; *h != TAGVER_ZERO; ++h) {
533-
const std::string &action = *h == TAGVER_BOTTOM ? opts->yymtagn
534-
: delayed ? opts->yymtagpd : opts->yymtagp;
534+
const bool negative = *h == TAGVER_BOTTOM;
535+
if (delayed && !negative) {
536+
s = opts->yyshiftmtag;
537+
if (opts->decorate) {
538+
o.str(s).cstr(" (").str(le).cstr(", ").i32(-1).cstr(")");
539+
prepend(actions, code_stmt(alc, o.flush()));
540+
}
541+
else {
542+
strrreplace(s, opts->placeholder + "1", le);
543+
strrreplace(s, opts->placeholder + "2", -1);
544+
prepend(actions, code_text(alc, o.str(s).flush()));
545+
}
546+
}
547+
s = negative ? opts->yymtagn : opts->yymtagp;
535548
if (opts->decorate) {
536-
o.str(action).cstr(" (").str(le).cstr(")");
549+
o.str(s).cstr(" (").str(le).cstr(")");
537550
prepend(actions, code_stmt(alc, o.flush()));
538551
}
539552
else {
540-
std::string s = action;
541553
strrreplace(s, opts->placeholder, le);
542-
o.str(s);
543-
prepend(actions, code_text(alc, o.flush()));
554+
prepend(actions, code_text(alc, o.str(s).flush()));
544555
}
545556
}
546557
append(tag_actions, actions);
547558
}
548559
else if (generic) {
549560
// save command without history; generic API
550-
const std::string &action = *h == TAGVER_BOTTOM ? opts->yystagn
551-
: delayed ? opts->yystagpd : opts->yystagp;
561+
const bool negative = *h == TAGVER_BOTTOM;
562+
s = negative ? opts->yystagn : opts->yystagp;
552563
if (opts->decorate) {
553-
o.str(action).cstr(" (").str(le).cstr(")");
564+
o.str(s).cstr(" (").str(le).cstr(")");
554565
append(tag_actions, code_stmt(alc, o.flush()));
555566
}
556567
else {
557-
std::string s = action;
558568
strrreplace(s, opts->placeholder, le);
559-
o.str(s);
560-
append(tag_actions, code_text(alc, o.flush()));
569+
append(tag_actions, code_text(alc, o.str(s).flush()));
570+
}
571+
if (delayed && !negative) {
572+
s = opts->yyshiftstag;
573+
if (opts->decorate) {
574+
o.str(s).cstr(" (").str(le).cstr(", ").i32(-1).cstr(")");
575+
append(tag_actions, code_stmt(alc, o.flush()));
576+
}
577+
else {
578+
strrreplace(s, opts->placeholder + "1", le);
579+
strrreplace(s, opts->placeholder + "2", -1);
580+
append(tag_actions, code_text(alc, o.str(s).flush()));
581+
}
561582
}
562583
}
563584
else {
@@ -607,11 +628,12 @@ void gen_fintags(Output &output, CodeList *stmts, const DFA &dfa, const Rule &ru
607628
append(stmts, code_stmt(alc, o.flush()));
608629
}
609630
else if (!generic) {
631+
o.str(opts->yycursor).cstr(" = ");
610632
if (dfa.oldstyle_ctxmarker) {
611-
o.str(opts->yycursor).cstr(" = ").str(opts->yyctxmarker);
633+
o.str(opts->yyctxmarker);
612634
}
613635
else {
614-
o.str(opts->yycursor).cstr(" = ").str(expr);
636+
o.str(expr);
615637
}
616638
append(stmts, code_stmt(alc, o.flush()));
617639
}
@@ -644,20 +666,46 @@ void gen_fintags(Output &output, CodeList *stmts, const DFA &dfa, const Rule &ru
644666
const Tag &tag = tags[t];
645667
if (fictive(tag) || !fixed(tag) || !trailing(tag)) continue;
646668

647-
// TODO: add generic API for fixed trailing context and use it.
648-
DASSERT(!generic);
649-
650-
const size_t dist = tag.dist;
669+
const int32_t dist = static_cast<int32_t>(tag.dist);
651670
const bool fixed_on_cursor = tag.base == Tag::RIGHTMOST;
671+
const std::string base = fixed_on_cursor
672+
? opts->yycursor : vartag_expr(fins[tag.base], opts);
652673

653-
if (!fixed_on_cursor) {
654-
o.str(opts->yycursor).cstr(" = ").str(vartag_expr(fins[tag.base], opts));
655-
if (dist > 0) o.cstr(" - ").u64(dist);
674+
if (generic) {
675+
DASSERT(!history(tag));
676+
if (!fixed_on_cursor) {
677+
s = opts->yyrestoretag;
678+
if (opts->decorate) {
679+
o.str(s).cstr(" (").str(base).cstr(")");
680+
append(stmts, code_stmt(alc, o.flush()));
681+
}
682+
else {
683+
strrreplace(s, opts->placeholder, base);
684+
append(stmts, code_text(alc, o.str(s).flush()));
685+
}
686+
}
687+
if (dist > 0) {
688+
s = opts->yyshift;
689+
if (opts->decorate) {
690+
o.str(s).cstr(" (").i32(-dist).cstr(")");
691+
append(stmts, code_stmt(alc, o.flush()));
692+
}
693+
else {
694+
strrreplace(s, opts->placeholder, -dist);
695+
append(stmts, code_text(alc, o.str(s).flush()));
696+
}
697+
}
656698
}
657-
else if (dist > 0) {
658-
o.str(opts->yycursor).cstr(" -= ").u64(dist);
699+
else {
700+
if (!fixed_on_cursor) {
701+
o.str(opts->yycursor).cstr(" = ").str(vartag_expr(fins[tag.base], opts));
702+
if (dist > 0) o.cstr(" - ").i32(dist);
703+
}
704+
else if (dist > 0) {
705+
o.str(opts->yycursor).cstr(" -= ").i32(dist);
706+
}
707+
append(stmts, code_stmt(alc, o.flush()));
659708
}
660-
append(stmts, code_stmt(alc, o.flush()));
661709
}
662710

663711
// fixed tags (except for trailing context)
@@ -667,16 +715,44 @@ void gen_fintags(Output &output, CodeList *stmts, const DFA &dfa, const Rule &ru
667715
// see note [fixed and variable tags]
668716
if (fictive(tag) || !fixed(tag) || trailing(tag)) continue;
669717

670-
const size_t dist = tag.dist;
718+
const int32_t dist = static_cast<int32_t>(tag.dist);
671719
const bool fixed_on_cursor = tag.base == Tag::RIGHTMOST;
672-
expr = fixed_on_cursor ? opts->yycursor : vartag_expr(fins[tag.base], opts);
720+
const std::string fix = tag_expr(tag, true);
721+
const std::string base = fixed_on_cursor
722+
? opts->yycursor : vartag_expr(fins[tag.base], opts);
723+
673724
if (generic) {
674-
// TODO: add generic API primitives for fixed tags and use them.
675-
DASSERT(false);
725+
if (fixed_on_cursor) {
726+
s = history(tag) ? opts->yymtagp : opts->yystagp;
727+
if (opts->decorate) {
728+
o.str(s).cstr(" (").str(fix).cstr(")");
729+
append(stmts, code_stmt(alc, o.flush()));
730+
}
731+
else {
732+
strrreplace(s, opts->placeholder, fix);
733+
append(stmts, code_text(alc, o.str(s).flush()));
734+
}
735+
}
736+
else {
737+
o.str(fix).cstr(" = ").str(base);
738+
append(stmts, code_stmt(alc, o.flush()));
739+
}
740+
if (dist > 0) {
741+
s = history(tag) ? opts->yyshiftmtag : opts->yyshiftstag;
742+
if (opts->decorate) {
743+
o.str(s).cstr(" (").str(fix).cstr(", ").i32(-dist).cstr(")");
744+
append(stmts, code_stmt(alc, o.flush()));
745+
}
746+
else {
747+
strrreplace(s, opts->placeholder + "1", fix);
748+
strrreplace(s, opts->placeholder + "2", -dist);
749+
append(stmts, code_text(alc, o.str(s).flush()));
750+
}
751+
}
676752
}
677753
else {
678-
o.str(tag_expr(tag, true)).cstr(" = ").str(expr);
679-
if (dist > 0) o.cstr(" - ").u64(dist);
754+
o.str(fix).cstr(" = ").str(base);
755+
if (dist > 0) o.cstr(" - ").i32(dist);
680756
append(stmts, code_stmt(alc, o.flush()));
681757
}
682758
}

src/options/opt.cc

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -114,10 +114,11 @@ static void fix_mutopt(const conopt_t &glob, const mutopt_t &defaults,
114114
real.yyrestoretag = defaults.yyrestoretag;
115115
real.yystagn = defaults.yystagn;
116116
real.yystagp = defaults.yystagp;
117-
real.yystagpd = defaults.yystagpd;
118117
real.yymtagn = defaults.yymtagn;
119118
real.yymtagp = defaults.yymtagp;
120-
real.yymtagpd = defaults.yymtagpd;
119+
real.yyshift = defaults.yyshift;
120+
real.yyshiftstag = defaults.yyshiftstag;
121+
real.yyshiftmtag = defaults.yyshiftmtag;
121122
real.yylessthan = defaults.yylessthan;
122123
real.dFlag = defaults.dFlag;
123124
real.yydebug = defaults.yydebug;
@@ -201,7 +202,8 @@ static void fix_mutopt(const conopt_t &glob, const mutopt_t &defaults,
201202
real.yyrestoretag = defaults.yyrestoretag;
202203
real.yystagn = defaults.yystagn;
203204
real.yystagp = defaults.yystagp;
204-
real.yystagpd = defaults.yystagpd;
205+
real.yyshift = defaults.yyshift;
206+
real.yyshiftstag = defaults.yyshiftstag;
205207
// for mtags there are no sensible defaults
206208
}
207209
if (!real.dFlag) {
@@ -448,10 +450,11 @@ void Opt::reset_group_api()
448450
reset_yyrestoretag();
449451
reset_yystagn();
450452
reset_yystagp();
451-
reset_yystagpd();
452453
reset_yymtagn();
453454
reset_yymtagp();
454-
reset_yymtagpd();
455+
reset_yyshift();
456+
reset_yyshiftstag();
457+
reset_yyshiftmtag();
455458
reset_yyskip();
456459
reset_yyfilllabel();
457460
reset_yynext();

src/options/opt.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,10 +170,11 @@ const uint32_t NOEOF = ~0u - 1;
170170
MUTOPT (std::string, yylessthan, "YYLESSTHAN") \
171171
MUTOPT (std::string, yystagn, "YYSTAGN") \
172172
MUTOPT (std::string, yystagp, "YYSTAGP") \
173-
MUTOPT (std::string, yystagpd, "YYSTAGPD") \
174173
MUTOPT (std::string, yymtagn, "YYMTAGN") \
175174
MUTOPT (std::string, yymtagp, "YYMTAGP") \
176-
MUTOPT (std::string, yymtagpd, "YYMTAGPD") \
175+
MUTOPT (std::string, yyshift, "YYSHIFT") \
176+
MUTOPT (std::string, yyshiftstag, "YYSHIFTSTAG") \
177+
MUTOPT (std::string, yyshiftmtag, "YYSHIFTMTAG") \
177178
MUTOPT (bool, decorate, true) \
178179
MUTOPT (std::string, placeholder, RE2C_PLACEHOLDER) \
179180
/* #line directives */ \

src/parse/lex_conf.re

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,10 +127,11 @@ void Scanner::lex_conf(Opt &opts)
127127
"define:YYLESSTHAN" { opts.set_yylessthan (lex_conf_string ()); return; }
128128
"define:YYSTAGN" { opts.set_yystagn (lex_conf_string ()); return; }
129129
"define:YYSTAGP" { opts.set_yystagp (lex_conf_string ()); return; }
130-
"define:YYSTAGPD" { opts.set_yystagpd (lex_conf_string ()); return; }
131130
"define:YYMTAGN" { opts.set_yymtagn (lex_conf_string ()); return; }
132131
"define:YYMTAGP" { opts.set_yymtagp (lex_conf_string ()); return; }
133-
"define:YYMTAGPD" { opts.set_yymtagpd (lex_conf_string ()); return; }
132+
"define:YYSHIFT" { opts.set_yyshift (lex_conf_string ()); return; }
133+
"define:YYSHIFTSTAG" { opts.set_yyshiftstag (lex_conf_string ()); return; }
134+
"define:YYSHIFTMTAG" { opts.set_yyshiftmtag (lex_conf_string ()); return; }
134135
135136
"decorate" { opts.set_decorate (lex_conf_bool()); return; }
136137
"placeholder" { opts.set_placeholder(lex_conf_string()); return; }

src/regexp/fixed_tags.cc

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@ namespace {
1818
* This optimization is applied only to tags in top-level concatenation,
1919
* because in other cases the base tag may be NULL, and the calculation of
2020
* the fixed tag value is not as simple as substracting a fixed offset.
21-
* Furthermore, fixed tags are fobidden with generic API because it cannot
22-
* express fixed offsets. M-tags (with history) also cannot be fixed.
21+
* There are no fixed m-tags (with history).
2322
*
2423
* Another special case is fictive tags (those that exist only to impose
2524
* hierarchical laws of POSIX disambiguation). We treat them as fixed in order
@@ -36,7 +35,7 @@ struct StackItem {
3635
static void find_fixed_tags(RESpec &spec, std::vector<StackItem> &stack, RE *re0)
3736
{
3837
static const uint32_t VARDIST = Tag::VARDIST;
39-
bool toplevel = spec.opts->input_api != INPUT_CUSTOM;
38+
bool toplevel = true;
4039

4140
// base tag, intially the fake "rightmost tag" (the end of RE)
4241
size_t base = Tag::RIGHTMOST;

src/skeleton/generate_code.cc

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -142,13 +142,11 @@ static void emit_skeleton_defines(Output &output, CodeList *code, const DFA &dfa
142142
append(code, code_textraw(alc, "#define YYSTAGN(t) t = NULL"));
143143
append(code, code_textraw(alc, "#define YYMTAGP(t) yymtag(&t, cursor, &yytp)"));
144144
append(code, code_textraw(alc, "#define YYMTAGN(t) yymtag(&t, NULL, &yytp)"));
145+
append(code, code_textraw(alc, "#define YYSHIFTSTAG(t, o) t += o"));
146+
append(code, code_textraw(alc, "#define YYSHIFTMTAG(t, o) yytp.head[t].elem += o"));
145147
append(code, code_textraw(alc, "#define YYRESTORETAG(t) cursor = t"));
146-
if (opts->stadfa) {
147-
append(code, code_textraw(alc, "#define YYSTAGPD(t) t = cursor - 1"));
148-
append(code, code_textraw(alc,
149-
"#define YYMTAGPD(t) yymtag(&t, cursor - 1, &yytp)"));
150-
}
151148
}
149+
append(code, code_textraw(alc, "#define YYSHIFT(o) cursor += o"));
152150
append(code, code_textraw(alc, "#define YYLESSTHAN(n) (limit - cursor) < n"));
153151
append(code, code_textraw(alc, "#define YYFILL(n) { break; }"));
154152
append(code, code_newline(alc));
@@ -222,7 +220,8 @@ static void emit_skeleton_function_action(Output &output, CodeList *code, const
222220

223221
static void emit_skeleton_stags(Output &output, CodeList *code, const DFA &dfa)
224222
{
225-
if (dfa.stagnames.empty()) return;
223+
const opt_t *opts = output.block().opts;
224+
if (dfa.stagvars.empty() && !opts->posix_syntax) return;
226225

227226
code_alc_t &alc = output.allocator;
228227
Scratchbuf &o = output.scratchbuf;
@@ -278,7 +277,7 @@ static void emit_skeleton_stags(Output &output, CodeList *code, const DFA &dfa)
278277

279278
static void emit_skeleton_mtags(Output &output, CodeList *code, const DFA &dfa)
280279
{
281-
if (dfa.mtagnames.empty()) return;
280+
if (dfa.mtagvars.empty()) return;
282281

283282
code_alc_t &alc = output.allocator;
284283
Scratchbuf &o = output.scratchbuf;
@@ -598,16 +597,17 @@ static void emit_skeleton_function_lex(Output &output, CodeList *code, DFA &dfa)
598597
if (dfa.need_accept) {
599598
append(block2, code_stmt(alc, "unsigned int yyaccept = 0"));
600599
}
600+
// autogenerated stag variables
601601
if (!dfa.stagnames.empty()) {
602-
// autogenerated stag variables
603602
text = o.cstr("\n").str(indent(2, opts->indString))
604603
.cstr("const YYCTYPE *@@ = NULL;").flush();
605604
Code *stags = code_tags(alc, text, "", false);
606605
gen_tags(o, opts, stags, dfa.stagnames);
607606
append(block2, stags);
608607
append(block2, code_textraw(alc, ""));
609-
610-
// user-defined stag variables
608+
}
609+
// user-defined stag variables
610+
if (!dfa.stagvars.empty()) {
611611
var1 = dfa.stagvars.begin();
612612
var2 = dfa.stagvars.end();
613613
if (var1 != var2) {
@@ -619,18 +619,19 @@ static void emit_skeleton_function_lex(Output &output, CodeList *code, DFA &dfa)
619619
append(block2, code_stmt(alc, text));
620620
}
621621
}
622+
// autogenerated mtag variables
622623
if (!dfa.mtagnames.empty()) {
623624
append(block2, code_text(alc, "yymtagpool_clear(&yytp);"));
624625

625-
// autogenerated mtag variables
626626
text = o.cstr("\n").str(indent(2, opts->indString))
627627
.cstr("ptrdiff_t @@ = -1;").flush();
628628
Code *mtags = code_tags(alc, text, "", true);
629629
gen_tags(o, opts, mtags, dfa.mtagnames);
630630
append(block2, mtags);
631631
append(block2, code_textraw(alc, ""));
632-
633-
// user-defined mtag variables
632+
}
633+
// user-defined mtag variables
634+
if (!dfa.mtagvars.empty()) {
634635
var1 = dfa.mtagvars.begin();
635636
var2 = dfa.mtagvars.end();
636637
if (var1 != var2) {

0 commit comments

Comments
 (0)