Skip to content

Commit 14c5bea

Browse files
committed
Total rewrite of the code generation subsystem.
Previously the output program was represented as a list of blocks (one block per each re2c comment in the source file). Each block consisted of a list of heterogenous fragments. Some fragments contained text that was generated immediately during the first pass of code generation; other fragments were placeholders for parts of the program whose generation was delayed (such as dispatch on YYFILL labels with -f option that cannot be generated on the first pass because the full list of labels is known only at the end of the first pass). Individual fragments used ad-hoc methods to render the generated code, which made rendering rather complex (for example, it was problematic to decide if the 'if' statement should be rendered on a single line or on multiple lines). This commit adds a new AST-like representation of the output program. The top-level program is still a list of output blocks, but each block is now an AST. This decouples code generation from rendering, which simplifies both phases and allows easier customization for different output language backends (such as C or Go).
1 parent faa5d41 commit 14c5bea

File tree

21 files changed

+2999
-2192
lines changed

21 files changed

+2999
-2192
lines changed

bootstrap/src/parse/lex.cc

Lines changed: 422 additions & 413 deletions
Large diffs are not rendered by default.

bootstrap/src/parse/lex.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Generated by re2c 1.1.1 on Fri Aug 2 10:49:54 2019 */
1+
/* Generated by re2c 1.3 on Mon Apr 13 20:34:13 2020 */
22

33
#ifndef _RE2C_PARSE_LEX_
44
#define _RE2C_PARSE_LEX_

src/adfa/adfa.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ DFA::DFA
5252
, rules(dfa.rules)
5353
, tags(dfa.tags)
5454
, mtagvers(dfa.mtagvers)
55+
, stagnames()
56+
, stagvars()
57+
, mtagnames()
58+
, mtagvars()
5559
, finvers(dfa.finvers)
5660
, tcpool(dfa.tcpool)
5761
, max_fill (0)
@@ -66,6 +70,9 @@ DFA::DFA
6670
, setup(su)
6771
, eof_action(eof)
6872
, msg(msg)
73+
, used_labels()
74+
, start_label(label_t::first())
75+
, initial_label(label_t::first())
6976
{
7077
const size_t nstates = dfa.states.size();
7178
const size_t nchars = dfa.nchars;

src/adfa/adfa.h

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,10 @@ struct DFA
8181
std::valarray<Rule> &rules;
8282
std::vector<Tag> &tags;
8383
std::set<tagver_t> &mtagvers;
84+
std::set<std::string> stagnames;
85+
std::set<std::string> stagvars;
86+
std::set<std::string> mtagnames;
87+
std::set<std::string> mtagvars;
8488
const tagver_t *finvers;
8589
tcpool_t &tcpool;
8690
size_t max_fill;
@@ -96,6 +100,10 @@ struct DFA
96100
const Code *eof_action;
97101
Msg &msg;
98102

103+
std::set<label_t> used_labels;
104+
label_t start_label;
105+
label_t initial_label;
106+
99107
DFA ( const dfa_t &dfa
100108
, const std::vector<size_t> &fill
101109
, size_t def
@@ -111,18 +119,17 @@ struct DFA
111119
~DFA ();
112120
void reorder();
113121
void prepare(const opt_t *opts);
114-
void calc_stats(const opt_t *opts);
115-
void emit (Output &, uint32_t &, bool, bool &);
122+
void calc_stats(OutputBlock &out);
123+
void count_used_labels(const opt_t *opts);
124+
void emit_body(Output &output, uint32_t ind, CodeStmts *program) const;
125+
void emit_dot(Output &output, CodeStmts *program) const;
116126

117127
private:
118128
void addState(State*, State *);
119129
void split (State *);
120130
void findBaseState(const opt_t *opts);
121131
void hoist_tags(const opt_t *opts);
122132
void hoist_tags_and_skip(const opt_t *opts);
123-
void count_used_labels(std::set<label_t> &used, label_t start, label_t initial, const opt_t *opts) const;
124-
void emit_body (Output &, uint32_t &, const std::set<label_t> & used_labels, label_t initial) const;
125-
void emit_dot(Output &o, bool last_cond) const;
126133

127134
FORBID_COPY (DFA);
128135
};

src/adfa/prepare.cc

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "src/adfa/adfa.h"
1010
#include "src/codegen/bitmap.h"
1111
#include "src/codegen/go.h"
12+
#include "src/codegen/emit.h"
1213
#include "src/debug/debug.h"
1314
#include "src/dfa/tcmd.h"
1415
#include "src/msg/msg.h"
@@ -242,14 +243,14 @@ void DFA::prepare(const opt_t *opts)
242243
}
243244
}
244245

245-
void DFA::calc_stats(const opt_t *opts)
246+
void DFA::calc_stats(OutputBlock &out)
246247
{
248+
const opt_t *opts = out.opts;
249+
247250
// calculate 'YYMAXFILL'
248251
max_fill = 0;
249-
for (State * s = head; s; s = s->next)
250-
{
251-
if (max_fill < s->fill)
252-
{
252+
for (State * s = head; s; s = s->next) {
253+
if (max_fill < s->fill) {
253254
max_fill = s->fill;
254255
}
255256
}
@@ -284,6 +285,33 @@ void DFA::calc_stats(const opt_t *opts)
284285
"multiple context markers, use '-t, --tags' "
285286
"option and '/*!stags:re2c ... */' directive");
286287
}
288+
289+
if (!oldstyle_ctxmarker) {
290+
for (size_t i = 0; i < tags.size(); ++i) {
291+
const Tag &tag = tags[i];
292+
if (history(tag)) {
293+
mtagvars.insert(*tag.name);
294+
}
295+
else if (tag.name) {
296+
stagvars.insert(*tag.name);
297+
}
298+
}
299+
for (tagver_t v = 1; v <= maxtagver; ++v) {
300+
const std::string s = vartag_name(v, opts->tags_prefix);
301+
if (mtagvers.find(v) != mtagvers.end()) {
302+
mtagnames.insert(s);
303+
}
304+
else {
305+
stagnames.insert(s);
306+
}
307+
}
308+
out.stags.insert(stagnames.begin(), stagnames.end());
309+
out.mtags.insert(mtagnames.begin(), mtagnames.end());
310+
}
311+
312+
if (!cond.empty()) {
313+
out.types.push_back(cond);
314+
}
287315
}
288316

289317
static bool can_hoist_tags(const State *s, const opt_t *opts)

src/codegen/bitmap.cc

Lines changed: 35 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -47,18 +47,26 @@ const bitmap_t *bitmaps_t::find(const Go *go, const State *s) const
4747

4848
bool bitmaps_t::empty() const { return maps.empty(); }
4949

50-
void bitmaps_t::gen(Output &o, uint32_t ind)
50+
CodeStmts *bitmaps_t::gen(Output &output)
5151
{
52-
if (empty() || !used) return;
52+
if (empty() || !used) return NULL;
53+
54+
const opt_t *opts = output.block().opts;
55+
code_alc_t &alc = output.allocator;
56+
Scratchbuf &o = output.scratchbuf;
57+
58+
CodeStmts *stmts = code_stmts(alc);
5359

54-
const opt_t *opts = o.block().opts;
5560
const uint32_t nmap = static_cast<uint32_t>(maps.size());
5661
riter_t b = maps.rbegin(), e = maps.rend();
5762

58-
o.wind(ind).ws("static const unsigned char ")
59-
.wstring(opts->yybm).ws("[] = {");
63+
o.cstr("static const unsigned char ").str(opts->yybm).cstr("[] = {");
64+
append_stmt(stmts, code_stmt_text(alc, o.flush()));
65+
66+
CodeStmts *block = code_stmts(alc);
67+
static const uint32_t TABLE_WIDTH = 8;
6068

61-
for (uint32_t i = 0, t = 1; b != e; i += ncunit, t += 8) {
69+
for (uint32_t i = 0, t = 1; b != e; i += ncunit, t += TABLE_WIDTH) {
6270
memset(buffer, 0, ncunit * sizeof(uint32_t));
6371

6472
for (uint32_t m = 0x80; b != e && m; m >>= 1, ++b) {
@@ -67,25 +75,33 @@ void bitmaps_t::gen(Output &o, uint32_t ind)
6775
doGen(b->go, b->on, buffer, 0, m);
6876
}
6977

70-
if (nmap > 8) {
71-
o.ws("\n").wind(ind + 1).ws("/* table ").wu32(t).ws(" .. ")
72-
.wu32(std::min(nmap, t + 7)).ws(": ").wu32(i).ws(" */");
78+
if (nmap > TABLE_WIDTH) {
79+
o.cstr("/* table ").u32(t).cstr(" .. ").u32(std::min(nmap, t + 7))
80+
.cstr(": ").u32(i).cstr(" */");
81+
append_stmt(block, code_stmt_text(alc, o.flush()));
7382
}
7483

75-
for (uint32_t c = 0; c < ncunit; ++c) {
76-
if (c % 8 == 0) {
77-
o.ws("\n").wind(ind + 1);
84+
for (uint32_t i = 0; i < ncunit / TABLE_WIDTH; ++i) {
85+
for (uint32_t j = 0; j < TABLE_WIDTH; ++j) {
86+
const uint32_t c = buffer[i * TABLE_WIDTH + j];
87+
if (opts->yybmHexTable) {
88+
o.u32_hex(c, opts);
89+
}
90+
else {
91+
o.u32_width(c, 3);
92+
}
93+
o.cstr(", ");
7894
}
79-
if (opts->yybmHexTable) {
80-
o.wu32_hex(buffer[c]);
81-
} else {
82-
o.wu32_width(buffer[c], 3);
83-
}
84-
o.ws(", ");
95+
append_stmt(block, code_stmt_text(alc, o.flush()));
8596
}
8697
}
8798

88-
o.ws("\n").wind(ind).ws("};\n");
99+
append_stmt(stmts, code_block(alc, block, CodeBlock::INDENTED));
100+
101+
o.cstr("};");
102+
append_stmt(stmts, code_stmt_text(alc, o.flush()));
103+
104+
return stmts;
89105
}
90106

91107
void doGen(const Go *g, const State *s, uint32_t *bm, uint32_t f, uint32_t m)

src/codegen/bitmap.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ namespace re2c {
1313
class Output;
1414
struct Go;
1515
struct State;
16+
struct CodeStmts;
1617

1718
struct bitmap_t
1819
{
@@ -40,7 +41,7 @@ class bitmaps_t
4041
void insert(const Go *go, const State *s);
4142
const bitmap_t *find(const Go *go, const State *s) const;
4243
bool empty() const;
43-
void gen(Output &o, uint32_t ind);
44+
CodeStmts *gen(Output &output);
4445
FORBID_COPY(bitmaps_t);
4546
};
4647

src/codegen/emit.h

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,33 @@
11
#ifndef _RE2C_CODE_EMIT_
22
#define _RE2C_CODE_EMIT_
33

4+
#include <vector>
5+
46
#include "src/codegen/output.h"
57
#include "src/adfa/adfa.h"
8+
#include "src/util/smart_ptr.h"
69

710

811
namespace re2c {
912

10-
typedef std::vector<std::string> code_lines_t;
13+
typedef std::vector<smart_ptr<DFA> > dfas_t;
14+
15+
void gen_code(Output &output, dfas_t &dfas);
16+
17+
void emit_action(Output &output, uint32_t ind, const DFA &dfa, const State *s,
18+
CodeStmts *stmts);
19+
20+
void gen_settags(Output &output, CodeStmts *tag_actions, const DFA &dfa, tcid_t tcid,
21+
bool delayed);
22+
23+
void gen_goto(Output &output, CodeStmts *stmts, const State *from, const State *to,
24+
const DFA &dfa, tcid_t tcid, bool skip, bool eof);
1125

12-
void emit_action(Output &o, uint32_t ind, const DFA &dfa, const State *s, const std::set<label_t> &used_labels);
13-
void gen_goto_plain(Output &o, uint32_t ind, const State *from, const State *to, const DFA &dfa, tcid_t tcid, bool skip, bool eof);
14-
void gen_goto_case(Output &o, uint32_t ind, const State *from, const State *to, const DFA &dfa, tcid_t tcid, bool skip, bool eof);
15-
void gen_goto_if(Output &o, uint32_t ind, const State *from, const State *to, const DFA &dfa, tcid_t tcid, bool skip, bool eof);
16-
void gen_settags(code_lines_t &code, const DFA &dfa, tcid_t tcid, const opt_t *opts, bool delayed);
1726
std::string vartag_name(tagver_t ver, const std::string &prefix);
18-
std::string vartag_expr(tagver_t ver, const std::string &prefix, const std::string &expression);
27+
28+
std::string vartag_expr(tagver_t ver, const std::string &prefix,
29+
const std::string &expression);
30+
1931
std::string tag_expr(const Tag &tag, bool lvalue);
2032

2133
inline std::string indent(uint32_t n, const std::string &s)

0 commit comments

Comments
 (0)