-
Notifications
You must be signed in to change notification settings - Fork 0
/
Python3Lexer.stg
128 lines (114 loc) · 3.97 KB
/
Python3Lexer.stg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
header() ::= <<
#include "Python3Parser.h"
#include \<stack>
#include \<regex>
>>
members() ::= <<
std::vector\<std::unique_ptr\<antlr4::Token> > tokens;
std::stack\<int> indents;
int opened = 0;
std::unique_ptr\<antlr4::Token> lastToken = nullptr;
void emit(std::unique_ptr\<antlr4::Token> t) override
{
tokens.push_back(cloneToken(t));
setToken(std::move(t));
}
std::unique_ptr\<antlr4::Token> nextToken() override
{
if (_input->LA(1) == EOF && !indents.empty()) {
for (int i = tokens.size() - 1; i >= 0; i--) {
if (tokens[i]->getType() == EOF) {
tokens.erase(tokens.begin() + i);
}
}
emit(commonToken(Python3Parser::NEWLINE, "\n"));
while (!indents.empty()) {
emit(createDedent());
indents.pop();
}
emit(commonToken(EOF, "\<EOF>"));
}
std::unique_ptr\<antlr4::Token> next = Lexer::nextToken();
if (next->getChannel() == antlr4::Token::DEFAULT_CHANNEL) {
lastToken = cloneToken(next);
}
if (!tokens.empty())
{
next = std::move(*tokens.begin());
tokens.erase(tokens.begin());
}
return next;
}
void reset() override
{
tokens = std::vector\<std::unique_ptr\<antlr4::Token> >{};
indents = std::stack\<int>{};
opened = 0;
lastToken = nullptr;
Lexer::reset();
}
std::unique_ptr\<antlr4::Token> createDedent() {
std::unique_ptr\<antlr4::CommonToken> dedent = commonToken(Python3Parser::DEDENT, "");
return dedent;
}
std::unique_ptr\<antlr4::CommonToken> commonToken(size_t type, const std::string& text) {
int stop = getCharIndex() - 1;
int start = text.empty() ? stop : stop - text.size() + 1;
return _factory->create({ this, _input }, type, text, DEFAULT_TOKEN_CHANNEL, start, stop, lastToken ? lastToken->getLine() : 0, lastToken ? lastToken->getCharPositionInLine() : 0);
}
std::unique_ptr\<antlr4::CommonToken> cloneToken(const std::unique_ptr\<antlr4::Token>& source) {
return _factory->create({ this, _input }, source->getType(), source->getText(), source->getChannel(), source->getStartIndex(), source->getStopIndex(), source->getLine(), source->getCharPositionInLine());
}
int getIndentationCount(const std::string& spaces) {
int count = 0;
for (char ch : spaces) {
switch (ch) {
case '\t':
count += 8 - (count % 8);
break;
default:
// A normal space char.
count++;
}
}
return count;
}
>>
atStartOfInput() ::= <<
getCharPositionInLine() == 0 && getLine() == 1
>>
openBrace() ::= <<
this->opened++;
>>
closeBrace() ::= <<
this->opened--;
>>
onNewLine() ::= <<
{
std::string newLine = std::regex_replace(getText(), std::regex("[^\r\n\f]+"), "");
std::string spaces = std::regex_replace(getText(), std::regex("[\r\n\f]+"), "");
int next = _input->LA(1);
int nextnext = _input->LA(2);
if (opened > 0 || (nextnext != -1 && (next == '\r' || next == '\n' || next == '\f' || next == '#'))) {
skip();
}
else {
emit(commonToken(Python3Lexer::NEWLINE, newLine));
int indent = getIndentationCount(spaces);
int previous = indents.empty() ? 0 : indents.top();
if (indent == previous) {
skip();
}
else if (indent > previous) {
indents.push(indent);
emit(commonToken(Python3Lexer::INDENT, spaces));
}
else {
while(!indents.empty() && indents.top() > indent) {
emit(createDedent());
indents.pop();
}
}
}
}
>>