-
Notifications
You must be signed in to change notification settings - Fork 1
/
lexer.c++
405 lines (388 loc) · 13.6 KB
/
lexer.c++
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
/**
*
* Project Ambrosia: Ambrosia library
*
* Written in 2012 by Ruben Van Boxem <vanboxem.ruben@gmail.com>
*
* To the extent possible under law, the author(s) have dedicated all copyright and related
* and neighboring rights to this software to the public domain worldwide. This software is
* distributed without any warranty.
*
* You should have received a copy of the CC0 Public Domain Dedication along with this software.
* If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
*
* lexer.c++
* Class implementation.
*
**/
// Class include
#include "Ambrosia/lexer.h++"
// Ambrosia includes
#include "Ambrosia/algorithm.h++"
#include "Ambrosia/configuration.h++"
#include "Ambrosia/debug.h++"
#include "Ambrosia/enum_maps.h++"
#include "Ambrosia/Error/error.h++"
#include "Ambrosia/Error/syntax_error.h++"
// C++ includes
#include <functional>
using std::function;
#include <istream>
using std::istream;
#include <set>
using std::set;
#include <stdexcept>
using std::logic_error;
#include <string>
using std::string;
namespace ambrosia
{
namespace lib
{
const set<char> special_characters = { '(', ')', '{', '}', ':', ',' };
const set<char> special_characters_newline = { '(', ')', '{', '}', ':', ',', '\n' };
const set<char> special_characters_conditional = { '(', ')', '|', '+', '!' };
lexer::lexer(std::istream& stream,
const string& filename,
const size_t line_number,
const size_t column_number,
std::istream::pos_type stream_position)
: stream(stream),
filename(filename),
line_number(line_number),
column_number(column_number)
{
stream.seekg(stream_position);
if(!stream)
throw error("Unable to seek to position in file: " + filename);
}
/* replacement written quickly by DeadMG. Look into this.
bool lexer::next_token(string& token,
const set<char>& special_characters)
{
token.clear();
char c;
auto addchar = [&] { token.append(1, c); };
stream.get(c);
if (contains(special_characters, c)) {
addchar();
return true;
}
// Consume but don't do anything with whitespace
if (std::isspace(c, stream.getloc())) {
return next_token(token, special_characters);
}
switch(c) {
case '"': {
//addchar();
while(stream.get(c) && c != '"') {
// Something about first char being whitespace exception
if (c == '\n') // exception
addchar();
}
if (!stream) // exception
//addchar();
return true;
}
case '\n': {
++line_number;
return next_token(token, special_characters);
}
case '#': {
while(stream.get(c) && c != '\n');
if (!stream) // unterminated comment exception
stream.putback('\n');
return next_token(token, special_characters);
}
}
// Identifier, I *think*
// Hard to extract the original logic..
// So I just went with the traditional [a-zA-Z_] regex.
addchar();
while(stream.get(c) && ((c >= 'a' && c <= '\z') || (c >= 'A' && c <= 'Z') || c == '_'))
addchar();
stream.putback(c);
return true;
}
*/
bool lexer::next_token(string& token,
const set<char>& special_characters)
{
// TODO: test the *full* hell out of this function
// FIXME: ugly as hell, alternatives welcome.
token.clear();
bool inside_quotes = false;
char c;
// backup current position
previous_position = stream.tellg();
previous_line_number = line_number;
while(stream.get(c))
{
debug(debug::lexer, "lexer::next_token::line number ", line_number, ", character: \'", output_form(c), "\', token so far: ", output_form(token), "\n");
if(inside_quotes)
{
debug(debug::lexer, "lexer::next_token::Inside quotes.\n");
if('\\' == c)
{
if(stream.get(c))
token.append(1, c);
else
throw syntax_error("Valid escape character expected after \'\\\'", filename, line_number);
}
if('\"' == c)
break; // end of token at end of quotes
else if('\n' == c)
throw syntax_error("Quoted strings cannot span several lines.", filename, line_number);
else if(token.empty() && std::isspace(c, stream.getloc()))
throw syntax_error("Beginning quote must not be followed by a whitespace.", filename, line_number);
else
token.append(1, c);
}
else
{
if(token.empty())
{
if('\n' == c)
++line_number;
if(contains(special_characters, c))
{ // special characters are tokens of their own
debug(debug::lexer, "lexer::next_token::Detected special character.\n");
token.append(1, c);
return true;
}
else if('\"' == c)
{
debug(debug::lexer, "lexer::next_token::Quote detected.\n");
inside_quotes = true;
continue;
}
else if(std::isspace(c, stream.getloc()))
continue;
else if('#' == c)
{ // skip over comments
debug(debug::lexer, "lexer::next_token::Skipping over comments.\n");
string temp;
std::getline(stream, temp);
stream.putback('\n');
}
else if('\\' == c)
{
string temp;
std::getline(stream, temp);
++line_number;
}
else
token.append(1, c);
}
else if(std::isspace(c, stream.getloc()) || contains(special_characters, c))
{ // special characters or whitespace seperate tokens
debug(debug::lexer, "lexer::next_token::Detected special character or space.\n");
stream.putback(c);
break;
}
//else if('\"' == c)
// throw syntax_error("Beginning quotes must be preceded by a whitespace or a special character.", filename, line_number);
else
token.append(1, c);
}
}
if(!token.empty())
debug(debug::lexer, "lexer::next_token:Token extracted: \'", output_form(token), "\'\n");
return !token.empty();
}
void lexer::previous_token()
{
debug(debug::lexer, "lexer::previous_token::Resetting input stream position.\n");
stream.seekg(previous_position);
line_number = previous_line_number;
}
bool lexer::next_list_token(const configuration& configuration,
string& token)
{
debug(debug::lexer, "lexer::next_list_token::reading next list item.\n");
size_t curly_braces_count = 0;
while(next_token(token, special_characters_newline))
{
debug(debug::lexer, "lexer::next_list_token::token: ", output_form(token), ".\n");
if("\n" == token)
{
debug(debug::lexer, "lexer::next_list_token::End of list.\n");
return false; // list has ended
}
else if("(" == token)
process_inner_list_conditional(configuration);
else if("}" == token)
{
if(curly_braces_count != 0)
--curly_braces_count;
else
{
throw syntax_error("Unexpected closing curly brace.", filename, line_number);
return false;
}
}
else // normal list item
return true;
}
if(curly_braces_count != 0)
throw syntax_error("Unclosed curly braces in list.", filename, line_number);
return true;
}
bool lexer::test_condition(const function<bool(const string&)>& config_contains)
{
bool result = true;
bool empty_conditional = true;
///
// - each set of parenthesis is handled recursively
// - logical AND: +
// - logical OR: |
// - logical NOT: ! TODO!!!!!!
// - two bools: "result" and "current"
// - "result" keeps global result, and is modified by "+"
// - "current" keeps results for "|" and "!"
// - syntax checking for invalid
///
string token;
bool previous_was_operator = false; // only for |!+
bool negate = false;
conditional_operator op = conditional_operator::left_parenthesis;
while(next_token(token, special_characters_conditional))
{
if(token == "(")
{
debug(debug::conditional, "lexer::test_condition::Found opening parenthesis \'(\';\n");
if(previous_was_operator)
{
switch(op)
{
case conditional_operator::not_op:
throw syntax_error("Not operator not implemented yet.", filename, line_number);
case conditional_operator::or_op:
result = result || test_condition(config_contains);
break;
case conditional_operator::and_op:
throw syntax_error( "And operator not implemented yet.",
filename, line_number );
default:
throw std::logic_error("lexer::test_condition:Operator " + conditional_operator_map_inverse.at(op) + " unexpected.");
}
}
else
throw syntax_error("Opening parenthesis must be preceded by a conditional operator.", filename, line_number);
}
else if(token == ")")
{
debug(debug::conditional, "lexer::test_condition:Detected closing parenthesis. Returning ", to_string(result), ".\n");
if(empty_conditional)
throw syntax_error("Empty conditional statement.", filename, line_number);
return result;
}
else if(map_value(conditional_operator_map, token, op))
{
debug(debug::conditional, "lexer::test_condition::Found conditional operator ", token, ".\n");
if(op == conditional_operator::not_op)
negate = !negate;
else if(previous_was_operator)
{
throw syntax_error("Expected config item after conditional operator " + conditional_operator_map_inverse.at(op) + " unexpected.", filename, line_number);
}
else
previous_was_operator = true;
}
else // "token" is a config string
{
debug(debug::conditional, "lexer::test_condition:Testing config string ", token, " with operator ", conditional_operator_map_inverse.at(op), ".\n");
empty_conditional = false;
switch(op)
{
case conditional_operator::left_parenthesis:
result = config_contains(token);
break;
case conditional_operator::or_op:
result = result || config_contains(token);
break;
case conditional_operator::and_op:
result = result && config_contains(token);
break;
default:
throw std::logic_error("lexer::test_condition:Operator " + conditional_operator_map_inverse.at(op) + " not expected." );
}
debug(debug::conditional, "lexer::test_condition:Current condition state is ", to_string(result), ".\n");
}
}
return result;
}
void lexer::process_outer_conditional(const configuration& configuration)
{
// processes target and fill
if(test_condition([&configuration](const string& item) {return contains(configuration.config_strings, item); }))
debug(debug::parser, "lexer::process_outer_conditional::condition returned true, nothing to skip.\n");
else
{
debug(debug::parser, "lexer::process_outer_conditional::conditional returned false, skipping whole target.\n");
string token;
while(next_token(token))
{
if(token == "{")
break;
}
debug(debug::parser, "lexer::process_outer_conditional::Found opening brace of target.\n");
const size_t begin_line_number = line_number; // store line number for error reporting in case of unmatched braces
size_t curly_braces = 1;
while(curly_braces > 0 && next_token(token))
{
if(token == "{")
++curly_braces;
else if(token == "}")
--curly_braces;
debug(debug::parser, "lexer::process_outer_conditional::Curly brace level: ", curly_braces, ".\n");
}
if(curly_braces != 0)
throw syntax_error("Did not find matching curly braces to match here.", filename, begin_line_number);
}
debug(debug::parser, "lexer::process_outer_conditional::Finished with outer conditional.\n");
}
void lexer::process_dependency_set_conditional(const configuration& configuration)
{
if(test_condition([&configuration](const string& item) {return contains(configuration.config_strings, item); }))
debug(debug::parser, "lexer::process_dependency_set_conditional::Condition returned true, nothing to skip.\n");
else
{
string token;
if(!next_token(token))
throw syntax_error("Expected dependency name after dependency type.", filename, line_number);
}
debug(debug::parser, "lexer::process_dependency_set_conditional::Finished with dependency set conditional.\n");
}
void lexer::process_inner_conditional(const configuration& configuration)
{
debug(debug::parser, "lexer::process_inner_conditional::Using target config:\n", configuration.config_strings, "\n");
if(test_condition([&configuration](const string& item){ return contains(configuration.config_strings, item); }))
debug(debug::parser, "lexer::process_inner_conditional::condition returned true, nothing to skip.\n");
else
{
debug(debug::parser, "lexer::process_inner_conditional::conditional returned false, skipping all relevant parts.\n");
string token;
while(next_list_token(configuration, token))
{
debug(debug::parser, "lexer::process_inner_conditional::ignoring token: \'", token, "\'.\n");
// ignore anything in the list following a false conditional
continue;
}
}
}
void lexer::process_inner_list_conditional(const configuration& configuration)
{
debug(debug::parser, "lexer::process_inner_list_conditional::Using target config:\n", configuration.config_strings, "\n");
if(test_condition([&configuration](const string& item) { return contains(configuration.config_strings, item); }))
debug(debug::parser, "lexer::process_inner_list_conditional::condition returned true, nothing to skip.\n");
else
{
debug(debug::parser, "lexer::process_inner_list_conditional::conditional returned false, skipping list item.\n");
string token;
if(!next_list_token(configuration, token))
throw syntax_error("A list must not be empty.", filename, line_number);
}
}
} // namespace lib
} // namespace ambrosia