Permalink
Newer
100644
230 lines (209 sloc)
7.66 KB
1
class RecursiveDescentParser {
2
constructor(tokens) {
3
this.tokens = tokens;
4
this.tokenIndex = 0;
5
}
6
7
// We'll call this helper method to throw a parse error when we encounter
8
// ill-formed input.
9
parseError(message) {
10
let location = this.tokens[this.tokenIndex].location;
11
let err = new Error("Line " + location.line + ", column " + location.column + ": " + message);
18
}
19
20
get currentTokenValue() {
21
return this.tokens[this.tokenIndex].value;
22
}
23
24
expect(tokenType) {
25
if (this.currentToken === tokenType) {
26
let value = this.currentTokenValue;
27
this.tokenIndex++;
28
return value;
29
} else {
30
this.parseError("Unexpected " + this.currentToken + " token; expected " + tokenType);
31
}
32
}
33
34
parseProgram() {
35
let definitions = [];
36
while (this.currentToken !== 'EOF') {
37
definitions.push(this.parseDefinition());
38
}
39
return definitions;
40
}
41
42
parseDefinition() {
43
switch (this.currentToken) {
44
case 'var': {
45
this.tokenIndex++;
46
let name = this.expect('identifier');
47
this.expect('=');
48
let expression = this.parseExpression();
49
if (this.currentToken !== ';') {
50
this.parseError("Unexpected " + this.currentToken + " token; expected infix operator or ';'");
51
}
52
this.tokenIndex++;
53
return {kind: 'variable definition', name: name, body: expression};
54
}
55
56
case 'def': {
57
this.tokenIndex++;
58
let name = this.expect('identifier');
59
let parameters = this.parseParameterList();
60
this.expect('=');
61
let expression = this.parseExpression();
62
if (this.currentToken !== ';') {
63
this.parseError("Unexpected " + this.currentToken + " token; expected infix operator or ';'");
64
}
65
this.tokenIndex++;
66
return {kind: 'function definition', name: name, parameters: parameters, body: expression};
67
}
68
69
default:
70
this.parseError("Unexpected " + this.currentToken + " token; expected 'var', 'def' or end of input");
71
}
72
}
73
74
parseParameterList() {
75
this.expect('(');
76
if (this.currentToken === ')') {
77
// Move the cursor beyond the ')'
78
this.tokenIndex++;
79
return [];
80
} else {
81
let params = [ this.expect('identifier') ];
82
while (this.currentToken !== ')') {
83
if (this.currentToken !== ',') {
84
this.parseError("Unexpected " + this.currentToken + " token; expected ',' or ')'");
85
}
86
this.tokenIndex++;
87
params.push(this.expect('identifier'));
88
}
89
// Move the cursor beyond the ')'
90
this.tokenIndex++;
91
return params;
92
}
93
}
94
96
// Use the shunting yard algorithm to handle infix operators, but recursive descent for anything
97
// else. This allows the simplest possible implementation of shunting yard while removing the
98
// ugliest part of recursive descent parsing (which needs an extra function for each level of
99
// precedence when parsing infix expressions, whereas shunting yard just needs a table of operators).
100
const infixOperators = {
101
'+': {kind: '+', precedence: 1, associativity: 'left'},
102
'-': {kind: '-', precedence: 1, associativity: 'left'},
103
'*': {kind: '*', precedence: 2, associativity: 'left'},
104
'/': {kind: '/', precedence: 2, associativity: 'left'},
105
'%': {kind: '%', precedence: 2, associativity: 'left'},
106
'^': {kind: '^', precedence: 3, associativity: 'right'}
107
};
109
const operatorStack = [];
110
const outputStack = [ this.parsePrefixExpression() ];
112
function popOperator() {
113
let op = operatorStack.pop();
114
let rhs = outputStack.pop();
115
let lhs = outputStack.pop();
116
outputStack.push( { kind: op.kind, lhs: lhs, rhs: rhs } );
119
let op;
120
// While the current token is an infix operator
121
while ((op = infixOperators[this.currentToken])) {
122
while (operatorStack.length > 0 && (operatorStack[operatorStack.length - 1].precedence > op.precedence ||
123
operatorStack[operatorStack.length - 1].precedence == op.precedence && op.associativity == "left")) {
124
popOperator();
125
}
126
operatorStack.push(op);
128
outputStack.push(this.parsePrefixExpression());
129
}
130
while (operatorStack.length > 0) {
131
popOperator();
132
}
133
if (outputStack.length !== 1) {
134
throw "Internal error: outputStack did not end up with exactly one element: " + JSON.stringify(outputStack);
137
}
138
139
parsePrefixExpression() {
140
let negate = false;
141
// Unary + is a no-op, so we simply do nothing and continue with the
142
// next token. Note that we can only do this because the only type we
143
// support are numbers. If we supported types other than numbers, we'd
144
// have to keep the unary + around long enough for the type checker to
145
// produce an error if we try to use + on a string.
146
// For unary - we keep track of a flag telling us whether to negate the
147
// following expression or not (since two -s cancel each other out, we
148
// we only do something if there are an uneven number of -s).
149
while (this.currentToken === '+' || this.currentToken === '-') {
150
if (this.currentToken === '-') {
151
negate = !negate;
152
}
153
this.tokenIndex++;
154
}
155
let expression = this.parsePrimaryExpression();
156
if (negate) {
157
// The unary - is translated as a binary - with 0 as the left operand.
158
return {kind: '-', lhs: {kind: 'numberLiteral', value: 0}, rhs: expression};
159
} else {
160
return expression;
161
}
162
}
163
164
parsePrimaryExpression() {
165
switch(this.currentToken) {
166
case 'number':
167
let value = this.currentTokenValue;
168
this.tokenIndex++;
169
return {kind: 'numberLiteral', value: value};
170
171
case 'identifier':
172
let name = this.currentTokenValue;
173
this.tokenIndex++;
174
if (this.currentToken === '(') {
175
// Identifier followed by a '(' == function call
176
this.tokenIndex++;
177
if (this.currentToken === ')') {
178
// Move the cursor beyond the ')'
179
this.tokenIndex++;
180
return {kind: 'functionCall', name: name, arguments: []};
181
} else {
182
let args = [ this.parseExpression() ];
183
while (this.currentToken !== ')') {
184
if (this.currentToken !== ',') {
185
this.parseError("Unexpected " + this.currentToken + " token; expected ',', ')' or infix operator");
186
}
187
this.tokenIndex++;
188
args.push(this.parseExpression());
189
}
190
// Move the cursor beyond the ')'
191
this.tokenIndex++;
192
return {kind: 'functionCall', name: name, arguments: args};
193
}
194
} else {
195
// Otherwise it's a variable
196
return {kind: 'variable', name: name};
197
}
198
199
case '(':
200
this.tokenIndex++;
201
let expression = this.parseExpression();
202
if (this.currentToken === ')') {
203
this.tokenIndex++;
204
} else {
205
this.parseError("Unexpected " + this.currentToken + " token; expected ')' or infix operator");
206
}
207
return expression;
208
209
default:
210
this.parseError("Unexpected " + this.currentToken + " token; expected expression");
211
}
212
}
213
214
parseExpressionEOF() {
215
let expression = this.parseExpression();
216
if (this.currentToken === 'EOF') {
217
return expression;
218
} else {
219
this.parseError("Unexpected " + this.currentToken + " token; expected infix operator or end of input");
220
}
221
}
222
}
223
224
function parseExpression(tokens) {
225
return new RecursiveDescentParser(tokens).parseExpressionEOF();
226
}
227
228
function parseProgram(tokens) {
229
return new RecursiveDescentParser(tokens).parseProgram();