/
utils.ts
237 lines (220 loc) · 7.72 KB
/
utils.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import type {
Production,
ProductionBody,
RightHandSide,
OneOfList,
SymbolSpan,
LexicalSymbol,
Terminal,
Nonterminal,
ButNotSymbol,
OneOfSymbol,
ArgumentList,
} from 'grammarkdown';
import type { Node as EcmarkdownNode } from 'ecmarkdown';
import { Grammar as GrammarFile, SyntaxKind, skipTrivia, NodeVisitor } from 'grammarkdown';
import * as emd from 'ecmarkdown';
export function getProductions(grammar: GrammarFile) {
let productions: Map<
string,
{ production: Production; rhses: (RightHandSide | OneOfList)[] }
> = new Map();
grammar.rootFiles.forEach(f =>
f.elements.forEach(e => {
if (e.kind !== SyntaxKind.Production) {
// The alternatives supported by Grammarkdown are imports and defines, which ecma-262 does not use.
throw new Error('Grammar contains non-production node ' + JSON.stringify(e));
}
if (typeof e.body === 'undefined') {
throw new Error('production lacks body ' + JSON.stringify(e));
}
if (!productions.has(e.name.text!)) {
productions.set(e.name.text!, { production: e as Production, rhses: [] });
}
productions.get(e.name.text!)!.rhses.push(...productionBodies(e.body));
})
);
return productions;
}
function productionBodies(body: ProductionBody) {
switch (body.kind) {
case SyntaxKind.RightHandSideList:
return body.elements!;
case SyntaxKind.OneOfList:
case SyntaxKind.RightHandSide:
return [body];
default:
// @ts-ignore
throw new Error('unknown production body type ' + body.constructor.name);
}
}
// these "matches" functions are not symmetric:
// the first parameter is permitted to omit flags and _opt nodes present on the second, but not conversely
export function rhsMatches(a: RightHandSide | OneOfList, b: RightHandSide | OneOfList) {
if (a.kind !== b.kind) {
return false;
}
switch (a.kind) {
case SyntaxKind.RightHandSide: {
let aHead = a.head;
let bHead = (b as RightHandSide).head;
if (aHead === undefined || bHead === undefined) {
throw new Error('RHS must have content');
}
if (aHead.symbol.kind === SyntaxKind.EmptyAssertion) {
if (aHead.next !== undefined) {
throw new Error('empty assertions should not have other content');
}
return bHead.symbol.kind === SyntaxKind.EmptyAssertion || canBeEmpty(bHead);
}
return symbolSpanMatches(aHead, bHead);
}
default:
throw new Error('unknown rhs type ' + a.constructor.name);
}
}
function symbolSpanMatches(a: SymbolSpan | undefined, b: SymbolSpan | undefined): boolean {
if (a === undefined) {
return canBeEmpty(b);
}
if (a !== undefined && b !== undefined && symbolMatches(a.symbol, b.symbol)) {
return symbolSpanMatches(a.next, b.next);
}
// sometimes when there is an optional terminal or nonterminal we give distinct implementations for each case, rather than one implementation which represents both
// which means both `a b c` and `a c` must match `a b? c`
// TODO reconsider whether ECMA-262 should have these
if (b !== undefined && canSkipSymbol(b.symbol)) {
return symbolSpanMatches(a, b.next);
}
return false;
}
function canBeEmpty(b: SymbolSpan | undefined): boolean {
return b === undefined || (canSkipSymbol(b.symbol) && canBeEmpty(b.next));
}
function canSkipSymbol(a: LexicalSymbol) {
return (
a.kind === SyntaxKind.NoSymbolHereAssertion ||
a.kind === SyntaxKind.LookaheadAssertion ||
a.kind === SyntaxKind.ProseAssertion ||
(a as Terminal | Nonterminal).questionToken !== undefined
);
}
function symbolMatches(a: LexicalSymbol, b: LexicalSymbol): boolean {
if (a.kind !== b.kind) {
return false;
}
switch (a.kind) {
case SyntaxKind.Terminal:
return a.literal.text === (b as Terminal).literal.text;
case SyntaxKind.Nonterminal:
if (a.argumentList !== undefined) {
if ((b as Nonterminal).argumentList === undefined) {
return false;
}
if (!argumentListMatches(a.argumentList, (b as Nonterminal).argumentList!)) {
return false;
}
}
return a.name.text === (b as Nonterminal).name.text;
case SyntaxKind.ButNotSymbol:
if (a.right === undefined || (b as ButNotSymbol).right === undefined) {
throw new Error('"but not" production cannot be empty');
}
return (
symbolMatches(a.left, (b as ButNotSymbol).left) &&
symbolMatches(a.right, (b as ButNotSymbol).right!)
);
case SyntaxKind.EmptyAssertion:
case SyntaxKind.LookaheadAssertion:
case SyntaxKind.ProseAssertion:
return true;
case SyntaxKind.OneOfSymbol:
if (a.symbols === undefined || (b as OneOfSymbol).symbols === undefined) {
throw new Error('"one of" production cannot be empty');
}
return (
a.symbols.length === (b as OneOfSymbol).symbols!.length &&
a.symbols.every((s, i) => symbolMatches(s, (b as OneOfSymbol).symbols![i]))
);
default:
throw new Error('unknown symbol type ' + a.constructor.name);
}
}
function argumentListMatches(a: ArgumentList, b: ArgumentList) {
if (a.elements === undefined || b.elements === undefined) {
throw new Error('argument lists must have elements');
}
return (
a.elements.length === b.elements.length &&
a.elements.every((ae, i) => {
let be = b.elements![i];
if (ae.operatorToken === undefined || be.operatorToken === undefined) {
throw new Error('arguments must have operators');
}
if (ae.name === undefined || be.name === undefined) {
throw new Error('arguments must have names');
}
return ae.operatorToken.kind === be.operatorToken.kind && ae.name.text === be.name.text;
})
);
}
// this is only for use with single-file grammars
export function getLocationInGrammar(grammar: GrammarFile, pos: number) {
let file = grammar.sourceFiles[0];
let posWithoutWhitespace = skipTrivia(file.text, pos, file.text.length);
let { line: gmdLine, character: gmdCharacter } = file.lineMap.positionAt(posWithoutWhitespace);
// grammarkdown use 0-based line and column, we want 1-based
return { line: gmdLine + 1, column: gmdCharacter + 1 };
}
class CollectNonterminalsFromGrammar extends NodeVisitor {
declare grammar: GrammarFile;
declare results: { name: string; loc: { line: number; column: number } }[];
constructor(grammar: GrammarFile) {
super();
this.grammar = grammar;
this.results = [];
}
visitProduction(node: Production): Production {
this.results.push({
name: node.name.text!,
loc: getLocationInGrammar(this.grammar, node.name.pos),
});
return super.visitProduction(node);
}
visitNonterminal(node: Nonterminal): Nonterminal {
this.results.push({
name: node.name.text!,
loc: getLocationInGrammar(this.grammar, node.name.pos),
});
return super.visitNonterminal(node);
}
}
export function collectNonterminalsFromGrammar(grammar: GrammarFile) {
let visitor = new CollectNonterminalsFromGrammar(grammar);
grammar.rootFiles.forEach(f => {
visitor.visitEach(f.elements);
});
return visitor.results;
}
export function collectNonterminalsFromEmd(
emdNode: EcmarkdownNode | EcmarkdownNode[]
): { name: string; loc: { line: number; column: number } }[] {
if (Array.isArray(emdNode)) {
return emdNode.flatMap(collectNonterminalsFromEmd);
}
let results: ReturnType<typeof collectNonterminalsFromEmd> = [];
emd.visit(emdNode, {
enter(emdNode: EcmarkdownNode) {
if (emdNode.name === 'pipe') {
results.push({
name: emdNode.nonTerminal,
loc: {
line: emdNode.location.start.line,
column: emdNode.location.start.column + 1, // skip the pipe
},
});
}
},
});
return results;
}