Skip to content

Commit

Permalink
Basic regex literal lexer support.
Browse files Browse the repository at this point in the history
  • Loading branch information
TwitchBronBron committed Sep 24, 2021
1 parent 1818c36 commit 4c3c6ca
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 8 deletions.
22 changes: 22 additions & 0 deletions src/lexer/Lexer.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1222,4 +1222,26 @@ describe('lexer', () => {
TokenKind.Eof
]);
});

describe('regular expression literals', () => {
function testRegex(...regexps) {
const results = [] as string[];
for (const regexp of regexps) {
const { tokens } = Lexer.scan(regexp);
results.push(tokens[0].text);
}
expect(results).to.eql(regexps);
}

it('recognizes regex literals', () => {
testRegex(
'/simple/',
'/SimpleWithValidFlags/imsx',
'/UnknownFlags/VUI',
'/with spaces/andflags',
'/with(parens)and[squarebraces]/',
'/*()^$@/'
);
});
});
});
75 changes: 67 additions & 8 deletions src/lexer/Lexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -199,14 +199,17 @@ export class Lexer {
}
},
'/': function (this: Lexer) {
switch (this.peek()) {
case '=':
this.advance();
this.addToken(TokenKind.ForwardslashEqual);
break;
default:
this.addToken(TokenKind.Forwardslash);
break;
//try capturing a regex literal. If that doesn't work, fall back to normal handling
if (!this.regexLiteral()) {
switch (this.peek()) {
case '=':
this.advance();
this.addToken(TokenKind.ForwardslashEqual);
break;
default:
this.addToken(TokenKind.Forwardslash);
break;
}
}
},
'\\': function (this: Lexer) {
Expand Down Expand Up @@ -384,6 +387,19 @@ export class Lexer {
this.columnEnd++;
}

private lookaheadStack = [] as Array<{ current: number; columnEnd: number }>;
private pushLookahead() {
this.lookaheadStack.push({
current: this.current,
columnEnd: this.columnEnd
});
}
private popLookahead() {
const { current, columnEnd } = this.lookaheadStack.pop();
this.current = current;
this.columnEnd = columnEnd;
}

/**
* Returns the character at position `current` or a null character if we've reached the end of
* input.
Expand Down Expand Up @@ -835,6 +851,17 @@ export class Lexer {
return candidates.includes(this.source.charAt(this.current));
}

/**
* Advance if the current token matches one of the candidates
*/
private advanceIf(...candidates: string[]) {
if (this.check(...candidates)) {
this.advance();
return true;
}
return false;
}

/**
* Check the previous character
*/
Expand Down Expand Up @@ -927,6 +954,38 @@ export class Lexer {
}
}

/**
* Capture a regex literal token. Returns false if not found.
* This is lookahead lexing which might techincally belong in the parser,
* but it's easy enough to do here in the lexer
*/
private regexLiteral() {
this.pushLookahead();

//finite loop to prevent infinite loop if something went wrong
for (let i = this.current; i < this.source.length; i++) {

//if we reached the end of the regex, consume any flags
if (this.advanceIf('/')) {
//consume all flag-like chars (let the parser validate the actual values)
while (/[a-z]/i.exec(this.peek())) {
this.advance();
}
//finalize the regex literal and EXIT
this.addToken(TokenKind.RegexLiteral);
return true;

//if we found a non-escaped newline, there's a syntax error with this regex (or it's not a regex), so quit
} else if (this.check('\n')) {
break;
} else {
this.advance();
}
}
this.popLookahead();
return false;
}

/**
* Creates a `Token` and adds it to the `tokens` array.
* @param kind the type of token to produce.
Expand Down
1 change: 1 addition & 0 deletions src/lexer/TokenKind.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ export enum TokenKind {
DoubleLiteral = 'DoubleLiteral',
LongIntegerLiteral = 'LongIntegerLiteral',
EscapedCharCodeLiteral = 'EscapedCharCodeLiteral', //this is used to capture things like `\n`, `\r\n` in template strings
RegexLiteral = 'RegexLiteral',

//types
Void = 'Void',
Expand Down

0 comments on commit 4c3c6ca

Please sign in to comment.