Skip to content

Commit

Permalink
fix(lexer): optimized WS skipping and comment scanning
Browse files Browse the repository at this point in the history
  • Loading branch information
KFlash committed May 31, 2019
1 parent de7d970 commit 9f85539
Show file tree
Hide file tree
Showing 12 changed files with 249 additions and 110 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "meriyah",
"version": "0.2.0",
"version": "0.2.1",
"description": "A 100% compliant, self-hosted javascript parser with high focus on both performance and stability",
"main": "dist/meriyah.umd.js",
"module": "dist/meriyah.esm.js",
Expand Down
42 changes: 26 additions & 16 deletions src/lexer/comments.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { nextCodePoint, CharTypes, CharFlags } from './';
import { nextCodePoint, CharTypes, CharFlags, ScannerState, Seek } from './';
import { Chars } from '../chars';
import { Token } from '../token';
import { ParserState, Flags } from '../common';
Expand All @@ -22,7 +22,7 @@ export function skipHashBang(parser: ParserState): void {
if (index < parser.end && parser.source.charCodeAt(index) === Chars.Exclamation) {
parser.index = index + 1;
parser.currentCodePoint = parser.source.charCodeAt(parser.index);
skipSingleLineComment(parser);
skipSingleLineComment(parser, ScannerState.None);
} else {
report(parser, Errors.IllegalCaracter, '#');
}
Expand All @@ -34,44 +34,54 @@ export function skipHashBang(parser: ParserState): void {
*
* @param parser Parser object
*/
export function skipSingleLineComment(parser: ParserState): Token {
export function skipSingleLineComment(parser: ParserState, state: ScannerState): ScannerState {
while (parser.index < parser.end) {
if (
CharTypes[parser.currentCodePoint] & CharFlags.LineTerminator ||
(parser.currentCodePoint ^ Chars.LineSeparator) <= 1
) {
break;
parser.flags |= Flags.NewLine;
parser.column = 0;
parser.line++;
parser.currentCodePoint = parser.source.charCodeAt(++parser.index);
return state;
}
nextCodePoint(parser);
}
return Token.WhiteSpace;
return state;
}

/**
* Skips multiline comment
*
* @param parser Parser object
*/
export function skipMultiLineComment(parser: ParserState): any {
export function skipMultiLineComment(parser: ParserState, state: ScannerState): any {
while (parser.index < parser.end) {
while (CharTypes[parser.currentCodePoint] & CharFlags.Asterisk) {
if (nextCodePoint(parser) === Chars.Slash) {
nextCodePoint(parser);
return Token.WhiteSpace;
return state;
}
}

// ES 2020 11.3 Line Terminators
if (
CharTypes[parser.currentCodePoint] & CharFlags.LineTerminator ||
(parser.currentCodePoint ^ Chars.LineSeparator) <= 1
) {
if (
CharTypes[parser.currentCodePoint] & CharFlags.CarriageReturn &&
CharTypes[parser.source.charCodeAt(parser.index + 1)] & CharFlags.LineFeed
) {
parser.index++;
if (CharTypes[parser.currentCodePoint] & CharFlags.LineTerminator) {
if (CharTypes[parser.currentCodePoint] & CharFlags.CarriageReturn) {
state |= ScannerState.NewLine | ScannerState.LastIsCR;
parser.column = 0;
parser.line++;
} else {
if (state & ScannerState.LastIsCR) {
parser.column = 0;
parser.line++;
}
state = (state & ~ScannerState.LastIsCR) | ScannerState.NewLine;
}
parser.currentCodePoint = parser.source.charCodeAt(++parser.index);
parser.flags |= Flags.NewLine;
} else if ((parser.currentCodePoint ^ Chars.LineSeparator) <= 1) {
state = (state & ~ScannerState.LastIsCR) | ScannerState.NewLine;
parser.column = 0;
parser.currentCodePoint = parser.source.charCodeAt(++parser.index);
parser.line++;
Expand Down
12 changes: 12 additions & 0 deletions src/lexer/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,18 @@ import { ParserState } from '../common';
import { unicodeLookup } from '../unicode';
import { report, Errors } from '../errors';

export const enum Seek {
None,
SameLine,
NewLine
}
export const enum ScannerState {
None = 0,
NewLine = 1 << 0,
SameLine = 1 << 1,
LastIsCR = 1 << 2
}

/**
* Advances this lexer's current index.
* @param parser The parser instance
Expand Down
10 changes: 9 additions & 1 deletion src/lexer/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
export { scanSingleToken, nextToken } from './scan';
export { skipMultiLineComment, skipSingleLineComment, skipHashBang } from './comments';
export { nextCodePoint, consumeMultiUnitCodePoint, isExoticECMAScriptWhitespace, fromCodePoint, toHex } from './common';
export {
nextCodePoint,
consumeMultiUnitCodePoint,
isExoticECMAScriptWhitespace,
fromCodePoint,
toHex,
ScannerState,
Seek
} from './common';
export { CharTypes, CharFlags, isIdentifierStart, isIdentifierPart } from './charClassifier';
export { scanIdentifier, scanPrivateName, scanUnicodeEscapeValue } from './identifier';
export { scanString } from './string';
Expand Down
54 changes: 30 additions & 24 deletions src/lexer/scan.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { skipSingleLineComment, skipMultiLineComment } from './comments';
import { skipSingleLineComment, skipMultiLineComment, ScannerState, Seek } from './';
import { CharTypes, CharFlags } from './charClassifier';
import { Chars } from '../chars';
import { Token } from '../token';
Expand Down Expand Up @@ -27,11 +27,11 @@ import {
* StringLiteral: 34, 39: '"', `'`
* NumericLiteral: 48, 49..57: '0'..'9'
* WhiteSpace: 9, 11, 12, 32: '\t', '\v', '\f', ' '
* LineTerminator: 10, 13: '\n', '\r'
* CarriageReturn: 10, 13: '\n', '\r'
* Template: 96: '`'
*/

export const OneCharToken = [
export const TokenLookup = [
/* 0 - Null */ Token.Illegal,
/* 1 - Start of Heading */ Token.Illegal,
/* 2 - Start of Text */ Token.Illegal,
Expand All @@ -42,10 +42,10 @@ export const OneCharToken = [
/* 7 - Bell */ Token.Illegal,
/* 8 - Backspace */ Token.Illegal,
/* 9 - Horizontal Tab */ Token.WhiteSpace,
/* 10 - Line Feed */ Token.LineTerminator,
/* 10 - Line Feed */ Token.LineFeed,
/* 11 - Vertical Tab */ Token.WhiteSpace,
/* 12 - Form Feed */ Token.WhiteSpace,
/* 13 - Carriage Return */ Token.LineTerminator,
/* 13 - Carriage Return */ Token.CarriageReturn,
/* 14 - Shift Out */ Token.Illegal,
/* 15 - Shift In */ Token.Illegal,
/* 16 - Data Line Escape */ Token.Illegal,
Expand Down Expand Up @@ -169,15 +169,15 @@ export function nextToken(parser: ParserState, context: Context): void {
}

export function scanSingleToken(parser: ParserState, context: Context): Token {
let isStartOfLine = parser.index === 0;

let state = ScannerState.None;
const isStartOfLine = parser.index === 0;
while (parser.index < parser.end) {
parser.tokenIndex = parser.index;

const first = parser.currentCodePoint;

if (first <= 0x7e) {
const token = OneCharToken[first];
const token = TokenLookup[first];

switch (token) {
// Look for an unambiguous single-char token
Expand All @@ -201,19 +201,26 @@ export function scanSingleToken(parser: ParserState, context: Context): Token {
case Token.WhiteSpace:
nextCodePoint(parser);
break;
// Line terminators
case Token.LineTerminator:

case Token.CarriageReturn:
parser.flags |= Flags.NewLine;
if (
CharTypes[first] & CharFlags.CarriageReturn &&
CharTypes[parser.source.charCodeAt(parser.index + 1)] & CharFlags.LineFeed
) {
parser.index++;
}

state |= ScannerState.NewLine | ScannerState.LastIsCR;

parser.column = 0;
parser.currentCodePoint = parser.source.charCodeAt(++parser.index);
parser.line++;
break;
case Token.LineFeed:
parser.flags |= Flags.NewLine;

if ((state & ScannerState.LastIsCR) === 0) {
parser.column = 0;
parser.line++;
}
state = (state & ~ScannerState.LastIsCR) | ScannerState.NewLine;
parser.currentCodePoint = parser.source.charCodeAt(++parser.index);
break;
// Look for an identifier.
case Token.Identifier:
return scanIdentifier(parser, context);
Expand Down Expand Up @@ -298,11 +305,11 @@ export function scanSingleToken(parser: ParserState, context: Context): Token {
nextCodePoint(parser);
if (
(context & Context.Module) === 0 &&
(isStartOfLine || parser.flags & Flags.NewLine) &&
(state & ScannerState.NewLine || isStartOfLine) &&
parser.currentCodePoint === Chars.GreaterThan
) {
if ((context & Context.OptionsWebCompat) === 0) report(parser, Errors.HtmlCommentInWebCompat);
skipSingleLineComment(parser);
state = skipSingleLineComment(parser, state);
continue;
}

Expand All @@ -323,12 +330,12 @@ export function scanSingleToken(parser: ParserState, context: Context): Token {
const ch = parser.currentCodePoint;
if (ch === Chars.Slash) {
nextCodePoint(parser);
skipSingleLineComment(parser);
state = skipSingleLineComment(parser, state);
continue;
} else if (ch === Chars.Asterisk) {
nextCodePoint(parser);
skipMultiLineComment(parser);
break;
state = skipMultiLineComment(parser, state);
continue;
} else if (context & Context.AllowRegExp) {
return scanRegularExpression(parser, context);
} else if (ch === Chars.EqualSign) {
Expand Down Expand Up @@ -366,7 +373,7 @@ export function scanSingleToken(parser: ParserState, context: Context): Token {
parser.source.charCodeAt(parser.index + 1) === Chars.Hyphen &&
parser.source.charCodeAt(parser.index + 2) === Chars.Hyphen
) {
skipSingleLineComment(parser);
state = skipSingleLineComment(parser, state);
continue;
}

Expand Down Expand Up @@ -485,6 +492,7 @@ export function scanSingleToken(parser: ParserState, context: Context): Token {
} else {
if ((first ^ Chars.LineSeparator) <= 1) {
parser.flags |= Flags.NewLine;
state = (state & ~ScannerState.LastIsCR) | ScannerState.NewLine;
parser.column = 0;
parser.currentCodePoint = parser.source.charCodeAt(++parser.index);
parser.line++;
Expand All @@ -501,8 +509,6 @@ export function scanSingleToken(parser: ParserState, context: Context): Token {
// Invalid ASCII code point/unit
report(parser, Errors.IllegalCaracter, fromCodePoint(first));
}

isStartOfLine = false;
}
return Token.EOF;
}
9 changes: 1 addition & 8 deletions src/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3781,14 +3781,7 @@ function parseRestOrSpreadElement(
parser.assignable = AssignmentKind.IsAssignable;
destructible |= parser.token === Token.AwaitKeyword ? DestructuringKind.Await : 0;

argument = parsePrimaryExpressionExtended(
parser,
context,
type,
/* inNewExpression */ 0,
/* assignable */ 1,
tokenIndex
);
argument = parsePrimaryExpressionExtended(parser, context, type, 0, 1, tokenIndex);

const { token } = parser;

Expand Down
5 changes: 3 additions & 2 deletions src/token.ts
Original file line number Diff line number Diff line change
Expand Up @@ -178,11 +178,12 @@ export const enum Token {
BigIntLiteral = 122,
WhiteSpace = 124,
Illegal = 129,
LineTerminator = 130,
CarriageReturn = 130,
PrivateField = 131,
Template = 132,
Decorator = 133,
Target = 134 | IsIdentifier
Target = 134 | IsIdentifier,
LineFeed = 135,
}

export const KeywordDescTable = [
Expand Down
36 changes: 18 additions & 18 deletions test/lexer/skiphashbang.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,59 +96,59 @@ describe('Lexer - skiphashbang', () => {
pass('skips a shebang+LF before a lone hash', {
source: '#!/foo/bar/baz -abc\n# foo',
hasNext: true,
newLine: false,
newLine: true,
value: '',
index: 19,
index: 20,
line: 2,
column: 17
column: 0
});

pass('skips a shebang+LF in an otherwise empty source', {
source: '#!/foo/bar/baz -abc\n',
newLine: false,
newLine: true,
hasNext: false,
value: '',
index: 19,
index: 20,
line: 2,
column: 17
column: 0
});

pass('skips a shebang+LF before an identifier', {
source: '#!/foo/bar/baz -abc\nfoo',
newLine: false,
newLine: true,
hasNext: false,
value: '',
index: 19,
index: 20,
line: 2,
column: 17
column: 0
});

pass('skips a shebang+LF before a lone exclamation', {
source: '#!/foo/bar/baz -abc\n! foo',
newLine: false,
newLine: true,
hasNext: false,
value: '',
index: 19,
index: 20,
line: 2,
column: 17
column: 0
});

pass('skips a shebang+CR in an otherwise empty source', {
source: '#!/foo/bar/baz -abc\r',
newLine: false,
newLine: true,
hasNext: false,
value: '',
index: 19,
index: 20,
line: 2,
column: 17
column: 0
});
pass('skips a BOM+shebang+LF in an otherwise empty source', {
source: '\uFFEF#!/foo/bar/baz -abc\n',
newLine: false,
newLine: true,
hasNext: false,
value: '',
index: 20,
index: 21,
line: 2,
column: 17
column: 0
});
});
2 changes: 1 addition & 1 deletion test/lexer/whitespace.ts
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ describe('Lexer - Whitespace', () => {
value: '',
index: 26,
line: 2,
column: 5
column: 25
});

pass('skips multiline comments with Windows newlines', {
Expand Down
4 changes: 4 additions & 0 deletions test/parser/expressions/group.ts
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,10 @@ describe('Expressions - Group', () => {
['({a} += 0);', Context.None],
['({a,,} = 0)', Context.None],
['({,a,} = 0)', Context.None],
['({a, ...b, c} = {})', Context.None],
['({a = 5})', Context.None],
['({ ...{a} } = {})', Context.None],
['({b, c, d, ...{a} } = {})', Context.None],
['({a,,a} = 0)', Context.None],
['({function} = 0)', Context.None],
['({a:function} = 0)', Context.None],
Expand Down

0 comments on commit 9f85539

Please sign in to comment.