Skip to content

Commit

Permalink
fix(parser): performance improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
KFlash committed Jun 26, 2019
1 parent 3b0ccc0 commit 62c2d6f
Show file tree
Hide file tree
Showing 11 changed files with 149 additions and 142 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "meriyah",
"version": "0.6.0",
"version": "0.6.1",
"description": "A 100% compliant, self-hosted javascript parser with high focus on both performance and stability",
"main": "dist/meriyah.umd.js",
"module": "dist/meriyah.esm.js",
Expand Down
12 changes: 7 additions & 5 deletions src/lexer/comments.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { nextCodePoint, CharTypes, CharFlags, LexerState, advanceNewline, consumeLineFeed } from './';
import { nextCP, CharTypes, CharFlags, LexerState, advanceNewline, consumeLineFeed } from './';
import { Chars } from '../chars';
import { ParserState } from '../common';
import { report, Errors } from '../errors';
Expand All @@ -9,6 +9,8 @@ import { report, Errors } from '../errors';
* @param parser Parser object
*/
export function skipHashBang(parser: ParserState): void {
// HashbangComment ::
// #! SingleLineCommentChars
let index = parser.index;
if (index === parser.end) return;
if (parser.nextCP === Chars.ByteOrderMark) {
Expand Down Expand Up @@ -41,7 +43,7 @@ export function skipSingleLineComment(parser: ParserState, state: LexerState): L
advanceNewline(parser);
return state;
}
nextCodePoint(parser);
nextCP(parser);
}
return state;
}
Expand All @@ -55,8 +57,8 @@ export function skipSingleLineComment(parser: ParserState, state: LexerState): L
export function skipMultiLineComment(parser: ParserState, state: LexerState): LexerState | void {
while (parser.index < parser.end) {
while (parser.nextCP === Chars.Asterisk) {
if (nextCodePoint(parser) === Chars.Slash) {
nextCodePoint(parser);
if (nextCP(parser) === Chars.Slash) {
nextCP(parser);
return state;
}
}
Expand All @@ -71,7 +73,7 @@ export function skipMultiLineComment(parser: ParserState, state: LexerState): Le
state = (state | LexerState.LastIsCR | LexerState.NewLine) ^ LexerState.LastIsCR;
advanceNewline(parser);
} else {
nextCodePoint(parser);
nextCP(parser);
}
}

Expand Down
6 changes: 5 additions & 1 deletion src/lexer/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ export const enum LexerState {
* Advances this lexer's current index.
* @param parser The parser instance
*/
export function nextCodePoint(parser: ParserState): number {
export function nextCP(parser: ParserState): number {
parser.column++;
return (parser.nextCP = parser.source.charCodeAt(++parser.index));
}
Expand All @@ -34,6 +34,10 @@ export function consumeMultiUnitCodePoint(parser: ParserState, hi: number): bool
return true;
}

export function storeRaw(parser: ParserState, start: number) {
parser.tokenRaw = parser.source.slice(start, parser.index);
}

/**
* Use to consume a line feed instead of `advanceNewline`.
*/
Expand Down
14 changes: 7 additions & 7 deletions src/lexer/identifier.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { ParserState, Context } from '../common';
import { Token, descKeywordTable } from '../token';
import { Chars } from '../chars';
import { nextCodePoint, consumeMultiUnitCodePoint, fromCodePoint, toHex } from './';
import { nextCP, consumeMultiUnitCodePoint, fromCodePoint, toHex } from './';
import { CharTypes, CharFlags, isIdentifierPart } from './charClassifier';
import { report, Errors } from '../errors';
import { unicodeLookup } from '../unicode';
Expand All @@ -14,10 +14,10 @@ import { unicodeLookup } from '../unicode';
*/
export function scanIdentifier(parser: ParserState, context: Context): Token {
const canBeKeyword = CharTypes[parser.nextCP] & CharFlags.KeywordCandidate;
while ((CharTypes[nextCodePoint(parser)] & CharFlags.IdentifierPart) !== 0) {}
while ((CharTypes[nextCP(parser)] & CharFlags.IdentifierPart) !== 0) {}
parser.tokenValue = parser.source.slice(parser.tokenIndex, parser.index);
const hasEscape = CharTypes[parser.nextCP] & CharFlags.BackSlash;
if (!hasEscape && parser.nextCP < 0x7e) {
if ((parser.nextCP & ~0x7f) === 0 && !hasEscape) {
return descKeywordTable[parser.tokenValue] || Token.Identifier;
}

Expand Down Expand Up @@ -62,7 +62,7 @@ export function scanIdentifierSlowCase(
parser.tokenValue += fromCodePoint(code);
start = parser.index;
} else if (isIdentifierPart(parser.nextCP) || consumeMultiUnitCodePoint(parser, parser.nextCP)) {
nextCodePoint(parser);
nextCP(parser);
} else {
break;
}
Expand Down Expand Up @@ -98,7 +98,7 @@ export function scanIdentifierSlowCase(
* @param parser Parser object
*/
export function scanPrivateName(parser: ParserState): Token {
nextCodePoint(parser); // consumes '#'
nextCP(parser); // consumes '#'
if (
(CharTypes[parser.nextCP] & CharFlags.Decimal) !== 0 ||
((CharTypes[parser.nextCP] & CharFlags.IdentifierStart) === 0 &&
Expand Down Expand Up @@ -134,7 +134,7 @@ export function scanUnicodeEscapeValue(parser: ParserState): number {
let codePoint = 0;
// First handle a delimited Unicode escape, e.g. \u{1F4A9}
if (parser.nextCP === Chars.LeftBrace) {
while (CharTypes[nextCodePoint(parser)] & CharFlags.Hex) {
while (CharTypes[nextCP(parser)] & CharFlags.Hex) {
codePoint = (codePoint << 4) | toHex(parser.nextCP);
// Check this early to avoid `code` wrapping to a negative on overflow (which is
// reserved for abnormal conditions).
Expand All @@ -147,7 +147,7 @@ export function scanUnicodeEscapeValue(parser: ParserState): number {
if (codePoint < 1 || (parser.nextCP as number) !== Chars.RightBrace) {
report(parser, Errors.InvalidHexEscapeSequence);
}
nextCodePoint(parser); // consumes '}'
nextCP(parser); // consumes '}'
return codePoint;
}

Expand Down
3 changes: 2 additions & 1 deletion src/lexer/index.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
export { scanSingleToken, nextToken } from './scan';
export { skipMultiLineComment, skipSingleLineComment, skipHashBang } from './comments';
export {
nextCodePoint,
nextCP,
consumeMultiUnitCodePoint,
isExoticECMAScriptWhitespace,
fromCodePoint,
toHex,
storeRaw,
consumeLineFeed,
advanceNewline,
LexerState
Expand Down
62 changes: 33 additions & 29 deletions src/lexer/numeric.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { ParserState, Context, Flags } from '../common';
import { Token } from '../token';
import { nextCodePoint, toHex, CharTypes, CharFlags, isIdentifierStart } from './';
import { nextCP, toHex, CharTypes, CharFlags, isIdentifierStart, storeRaw } from './';
import { Chars } from '../chars';
import { report, Errors } from '../errors';

Expand All @@ -18,16 +18,16 @@ export function scanNumber(parser: ParserState, context: Context, isFloat: 0 | 1
let value: number | string = 0;

if (isFloat) {
while (CharTypes[nextCodePoint(parser)] & CharFlags.Decimal) {}
while (CharTypes[nextCP(parser)] & CharFlags.Decimal) {}
} else {
if (parser.nextCP === Chars.Zero) {
nextCodePoint(parser);
nextCP(parser);

// Hex
if ((parser.nextCP | 32) === Chars.LowerX) {
kind = NumberKind.Hex;
let digits = 0;
while (CharTypes[nextCodePoint(parser)] & CharFlags.Hex) {
while (CharTypes[nextCP(parser)] & CharFlags.Hex) {
value = value * 0x10 + toHex(parser.nextCP);
digits++;
}
Expand All @@ -36,15 +36,15 @@ export function scanNumber(parser: ParserState, context: Context, isFloat: 0 | 1
} else if ((parser.nextCP | 32) === Chars.LowerO) {
kind = NumberKind.Octal;
let digits = 0;
while (CharTypes[nextCodePoint(parser)] & CharFlags.Octal) {
while (CharTypes[nextCP(parser)] & CharFlags.Octal) {
value = value * 8 + (parser.nextCP - Chars.Zero);
digits++;
}
if (digits < 1) report(parser, Errors.ExpectedNumberInRadix, `${8}`);
} else if ((parser.nextCP | 32) === Chars.LowerB) {
kind = NumberKind.Binary;
let digits = 0;
while (CharTypes[nextCodePoint(parser)] & CharFlags.Binary) {
while (CharTypes[nextCP(parser)] & CharFlags.Binary) {
value = value * 2 + (parser.nextCP - Chars.Zero);
digits++;
}
Expand All @@ -60,7 +60,7 @@ export function scanNumber(parser: ParserState, context: Context, isFloat: 0 | 1
break;
}
value = value * 8 + (parser.nextCP - Chars.Zero);
} while (CharTypes[nextCodePoint(parser)] & CharFlags.Decimal);
} while (CharTypes[nextCP(parser)] & CharFlags.Decimal);
} else if (CharTypes[parser.nextCP] & CharFlags.ImplicitOctalDigits) {
if (context & Context.Strict) report(parser, Errors.StrictOctalEscape);
else parser.flags = Flags.Octals;
Expand All @@ -73,7 +73,7 @@ export function scanNumber(parser: ParserState, context: Context, isFloat: 0 | 1
if (isFloat) {
// scan subsequent decimal digits
let digit = 9;
while (digit >= 0 && CharTypes[nextCodePoint(parser)] & CharFlags.Decimal) {
while (digit >= 0 && CharTypes[nextCP(parser)] & CharFlags.Decimal) {
value = 10 * value + (parser.nextCP - Chars.Zero);
--digit;
}
Expand All @@ -86,15 +86,15 @@ export function scanNumber(parser: ParserState, context: Context, isFloat: 0 | 1
}

while (CharTypes[parser.nextCP] & CharFlags.Decimal) {
nextCodePoint(parser);
nextCP(parser);
}

// Scan any decimal dot and fractional component
if (parser.nextCP === Chars.Period) {
isFloat = 1;
nextCodePoint(parser); // consumes '.'
nextCP(parser); // consumes '.'
while (CharTypes[parser.nextCP] & CharFlags.Decimal) {
nextCodePoint(parser);
nextCP(parser);
}
}
}
Expand All @@ -108,24 +108,24 @@ export function scanNumber(parser: ParserState, context: Context, isFloat: 0 | 1
) {
if (isFloat) report(parser, Errors.InvalidBigInt);
isBigInt = 1;
nextCodePoint(parser);
nextCP(parser);
// Scan any exponential notation
} else if ((parser.nextCP | 32) === Chars.LowerE) {
if ((kind & (NumberKind.Decimal | NumberKind.DecimalWithLeadingZero)) === 0) {
report(parser, Errors.MissingExponent);
}

nextCodePoint(parser);
nextCP(parser);

// '-', '+'
if (CharTypes[parser.nextCP] & CharFlags.Exponent) {
nextCodePoint(parser);
nextCP(parser);
}

let exponentDigits = 0;
// Consume exponential digits
while (CharTypes[parser.nextCP] & CharFlags.Decimal) {
nextCodePoint(parser);
nextCP(parser);
exponentDigits++;
}
// Exponential notation must contain at least one digit
Expand All @@ -136,20 +136,24 @@ export function scanNumber(parser: ParserState, context: Context, isFloat: 0 | 1

// The source character immediately following a numeric literal must
// not be an identifier start or a decimal digit
if (CharTypes[parser.nextCP] & CharFlags.Decimal || isIdentifierStart(parser.nextCP)) {
if ((parser.index < parser.end && CharTypes[parser.nextCP] & CharFlags.Decimal) || isIdentifierStart(parser.nextCP)) {
report(parser, Errors.IDStartAfterNumber);
}
if (context & Context.OptionsRaw) parser.tokenRaw = parser.source.slice(parser.tokenIndex, parser.index);
parser.tokenValue =
kind & (NumberKind.ImplicitOctal | NumberKind.Binary | NumberKind.Hex | NumberKind.Octal)
? value
: kind & NumberKind.DecimalWithLeadingZero
? parseFloat(parser.source.slice(parser.tokenIndex, parser.index))
: isBigInt
? parseInt(parser.source.slice(parser.tokenIndex, parser.index), 0xa)
: +parser.source.slice(parser.tokenIndex, parser.index);

if (context & Context.OptionsRaw || isBigInt) parser.tokenRaw = parser.source.slice(parser.tokenIndex, parser.index);

return isBigInt ? Token.BigIntLiteral : Token.NumericLiteral;

if (kind & (NumberKind.ImplicitOctal | NumberKind.Binary | NumberKind.Hex | NumberKind.Octal)) {
parser.tokenValue = value;
} else {
const raw = parser.source.slice(parser.tokenIndex, parser.index);
parser.tokenValue =
kind & NumberKind.DecimalWithLeadingZero ? parseFloat(raw) : isBigInt ? parseInt(raw, 0xa) : +raw;
}

if (isBigInt) {
storeRaw(parser, parser.tokenIndex);
return Token.BigIntLiteral;
}

if (context & Context.OptionsRaw) storeRaw(parser, parser.tokenIndex);

return Token.NumericLiteral;
}
9 changes: 4 additions & 5 deletions src/lexer/regexp.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { Chars } from '../chars';
import { Context, ParserState } from '../common';
import { Token } from '../token';
import { nextCodePoint, isIdentifierPart } from './';
import { nextCP, isIdentifierPart } from './';
import { report, Errors } from '../errors';

/**
Expand All @@ -23,7 +23,7 @@ export function scanRegularExpression(parser: ParserState, context: Context): To

loop: while (true) {
const ch = parser.nextCP;
nextCodePoint(parser);
nextCP(parser);

if (preparseState & RegexState.Escape) {
preparseState &= ~RegexState.Escape;
Expand Down Expand Up @@ -71,7 +71,7 @@ export function scanRegularExpression(parser: ParserState, context: Context): To

const { index: flagStart } = parser;

loop: while (parser.index < parser.source.length) {
loop: while (isIdentifierPart(parser.nextCP)) {
switch (parser.nextCP) {
case Chars.LowerG:
if (mask & RegexFlags.Global) report(parser, Errors.DuplicateRegExpFlag, 'g');
Expand Down Expand Up @@ -104,11 +104,10 @@ export function scanRegularExpression(parser: ParserState, context: Context): To
break;

default:
if (!isIdentifierPart(parser.nextCP)) break loop;
report(parser, Errors.UnexpectedTokenRegExpFlag);
}

nextCodePoint(parser);
nextCP(parser);
}

const flags = parser.source.slice(flagStart, parser.index);
Expand Down
Loading

0 comments on commit 62c2d6f

Please sign in to comment.