fix(parser): performance improvements

meriyah · Jun 26, 2019 · 62c2d6f · 62c2d6f
1 parent 3b0ccc0
commit 62c2d6f
Show file tree

Hide file tree

Showing 11 changed files with 149 additions and 142 deletions.
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "meriyah",
-  "version": "0.6.0",
+  "version": "0.6.1",
   "description": "A 100% compliant, self-hosted javascript parser with high focus on both performance and stability",
   "main": "dist/meriyah.umd.js",
   "module": "dist/meriyah.esm.js",

diff --git a/src/lexer/comments.ts b/src/lexer/comments.ts
@@ -1,4 +1,4 @@
-import { nextCodePoint, CharTypes, CharFlags, LexerState, advanceNewline, consumeLineFeed } from './';
+import { nextCP, CharTypes, CharFlags, LexerState, advanceNewline, consumeLineFeed } from './';
 import { Chars } from '../chars';
 import { ParserState } from '../common';
 import { report, Errors } from '../errors';
@@ -9,6 +9,8 @@ import { report, Errors } from '../errors';
  * @param parser  Parser object
  */
 export function skipHashBang(parser: ParserState): void {
+  // HashbangComment ::
+  //   #!  SingleLineCommentChars
   let index = parser.index;
   if (index === parser.end) return;
   if (parser.nextCP === Chars.ByteOrderMark) {
@@ -41,7 +43,7 @@ export function skipSingleLineComment(parser: ParserState, state: LexerState): L
       advanceNewline(parser);
       return state;
     }
-    nextCodePoint(parser);
+    nextCP(parser);
   }
   return state;
 }
@@ -55,8 +57,8 @@ export function skipSingleLineComment(parser: ParserState, state: LexerState): L
 export function skipMultiLineComment(parser: ParserState, state: LexerState): LexerState | void {
   while (parser.index < parser.end) {
     while (parser.nextCP === Chars.Asterisk) {
-      if (nextCodePoint(parser) === Chars.Slash) {
-        nextCodePoint(parser);
+      if (nextCP(parser) === Chars.Slash) {
+        nextCP(parser);
         return state;
       }
     }
@@ -71,7 +73,7 @@ export function skipMultiLineComment(parser: ParserState, state: LexerState): Le
       state = (state | LexerState.LastIsCR | LexerState.NewLine) ^ LexerState.LastIsCR;
       advanceNewline(parser);
     } else {
-      nextCodePoint(parser);
+      nextCP(parser);
     }
   }
 

diff --git a/src/lexer/common.ts b/src/lexer/common.ts
@@ -14,7 +14,7 @@ export const enum LexerState {
  * Advances this lexer's current index.
  * @param parser The parser instance
  */
-export function nextCodePoint(parser: ParserState): number {
+export function nextCP(parser: ParserState): number {
   parser.column++;
   return (parser.nextCP = parser.source.charCodeAt(++parser.index));
 }
@@ -34,6 +34,10 @@ export function consumeMultiUnitCodePoint(parser: ParserState, hi: number): bool
   return true;
 }
 
+export function storeRaw(parser: ParserState, start: number) {
+  parser.tokenRaw = parser.source.slice(start, parser.index);
+}
+
 /**
  * Use to consume a line feed instead of `advanceNewline`.
  */

diff --git a/src/lexer/identifier.ts b/src/lexer/identifier.ts
@@ -1,7 +1,7 @@
 import { ParserState, Context } from '../common';
 import { Token, descKeywordTable } from '../token';
 import { Chars } from '../chars';
-import { nextCodePoint, consumeMultiUnitCodePoint, fromCodePoint, toHex } from './';
+import { nextCP, consumeMultiUnitCodePoint, fromCodePoint, toHex } from './';
 import { CharTypes, CharFlags, isIdentifierPart } from './charClassifier';
 import { report, Errors } from '../errors';
 import { unicodeLookup } from '../unicode';
@@ -14,10 +14,10 @@ import { unicodeLookup } from '../unicode';
  */
 export function scanIdentifier(parser: ParserState, context: Context): Token {
   const canBeKeyword = CharTypes[parser.nextCP] & CharFlags.KeywordCandidate;
-  while ((CharTypes[nextCodePoint(parser)] & CharFlags.IdentifierPart) !== 0) {}
+  while ((CharTypes[nextCP(parser)] & CharFlags.IdentifierPart) !== 0) {}
   parser.tokenValue = parser.source.slice(parser.tokenIndex, parser.index);
   const hasEscape = CharTypes[parser.nextCP] & CharFlags.BackSlash;
-  if (!hasEscape && parser.nextCP < 0x7e) {
+  if ((parser.nextCP & ~0x7f) === 0 && !hasEscape) {
     return descKeywordTable[parser.tokenValue] || Token.Identifier;
   }
 
@@ -62,7 +62,7 @@ export function scanIdentifierSlowCase(
       parser.tokenValue += fromCodePoint(code);
       start = parser.index;
     } else if (isIdentifierPart(parser.nextCP) || consumeMultiUnitCodePoint(parser, parser.nextCP)) {
-      nextCodePoint(parser);
+      nextCP(parser);
     } else {
       break;
     }
@@ -98,7 +98,7 @@ export function scanIdentifierSlowCase(
  * @param parser  Parser object
  */
 export function scanPrivateName(parser: ParserState): Token {
-  nextCodePoint(parser); // consumes '#'
+  nextCP(parser); // consumes '#'
   if (
     (CharTypes[parser.nextCP] & CharFlags.Decimal) !== 0 ||
     ((CharTypes[parser.nextCP] & CharFlags.IdentifierStart) === 0 &&
@@ -134,7 +134,7 @@ export function scanUnicodeEscapeValue(parser: ParserState): number {
   let codePoint = 0;
   // First handle a delimited Unicode escape, e.g. \u{1F4A9}
   if (parser.nextCP === Chars.LeftBrace) {
-    while (CharTypes[nextCodePoint(parser)] & CharFlags.Hex) {
+    while (CharTypes[nextCP(parser)] & CharFlags.Hex) {
       codePoint = (codePoint << 4) | toHex(parser.nextCP);
       // Check this early to avoid `code` wrapping to a negative on overflow (which is
       // reserved for abnormal conditions).
@@ -147,7 +147,7 @@ export function scanUnicodeEscapeValue(parser: ParserState): number {
     if (codePoint < 1 || (parser.nextCP as number) !== Chars.RightBrace) {
       report(parser, Errors.InvalidHexEscapeSequence);
     }
-    nextCodePoint(parser); // consumes '}'
+    nextCP(parser); // consumes '}'
     return codePoint;
   }
 

diff --git a/src/lexer/index.ts b/src/lexer/index.ts
@@ -1,11 +1,12 @@
 export { scanSingleToken, nextToken } from './scan';
 export { skipMultiLineComment, skipSingleLineComment, skipHashBang } from './comments';
 export {
-  nextCodePoint,
+  nextCP,
   consumeMultiUnitCodePoint,
   isExoticECMAScriptWhitespace,
   fromCodePoint,
   toHex,
+  storeRaw,
   consumeLineFeed,
   advanceNewline,
   LexerState

diff --git a/src/lexer/numeric.ts b/src/lexer/numeric.ts
@@ -1,6 +1,6 @@
 import { ParserState, Context, Flags } from '../common';
 import { Token } from '../token';
-import { nextCodePoint, toHex, CharTypes, CharFlags, isIdentifierStart } from './';
+import { nextCP, toHex, CharTypes, CharFlags, isIdentifierStart, storeRaw } from './';
 import { Chars } from '../chars';
 import { report, Errors } from '../errors';
 
@@ -18,16 +18,16 @@ export function scanNumber(parser: ParserState, context: Context, isFloat: 0 | 1
   let value: number | string = 0;
 
   if (isFloat) {
-    while (CharTypes[nextCodePoint(parser)] & CharFlags.Decimal) {}
+    while (CharTypes[nextCP(parser)] & CharFlags.Decimal) {}
   } else {
     if (parser.nextCP === Chars.Zero) {
-      nextCodePoint(parser);
+      nextCP(parser);
 
       // Hex
       if ((parser.nextCP | 32) === Chars.LowerX) {
         kind = NumberKind.Hex;
         let digits = 0;
-        while (CharTypes[nextCodePoint(parser)] & CharFlags.Hex) {
+        while (CharTypes[nextCP(parser)] & CharFlags.Hex) {
           value = value * 0x10 + toHex(parser.nextCP);
           digits++;
         }
@@ -36,15 +36,15 @@ export function scanNumber(parser: ParserState, context: Context, isFloat: 0 | 1
       } else if ((parser.nextCP | 32) === Chars.LowerO) {
         kind = NumberKind.Octal;
         let digits = 0;
-        while (CharTypes[nextCodePoint(parser)] & CharFlags.Octal) {
+        while (CharTypes[nextCP(parser)] & CharFlags.Octal) {
           value = value * 8 + (parser.nextCP - Chars.Zero);
           digits++;
         }
         if (digits < 1) report(parser, Errors.ExpectedNumberInRadix, `${8}`);
       } else if ((parser.nextCP | 32) === Chars.LowerB) {
         kind = NumberKind.Binary;
         let digits = 0;
-        while (CharTypes[nextCodePoint(parser)] & CharFlags.Binary) {
+        while (CharTypes[nextCP(parser)] & CharFlags.Binary) {
           value = value * 2 + (parser.nextCP - Chars.Zero);
           digits++;
         }
@@ -60,7 +60,7 @@ export function scanNumber(parser: ParserState, context: Context, isFloat: 0 | 1
             break;
           }
           value = value * 8 + (parser.nextCP - Chars.Zero);
-        } while (CharTypes[nextCodePoint(parser)] & CharFlags.Decimal);
+        } while (CharTypes[nextCP(parser)] & CharFlags.Decimal);
       } else if (CharTypes[parser.nextCP] & CharFlags.ImplicitOctalDigits) {
         if (context & Context.Strict) report(parser, Errors.StrictOctalEscape);
         else parser.flags = Flags.Octals;
@@ -73,7 +73,7 @@ export function scanNumber(parser: ParserState, context: Context, isFloat: 0 | 1
       if (isFloat) {
         // scan subsequent decimal digits
         let digit = 9;
-        while (digit >= 0 && CharTypes[nextCodePoint(parser)] & CharFlags.Decimal) {
+        while (digit >= 0 && CharTypes[nextCP(parser)] & CharFlags.Decimal) {
           value = 10 * value + (parser.nextCP - Chars.Zero);
           --digit;
         }
@@ -86,15 +86,15 @@ export function scanNumber(parser: ParserState, context: Context, isFloat: 0 | 1
       }
 
       while (CharTypes[parser.nextCP] & CharFlags.Decimal) {
-        nextCodePoint(parser);
+        nextCP(parser);
       }
 
       // Scan any decimal dot and fractional component
       if (parser.nextCP === Chars.Period) {
         isFloat = 1;
-        nextCodePoint(parser); // consumes '.'
+        nextCP(parser); // consumes '.'
         while (CharTypes[parser.nextCP] & CharFlags.Decimal) {
-          nextCodePoint(parser);
+          nextCP(parser);
         }
       }
     }
@@ -108,24 +108,24 @@ export function scanNumber(parser: ParserState, context: Context, isFloat: 0 | 1
   ) {
     if (isFloat) report(parser, Errors.InvalidBigInt);
     isBigInt = 1;
-    nextCodePoint(parser);
+    nextCP(parser);
     // Scan any exponential notation
   } else if ((parser.nextCP | 32) === Chars.LowerE) {
     if ((kind & (NumberKind.Decimal | NumberKind.DecimalWithLeadingZero)) === 0) {
       report(parser, Errors.MissingExponent);
     }
 
-    nextCodePoint(parser);
+    nextCP(parser);
 
     // '-', '+'
     if (CharTypes[parser.nextCP] & CharFlags.Exponent) {
-      nextCodePoint(parser);
+      nextCP(parser);
     }
 
     let exponentDigits = 0;
     // Consume exponential digits
     while (CharTypes[parser.nextCP] & CharFlags.Decimal) {
-      nextCodePoint(parser);
+      nextCP(parser);
       exponentDigits++;
     }
     // Exponential notation must contain at least one digit
@@ -136,20 +136,24 @@ export function scanNumber(parser: ParserState, context: Context, isFloat: 0 | 1
 
   // The source character immediately following a numeric literal must
   // not be an identifier start or a decimal digit
-  if (CharTypes[parser.nextCP] & CharFlags.Decimal || isIdentifierStart(parser.nextCP)) {
+  if ((parser.index < parser.end && CharTypes[parser.nextCP] & CharFlags.Decimal) || isIdentifierStart(parser.nextCP)) {
     report(parser, Errors.IDStartAfterNumber);
   }
-  if (context & Context.OptionsRaw) parser.tokenRaw = parser.source.slice(parser.tokenIndex, parser.index);
-  parser.tokenValue =
-    kind & (NumberKind.ImplicitOctal | NumberKind.Binary | NumberKind.Hex | NumberKind.Octal)
-      ? value
-      : kind & NumberKind.DecimalWithLeadingZero
-      ? parseFloat(parser.source.slice(parser.tokenIndex, parser.index))
-      : isBigInt
-      ? parseInt(parser.source.slice(parser.tokenIndex, parser.index), 0xa)
-      : +parser.source.slice(parser.tokenIndex, parser.index);
-
-  if (context & Context.OptionsRaw || isBigInt) parser.tokenRaw = parser.source.slice(parser.tokenIndex, parser.index);
-
-  return isBigInt ? Token.BigIntLiteral : Token.NumericLiteral;
+
+  if (kind & (NumberKind.ImplicitOctal | NumberKind.Binary | NumberKind.Hex | NumberKind.Octal)) {
+    parser.tokenValue = value;
+  } else {
+    const raw = parser.source.slice(parser.tokenIndex, parser.index);
+    parser.tokenValue =
+      kind & NumberKind.DecimalWithLeadingZero ? parseFloat(raw) : isBigInt ? parseInt(raw, 0xa) : +raw;
+  }
+
+  if (isBigInt) {
+    storeRaw(parser, parser.tokenIndex);
+    return Token.BigIntLiteral;
+  }
+
+  if (context & Context.OptionsRaw) storeRaw(parser, parser.tokenIndex);
+
+  return Token.NumericLiteral;
 }
diff --git a/src/lexer/regexp.ts b/src/lexer/regexp.ts
@@ -1,7 +1,7 @@
 import { Chars } from '../chars';
 import { Context, ParserState } from '../common';
 import { Token } from '../token';
-import { nextCodePoint, isIdentifierPart } from './';
+import { nextCP, isIdentifierPart } from './';
 import { report, Errors } from '../errors';
 
 /**
@@ -23,7 +23,7 @@ export function scanRegularExpression(parser: ParserState, context: Context): To
 
   loop: while (true) {
     const ch = parser.nextCP;
-    nextCodePoint(parser);
+    nextCP(parser);
 
     if (preparseState & RegexState.Escape) {
       preparseState &= ~RegexState.Escape;
@@ -71,7 +71,7 @@ export function scanRegularExpression(parser: ParserState, context: Context): To
 
   const { index: flagStart } = parser;
 
-  loop: while (parser.index < parser.source.length) {
+  loop: while (isIdentifierPart(parser.nextCP)) {
     switch (parser.nextCP) {
       case Chars.LowerG:
         if (mask & RegexFlags.Global) report(parser, Errors.DuplicateRegExpFlag, 'g');
@@ -104,11 +104,10 @@ export function scanRegularExpression(parser: ParserState, context: Context): To
         break;
 
       default:
-        if (!isIdentifierPart(parser.nextCP)) break loop;
         report(parser, Errors.UnexpectedTokenRegExpFlag);
     }
 
-    nextCodePoint(parser);
+    nextCP(parser);
   }
 
   const flags = parser.source.slice(flagStart, parser.index);