feat(parser): Distinguish Identifier from IdentifierPattern

See ESTree issue 196.
meriyah · Jun 29, 2019 · 68da76b · 68da76b · KFlash · Jun 29, 2019
1 parent e359262
commit 68da76b
Show file tree

Hide file tree

Showing 12 changed files with 179 additions and 126 deletions.
diff --git a/README.md b/README.md
@@ -112,6 +112,7 @@ The second argument allows you to specify various options:
 | ----------- | ------------------------------------------------------------ |
 | `directives`      | Enable [directive prologue](https://github.com/danez/estree/blob/directive/es5.md#directive) to each literal node |
 | `globalReturn`    | Allow `return` in the global scope |
+| `identifierPattern` | Distinguish Identifier from IdentifierPattern |
 | `impliedStrict`   | Enable strict mode (*initial enforcement*) |
 | `lexical`         | Enable lexical binding and scope tracking |
 | `loc`         | Enable line/column location information to each node |

diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "meriyah",
-  "version": "1.2.2",
+  "version": "1.2.3",
   "description": "A 100% compliant, self-hosted javascript parser with high focus on both performance and stability",
   "main": "dist/meriyah.umd.js",
   "module": "dist/meriyah.esm.js",

diff --git a/src/common.ts b/src/common.ts
@@ -36,6 +36,7 @@ export const enum Context {
   AllowNewTarget        = 1 << 26,
   DisallowIn            = 1 << 27,
   InClass               = 1 << 28,
+  OptionsIdentifierPattern = 1 << 29,
 }
 
 export const enum PropertyKind {

diff --git a/src/errors.ts b/src/errors.ts
@@ -169,7 +169,7 @@ export const errorMessages: {
   [Errors.StrictOctalEscape]: 'Octal escape sequences are not allowed in strict mode',
   [Errors.TemplateOctalLiteral]: 'Octal escape sequences are not allowed in template strings',
   [Errors.InvalidPrivateName]: 'Unexpected token `#`',
-  [Errors.InvalidUnicodeEscapeSequence]: 'Invalid Unicode escape sequence',
+  [Errors.InvalidUnicodeEscapeSequence]: 'Illegal Unicode escape sequence',
   [Errors.InvalidCodePoint]: 'Invalid code point %0',
   [Errors.InvalidHexEscapeSequence]: 'Invalid hexadecimal escape sequence',
   [Errors.StrictOctalLiteral]: 'Octal literals are not allowed in strict mode',

diff --git a/src/lexer/comments.ts b/src/lexer/comments.ts
@@ -1,4 +1,4 @@
-import { nextCP, CharTypes, CharFlags, LexerState, advanceNewline, consumeLineFeed } from './';
+import { nextCP, CharTypes, CharFlags, LexerState, scanNewLine, consumeLineFeed } from './';
 import { Chars } from '../chars';
 import { ParserState } from '../common';
 import { report, Errors } from '../errors';
@@ -40,7 +40,7 @@ export function skipSingleLineComment(parser: ParserState, state: LexerState): L
   while (parser.index < parser.end) {
     if (CharTypes[parser.nextCP] & CharFlags.LineTerminator || (parser.nextCP ^ Chars.LineSeparator) <= 1) {
       state = (state | LexerState.LastIsCR | LexerState.NewLine) ^ LexerState.LastIsCR;
-      advanceNewline(parser);
+      scanNewLine(parser);
       return state;
     }
     nextCP(parser);
@@ -65,13 +65,13 @@ export function skipMultiLineComment(parser: ParserState, state: LexerState): Le
 
     if (parser.nextCP === Chars.CarriageReturn) {
       state |= LexerState.NewLine | LexerState.LastIsCR;
-      advanceNewline(parser);
+      scanNewLine(parser);
     } else if (parser.nextCP === Chars.LineFeed) {
       consumeLineFeed(parser, (state & LexerState.LastIsCR) !== 0);
       state = (state | LexerState.LastIsCR | LexerState.NewLine) ^ LexerState.LastIsCR;
     } else if ((parser.nextCP ^ Chars.LineSeparator) <= 1) {
       state = (state | LexerState.LastIsCR | LexerState.NewLine) ^ LexerState.LastIsCR;
-      advanceNewline(parser);
+      scanNewLine(parser);
     } else {
       nextCP(parser);
     }

diff --git a/src/lexer/common.ts b/src/lexer/common.ts
@@ -35,7 +35,7 @@ export function consumeMultiUnitCodePoint(parser: ParserState, hi: number): bool
 }
 
 /**
- * Use to consume a line feed instead of `advanceNewline`.
+ * Use to consume a line feed instead of `scanNewLine`.
  */
 export function consumeLineFeed(parser: ParserState, lastIsCR: boolean) {
   parser.nextCP = parser.source.charCodeAt(++parser.index);
@@ -46,7 +46,7 @@ export function consumeLineFeed(parser: ParserState, lastIsCR: boolean) {
   }
 }
 
-export function advanceNewline(parser: ParserState) {
+export function scanNewLine(parser: ParserState) {
   parser.flags |= Flags.NewLine;
   parser.nextCP = parser.source.charCodeAt(++parser.index);
   parser.column = 0;

diff --git a/src/lexer/identifier.ts b/src/lexer/identifier.ts
@@ -16,12 +16,11 @@ export function scanIdentifier(parser: ParserState, context: Context): Token {
   const canBeKeyword = CharTypes[parser.nextCP] & CharFlags.KeywordCandidate;
   while ((CharTypes[nextCP(parser)] & CharFlags.IdentifierPart) !== 0) {}
   parser.tokenValue = parser.source.slice(parser.tokenIndex, parser.index);
-  const hasEscape = CharTypes[parser.nextCP] & CharFlags.BackSlash;
-  if (!hasEscape && parser.nextCP < 0x7e) {
+  if ((CharTypes[parser.nextCP] & CharFlags.BackSlash) === 0 && parser.nextCP < 0x7e) {
     return descKeywordTable[parser.tokenValue] || Token.Identifier;
   }
-
-  return scanIdentifierSlowCase(parser, context, hasEscape, canBeKeyword);
+  // Slow path that has to deal with multi unit encoding
+  return scanIdentifierSlowCase(parser, context, 0, canBeKeyword);
 }
 
 /**
@@ -48,7 +47,7 @@ export function scanUnicodeIdentifier(parser: ParserState, context: Context): To
 export function scanIdentifierSlowCase(
   parser: ParserState,
   context: Context,
-  hasEscape: number,
+  hasEscape: 0 | 1,
   canBeKeyword: number
 ): Token {
   let start = parser.index;
@@ -132,16 +131,13 @@ export function scanIdentifierUnicodeEscape(parser: ParserState): number | void
  */
 export function scanUnicodeEscapeValue(parser: ParserState): number {
   let codePoint = 0;
+  let char = parser.nextCP;
   // First handle a delimited Unicode escape, e.g. \u{1F4A9}
-  if (parser.nextCP === Chars.LeftBrace) {
+  if (char === Chars.LeftBrace) {
     const startPos = parser.index;
     while (CharTypes[nextCP(parser)] & CharFlags.Hex) {
       codePoint = (codePoint << 4) | toHex(parser.nextCP);
-      // Check this early to avoid `code` wrapping to a negative on overflow (which is
-      // reserved for abnormal conditions).
-      if (codePoint > Chars.NonBMPMax) {
-        report(parser, Errors.UnicodeOverflow);
-      }
+      if (codePoint > Chars.NonBMPMax) report(parser, Errors.UnicodeOverflow);
     }
 
     // At least 4 characters have to be read
@@ -152,7 +148,7 @@ export function scanUnicodeEscapeValue(parser: ParserState): number {
     return codePoint;
   }
 
-  if ((CharTypes[parser.nextCP] & CharFlags.Hex) === 0) report(parser, Errors.InvalidHexEscapeSequence); // first one is mandatory
+  if ((CharTypes[char] & CharFlags.Hex) === 0) report(parser, Errors.InvalidHexEscapeSequence); // first one is mandatory
 
   const c2 = parser.source.charCodeAt(parser.index + 1);
   if ((CharTypes[c2] & CharFlags.Hex) === 0) report(parser, Errors.InvalidHexEscapeSequence);
@@ -161,7 +157,7 @@ export function scanUnicodeEscapeValue(parser: ParserState): number {
   const c4 = parser.source.charCodeAt(parser.index + 3);
   if ((CharTypes[c4] & CharFlags.Hex) === 0) report(parser, Errors.InvalidHexEscapeSequence);
 
-  codePoint = (toHex(parser.nextCP) << 12) | (toHex(c2) << 8) | (toHex(c3) << 4) | toHex(c4);
+  codePoint = (toHex(char) << 12) | (toHex(c2) << 8) | (toHex(c3) << 4) | toHex(c4);
 
   parser.nextCP = parser.source.charCodeAt((parser.index += 4));
 

diff --git a/src/lexer/index.ts b/src/lexer/index.ts
@@ -7,7 +7,7 @@ export {
   fromCodePoint,
   toHex,
   consumeLineFeed,
-  advanceNewline,
+  scanNewLine,
   LexerState
 } from './common';
 export { CharTypes, CharFlags, isIdentifierStart, isIdentifierPart } from './charClassifier';

diff --git a/src/lexer/numeric.ts b/src/lexer/numeric.ts
@@ -100,20 +100,19 @@ export function scanNumber(parser: ParserState, context: Context, isFloat: 0 | 1
         // Octal integer literals are not permitted in strict mode code
         if (context & Context.Strict) report(parser, Errors.StrictOctalEscape);
         kind = NumberKind.ImplicitOctal;
-        do {
-          if (CharTypes[parser.nextCP] & CharFlags.ImplicitOctalDigits) {
+        while (CharTypes[char] & CharFlags.Decimal) {
+          if (CharTypes[char] & CharFlags.ImplicitOctalDigits) {
             kind = NumberKind.DecimalWithLeadingZero;
             atStart = false;
             break;
           }
           value = value * 8 + (parser.nextCP - Chars.Zero);
-        } while (CharTypes[nextCP(parser)] & CharFlags.Decimal);
-        char = parser.nextCP;
+          char = nextCP(parser);
+        }
       } else if (CharTypes[char] & CharFlags.ImplicitOctalDigits) {
         if (context & Context.Strict) report(parser, Errors.StrictOctalEscape);
         else parser.flags = Flags.Octals;
         kind = NumberKind.DecimalWithLeadingZero;
-        char = parser.nextCP;
       } else if (char === Chars.Underscore) {
         report(parser, Errors.Unexpected);
       }

diff --git a/src/lexer/scan.ts b/src/lexer/scan.ts
@@ -19,7 +19,7 @@ import {
   scanPrivateName,
   fromCodePoint,
   consumeLineFeed,
-  advanceNewline
+  scanNewLine
 } from './';
 
 /*
@@ -218,7 +218,7 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le
 
         case Token.CarriageReturn:
           state |= LexerState.NewLine | LexerState.LastIsCR;
-          advanceNewline(parser);
+          scanNewLine(parser);
           break;
 
         case Token.LineFeed:
@@ -268,7 +268,9 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le
         // `*`, `**`, `*=`, `**=`
         case Token.Multiply: {
           nextCP(parser);
+
           if (parser.index >= parser.end) return Token.Multiply;
+
           const next = parser.nextCP;
 
           if (next === Chars.EqualSign) {
@@ -277,9 +279,11 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le
           }
 
           if (next !== Chars.Asterisk) return Token.Multiply;
+
+          if (nextCP(parser) !== Chars.EqualSign) return Token.Exponentiate;
+
           nextCP(parser);
-          if (parser.nextCP !== Chars.EqualSign) return Token.Exponentiate;
-          nextCP(parser);
+
           return Token.ExponentiateAssign;
         }
 
@@ -361,24 +365,19 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le
 
         // `<`, `<=`, `<<`, `<<=`, `</`, `<!--`
         case Token.LessThan:
-          nextCP(parser);
-          if (parser.index >= parser.end) return Token.LessThan;
-
-          switch (parser.nextCP) {
-            case Chars.LessThan:
-              nextCP(parser);
-              if ((parser.nextCP as number) === Chars.EqualSign) {
+          let next = nextCP(parser);
+          if (parser.index < parser.end) {
+            if (next === Chars.LessThan) {
+              if (parser.index < parser.end && nextCP(parser) === Chars.EqualSign) {
                 nextCP(parser);
                 return Token.ShiftLeftAssign;
               } else {
                 return Token.ShiftLeft;
               }
-
-            case Chars.EqualSign:
+            } else if (next === Chars.EqualSign) {
               nextCP(parser);
               return Token.LessThanOrEqual;
-
-            case Chars.Exclamation:
+            } else if (next === Chars.Exclamation) {
               // Treat HTML begin-comment as comment-till-end-of-line.
               if (
                 (context & Context.Module) === 0 &&
@@ -388,21 +387,17 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le
                 state = skipSingleLineComment(parser, state);
                 continue;
               }
-
-            default:
-              // ignore
-              return Token.LessThan;
+            }
           }
-
+          return Token.LessThan;
         // `=`, `==`, `===`, `=>`
         case Token.Assign: {
           nextCP(parser);
           if (parser.index >= parser.end) return Token.Assign;
           const next = parser.nextCP;
 
           if (next === Chars.EqualSign) {
-            nextCP(parser);
-            if (parser.nextCP === Chars.EqualSign) {
+            if (nextCP(parser) === Chars.EqualSign) {
               nextCP(parser);
               return Token.StrictEqual;
             } else {
@@ -436,7 +431,9 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le
         // `>`, `>=`, `>>`, `>>>`, `>>=`, `>>>=`
         case Token.GreaterThan: {
           nextCP(parser);
+
           if (parser.index >= parser.end) return Token.GreaterThan;
+
           const next = parser.nextCP;
 
           if (next === Chars.EqualSign) {
@@ -445,14 +442,14 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le
           }
 
           if (next !== Chars.GreaterThan) return Token.GreaterThan;
+
           nextCP(parser);
 
           if (parser.index < parser.end) {
             const next = parser.nextCP;
 
             if (next === Chars.GreaterThan) {
-              nextCP(parser);
-              if (parser.nextCP === Chars.EqualSign) {
+              if (nextCP(parser) === Chars.EqualSign) {
                 nextCP(parser);
                 return Token.LogicalShiftRightAssign;
               } else {
@@ -503,7 +500,7 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le
     } else {
       if ((first ^ Chars.LineSeparator) <= 1) {
         state = (state | LexerState.LastIsCR | LexerState.NewLine) ^ LexerState.LastIsCR;
-        advanceNewline(parser);
+        scanNewLine(parser);
         continue;
       }