fix(lexer): optimized WS skipping and comment scanning

meriyah · May 31, 2019 · 9f85539 · 9f85539
1 parent de7d970
commit 9f85539
Show file tree

Hide file tree

Showing 12 changed files with 249 additions and 110 deletions.
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "meriyah",
-  "version": "0.2.0",
+  "version": "0.2.1",
   "description": "A 100% compliant, self-hosted javascript parser with high focus on both performance and stability",
   "main": "dist/meriyah.umd.js",
   "module": "dist/meriyah.esm.js",

diff --git a/src/lexer/comments.ts b/src/lexer/comments.ts
@@ -1,4 +1,4 @@
-import { nextCodePoint, CharTypes, CharFlags } from './';
+import { nextCodePoint, CharTypes, CharFlags, ScannerState, Seek } from './';
 import { Chars } from '../chars';
 import { Token } from '../token';
 import { ParserState, Flags } from '../common';
@@ -22,7 +22,7 @@ export function skipHashBang(parser: ParserState): void {
     if (index < parser.end && parser.source.charCodeAt(index) === Chars.Exclamation) {
       parser.index = index + 1;
       parser.currentCodePoint = parser.source.charCodeAt(parser.index);
-      skipSingleLineComment(parser);
+      skipSingleLineComment(parser, ScannerState.None);
     } else {
       report(parser, Errors.IllegalCaracter, '#');
     }
@@ -34,44 +34,54 @@ export function skipHashBang(parser: ParserState): void {
  *
  * @param parser  Parser object
  */
-export function skipSingleLineComment(parser: ParserState): Token {
+export function skipSingleLineComment(parser: ParserState, state: ScannerState): ScannerState {
   while (parser.index < parser.end) {
     if (
       CharTypes[parser.currentCodePoint] & CharFlags.LineTerminator ||
       (parser.currentCodePoint ^ Chars.LineSeparator) <= 1
     ) {
-      break;
+      parser.flags |= Flags.NewLine;
+      parser.column = 0;
+      parser.line++;
+      parser.currentCodePoint = parser.source.charCodeAt(++parser.index);
+      return state;
     }
     nextCodePoint(parser);
   }
-  return Token.WhiteSpace;
+  return state;
 }
 
 /**
  * Skips multiline comment
  *
  * @param parser  Parser object
  */
-export function skipMultiLineComment(parser: ParserState): any {
+export function skipMultiLineComment(parser: ParserState, state: ScannerState): any {
   while (parser.index < parser.end) {
     while (CharTypes[parser.currentCodePoint] & CharFlags.Asterisk) {
       if (nextCodePoint(parser) === Chars.Slash) {
         nextCodePoint(parser);
-        return Token.WhiteSpace;
+        return state;
       }
     }
 
     // ES 2020 11.3 Line Terminators
-    if (
-      CharTypes[parser.currentCodePoint] & CharFlags.LineTerminator ||
-      (parser.currentCodePoint ^ Chars.LineSeparator) <= 1
-    ) {
-      if (
-        CharTypes[parser.currentCodePoint] & CharFlags.CarriageReturn &&
-        CharTypes[parser.source.charCodeAt(parser.index + 1)] & CharFlags.LineFeed
-      ) {
-        parser.index++;
+    if (CharTypes[parser.currentCodePoint] & CharFlags.LineTerminator) {
+      if (CharTypes[parser.currentCodePoint] & CharFlags.CarriageReturn) {
+        state |= ScannerState.NewLine | ScannerState.LastIsCR;
+        parser.column = 0;
+        parser.line++;
+      } else {
+        if (state & ScannerState.LastIsCR) {
+          parser.column = 0;
+          parser.line++;
+        }
+        state = (state & ~ScannerState.LastIsCR) | ScannerState.NewLine;
       }
+      parser.currentCodePoint = parser.source.charCodeAt(++parser.index);
+      parser.flags |= Flags.NewLine;
+    } else if ((parser.currentCodePoint ^ Chars.LineSeparator) <= 1) {
+      state = (state & ~ScannerState.LastIsCR) | ScannerState.NewLine;
       parser.column = 0;
       parser.currentCodePoint = parser.source.charCodeAt(++parser.index);
       parser.line++;

diff --git a/src/lexer/common.ts b/src/lexer/common.ts
@@ -3,6 +3,18 @@ import { ParserState } from '../common';
 import { unicodeLookup } from '../unicode';
 import { report, Errors } from '../errors';
 
+export const enum Seek {
+  None,
+  SameLine,
+  NewLine
+}
+export const enum ScannerState {
+  None = 0,
+  NewLine = 1 << 0,
+  SameLine = 1 << 1,
+  LastIsCR = 1 << 2
+}
+
 /**
  * Advances this lexer's current index.
  * @param parser The parser instance

diff --git a/src/lexer/index.ts b/src/lexer/index.ts
@@ -1,6 +1,14 @@
 export { scanSingleToken, nextToken } from './scan';
 export { skipMultiLineComment, skipSingleLineComment, skipHashBang } from './comments';
-export { nextCodePoint, consumeMultiUnitCodePoint, isExoticECMAScriptWhitespace, fromCodePoint, toHex } from './common';
+export {
+  nextCodePoint,
+  consumeMultiUnitCodePoint,
+  isExoticECMAScriptWhitespace,
+  fromCodePoint,
+  toHex,
+  ScannerState,
+  Seek
+} from './common';
 export { CharTypes, CharFlags, isIdentifierStart, isIdentifierPart } from './charClassifier';
 export { scanIdentifier, scanPrivateName, scanUnicodeEscapeValue } from './identifier';
 export { scanString } from './string';

diff --git a/src/lexer/scan.ts b/src/lexer/scan.ts
@@ -1,4 +1,4 @@
-import { skipSingleLineComment, skipMultiLineComment } from './comments';
+import { skipSingleLineComment, skipMultiLineComment, ScannerState, Seek } from './';
 import { CharTypes, CharFlags } from './charClassifier';
 import { Chars } from '../chars';
 import { Token } from '../token';
@@ -27,11 +27,11 @@ import {
  * StringLiteral:    34, 39: '"', `'`
  * NumericLiteral:   48, 49..57: '0'..'9'
  * WhiteSpace:       9, 11, 12, 32: '\t', '\v', '\f', ' '
- * LineTerminator:   10, 13: '\n', '\r'
+ * CarriageReturn:          10, 13: '\n', '\r'
  * Template:         96: '`'
  */
 
-export const OneCharToken = [
+export const TokenLookup = [
   /*   0 - Null               */ Token.Illegal,
   /*   1 - Start of Heading   */ Token.Illegal,
   /*   2 - Start of Text      */ Token.Illegal,
@@ -42,10 +42,10 @@ export const OneCharToken = [
   /*   7 - Bell               */ Token.Illegal,
   /*   8 - Backspace          */ Token.Illegal,
   /*   9 - Horizontal Tab     */ Token.WhiteSpace,
-  /*  10 - Line Feed          */ Token.LineTerminator,
+  /*  10 - Line Feed          */ Token.LineFeed,
   /*  11 - Vertical Tab       */ Token.WhiteSpace,
   /*  12 - Form Feed          */ Token.WhiteSpace,
-  /*  13 - Carriage Return    */ Token.LineTerminator,
+  /*  13 - Carriage Return    */ Token.CarriageReturn,
   /*  14 - Shift Out          */ Token.Illegal,
   /*  15 - Shift In           */ Token.Illegal,
   /*  16 - Data Line Escape   */ Token.Illegal,
@@ -169,15 +169,15 @@ export function nextToken(parser: ParserState, context: Context): void {
 }
 
 export function scanSingleToken(parser: ParserState, context: Context): Token {
-  let isStartOfLine = parser.index === 0;
-
+  let state = ScannerState.None;
+  const isStartOfLine = parser.index === 0;
   while (parser.index < parser.end) {
     parser.tokenIndex = parser.index;
 
     const first = parser.currentCodePoint;
 
     if (first <= 0x7e) {
-      const token = OneCharToken[first];
+      const token = TokenLookup[first];
 
       switch (token) {
         // Look for an unambiguous single-char token
@@ -201,19 +201,26 @@ export function scanSingleToken(parser: ParserState, context: Context): Token {
         case Token.WhiteSpace:
           nextCodePoint(parser);
           break;
-        // Line terminators
-        case Token.LineTerminator:
+
+        case Token.CarriageReturn:
           parser.flags |= Flags.NewLine;
-          if (
-            CharTypes[first] & CharFlags.CarriageReturn &&
-            CharTypes[parser.source.charCodeAt(parser.index + 1)] & CharFlags.LineFeed
-          ) {
-            parser.index++;
-          }
+
+          state |= ScannerState.NewLine | ScannerState.LastIsCR;
+
           parser.column = 0;
           parser.currentCodePoint = parser.source.charCodeAt(++parser.index);
           parser.line++;
           break;
+        case Token.LineFeed:
+          parser.flags |= Flags.NewLine;
+
+          if ((state & ScannerState.LastIsCR) === 0) {
+            parser.column = 0;
+            parser.line++;
+          }
+          state = (state & ~ScannerState.LastIsCR) | ScannerState.NewLine;
+          parser.currentCodePoint = parser.source.charCodeAt(++parser.index);
+          break;
         // Look for an identifier.
         case Token.Identifier:
           return scanIdentifier(parser, context);
@@ -298,11 +305,11 @@ export function scanSingleToken(parser: ParserState, context: Context): Token {
             nextCodePoint(parser);
             if (
               (context & Context.Module) === 0 &&
-              (isStartOfLine || parser.flags & Flags.NewLine) &&
+              (state & ScannerState.NewLine || isStartOfLine) &&
               parser.currentCodePoint === Chars.GreaterThan
             ) {
               if ((context & Context.OptionsWebCompat) === 0) report(parser, Errors.HtmlCommentInWebCompat);
-              skipSingleLineComment(parser);
+              state = skipSingleLineComment(parser, state);
               continue;
             }
 
@@ -323,12 +330,12 @@ export function scanSingleToken(parser: ParserState, context: Context): Token {
             const ch = parser.currentCodePoint;
             if (ch === Chars.Slash) {
               nextCodePoint(parser);
-              skipSingleLineComment(parser);
+              state = skipSingleLineComment(parser, state);
               continue;
             } else if (ch === Chars.Asterisk) {
               nextCodePoint(parser);
-              skipMultiLineComment(parser);
-              break;
+              state = skipMultiLineComment(parser, state);
+              continue;
             } else if (context & Context.AllowRegExp) {
               return scanRegularExpression(parser, context);
             } else if (ch === Chars.EqualSign) {
@@ -366,7 +373,7 @@ export function scanSingleToken(parser: ParserState, context: Context): Token {
                 parser.source.charCodeAt(parser.index + 1) === Chars.Hyphen &&
                 parser.source.charCodeAt(parser.index + 2) === Chars.Hyphen
               ) {
-                skipSingleLineComment(parser);
+                state = skipSingleLineComment(parser, state);
                 continue;
               }
 
@@ -485,6 +492,7 @@ export function scanSingleToken(parser: ParserState, context: Context): Token {
     } else {
       if ((first ^ Chars.LineSeparator) <= 1) {
         parser.flags |= Flags.NewLine;
+        state = (state & ~ScannerState.LastIsCR) | ScannerState.NewLine;
         parser.column = 0;
         parser.currentCodePoint = parser.source.charCodeAt(++parser.index);
         parser.line++;
@@ -501,8 +509,6 @@ export function scanSingleToken(parser: ParserState, context: Context): Token {
       // Invalid ASCII code point/unit
       report(parser, Errors.IllegalCaracter, fromCodePoint(first));
     }
-
-    isStartOfLine = false;
   }
   return Token.EOF;
 }
diff --git a/src/parser.ts b/src/parser.ts
@@ -3781,14 +3781,7 @@ function parseRestOrSpreadElement(
     parser.assignable = AssignmentKind.IsAssignable;
     destructible |= parser.token === Token.AwaitKeyword ? DestructuringKind.Await : 0;
 
-    argument = parsePrimaryExpressionExtended(
-      parser,
-      context,
-      type,
-      /* inNewExpression */ 0,
-      /* assignable */ 1,
-      tokenIndex
-    );
+    argument = parsePrimaryExpressionExtended(parser, context, type, 0, 1, tokenIndex);
 
     const { token } = parser;
 

diff --git a/src/token.ts b/src/token.ts
@@ -178,11 +178,12 @@ export const enum Token {
   BigIntLiteral  = 122,
   WhiteSpace           = 124,
   Illegal  = 129,
-  LineTerminator  = 130,
+  CarriageReturn  = 130,
   PrivateField  = 131,
   Template = 132,
   Decorator = 133,
-  Target = 134 | IsIdentifier
+  Target = 134 | IsIdentifier,
+  LineFeed  = 135,
 }
 
 export const KeywordDescTable = [

diff --git a/test/lexer/skiphashbang.ts b/test/lexer/skiphashbang.ts
@@ -96,59 +96,59 @@ describe('Lexer - skiphashbang', () => {
   pass('skips a shebang+LF before a lone hash', {
     source: '#!/foo/bar/baz -abc\n# foo',
     hasNext: true,
-    newLine: false,
+    newLine: true,
     value: '',
-    index: 19,
+    index: 20,
     line: 2,
-    column: 17
+    column: 0
   });
 
   pass('skips a shebang+LF in an otherwise empty source', {
     source: '#!/foo/bar/baz -abc\n',
-    newLine: false,
+    newLine: true,
     hasNext: false,
     value: '',
-    index: 19,
+    index: 20,
     line: 2,
-    column: 17
+    column: 0
   });
 
   pass('skips a shebang+LF before an identifier', {
     source: '#!/foo/bar/baz -abc\nfoo',
-    newLine: false,
+    newLine: true,
     hasNext: false,
     value: '',
-    index: 19,
+    index: 20,
     line: 2,
-    column: 17
+    column: 0
   });
 
   pass('skips a shebang+LF before a lone exclamation', {
     source: '#!/foo/bar/baz -abc\n! foo',
-    newLine: false,
+    newLine: true,
     hasNext: false,
     value: '',
-    index: 19,
+    index: 20,
     line: 2,
-    column: 17
+    column: 0
   });
 
   pass('skips a shebang+CR in an otherwise empty source', {
     source: '#!/foo/bar/baz -abc\r',
-    newLine: false,
+    newLine: true,
     hasNext: false,
     value: '',
-    index: 19,
+    index: 20,
     line: 2,
-    column: 17
+    column: 0
   });
   pass('skips a BOM+shebang+LF in an otherwise empty source', {
     source: '\uFFEF#!/foo/bar/baz -abc\n',
-    newLine: false,
+    newLine: true,
     hasNext: false,
     value: '',
-    index: 20,
+    index: 21,
     line: 2,
-    column: 17
+    column: 0
   });
 });
diff --git a/test/lexer/whitespace.ts b/test/lexer/whitespace.ts
@@ -342,7 +342,7 @@ describe('Lexer - Whitespace', () => {
     value: '',
     index: 26,
     line: 2,
-    column: 5
+    column: 25
   });
 
   pass('skips multiline comments with Windows newlines', {

diff --git a/test/parser/expressions/group.ts b/test/parser/expressions/group.ts
@@ -324,6 +324,10 @@ describe('Expressions - Group', () => {
     ['({a} += 0);', Context.None],
     ['({a,,} = 0)', Context.None],
     ['({,a,} = 0)', Context.None],
+    ['({a, ...b, c} = {})', Context.None],
+    ['({a = 5})', Context.None],
+    ['({ ...{a} } = {})', Context.None],
+    ['({b, c, d, ...{a} } = {})', Context.None],
     ['({a,,a} = 0)', Context.None],
     ['({function} = 0)', Context.None],
     ['({a:function} = 0)', Context.None],