fix(lexer): performance tweaks

meriyah · Jun 30, 2019 · 109fdbb · 109fdbb
1 parent b380d62
commit 109fdbb
Show file tree

Hide file tree

Showing 7 changed files with 29 additions and 92 deletions.
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "meriyah",
-  "version": "1.2.3",
+  "version": "1.2.4",
   "description": "A 100% compliant, self-hosted javascript parser with high focus on both performance and stability",
   "main": "dist/meriyah.umd.js",
   "module": "dist/meriyah.esm.js",

diff --git a/src/lexer/charClassifier.ts b/src/lexer/charClassifier.ts
@@ -146,11 +146,11 @@ export const CharTypes = [
   CharFlags.IdentifierStart | CharFlags.IdentifierPart | CharFlags.KeywordCandidate /* 0x78 x */,
   CharFlags.IdentifierStart | CharFlags.IdentifierPart | CharFlags.KeywordCandidate /* 0x79 y */,
   CharFlags.IdentifierStart | CharFlags.IdentifierPart | CharFlags.KeywordCandidate /* 0x7A z */,
-  CharFlags.None /* 0x7B   */,
-  CharFlags.None /* 0x7C   */,
-  CharFlags.None /* 0x7D   */,
-  CharFlags.None /* 0x7E   */,
-  CharFlags.None /* 0x7F   */
+  CharFlags.None /* 0x7B */,
+  CharFlags.None /* 0x7C */,
+  CharFlags.None /* 0x7D */,
+  CharFlags.None /* 0x7E */,
+  CharFlags.None /* 0x7F */
 ];
 
 export function isIdentifierStart(code: number): number {

diff --git a/src/lexer/comments.ts b/src/lexer/comments.ts
@@ -1,6 +1,6 @@
 import { nextCP, CharTypes, CharFlags, LexerState, scanNewLine, consumeLineFeed } from './';
 import { Chars } from '../chars';
-import { ParserState } from '../common';
+import { ParserState, Context } from '../common';
 import { report, Errors } from '../errors';
 
 /**
@@ -11,22 +11,8 @@ import { report, Errors } from '../errors';
 export function skipHashBang(parser: ParserState): void {
   // HashbangComment ::
   //   #!  SingleLineCommentChars
-  let index = parser.index;
-  if (index === parser.end) return;
-  if (parser.nextCP === Chars.ByteOrderMark) {
-    parser.index = ++index;
-    parser.nextCP = parser.source.charCodeAt(parser.index);
-  }
-
-  if (index < parser.end && parser.nextCP === Chars.Hash) {
-    index++;
-    if (index < parser.end && parser.source.charCodeAt(index) === Chars.Exclamation) {
-      parser.index = index + 1;
-      parser.nextCP = parser.source.charCodeAt(parser.index);
-      skipSingleLineComment(parser, LexerState.None);
-    } else {
-      report(parser, Errors.IllegalCaracter, '#');
-    }
+  if (parser.nextCP === Chars.Hash && parser.source.charCodeAt(parser.index + 1) === Chars.Exclamation) {
+    skipSingleLineComment(parser, LexerState.None);
   }
 }
 

diff --git a/src/lexer/identifier.ts b/src/lexer/identifier.ts
@@ -2,7 +2,7 @@ import { ParserState, Context } from '../common';
 import { Token, descKeywordTable } from '../token';
 import { Chars } from '../chars';
 import { nextCP, consumeMultiUnitCodePoint, fromCodePoint, toHex } from './';
-import { CharTypes, CharFlags, isIdentifierPart } from './charClassifier';
+import { CharTypes, CharFlags, isIdentifierPart, isIdentifierStart } from './charClassifier';
 import { report, reportAt, Errors } from '../errors';
 import { unicodeLookup } from '../unicode';
 
@@ -97,15 +97,7 @@ export function scanIdentifierSlowCase(
  * @param parser  Parser object
  */
 export function scanPrivateName(parser: ParserState): Token {
-  nextCP(parser); // consumes '#'
-  if (
-    (CharTypes[parser.nextCP] & CharFlags.Decimal) !== 0 ||
-    ((CharTypes[parser.nextCP] & CharFlags.IdentifierStart) === 0 &&
-      ((unicodeLookup[(parser.nextCP >>> 5) + 0] >>> parser.nextCP) & 31 & 1) === 0)
-  ) {
-    report(parser, Errors.MissingPrivateName);
-  }
-
+  if (!isIdentifierStart(nextCP(parser))) report(parser, Errors.MissingPrivateName);
   return Token.PrivateField;
 }
 
@@ -116,7 +108,7 @@ export function scanPrivateName(parser: ParserState): Token {
  */
 export function scanIdentifierUnicodeEscape(parser: ParserState): number | void {
   // Check for Unicode escape of the form '\uXXXX'
-  // and return code point value if valid Unicode escape is found. Otherwise return -1.
+  // and return code point value if valid Unicode escape is found.
   if (parser.index + 5 < parser.end && parser.source.charCodeAt(parser.index + 1) === Chars.LowerU) {
     parser.nextCP = parser.source.charCodeAt((parser.index += 2));
     return scanUnicodeEscapeValue(parser);
@@ -150,14 +142,14 @@ export function scanUnicodeEscapeValue(parser: ParserState): number {
 
   if ((CharTypes[char] & CharFlags.Hex) === 0) report(parser, Errors.InvalidHexEscapeSequence); // first one is mandatory
 
-  const c2 = parser.source.charCodeAt(parser.index + 1);
-  if ((CharTypes[c2] & CharFlags.Hex) === 0) report(parser, Errors.InvalidHexEscapeSequence);
-  const c3 = parser.source.charCodeAt(parser.index + 2);
-  if ((CharTypes[c3] & CharFlags.Hex) === 0) report(parser, Errors.InvalidHexEscapeSequence);
-  const c4 = parser.source.charCodeAt(parser.index + 3);
-  if ((CharTypes[c4] & CharFlags.Hex) === 0) report(parser, Errors.InvalidHexEscapeSequence);
+  const char2 = parser.source.charCodeAt(parser.index + 1);
+  if ((CharTypes[char2] & CharFlags.Hex) === 0) report(parser, Errors.InvalidHexEscapeSequence);
+  const char3 = parser.source.charCodeAt(parser.index + 2);
+  if ((CharTypes[char3] & CharFlags.Hex) === 0) report(parser, Errors.InvalidHexEscapeSequence);
+  const char4 = parser.source.charCodeAt(parser.index + 3);
+  if ((CharTypes[char4] & CharFlags.Hex) === 0) report(parser, Errors.InvalidHexEscapeSequence);
 
-  codePoint = (toHex(char) << 12) | (toHex(c2) << 8) | (toHex(c3) << 4) | toHex(c4);
+  codePoint = (toHex(char) << 12) | (toHex(char2) << 8) | (toHex(char3) << 4) | toHex(char4);
 
   parser.nextCP = parser.source.charCodeAt((parser.index += 4));
 

diff --git a/src/parser.ts b/src/parser.ts
@@ -218,7 +218,7 @@ export function parseSource(source: string, options: Options | void, context: Co
   const parser = create(source, sourceFile);
 
   // See: https://github.com/tc39/proposal-hashbang
-  skipHashBang(parser);
+  if (context & Context.OptionsNext) skipHashBang(parser);
 
   const scope: any = context & Context.OptionsLexical ? initblockScope() : {};
 

diff --git a/test/lexer/skiphashbang.ts b/test/lexer/skiphashbang.ts
@@ -1,13 +1,13 @@
 import * as t from 'assert';
 import { Flags, Context } from '../../src/common';
 import { create } from '../../src/parser';
-import { scanSingleToken, skipHashBang } from '../../src/lexer';
+import { skipHashBang } from '../../src/lexer';
 
 describe('Lexer - skiphashbang', () => {
   function pass(name: string, opts: any) {
     it(name, () => {
       const state = create(opts.source);
-      const token = skipHashBang(state);
+      skipHashBang(state);
       t.deepEqual(
         {
           value: state.tokenValue,
@@ -61,38 +61,6 @@ describe('Lexer - skiphashbang', () => {
     column: 0
   });
 
-  pass('skips a BOM in an otherwise empty source', {
-    source: '\uFFEF',
-    newLine: false,
-    hasNext: false,
-    value: '',
-    index: 1,
-    line: 1,
-    column: 0
-  });
-
-  pass('skips a BOM before an identifier', {
-    source: '\uFFEFfoo',
-    newLine: false,
-    hasNext: false,
-    value: '',
-    index: 1,
-    line: 1,
-    column: 0
-  });
-
-  fail('skips a BOM and fails before a lone hash', '\uFFEF# foo');
-
-  pass('skips a BOM before a lone exclamation', {
-    source: '\uFFEF! foo',
-    newLine: false,
-    hasNext: false,
-    value: '',
-    index: 1,
-    line: 1,
-    column: 0
-  });
-
   pass('skips a shebang+LF before a lone hash', {
     source: '#!/foo/bar/baz -abc\n# foo',
     hasNext: true,
@@ -142,13 +110,4 @@ describe('Lexer - skiphashbang', () => {
     line: 2,
     column: 0
   });
-  pass('skips a BOM+shebang+LF in an otherwise empty source', {
-    source: '\uFFEF#!/foo/bar/baz -abc\n',
-    newLine: true,
-    hasNext: false,
-    value: '',
-    index: 21,
-    line: 2,
-    column: 0
-  });
 });
diff --git a/test/parser/miscellaneous/hashbang.ts b/test/parser/miscellaneous/hashbang.ts
@@ -2,7 +2,7 @@ import { Context } from '../../../src/common';
 import * as t from 'assert';
 import { parseSource } from '../../../src/parser';
 
-describe('Miscellaneous - Failure', () => {
+describe('Miscellaneous - Hashbang', () => {
   for (const arg of [
     '/**/ #!\n',
     '//---\n #!\n',
@@ -25,17 +25,17 @@ describe('Miscellaneous - Failure', () => {
   ]) {
     it(`${arg}`, () => {
       t.throws(() => {
-        parseSource(`${arg}`, undefined, Context.OptionsWebCompat);
+        parseSource(`${arg}`, undefined, Context.OptionsWebCompat | Context.OptionsNext);
       });
     });
     it(`${arg}`, () => {
       t.throws(() => {
-        parseSource(`${arg}`, undefined, Context.None);
+        parseSource(`${arg}`, undefined, Context.OptionsNext);
       });
     });
     it(`${arg}`, () => {
       t.throws(() => {
-        parseSource(`${arg}`, undefined, Context.Strict | Context.Module);
+        parseSource(`${arg}`, undefined, Context.Strict | Context.Module | Context.OptionsNext);
       });
     });
   }
@@ -49,17 +49,17 @@ describe('Miscellaneous - Failure', () => {
   ]) {
     it(`${arg}`, () => {
       t.doesNotThrow(() => {
-        parseSource(`${arg}`, undefined, Context.OptionsWebCompat);
+        parseSource(`${arg}`, undefined, Context.OptionsWebCompat | Context.OptionsNext);
       });
     });
     it(`${arg}`, () => {
       t.doesNotThrow(() => {
-        parseSource(`${arg}`, undefined, Context.None);
+        parseSource(`${arg}`, undefined, Context.OptionsNext);
       });
     });
     it(`${arg}`, () => {
       t.doesNotThrow(() => {
-        parseSource(`${arg}`, undefined, Context.Strict | Context.Module);
+        parseSource(`${arg}`, undefined, Context.Strict | Context.Module | Context.OptionsNext);
       });
     });
   }