fix(lexer): fixed potensial issue with BOM

meriyah · Jun 30, 2019 · b380d62 · b380d62
1 parent 6ec8310
commit b380d62
Show file tree

Hide file tree

Showing 5 changed files with 10 additions and 13 deletions.
diff --git a/README.md b/README.md
@@ -7,6 +7,8 @@
     <a href="https://lgtm.com/projects/g/meriyah/meriyah/context:javascript"><img src="https://img.shields.io/lgtm/grade/javascript/g/meriyah/meriyah.svg?logo=lgtm&logoWidth=18" alt="GitHub license" /></a>
     <a href="https://lgtm.com/projects/g/meriyah/meriyah/alerts"><img src="https://img.shields.io/lgtm/alerts/g/meriyah/meriyah.svg?logo=lgtm&logoWidth=18" alt="Total alerts" /></a>
     <a href="https://circleci.com/gh/meriyah/meriyah"><img src="https://circleci.com/gh/meriyah/meriyah.svg?style=svg" alt="Circle" /></a>
+    <a href="https://github.com/meriyah/meriyah/blob/master/LICENSE.md"><img src="https://img.shields.io/github/license/meriyah/meriyah.svg" alt="Circle" /></a>
+
 </p>
 
 <br>
@@ -136,7 +138,6 @@ This will return when serialized in json:
     ]
 }
 ```
-
 ## ECMAScript compability
 
 Meriyah is 100% ECMA spec compatible, but you have to enable several [options](https://github.com/meriyah/meriyah#options) to make sure your code parses with 100% ECMA spec compability. This is done because Meriyah's main focus is on performance, and each option you enable will have impact on it's performance.

diff --git a/src/lexer/charClassifier.ts b/src/lexer/charClassifier.ts
@@ -5,9 +5,7 @@ export const enum CharFlags {
   None = 0,
   IdentifierStart       = 1 << 0,
   IdentifierPart        = 1 << 1,
-  WhiteSpace            = 1 << 2, // ECMA-262 11.2 White Space
   KeywordCandidate      = 1 << 6,
-  Asterisk              = 1 << 7,
   LineTerminator        = 1 << 9, // ECMA-262 11.3 Line Terminators
   Decimal               = 1 << 10,
   Octal                 = 1 << 11,
@@ -34,10 +32,10 @@ export const CharTypes = [
   CharFlags.None /* 0x06   */,
   CharFlags.None /* 0x07   */,
   CharFlags.None /* 0x08   */,
-  CharFlags.WhiteSpace /* 0x09   */,
+  CharFlags.None /* 0x09   */,
   CharFlags.LineTerminator | CharFlags.CarriageReturn /* 0x0A   */,
-  CharFlags.WhiteSpace /* 0x0B   */,
-  CharFlags.WhiteSpace /* 0x0C   */,
+  CharFlags.None /* 0x0B   */,
+  CharFlags.None /* 0x0C   */,
   CharFlags.LineTerminator | CharFlags.LineFeed /* 0x0D   */,
   CharFlags.None /* 0x0E   */,
   CharFlags.None /* 0x0F   */,
@@ -57,7 +55,7 @@ export const CharTypes = [
   CharFlags.None /* 0x1D   */,
   CharFlags.None /* 0x1E   */,
   CharFlags.None /* 0x1F   */,
-  CharFlags.WhiteSpace /* 0x20   */,
+  CharFlags.None /* 0x20   */,
   CharFlags.None /* 0x21 ! */,
   CharFlags.None /* 0x22   */,
   CharFlags.None /* 0x23 # */,
@@ -67,7 +65,7 @@ export const CharTypes = [
   CharFlags.None /* 0x27   */,
   CharFlags.None /* 0x28   */,
   CharFlags.None /* 0x29   */,
-  CharFlags.Asterisk /* 0x2A   */,
+  CharFlags.None /* 0x2A   */,
   CharFlags.Exponent /* 0x2B   */,
   CharFlags.None /* 0x2C   */,
   CharFlags.Exponent /* 0x2D   */,

diff --git a/src/lexer/comments.ts b/src/lexer/comments.ts
@@ -15,7 +15,7 @@ export function skipHashBang(parser: ParserState): void {
   if (index === parser.end) return;
   if (parser.nextCP === Chars.ByteOrderMark) {
     parser.index = ++index;
-    parser.nextCP = parser.source.charCodeAt(index);
+    parser.nextCP = parser.source.charCodeAt(parser.index);
   }
 
   if (index < parser.end && parser.nextCP === Chars.Hash) {

diff --git a/src/lexer/common.ts b/src/lexer/common.ts
@@ -57,8 +57,8 @@ export function scanNewLine(parser: ParserState) {
 export function isExoticECMAScriptWhitespace(code: number): boolean {
   /**
    * There are 25 white space characters we need to correctly class.
-   * Lucky for us that we have already classified the lower ASCII range (127) white space, so
-   * what we have to do now is to validate against the remaining
+   * The lower ASCII range (127) white space have already been classified, so
+   * only needed is to validate against the remaining
    * 15 Unicode category "Zs" ("Space_Separator") chars.
    *
    * - 0x1680

diff --git a/test/lexer/charClassifier.ts b/test/lexer/charClassifier.ts
@@ -4,10 +4,8 @@ import { CharFlags, CharTypes } from '../../src/lexer/charClassifier';
 
 describe('Lexer - charClassifier', () => {
   const tokens: [Context, number][] = [
-    [CharFlags.WhiteSpace, 9],
     [CharFlags.LineTerminator, 10],
     [CharFlags.LineTerminator, 10],
-    [CharFlags.WhiteSpace, 32],
     [CharFlags.IdentifierStart | CharFlags.IdentifierPart, 36],
     [CharFlags.IdentifierPart | CharFlags.Decimal, 48],
     [CharFlags.IdentifierPart | CharFlags.Decimal, 55],