feat(lexer): implement numeric literal scanning

meriyah · Jun 28, 2019 · 8ba7461 · 8ba7461
1 parent 6d894e5
commit 8ba7461
Show file tree

Hide file tree

Showing 7 changed files with 227 additions and 77 deletions.
diff --git a/README.md b/README.md
@@ -25,7 +25,8 @@ A 100% compliant, self-hosted javascript parser with high focus on both performa
 
 * [Decorators](https://github.com/tc39/proposal-decorators)
 * [Class Public Instance Fields & Private Instance Fields](https://github.com/tc39/proposal-class-fields)
-* [Hashbang Grammar](https://github.com/tc39/proposal-hashbang)
+* [Hashbang grammar](https://github.com/tc39/proposal-hashbang)
+* [Numeric separators](https://github.com/tc39/proposal-numeric-separator)
 * [Private methods](https://github.com/tc39/proposal-private-methods)
 * [Static class fields and private static methods](https://github.com/tc39/proposal-static-class-features/)
 
@@ -128,6 +129,6 @@ Meriyah is 100% ECMA spec compatible, but you have to enable several [options](h
 
 Also note that support for additional ECMAScript features for Web Browsers (*annexB*) isn't enabled by default as in other parsers, but you can instead parse with and without web compability .
 
-This is done because AnnexB is an extension of the language, and also beaucse all the `Test262 suite` tests has no web compability. 
+This is done because AnnexB is an extension of the language, and also beaucse all the `Test262 suite` tests has no web compability.
 
 Lexical binding and scope tracking has to be enabled with the `lexical` option.
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "meriyah",
-  "version": "1.0.2",
+  "version": "1.1.0",
   "description": "A 100% compliant, self-hosted javascript parser with high focus on both performance and stability",
   "main": "dist/meriyah.umd.js",
   "module": "dist/meriyah.esm.js",

diff --git a/src/errors.ts b/src/errors.ts
@@ -155,7 +155,9 @@ export const enum Errors {
   UndeclaredExportedBinding,
   UnexpectedPrivateField,
   DuplicateLetConstBinding,
-  CantAssignToValidRHS
+  CantAssignToValidRHS,
+  ContinuousNumericSeparator,
+  TrailingNumericSeparator
 }
 
 /*@internal*/
@@ -322,7 +324,9 @@ export const errorMessages: {
   [Errors.DuplicateExportBinding]: "Cannot export a duplicate name '%0'",
   [Errors.DuplicateLetConstBinding]: 'Duplicate %0 for-binding',
   [Errors.UndeclaredExportedBinding]: "Exported binding '%0' needs to refer to a top-level declared variable",
-  [Errors.UnexpectedPrivateField]: 'Unexpected private field'
+  [Errors.UnexpectedPrivateField]: 'Unexpected private field',
+  [Errors.TrailingNumericSeparator]: 'Numeric separators are not allowed at the end of numeric literals',
+  [Errors.ContinuousNumericSeparator]: 'Only one underscore is allowed as numeric separator'
 };
 
 export class ParseError extends SyntaxError {

diff --git a/src/lexer/numeric.ts b/src/lexer/numeric.ts
@@ -13,143 +13,244 @@ export const enum NumberKind {
   DecimalWithLeadingZero = 1 << 5
 }
 
+export const enum SeparatorState {
+  None = 0,
+  Allowed = 1 << 0,
+  Previous = 1 << 1
+}
+
+export function scanDigits(parser: ParserState, char: number): string {
+  let seenSeparator = false;
+  let start = parser.index;
+  let ret = '';
+  while (CharTypes[char] & (CharFlags.Decimal | CharFlags.Underscore)) {
+    if (char === Chars.Underscore) {
+      const preUnderscoreIndex = parser.index;
+      char = nextCP(parser);
+      if (char === Chars.Underscore) report(parser, Errors.ContinuousNumericSeparator);
+      seenSeparator = true;
+      ret += parser.source.substring(start, preUnderscoreIndex);
+      start = parser.index;
+      continue;
+    }
+    seenSeparator = false;
+    char = nextCP(parser);
+  }
+  if (seenSeparator) {
+    report(parser, Errors.TrailingNumericSeparator);
+  }
+
+  return ret + parser.source.substring(start, parser.index);
+}
+
 export function scanNumber(parser: ParserState, context: Context, isFloat: 0 | 1): Token {
   let kind: NumberKind = NumberKind.Decimal;
-  let value: number | string = 0;
+  let char = parser.nextCP;
+  let value: any = 0;
+  let digit = 9;
+  let atStart = !isFloat;
+  let state = SeparatorState.None;
 
   if (isFloat) {
-    while (CharTypes[nextCP(parser)] & CharFlags.Decimal) {}
+    if (char === Chars.Underscore) report(parser, Errors.Unexpected);
+    value += '.' + scanDigits(parser, char);
+    char = parser.nextCP;
   } else {
-    if (parser.nextCP === Chars.Zero) {
-      nextCP(parser);
+    if (char === Chars.Zero) {
+      char = nextCP(parser);
 
       // Hex
-      if ((parser.nextCP | 32) === Chars.LowerX) {
+      if ((char | 32) === Chars.LowerX) {
         kind = NumberKind.Hex;
         let digits = 0;
-        while (CharTypes[nextCP(parser)] & CharFlags.Hex) {
-          value = value * 0x10 + toHex(parser.nextCP);
+
+        char = nextCP(parser);
+        while (CharTypes[char] & (CharFlags.Hex | CharFlags.Underscore)) {
+          if (char === Chars.Underscore) {
+            //  let seenSeparator = 1;
+            if (state & SeparatorState.Allowed) {
+              state = (state | SeparatorState.Allowed | SeparatorState.Previous) ^ SeparatorState.Allowed;
+            } else if (state & SeparatorState.Previous) {
+              report(parser, Errors.ContinuousNumericSeparator);
+            } else {
+              report(parser, Errors.ContinuousNumericSeparator);
+            }
+            char = nextCP(parser);
+            continue;
+          }
+          state = (state | SeparatorState.Allowed | SeparatorState.Previous) ^ SeparatorState.Previous;
+          value = value * 0x10 + toHex(char);
           digits++;
+          char = nextCP(parser);
         }
+
+        if (parser.source.charCodeAt(parser.index - 1) === Chars.Underscore)
+          report(parser, Errors.TrailingNumericSeparator);
+
+        char = parser.nextCP;
         if (digits < 1) report(parser, Errors.MissingHexDigits);
         // Octal
-      } else if ((parser.nextCP | 32) === Chars.LowerO) {
+      } else if ((char | 32) === Chars.LowerO) {
         kind = NumberKind.Octal;
         let digits = 0;
-        while (CharTypes[nextCP(parser)] & CharFlags.Octal) {
-          value = value * 8 + (parser.nextCP - Chars.Zero);
+
+        char = nextCP(parser);
+        while (CharTypes[char] & (CharFlags.Octal | CharFlags.Underscore)) {
+          if (char === Chars.Underscore) {
+            //  let seenSeparator = 1;
+            if (state & SeparatorState.Allowed) {
+              state = (state | SeparatorState.Allowed | SeparatorState.Previous) ^ SeparatorState.Allowed;
+            } else if (state & SeparatorState.Previous) {
+              report(parser, Errors.ContinuousNumericSeparator);
+            } else {
+              report(parser, Errors.ContinuousNumericSeparator);
+            }
+            char = nextCP(parser);
+            continue;
+          }
+          state = (state | SeparatorState.Allowed | SeparatorState.Previous) ^ SeparatorState.Previous;
+          value = value * 8 + (char - Chars.Zero);
           digits++;
+          char = nextCP(parser);
         }
+
+        if (parser.source.charCodeAt(parser.index - 1) === Chars.Underscore)
+          report(parser, Errors.TrailingNumericSeparator);
+
         if (digits < 1) report(parser, Errors.ExpectedNumberInRadix, `${8}`);
-      } else if ((parser.nextCP | 32) === Chars.LowerB) {
+      } else if ((char | 32) === Chars.LowerB) {
         kind = NumberKind.Binary;
         let digits = 0;
-        while (CharTypes[nextCP(parser)] & CharFlags.Binary) {
-          value = value * 2 + (parser.nextCP - Chars.Zero);
+        char = nextCP(parser);
+        while (CharTypes[char] & (CharFlags.Binary | CharFlags.Underscore)) {
+          if (char === Chars.Underscore) {
+            if (state & SeparatorState.Allowed) {
+              state = (state | SeparatorState.Allowed | SeparatorState.Previous) ^ SeparatorState.Allowed;
+            } else if (state & SeparatorState.Previous) {
+              report(parser, Errors.ContinuousNumericSeparator);
+            } else {
+              report(parser, Errors.ContinuousNumericSeparator);
+            }
+            char = nextCP(parser);
+            continue;
+          }
+          state = (state | SeparatorState.Allowed | SeparatorState.Previous) ^ SeparatorState.Previous;
+          value = value * 2 + (char - Chars.Zero);
           digits++;
+          char = nextCP(parser);
         }
+        if (parser.source.charCodeAt(parser.index - 1) === Chars.Underscore)
+          report(parser, Errors.TrailingNumericSeparator);
         if (digits < 1) report(parser, Errors.ExpectedNumberInRadix, `${2}`);
-      } else if (CharTypes[parser.nextCP] & CharFlags.Octal) {
+      } else if (CharTypes[char] & CharFlags.Octal) {
         // Octal integer literals are not permitted in strict mode code
         if (context & Context.Strict) report(parser, Errors.StrictOctalEscape);
         kind = NumberKind.ImplicitOctal;
         do {
           if (CharTypes[parser.nextCP] & CharFlags.ImplicitOctalDigits) {
             kind = NumberKind.DecimalWithLeadingZero;
-            isFloat = 0;
+            atStart = false;
             break;
           }
           value = value * 8 + (parser.nextCP - Chars.Zero);
         } while (CharTypes[nextCP(parser)] & CharFlags.Decimal);
-      } else if (CharTypes[parser.nextCP] & CharFlags.ImplicitOctalDigits) {
+        char = parser.nextCP;
+      } else if (CharTypes[char] & CharFlags.ImplicitOctalDigits) {
         if (context & Context.Strict) report(parser, Errors.StrictOctalEscape);
         else parser.flags = Flags.Octals;
         kind = NumberKind.DecimalWithLeadingZero;
+        char = parser.nextCP;
+      } else if (char === Chars.Underscore) {
+        report(parser, Errors.Unexpected);
       }
     }
 
     // Parse decimal digits and allow trailing fractional part
     if (kind & (NumberKind.Decimal | NumberKind.DecimalWithLeadingZero)) {
-      if (isFloat) {
-        // scan subsequent decimal digits
-        let digit = 9;
-        while (digit >= 0 && CharTypes[nextCP(parser)] & CharFlags.Decimal) {
-          value = 10 * value + (parser.nextCP - Chars.Zero);
+      let seenSeparator = 0;
+
+      if (atStart) {
+        while (digit >= 0 && CharTypes[char] & (CharFlags.Decimal | CharFlags.Underscore)) {
+          if (char === Chars.Underscore) {
+            char = nextCP(parser);
+            if (char === Chars.Underscore) report(parser, Errors.ContinuousNumericSeparator);
+            seenSeparator = 1;
+            continue;
+          }
+          seenSeparator = 0;
+          value = 10 * value + (char - Chars.Zero);
+          char = nextCP(parser);
           --digit;
         }
+        if (seenSeparator) {
+          report(parser, Errors.TrailingNumericSeparator);
+        }
 
-        if (digit >= 0 && !isIdentifierStart(parser.nextCP) && parser.nextCP !== Chars.Period) {
-          if (context & Context.OptionsRaw) parser.tokenRaw = parser.source.slice(parser.tokenIndex, parser.index);
+        if (digit >= 0 && !isIdentifierStart(char) && char !== Chars.Period && char !== Chars.Underscore) {
+          // Most numbers are pure decimal integers without fractional component
+          // or exponential notation.  Handle that with optimized code.
           parser.tokenValue = value;
+          if (context & Context.OptionsRaw) parser.tokenRaw = parser.source.slice(parser.tokenIndex, parser.index);
           return Token.NumericLiteral;
         }
       }
 
-      while (CharTypes[parser.nextCP] & CharFlags.Decimal) {
-        nextCP(parser);
-      }
+      value += scanDigits(parser, char);
 
-      // Scan any decimal dot and fractional component
-      if (parser.nextCP === Chars.Period) {
+      char = parser.nextCP;
+
+      // Consume any decimal dot and fractional component.
+      if (char === Chars.Period) {
+        char = nextCP(parser);
+        if ((char as number) === Chars.Underscore) report(parser, Errors.Unexpected);
         isFloat = 1;
-        nextCP(parser); // consumes '.'
-        while (CharTypes[parser.nextCP] & CharFlags.Decimal) {
-          nextCP(parser);
-        }
+        value += '.' + scanDigits(parser, char);
+        char = parser.nextCP;
       }
     }
   }
+  const end = parser.index;
 
   let isBigInt: 0 | 1 = 0;
 
-  if (
-    parser.nextCP === Chars.LowerN &&
-    (kind & (NumberKind.Decimal | NumberKind.Binary | NumberKind.Octal | NumberKind.Hex)) !== 0
-  ) {
+  if (char === Chars.LowerN) {
     if (isFloat) report(parser, Errors.InvalidBigInt);
     isBigInt = 1;
-    nextCP(parser);
-    // Scan any exponential notation
-  } else if ((parser.nextCP | 32) === Chars.LowerE) {
-    if ((kind & (NumberKind.Decimal | NumberKind.DecimalWithLeadingZero)) === 0) {
-      report(parser, Errors.MissingExponent);
-    }
+    char = nextCP(parser);
+  } else {
+    // Consume any exponential notation.
+    if ((parser.nextCP | 32) === Chars.LowerE) {
+      char = nextCP(parser);
+      // '-', '+'
+      if (CharTypes[char] & CharFlags.Exponent) {
+        char = nextCP(parser);
+      }
 
-    nextCP(parser);
+      const preNumericPart = parser.index;
 
-    // '-', '+'
-    if (CharTypes[parser.nextCP] & CharFlags.Exponent) {
-      nextCP(parser);
-    }
+      // Exponential notation must contain at least one digit
+      if ((CharTypes[char] & CharFlags.Decimal) < 1) report(parser, Errors.MissingExponent);
 
-    let exponentDigits = 0;
-    // Consume exponential digits
-    while (CharTypes[parser.nextCP] & CharFlags.Decimal) {
-      nextCP(parser);
-      exponentDigits++;
-    }
-    // Exponential notation must contain at least one digit
-    if (exponentDigits < 1) {
-      report(parser, Errors.MissingExponent);
+      // Consume exponential digits
+      value += parser.source.substring(end, preNumericPart) + scanDigits(parser, char);
+
+      char = parser.nextCP;
     }
   }
 
   // The source character immediately following a numeric literal must
   // not be an identifier start or a decimal digit
-  if (CharTypes[parser.nextCP] & CharFlags.Decimal || isIdentifierStart(parser.nextCP)) {
+  if (CharTypes[char] & CharFlags.Decimal || isIdentifierStart(char)) {
     report(parser, Errors.IDStartAfterNumber);
   }
-  if (context & Context.OptionsRaw) parser.tokenRaw = parser.source.slice(parser.tokenIndex, parser.index);
-  parser.tokenValue =
-    kind & (NumberKind.ImplicitOctal | NumberKind.Binary | NumberKind.Hex | NumberKind.Octal)
-      ? value
-      : kind & NumberKind.DecimalWithLeadingZero
-      ? parseFloat(parser.source.slice(parser.tokenIndex, parser.index))
-      : isBigInt
-      ? parseInt(parser.source.slice(parser.tokenIndex, parser.index), 0xa)
-      : +parser.source.slice(parser.tokenIndex, parser.index);
 
   if (context & Context.OptionsRaw || isBigInt) parser.tokenRaw = parser.source.slice(parser.tokenIndex, parser.index);
 
-  return isBigInt ? Token.BigIntLiteral : Token.NumericLiteral;
+  if (kind & NumberKind.DecimalWithLeadingZero)
+    parser.tokenValue = parseFloat(parser.source.slice(parser.tokenIndex, parser.index));
+  else parser.tokenValue = parseFloat(value);
+
+  if (isBigInt) return Token.BigIntLiteral;
+
+  return Token.NumericLiteral;
 }
diff --git a/src/meriyah.ts b/src/meriyah.ts
@@ -23,4 +23,4 @@ export function parse(source: string, options: Options | void): ESTree.Program {
   return parseSource(source, options, Context.None);
 }
 
-export const version = '1.0.0';
+export const version = '1.1.0';