Skip to content

Commit

Permalink
feat(lexer): implement numeric literal scanning
Browse files Browse the repository at this point in the history
  • Loading branch information
KFlash committed Jun 28, 2019
1 parent 6d894e5 commit 8ba7461
Show file tree
Hide file tree
Showing 7 changed files with 227 additions and 77 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ A 100% compliant, self-hosted javascript parser with high focus on both performa

* [Decorators](https://github.com/tc39/proposal-decorators)
* [Class Public Instance Fields & Private Instance Fields](https://github.com/tc39/proposal-class-fields)
* [Hashbang Grammar](https://github.com/tc39/proposal-hashbang)
* [Hashbang grammar](https://github.com/tc39/proposal-hashbang)
* [Numeric separators](https://github.com/tc39/proposal-numeric-separator)
* [Private methods](https://github.com/tc39/proposal-private-methods)
* [Static class fields and private static methods](https://github.com/tc39/proposal-static-class-features/)

Expand Down Expand Up @@ -128,6 +129,6 @@ Meriyah is 100% ECMA spec compatible, but you have to enable several [options](h

Also note that support for additional ECMAScript features for Web Browsers (*annexB*) isn't enabled by default as in other parsers, but you can instead parse with and without web compability .

This is done because AnnexB is an extension of the language, and also beaucse all the `Test262 suite` tests has no web compability.
This is done because AnnexB is an extension of the language, and also beaucse all the `Test262 suite` tests has no web compability.

Lexical binding and scope tracking has to be enabled with the `lexical` option.
2 changes: 1 addition & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "meriyah",
"version": "1.0.2",
"version": "1.1.0",
"description": "A 100% compliant, self-hosted javascript parser with high focus on both performance and stability",
"main": "dist/meriyah.umd.js",
"module": "dist/meriyah.esm.js",
Expand Down
8 changes: 6 additions & 2 deletions src/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,9 @@ export const enum Errors {
UndeclaredExportedBinding,
UnexpectedPrivateField,
DuplicateLetConstBinding,
CantAssignToValidRHS
CantAssignToValidRHS,
ContinuousNumericSeparator,
TrailingNumericSeparator
}

/*@internal*/
Expand Down Expand Up @@ -322,7 +324,9 @@ export const errorMessages: {
[Errors.DuplicateExportBinding]: "Cannot export a duplicate name '%0'",
[Errors.DuplicateLetConstBinding]: 'Duplicate %0 for-binding',
[Errors.UndeclaredExportedBinding]: "Exported binding '%0' needs to refer to a top-level declared variable",
[Errors.UnexpectedPrivateField]: 'Unexpected private field'
[Errors.UnexpectedPrivateField]: 'Unexpected private field',
[Errors.TrailingNumericSeparator]: 'Numeric separators are not allowed at the end of numeric literals',
[Errors.ContinuousNumericSeparator]: 'Only one underscore is allowed as numeric separator'
};

export class ParseError extends SyntaxError {
Expand Down
235 changes: 168 additions & 67 deletions src/lexer/numeric.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,143 +13,244 @@ export const enum NumberKind {
DecimalWithLeadingZero = 1 << 5
}

export const enum SeparatorState {
None = 0,
Allowed = 1 << 0,
Previous = 1 << 1
}

export function scanDigits(parser: ParserState, char: number): string {
let seenSeparator = false;
let start = parser.index;
let ret = '';
while (CharTypes[char] & (CharFlags.Decimal | CharFlags.Underscore)) {
if (char === Chars.Underscore) {
const preUnderscoreIndex = parser.index;
char = nextCP(parser);
if (char === Chars.Underscore) report(parser, Errors.ContinuousNumericSeparator);
seenSeparator = true;
ret += parser.source.substring(start, preUnderscoreIndex);
start = parser.index;
continue;
}
seenSeparator = false;
char = nextCP(parser);
}
if (seenSeparator) {
report(parser, Errors.TrailingNumericSeparator);
}

return ret + parser.source.substring(start, parser.index);
}

export function scanNumber(parser: ParserState, context: Context, isFloat: 0 | 1): Token {
let kind: NumberKind = NumberKind.Decimal;
let value: number | string = 0;
let char = parser.nextCP;
let value: any = 0;
let digit = 9;
let atStart = !isFloat;
let state = SeparatorState.None;

if (isFloat) {
while (CharTypes[nextCP(parser)] & CharFlags.Decimal) {}
if (char === Chars.Underscore) report(parser, Errors.Unexpected);
value += '.' + scanDigits(parser, char);
char = parser.nextCP;
} else {
if (parser.nextCP === Chars.Zero) {
nextCP(parser);
if (char === Chars.Zero) {
char = nextCP(parser);

// Hex
if ((parser.nextCP | 32) === Chars.LowerX) {
if ((char | 32) === Chars.LowerX) {
kind = NumberKind.Hex;
let digits = 0;
while (CharTypes[nextCP(parser)] & CharFlags.Hex) {
value = value * 0x10 + toHex(parser.nextCP);

char = nextCP(parser);
while (CharTypes[char] & (CharFlags.Hex | CharFlags.Underscore)) {
if (char === Chars.Underscore) {
// let seenSeparator = 1;
if (state & SeparatorState.Allowed) {
state = (state | SeparatorState.Allowed | SeparatorState.Previous) ^ SeparatorState.Allowed;
} else if (state & SeparatorState.Previous) {
report(parser, Errors.ContinuousNumericSeparator);
} else {
report(parser, Errors.ContinuousNumericSeparator);
}
char = nextCP(parser);
continue;
}
state = (state | SeparatorState.Allowed | SeparatorState.Previous) ^ SeparatorState.Previous;
value = value * 0x10 + toHex(char);
digits++;
char = nextCP(parser);
}

if (parser.source.charCodeAt(parser.index - 1) === Chars.Underscore)
report(parser, Errors.TrailingNumericSeparator);

char = parser.nextCP;
if (digits < 1) report(parser, Errors.MissingHexDigits);
// Octal
} else if ((parser.nextCP | 32) === Chars.LowerO) {
} else if ((char | 32) === Chars.LowerO) {
kind = NumberKind.Octal;
let digits = 0;
while (CharTypes[nextCP(parser)] & CharFlags.Octal) {
value = value * 8 + (parser.nextCP - Chars.Zero);

char = nextCP(parser);
while (CharTypes[char] & (CharFlags.Octal | CharFlags.Underscore)) {
if (char === Chars.Underscore) {
// let seenSeparator = 1;
if (state & SeparatorState.Allowed) {
state = (state | SeparatorState.Allowed | SeparatorState.Previous) ^ SeparatorState.Allowed;
} else if (state & SeparatorState.Previous) {
report(parser, Errors.ContinuousNumericSeparator);
} else {
report(parser, Errors.ContinuousNumericSeparator);
}
char = nextCP(parser);
continue;
}
state = (state | SeparatorState.Allowed | SeparatorState.Previous) ^ SeparatorState.Previous;
value = value * 8 + (char - Chars.Zero);
digits++;
char = nextCP(parser);
}

if (parser.source.charCodeAt(parser.index - 1) === Chars.Underscore)
report(parser, Errors.TrailingNumericSeparator);

if (digits < 1) report(parser, Errors.ExpectedNumberInRadix, `${8}`);
} else if ((parser.nextCP | 32) === Chars.LowerB) {
} else if ((char | 32) === Chars.LowerB) {
kind = NumberKind.Binary;
let digits = 0;
while (CharTypes[nextCP(parser)] & CharFlags.Binary) {
value = value * 2 + (parser.nextCP - Chars.Zero);
char = nextCP(parser);
while (CharTypes[char] & (CharFlags.Binary | CharFlags.Underscore)) {
if (char === Chars.Underscore) {
if (state & SeparatorState.Allowed) {
state = (state | SeparatorState.Allowed | SeparatorState.Previous) ^ SeparatorState.Allowed;
} else if (state & SeparatorState.Previous) {
report(parser, Errors.ContinuousNumericSeparator);
} else {
report(parser, Errors.ContinuousNumericSeparator);
}
char = nextCP(parser);
continue;
}
state = (state | SeparatorState.Allowed | SeparatorState.Previous) ^ SeparatorState.Previous;
value = value * 2 + (char - Chars.Zero);
digits++;
char = nextCP(parser);
}
if (parser.source.charCodeAt(parser.index - 1) === Chars.Underscore)
report(parser, Errors.TrailingNumericSeparator);
if (digits < 1) report(parser, Errors.ExpectedNumberInRadix, `${2}`);
} else if (CharTypes[parser.nextCP] & CharFlags.Octal) {
} else if (CharTypes[char] & CharFlags.Octal) {
// Octal integer literals are not permitted in strict mode code
if (context & Context.Strict) report(parser, Errors.StrictOctalEscape);
kind = NumberKind.ImplicitOctal;
do {
if (CharTypes[parser.nextCP] & CharFlags.ImplicitOctalDigits) {
kind = NumberKind.DecimalWithLeadingZero;
isFloat = 0;
atStart = false;
break;
}
value = value * 8 + (parser.nextCP - Chars.Zero);
} while (CharTypes[nextCP(parser)] & CharFlags.Decimal);
} else if (CharTypes[parser.nextCP] & CharFlags.ImplicitOctalDigits) {
char = parser.nextCP;
} else if (CharTypes[char] & CharFlags.ImplicitOctalDigits) {
if (context & Context.Strict) report(parser, Errors.StrictOctalEscape);
else parser.flags = Flags.Octals;
kind = NumberKind.DecimalWithLeadingZero;
char = parser.nextCP;
} else if (char === Chars.Underscore) {
report(parser, Errors.Unexpected);
}
}

// Parse decimal digits and allow trailing fractional part
if (kind & (NumberKind.Decimal | NumberKind.DecimalWithLeadingZero)) {
if (isFloat) {
// scan subsequent decimal digits
let digit = 9;
while (digit >= 0 && CharTypes[nextCP(parser)] & CharFlags.Decimal) {
value = 10 * value + (parser.nextCP - Chars.Zero);
let seenSeparator = 0;

if (atStart) {
while (digit >= 0 && CharTypes[char] & (CharFlags.Decimal | CharFlags.Underscore)) {
if (char === Chars.Underscore) {
char = nextCP(parser);
if (char === Chars.Underscore) report(parser, Errors.ContinuousNumericSeparator);
seenSeparator = 1;
continue;
}
seenSeparator = 0;
value = 10 * value + (char - Chars.Zero);
char = nextCP(parser);
--digit;
}
if (seenSeparator) {
report(parser, Errors.TrailingNumericSeparator);
}

if (digit >= 0 && !isIdentifierStart(parser.nextCP) && parser.nextCP !== Chars.Period) {
if (context & Context.OptionsRaw) parser.tokenRaw = parser.source.slice(parser.tokenIndex, parser.index);
if (digit >= 0 && !isIdentifierStart(char) && char !== Chars.Period && char !== Chars.Underscore) {
// Most numbers are pure decimal integers without fractional component
// or exponential notation. Handle that with optimized code.
parser.tokenValue = value;
if (context & Context.OptionsRaw) parser.tokenRaw = parser.source.slice(parser.tokenIndex, parser.index);
return Token.NumericLiteral;
}
}

while (CharTypes[parser.nextCP] & CharFlags.Decimal) {
nextCP(parser);
}
value += scanDigits(parser, char);

// Scan any decimal dot and fractional component
if (parser.nextCP === Chars.Period) {
char = parser.nextCP;

// Consume any decimal dot and fractional component.
if (char === Chars.Period) {
char = nextCP(parser);
if ((char as number) === Chars.Underscore) report(parser, Errors.Unexpected);
isFloat = 1;
nextCP(parser); // consumes '.'
while (CharTypes[parser.nextCP] & CharFlags.Decimal) {
nextCP(parser);
}
value += '.' + scanDigits(parser, char);
char = parser.nextCP;
}
}
}
const end = parser.index;

let isBigInt: 0 | 1 = 0;

if (
parser.nextCP === Chars.LowerN &&
(kind & (NumberKind.Decimal | NumberKind.Binary | NumberKind.Octal | NumberKind.Hex)) !== 0
) {
if (char === Chars.LowerN) {
if (isFloat) report(parser, Errors.InvalidBigInt);
isBigInt = 1;
nextCP(parser);
// Scan any exponential notation
} else if ((parser.nextCP | 32) === Chars.LowerE) {
if ((kind & (NumberKind.Decimal | NumberKind.DecimalWithLeadingZero)) === 0) {
report(parser, Errors.MissingExponent);
}
char = nextCP(parser);
} else {
// Consume any exponential notation.
if ((parser.nextCP | 32) === Chars.LowerE) {
char = nextCP(parser);
// '-', '+'
if (CharTypes[char] & CharFlags.Exponent) {
char = nextCP(parser);
}

nextCP(parser);
const preNumericPart = parser.index;

// '-', '+'
if (CharTypes[parser.nextCP] & CharFlags.Exponent) {
nextCP(parser);
}
// Exponential notation must contain at least one digit
if ((CharTypes[char] & CharFlags.Decimal) < 1) report(parser, Errors.MissingExponent);

let exponentDigits = 0;
// Consume exponential digits
while (CharTypes[parser.nextCP] & CharFlags.Decimal) {
nextCP(parser);
exponentDigits++;
}
// Exponential notation must contain at least one digit
if (exponentDigits < 1) {
report(parser, Errors.MissingExponent);
// Consume exponential digits
value += parser.source.substring(end, preNumericPart) + scanDigits(parser, char);

char = parser.nextCP;
}
}

// The source character immediately following a numeric literal must
// not be an identifier start or a decimal digit
if (CharTypes[parser.nextCP] & CharFlags.Decimal || isIdentifierStart(parser.nextCP)) {
if (CharTypes[char] & CharFlags.Decimal || isIdentifierStart(char)) {
report(parser, Errors.IDStartAfterNumber);
}
if (context & Context.OptionsRaw) parser.tokenRaw = parser.source.slice(parser.tokenIndex, parser.index);
parser.tokenValue =
kind & (NumberKind.ImplicitOctal | NumberKind.Binary | NumberKind.Hex | NumberKind.Octal)
? value
: kind & NumberKind.DecimalWithLeadingZero
? parseFloat(parser.source.slice(parser.tokenIndex, parser.index))
: isBigInt
? parseInt(parser.source.slice(parser.tokenIndex, parser.index), 0xa)
: +parser.source.slice(parser.tokenIndex, parser.index);

if (context & Context.OptionsRaw || isBigInt) parser.tokenRaw = parser.source.slice(parser.tokenIndex, parser.index);

return isBigInt ? Token.BigIntLiteral : Token.NumericLiteral;
if (kind & NumberKind.DecimalWithLeadingZero)
parser.tokenValue = parseFloat(parser.source.slice(parser.tokenIndex, parser.index));
else parser.tokenValue = parseFloat(value);

if (isBigInt) return Token.BigIntLiteral;

return Token.NumericLiteral;
}
2 changes: 1 addition & 1 deletion src/meriyah.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ export function parse(source: string, options: Options | void): ESTree.Program {
return parseSource(source, options, Context.None);
}

export const version = '1.0.0';
export const version = '1.1.0';
Loading

0 comments on commit 8ba7461

Please sign in to comment.