Skip to content

Commit

Permalink
feat(parser): Distinguish Identifier from IdentifierPattern
Browse files Browse the repository at this point in the history
See ESTree issue 196.
  • Loading branch information
KFlash committed Jun 29, 2019
1 parent e359262 commit 68da76b
Show file tree
Hide file tree
Showing 12 changed files with 179 additions and 126 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ The second argument allows you to specify various options:
| ----------- | ------------------------------------------------------------ |
| `directives` | Enable [directive prologue](https://github.com/danez/estree/blob/directive/es5.md#directive) to each literal node |
| `globalReturn` | Allow `return` in the global scope |
| `identifierPattern` | Distinguish Identifier from IdentifierPattern |
| `impliedStrict` | Enable strict mode (*initial enforcement*) |
| `lexical` | Enable lexical binding and scope tracking |
| `loc` | Enable line/column location information to each node |
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "meriyah",
"version": "1.2.2",
"version": "1.2.3",
"description": "A 100% compliant, self-hosted javascript parser with high focus on both performance and stability",
"main": "dist/meriyah.umd.js",
"module": "dist/meriyah.esm.js",
Expand Down
1 change: 1 addition & 0 deletions src/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ export const enum Context {
AllowNewTarget = 1 << 26,
DisallowIn = 1 << 27,
InClass = 1 << 28,
OptionsIdentifierPattern = 1 << 29,
}

export const enum PropertyKind {
Expand Down
2 changes: 1 addition & 1 deletion src/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ export const errorMessages: {
[Errors.StrictOctalEscape]: 'Octal escape sequences are not allowed in strict mode',
[Errors.TemplateOctalLiteral]: 'Octal escape sequences are not allowed in template strings',
[Errors.InvalidPrivateName]: 'Unexpected token `#`',
[Errors.InvalidUnicodeEscapeSequence]: 'Invalid Unicode escape sequence',
[Errors.InvalidUnicodeEscapeSequence]: 'Illegal Unicode escape sequence',
[Errors.InvalidCodePoint]: 'Invalid code point %0',
[Errors.InvalidHexEscapeSequence]: 'Invalid hexadecimal escape sequence',
[Errors.StrictOctalLiteral]: 'Octal literals are not allowed in strict mode',
Expand Down
8 changes: 4 additions & 4 deletions src/lexer/comments.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { nextCP, CharTypes, CharFlags, LexerState, advanceNewline, consumeLineFeed } from './';
import { nextCP, CharTypes, CharFlags, LexerState, scanNewLine, consumeLineFeed } from './';
import { Chars } from '../chars';
import { ParserState } from '../common';
import { report, Errors } from '../errors';
Expand Down Expand Up @@ -40,7 +40,7 @@ export function skipSingleLineComment(parser: ParserState, state: LexerState): L
while (parser.index < parser.end) {
if (CharTypes[parser.nextCP] & CharFlags.LineTerminator || (parser.nextCP ^ Chars.LineSeparator) <= 1) {
state = (state | LexerState.LastIsCR | LexerState.NewLine) ^ LexerState.LastIsCR;
advanceNewline(parser);
scanNewLine(parser);
return state;
}
nextCP(parser);
Expand All @@ -65,13 +65,13 @@ export function skipMultiLineComment(parser: ParserState, state: LexerState): Le

if (parser.nextCP === Chars.CarriageReturn) {
state |= LexerState.NewLine | LexerState.LastIsCR;
advanceNewline(parser);
scanNewLine(parser);
} else if (parser.nextCP === Chars.LineFeed) {
consumeLineFeed(parser, (state & LexerState.LastIsCR) !== 0);
state = (state | LexerState.LastIsCR | LexerState.NewLine) ^ LexerState.LastIsCR;
} else if ((parser.nextCP ^ Chars.LineSeparator) <= 1) {
state = (state | LexerState.LastIsCR | LexerState.NewLine) ^ LexerState.LastIsCR;
advanceNewline(parser);
scanNewLine(parser);
} else {
nextCP(parser);
}
Expand Down
4 changes: 2 additions & 2 deletions src/lexer/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ export function consumeMultiUnitCodePoint(parser: ParserState, hi: number): bool
}

/**
* Use to consume a line feed instead of `advanceNewline`.
* Use to consume a line feed instead of `scanNewLine`.
*/
export function consumeLineFeed(parser: ParserState, lastIsCR: boolean) {
parser.nextCP = parser.source.charCodeAt(++parser.index);
Expand All @@ -46,7 +46,7 @@ export function consumeLineFeed(parser: ParserState, lastIsCR: boolean) {
}
}

export function advanceNewline(parser: ParserState) {
export function scanNewLine(parser: ParserState) {
parser.flags |= Flags.NewLine;
parser.nextCP = parser.source.charCodeAt(++parser.index);
parser.column = 0;
Expand Down
22 changes: 9 additions & 13 deletions src/lexer/identifier.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,11 @@ export function scanIdentifier(parser: ParserState, context: Context): Token {
const canBeKeyword = CharTypes[parser.nextCP] & CharFlags.KeywordCandidate;
while ((CharTypes[nextCP(parser)] & CharFlags.IdentifierPart) !== 0) {}
parser.tokenValue = parser.source.slice(parser.tokenIndex, parser.index);
const hasEscape = CharTypes[parser.nextCP] & CharFlags.BackSlash;
if (!hasEscape && parser.nextCP < 0x7e) {
if ((CharTypes[parser.nextCP] & CharFlags.BackSlash) === 0 && parser.nextCP < 0x7e) {
return descKeywordTable[parser.tokenValue] || Token.Identifier;
}

return scanIdentifierSlowCase(parser, context, hasEscape, canBeKeyword);
// Slow path that has to deal with multi unit encoding
return scanIdentifierSlowCase(parser, context, 0, canBeKeyword);
}

/**
Expand All @@ -48,7 +47,7 @@ export function scanUnicodeIdentifier(parser: ParserState, context: Context): To
export function scanIdentifierSlowCase(
parser: ParserState,
context: Context,
hasEscape: number,
hasEscape: 0 | 1,
canBeKeyword: number
): Token {
let start = parser.index;
Expand Down Expand Up @@ -132,16 +131,13 @@ export function scanIdentifierUnicodeEscape(parser: ParserState): number | void
*/
export function scanUnicodeEscapeValue(parser: ParserState): number {
let codePoint = 0;
let char = parser.nextCP;
// First handle a delimited Unicode escape, e.g. \u{1F4A9}
if (parser.nextCP === Chars.LeftBrace) {
if (char === Chars.LeftBrace) {
const startPos = parser.index;
while (CharTypes[nextCP(parser)] & CharFlags.Hex) {
codePoint = (codePoint << 4) | toHex(parser.nextCP);
// Check this early to avoid `code` wrapping to a negative on overflow (which is
// reserved for abnormal conditions).
if (codePoint > Chars.NonBMPMax) {
report(parser, Errors.UnicodeOverflow);
}
if (codePoint > Chars.NonBMPMax) report(parser, Errors.UnicodeOverflow);
}

// At least 4 characters have to be read
Expand All @@ -152,7 +148,7 @@ export function scanUnicodeEscapeValue(parser: ParserState): number {
return codePoint;
}

if ((CharTypes[parser.nextCP] & CharFlags.Hex) === 0) report(parser, Errors.InvalidHexEscapeSequence); // first one is mandatory
if ((CharTypes[char] & CharFlags.Hex) === 0) report(parser, Errors.InvalidHexEscapeSequence); // first one is mandatory

const c2 = parser.source.charCodeAt(parser.index + 1);
if ((CharTypes[c2] & CharFlags.Hex) === 0) report(parser, Errors.InvalidHexEscapeSequence);
Expand All @@ -161,7 +157,7 @@ export function scanUnicodeEscapeValue(parser: ParserState): number {
const c4 = parser.source.charCodeAt(parser.index + 3);
if ((CharTypes[c4] & CharFlags.Hex) === 0) report(parser, Errors.InvalidHexEscapeSequence);

codePoint = (toHex(parser.nextCP) << 12) | (toHex(c2) << 8) | (toHex(c3) << 4) | toHex(c4);
codePoint = (toHex(char) << 12) | (toHex(c2) << 8) | (toHex(c3) << 4) | toHex(c4);

parser.nextCP = parser.source.charCodeAt((parser.index += 4));

Expand Down
2 changes: 1 addition & 1 deletion src/lexer/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ export {
fromCodePoint,
toHex,
consumeLineFeed,
advanceNewline,
scanNewLine,
LexerState
} from './common';
export { CharTypes, CharFlags, isIdentifierStart, isIdentifierPart } from './charClassifier';
Expand Down
9 changes: 4 additions & 5 deletions src/lexer/numeric.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,20 +100,19 @@ export function scanNumber(parser: ParserState, context: Context, isFloat: 0 | 1
// Octal integer literals are not permitted in strict mode code
if (context & Context.Strict) report(parser, Errors.StrictOctalEscape);
kind = NumberKind.ImplicitOctal;
do {
if (CharTypes[parser.nextCP] & CharFlags.ImplicitOctalDigits) {
while (CharTypes[char] & CharFlags.Decimal) {
if (CharTypes[char] & CharFlags.ImplicitOctalDigits) {
kind = NumberKind.DecimalWithLeadingZero;
atStart = false;
break;
}
value = value * 8 + (parser.nextCP - Chars.Zero);
} while (CharTypes[nextCP(parser)] & CharFlags.Decimal);
char = parser.nextCP;
char = nextCP(parser);
}
} else if (CharTypes[char] & CharFlags.ImplicitOctalDigits) {
if (context & Context.Strict) report(parser, Errors.StrictOctalEscape);
else parser.flags = Flags.Octals;
kind = NumberKind.DecimalWithLeadingZero;
char = parser.nextCP;
} else if (char === Chars.Underscore) {
report(parser, Errors.Unexpected);
}
Expand Down
47 changes: 22 additions & 25 deletions src/lexer/scan.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import {
scanPrivateName,
fromCodePoint,
consumeLineFeed,
advanceNewline
scanNewLine
} from './';

/*
Expand Down Expand Up @@ -218,7 +218,7 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le

case Token.CarriageReturn:
state |= LexerState.NewLine | LexerState.LastIsCR;
advanceNewline(parser);
scanNewLine(parser);
break;

case Token.LineFeed:
Expand Down Expand Up @@ -268,7 +268,9 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le
// `*`, `**`, `*=`, `**=`
case Token.Multiply: {
nextCP(parser);

if (parser.index >= parser.end) return Token.Multiply;

const next = parser.nextCP;

if (next === Chars.EqualSign) {
Expand All @@ -277,9 +279,11 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le
}

if (next !== Chars.Asterisk) return Token.Multiply;

if (nextCP(parser) !== Chars.EqualSign) return Token.Exponentiate;

nextCP(parser);
if (parser.nextCP !== Chars.EqualSign) return Token.Exponentiate;
nextCP(parser);

return Token.ExponentiateAssign;
}

Expand Down Expand Up @@ -361,24 +365,19 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le

// `<`, `<=`, `<<`, `<<=`, `</`, `<!--`
case Token.LessThan:
nextCP(parser);
if (parser.index >= parser.end) return Token.LessThan;

switch (parser.nextCP) {
case Chars.LessThan:
nextCP(parser);
if ((parser.nextCP as number) === Chars.EqualSign) {
let next = nextCP(parser);
if (parser.index < parser.end) {
if (next === Chars.LessThan) {
if (parser.index < parser.end && nextCP(parser) === Chars.EqualSign) {
nextCP(parser);
return Token.ShiftLeftAssign;
} else {
return Token.ShiftLeft;
}

case Chars.EqualSign:
} else if (next === Chars.EqualSign) {
nextCP(parser);
return Token.LessThanOrEqual;

case Chars.Exclamation:
} else if (next === Chars.Exclamation) {
// Treat HTML begin-comment as comment-till-end-of-line.
if (
(context & Context.Module) === 0 &&
Expand All @@ -388,21 +387,17 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le
state = skipSingleLineComment(parser, state);
continue;
}

default:
// ignore
return Token.LessThan;
}
}

return Token.LessThan;
// `=`, `==`, `===`, `=>`
case Token.Assign: {
nextCP(parser);
if (parser.index >= parser.end) return Token.Assign;
const next = parser.nextCP;

if (next === Chars.EqualSign) {
nextCP(parser);
if (parser.nextCP === Chars.EqualSign) {
if (nextCP(parser) === Chars.EqualSign) {
nextCP(parser);
return Token.StrictEqual;
} else {
Expand Down Expand Up @@ -436,7 +431,9 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le
// `>`, `>=`, `>>`, `>>>`, `>>=`, `>>>=`
case Token.GreaterThan: {
nextCP(parser);

if (parser.index >= parser.end) return Token.GreaterThan;

const next = parser.nextCP;

if (next === Chars.EqualSign) {
Expand All @@ -445,14 +442,14 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le
}

if (next !== Chars.GreaterThan) return Token.GreaterThan;

nextCP(parser);

if (parser.index < parser.end) {
const next = parser.nextCP;

if (next === Chars.GreaterThan) {
nextCP(parser);
if (parser.nextCP === Chars.EqualSign) {
if (nextCP(parser) === Chars.EqualSign) {
nextCP(parser);
return Token.LogicalShiftRightAssign;
} else {
Expand Down Expand Up @@ -503,7 +500,7 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le
} else {
if ((first ^ Chars.LineSeparator) <= 1) {
state = (state | LexerState.LastIsCR | LexerState.NewLine) ^ LexerState.LastIsCR;
advanceNewline(parser);
scanNewLine(parser);
continue;
}

Expand Down
Loading

8 comments on commit 68da76b

@KFlash
Copy link
Contributor Author

@KFlash KFlash commented on 68da76b Jun 29, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@aladdin-add You are the ESLint expert here :) See this issue estree/estree#196
I added an optional boolean value on the identifier AST node for this. Sounds correct for you?

You can try it in the REPL

@aladdin-add
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we wait to see it settled down in that thread?

@aladdin-add
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean it has not been accepted in estree. 😄

@KFlash
Copy link
Contributor Author

@KFlash KFlash commented on 68da76b Jun 29, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope not accepted yet, and therefore I added it as an option so it's possible to extend the ESTree. More or less the same way as Acorn does it for their preserveParens options. I adopted that option btw.

@adit-hotstar
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should also add the pattern property to MemberExpression nodes.

@KFlash
Copy link
Contributor Author

@KFlash KFlash commented on 68da76b Aug 29, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why MemberExpression nodes? Btw. Do you have time to do a PR on this? I'm kind of stuck with something else ATM.

@adit-hotstar
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why MemberExpression nodes?

A MemberExpression node can also be a pattern. For example consider the following program.

const foo = {};

[ foo.bar, foo.baz ] = [ 10, 20 ];

console.log(foo); // { bar: 10, baz: 20 }

Hence, you'd also need to distinguish MemberExpression values from patterns.

Do you have time to do a PR on this?

Can't say for certain but I might be able to do it over the weekend.

@KFlash
Copy link
Contributor Author

@KFlash KFlash commented on 68da76b Aug 29, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right. Seems to be a slip-up from my side :) I also tried it in the REPL, and the pattern property is missing.

Can't say for certain but I might be able to do it over the weekend.

No rush. :)

Please sign in to comment.