Skip to content

Commit

Permalink
fix(scanner): dedupe some scanner code and tweaked bit masks
Browse files Browse the repository at this point in the history
  • Loading branch information
KFlash committed Jun 22, 2019
1 parent 79514cd commit 1e9d1b1
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 57 deletions.
24 changes: 9 additions & 15 deletions src/lexer/comments.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { nextCodePoint, CharTypes, CharFlags, ScannerState } from './';
import { nextCodePoint, CharTypes, CharFlags, ScannerState, consumeLineFeed, advanceNewline } from './';
import { Chars } from '../chars';
import { ParserState, Flags } from '../common';
import { report, Errors } from '../errors';
Expand Down Expand Up @@ -36,10 +36,8 @@ export function skipHashBang(parser: ParserState): void {
export function skipSingleLineComment(parser: ParserState, state: ScannerState): ScannerState {
while (parser.index < parser.end) {
if (CharTypes[parser.nextCP] & CharFlags.LineTerminator || (parser.nextCP ^ Chars.LineSeparator) <= 1) {
parser.flags |= Flags.NewLine;
parser.column = 0;
parser.line++;
parser.nextCP = parser.source.charCodeAt(++parser.index);
state = (state | ScannerState.LastIsCR | ScannerState.NewLine) ^ ScannerState.LastIsCR;
advanceNewline(parser);
return state;
}
nextCodePoint(parser);
Expand All @@ -65,23 +63,19 @@ export function skipMultiLineComment(parser: ParserState, state: ScannerState):
if (CharTypes[parser.nextCP] & CharFlags.LineTerminator) {
if (CharTypes[parser.nextCP] & CharFlags.CarriageReturn) {
state |= ScannerState.NewLine | ScannerState.LastIsCR;
parser.column = 0;
parser.line++;
advanceNewline(parser);
} else {
if (state & ScannerState.LastIsCR) {
parser.column = 0;
parser.line++;
}
state = (state & ~ScannerState.LastIsCR) | ScannerState.NewLine;
state = (state | ScannerState.LastIsCR | ScannerState.NewLine) ^ ScannerState.LastIsCR;
parser.nextCP = parser.source.charCodeAt(++parser.index);
parser.flags |= Flags.NewLine;
}
parser.nextCP = parser.source.charCodeAt(++parser.index);
parser.flags |= Flags.NewLine;
} else if ((parser.nextCP ^ Chars.LineSeparator) <= 1) {
state = (state & ~ScannerState.LastIsCR) | ScannerState.NewLine;
parser.column = 0;
parser.nextCP = parser.source.charCodeAt(++parser.index);
parser.line++;
parser.flags |= Flags.NewLine;
state = (state | ScannerState.LastIsCR | ScannerState.NewLine) ^ ScannerState.LastIsCR;
advanceNewline(parser);
} else {
nextCodePoint(parser);
}
Expand Down
21 changes: 20 additions & 1 deletion src/lexer/common.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { Chars } from '../chars';
import { ParserState } from '../common';
import { ParserState, Flags } from '../common';
import { unicodeLookup } from '../unicode';
import { report, Errors } from '../errors';

Expand Down Expand Up @@ -34,6 +34,25 @@ export function consumeMultiUnitCodePoint(parser: ParserState, hi: number): bool
return true;
}

/**
* Use to consume a line feed instead of `advanceNewline`.
*/
export function consumeLineFeed(parser: ParserState, lastIsCR: boolean) {
parser.nextCP = parser.source.charCodeAt(++parser.index);
parser.flags |= Flags.NewLine;
if (!lastIsCR) {
parser.column = 0;
parser.line++;
}
}

export function advanceNewline(parser: ParserState) {
parser.flags |= Flags.NewLine;
parser.nextCP = parser.source.charCodeAt(++parser.index);
parser.column = 0;
parser.line++;
}

// ECMA-262 11.2 White Space
export function isExoticECMAScriptWhitespace(code: number): boolean {
/**
Expand Down
69 changes: 51 additions & 18 deletions src/lexer/identifier.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,49 @@ import { CharTypes, CharFlags, isIdentifierPart } from './charClassifier';
import { report, Errors } from '../errors';
import { unicodeLookup } from '../unicode';

/**
* Scans identifier
*
* @param parser Parser object
* @param context Context masks
*/
export function scanIdentifier(parser: ParserState, context: Context): Token {
let hasEscape: 0 | 1 = 0;
let canBeKeyword: number = CharTypes[parser.nextCP] & CharFlags.KeywordCandidate;
parser.tokenValue = '';
if (parser.nextCP <= 0x7e) {
if ((CharTypes[parser.nextCP] & CharFlags.BackSlash) === 0) {
while ((CharTypes[nextCodePoint(parser)] & CharFlags.IdentifierPart) !== 0) {}
parser.tokenValue = parser.source.slice(parser.tokenIndex, parser.index);
if (parser.nextCP > 0x7e) return scanIdentifierSlowCase(parser, context, hasEscape, canBeKeyword);

if ((CharTypes[parser.nextCP] & CharFlags.BackSlash) === 0) {
return descKeywordTable[parser.tokenValue] || Token.Identifier;
}
} else {
hasEscape = 1;
const code = scanIdentifierUnicodeEscape(parser);
if (!isIdentifierPart(code)) report(parser, Errors.InvalidUnicodeEscapeSequence);
canBeKeyword = CharTypes[code] & CharFlags.KeywordCandidate;
parser.tokenValue += fromCodePoint(code);
}
while ((CharTypes[nextCodePoint(parser)] & CharFlags.IdentifierPart) !== 0) {}
parser.tokenValue = parser.source.slice(parser.tokenIndex, parser.index);
if (parser.nextCP > 0x7e) return scanIdentifierSlowCase(parser, context, hasEscape, canBeKeyword);

if ((CharTypes[parser.nextCP] & CharFlags.BackSlash) === 0) {
return descKeywordTable[parser.tokenValue] || Token.Identifier;
}

return scanIdentifierSlowCase(parser, context, hasEscape, canBeKeyword);
}

/**
* Scans unicode identifier
*
* @param parser Parser object
* @param context Context masks
*/
export function scanUnicodeIdentifier(parser: ParserState, context: Context): Token {
parser.tokenValue = '';
const cookedChar = scanIdentifierUnicodeEscape(parser) as number;
if (!isIdentifierPart(cookedChar)) report(parser, Errors.InvalidUnicodeEscapeSequence);
parser.tokenValue += fromCodePoint(cookedChar);
return scanIdentifierSlowCase(parser, context, 1, CharTypes[cookedChar] & CharFlags.KeywordCandidate);
}

/**
* Scans identifier slow case
*
* @param parser Parser object
* @param context Context masks
* @param hasEscape
* @param canBeKeyword
*/
export function scanIdentifierSlowCase(
parser: ParserState,
context: Context,
Expand All @@ -42,7 +60,7 @@ export function scanIdentifierSlowCase(
if (CharTypes[parser.nextCP] & CharFlags.BackSlash) {
parser.tokenValue += parser.source.slice(start, parser.index);
hasEscape = 1;
const code = scanIdentifierUnicodeEscape(parser);
const code = scanIdentifierUnicodeEscape(parser) as number;
if (!isIdentifierPart(code)) report(parser, Errors.InvalidUnicodeEscapeSequence);
canBeKeyword = canBeKeyword && CharTypes[code] & CharFlags.KeywordCandidate;
parser.tokenValue += fromCodePoint(code);
Expand Down Expand Up @@ -78,6 +96,11 @@ export function scanIdentifierSlowCase(
return Token.Identifier;
}

/**
* Scans private name
*
* @param parser Parser object
*/
export function scanPrivateName(parser: ParserState): Token {
nextCodePoint(parser); // consumes '#'
if (
Expand All @@ -91,7 +114,12 @@ export function scanPrivateName(parser: ParserState): Token {
return Token.PrivateField;
}

export function scanIdentifierUnicodeEscape(parser: ParserState): any {
/**
* Scans unicode identifier
*
* @param parser Parser object
*/
export function scanIdentifierUnicodeEscape(parser: ParserState): number | void {
// Check for Unicode escape of the form '\uXXXX'
// and return code point value if valid Unicode escape is found. Otherwise return -1.
if (parser.index + 5 < parser.end && parser.source.charCodeAt(parser.index + 1) === Chars.LowerU) {
Expand All @@ -101,6 +129,11 @@ export function scanIdentifierUnicodeEscape(parser: ParserState): any {
report(parser, Errors.InvalidUnicodeEscapeSequence);
}

/**
* Scans unicode escape value
*
* @param parser Parser object
*/
export function scanUnicodeEscapeValue(parser: ParserState): number {
let codePoint = 0;
// First handle a delimited Unicode escape, e.g. \u{1F4A9}
Expand Down
10 changes: 9 additions & 1 deletion src/lexer/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,18 @@ export {
isExoticECMAScriptWhitespace,
fromCodePoint,
toHex,
consumeLineFeed,
advanceNewline,
ScannerState
} from './common';
export { CharTypes, CharFlags, isIdentifierStart, isIdentifierPart } from './charClassifier';
export { scanIdentifier, scanPrivateName, scanUnicodeEscapeValue } from './identifier';
export {
scanIdentifier,
scanIdentifierSlowCase,
scanUnicodeIdentifier,
scanPrivateName,
scanUnicodeEscapeValue
} from './identifier';
export { scanString } from './string';
export { scanNumber } from './numeric';
export { scanTemplate, scanTemplateTail } from './template';
Expand Down
43 changes: 22 additions & 21 deletions src/lexer/scan.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,12 @@ import {
scanNumber,
scanString,
scanIdentifier,
scanUnicodeIdentifier,
scanIdentifierSlowCase,
scanPrivateName,
fromCodePoint
fromCodePoint,
consumeLineFeed,
advanceNewline
} from './';

/*
Expand Down Expand Up @@ -125,7 +129,7 @@ export const TokenLookup = [
/* 89 - Y */ Token.Identifier,
/* 90 - Z */ Token.Identifier,
/* 91 - [ */ Token.LeftBracket,
/* 92 - \ */ Token.Identifier,
/* 92 - \ */ Token.EscapedIdentifier,
/* 93 - ] */ Token.RightBracket,
/* 94 - ^ */ Token.BitwiseXor,
/* 95 - _ */ Token.Identifier,
Expand Down Expand Up @@ -210,35 +214,34 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Sc
break;

case Token.CarriageReturn:
parser.flags |= Flags.NewLine;

state |= ScannerState.NewLine | ScannerState.LastIsCR;

parser.column = 0;
parser.nextCP = parser.source.charCodeAt(++parser.index);
parser.line++;
advanceNewline(parser);
break;

case Token.LineFeed:
consumeLineFeed(parser, (state & ScannerState.LastIsCR) !== 0);
state = (state | ScannerState.LastIsCR | ScannerState.NewLine) ^ ScannerState.LastIsCR;
parser.flags |= Flags.NewLine;

if ((state & ScannerState.LastIsCR) === 0) {
parser.column = 0;
parser.line++;
}
state = (state & ~ScannerState.LastIsCR) | ScannerState.NewLine;
parser.nextCP = parser.source.charCodeAt(++parser.index);
break;

// Look for an identifier.
case Token.Identifier:
return scanIdentifier(parser, context);

// Look for a decimal number.
case Token.NumericLiteral:
return scanNumber(parser, context, false);

// Look for a string or a template string.
case Token.StringLiteral:
return scanString(parser, context) as Token;

case Token.Template:
return scanTemplate(parser, context) as Token;

case Token.EscapedIdentifier:
return scanUnicodeIdentifier(parser, context);

// `#`
case Token.PrivateField:
return scanPrivateName(parser);
Expand Down Expand Up @@ -498,16 +501,14 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Sc
}
} else {
if ((first ^ Chars.LineSeparator) <= 1) {
parser.flags |= Flags.NewLine;
state = (state & ~ScannerState.LastIsCR) | ScannerState.NewLine;
parser.column = 0;
parser.nextCP = parser.source.charCodeAt(++parser.index);
parser.line++;
state = (state | ScannerState.LastIsCR | ScannerState.NewLine) ^ ScannerState.LastIsCR;
advanceNewline(parser);
continue;
}

if (isIDStart(first) || consumeMultiUnitCodePoint(parser, first)) {
return scanIdentifier(parser, context);
parser.tokenValue = '';
return scanIdentifierSlowCase(parser, context, /* hasEscape */ 0, /* canBeKeyword */ 0);
}

if (isExoticECMAScriptWhitespace(first)) {
Expand Down
4 changes: 3 additions & 1 deletion src/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5690,7 +5690,9 @@ export function parseFormalParametersOrFormalList(
isComplex = 1;
}

if (context & Context.OptionsLexical) declareName(parser, context, scope, parser.tokenValue, type, 0, 0);
if (context & Context.OptionsLexical && (parser.token & Token.IsIdentifier) === Token.IsIdentifier) {
declareName(parser, context, scope, parser.tokenValue, type, 0, 0);
}

left = parseAndClassifyIdentifier(parser, context, type, tokenIndex);
} else {
Expand Down
1 change: 1 addition & 0 deletions src/token.ts
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ export const enum Token {
Decorator = 133,
Target = 134 | IsIdentifier,
LineFeed = 135,
EscapedIdentifier = 136,
}

export const KeywordDescTable = [
Expand Down

0 comments on commit 1e9d1b1

Please sign in to comment.