diff --git a/src/unicode/index.ts b/src/unicode/index.ts index 7f3b41f..8b59034 100644 --- a/src/unicode/index.ts +++ b/src/unicode/index.ts @@ -119,3 +119,15 @@ export function digitToInt(code: number): number { } return code - DigitZero } + +export function isLeadSurrogate(code: number): boolean { + return code >= 0xd800 && code <= 0xdbff +} + +export function isTrailSurrogate(code: number): boolean { + return code >= 0xdc00 && code <= 0xdfff +} + +export function combineSurrogatePair(lead: number, trail: number): number { + return (lead - 0xd800) * 0x400 + (trail - 0xdc00) + 0x10000 +} diff --git a/src/validator.ts b/src/validator.ts index 4daf6b0..f9a4b10 100644 --- a/src/validator.ts +++ b/src/validator.ts @@ -20,16 +20,6 @@ import { FullStop, GreaterThanSign, HyphenMinus, - isDecimalDigit, - isHexDigit, - isIdContinue, - isIdStart, - isLatinLetter, - isLineTerminator, - isOctalDigit, - isValidLoneUnicodeProperty, - isValidUnicodeProperty, - isValidUnicode, LatinCapitalLetterB, LatinCapitalLetterD, LatinCapitalLetterP, @@ -70,6 +60,19 @@ import { VerticalLine, ZeroWidthJoiner, ZeroWidthNonJoiner, + combineSurrogatePair, + isDecimalDigit, + isHexDigit, + isIdContinue, + isIdStart, + isLatinLetter, + isLeadSurrogate, + isLineTerminator, + isOctalDigit, + isTrailSurrogate, + isValidLoneUnicodeProperty, + isValidUnicodeProperty, + isValidUnicode, } from "./unicode" function isSyntaxCharacter(cp: number): boolean { @@ -1861,18 +1864,31 @@ export class RegExpValidator { * UnicodeIDStart * `$` * `_` - * `\` RegExpUnicodeEscapeSequence[?U] + * `\` RegExpUnicodeEscapeSequence[+U] + * [~U] UnicodeLeadSurrogate UnicodeTrailSurrogate * ``` * @returns `true` if it ate the next characters successfully. */ private eatRegExpIdentifierStart(): boolean { const start = this.index + const forceUFlag = !this._uFlag && this.ecmaVersion >= 2020 let cp = this.currentCodePoint this.advance() - if (cp === ReverseSolidus && this.eatRegExpUnicodeEscapeSequence()) { + if ( + cp === ReverseSolidus && + this.eatRegExpUnicodeEscapeSequence(forceUFlag) + ) { cp = this._lastIntValue + } else if ( + forceUFlag && + isLeadSurrogate(cp) && + isTrailSurrogate(this.currentCodePoint) + ) { + cp = combineSurrogatePair(cp, this.currentCodePoint) + this.advance() } + if (isRegExpIdentifierStart(cp)) { this._lastIntValue = cp return true @@ -1893,7 +1909,8 @@ export class RegExpValidator { * UnicodeIDContinue * `$` * `_` - * `\` RegExpUnicodeEscapeSequence[?U] + * `\` RegExpUnicodeEscapeSequence[+U] + * [~U] UnicodeLeadSurrogate UnicodeTrailSurrogate * * * ``` @@ -1901,12 +1918,24 @@ export class RegExpValidator { */ private eatRegExpIdentifierPart(): boolean { const start = this.index + const forceUFlag = !this._uFlag && this.ecmaVersion >= 2020 let cp = this.currentCodePoint this.advance() - if (cp === ReverseSolidus && this.eatRegExpUnicodeEscapeSequence()) { + if ( + cp === ReverseSolidus && + this.eatRegExpUnicodeEscapeSequence(forceUFlag) + ) { cp = this._lastIntValue + } else if ( + forceUFlag && + isLeadSurrogate(cp) && + isTrailSurrogate(this.currentCodePoint) + ) { + cp = combineSurrogatePair(cp, this.currentCodePoint) + this.advance() } + if (isRegExpIdentifierPart(cp)) { this._lastIntValue = cp return true @@ -2027,19 +2056,19 @@ export class RegExpValidator { * ``` * @returns `true` if it ate the next characters successfully. */ - private eatRegExpUnicodeEscapeSequence(): boolean { + private eatRegExpUnicodeEscapeSequence(forceUFlag = false): boolean { const start = this.index + const uFlag = forceUFlag || this._uFlag if (this.eat(LatinSmallLetterU)) { if ( - (this._uFlag && this.eatRegExpUnicodeSurrogatePairEscape()) || + (uFlag && this.eatRegExpUnicodeSurrogatePairEscape()) || this.eatFixedHexDigits(4) || - (this._uFlag && this.eatRegExpUnicodeCodePointEscape()) + (uFlag && this.eatRegExpUnicodeCodePointEscape()) ) { return true } - - if (this.strict || this._uFlag) { + if (this.strict || uFlag) { this.raise("Invalid unicode escape") } this.rewind(start) @@ -2062,16 +2091,14 @@ export class RegExpValidator { if (this.eatFixedHexDigits(4)) { const lead = this._lastIntValue if ( - lead >= 0xd800 && - lead <= 0xdbff && + isLeadSurrogate(lead) && this.eat(ReverseSolidus) && this.eat(LatinSmallLetterU) && this.eatFixedHexDigits(4) ) { const trail = this._lastIntValue - if (trail >= 0xdc00 && trail <= 0xdfff) { - this._lastIntValue = - (lead - 0xd800) * 0x400 + (trail - 0xdc00) + 0x10000 + if (isTrailSurrogate(trail)) { + this._lastIntValue = combineSurrogatePair(lead, trail) return true } } diff --git a/test/fixtures/parser/literal/unicode-group-names-invalid.json b/test/fixtures/parser/literal/unicode-group-names-invalid.json new file mode 100644 index 0000000..ba8547f --- /dev/null +++ b/test/fixtures/parser/literal/unicode-group-names-invalid.json @@ -0,0 +1,44 @@ +{ + "options": { + "ecmaVersion": 2020, + "strict": false + }, + "patterns": { + "/(?<\\ud83d\\ude80>.)/": { + "error": { + "message": "Invalid regular expression: /(?<\\ud83d\\ude80>.)/: Invalid capture group name", + "index": 4 + } + }, + "/(?<\\ud83d\\ude80>.)/u": { + "error": { + "message": "Invalid regular expression: /(?<\\ud83d\\ude80>.)/u: Invalid capture group name", + "index": 4 + } + }, + "/(?<\\u{1f680}>.)/": { + "error": { + "message": "Invalid regular expression: /(?<\\u{1f680}>.)/: Invalid capture group name", + "index": 4 + } + }, + "/(?<\\u{1f680}>.)/u": { + "error": { + "message": "Invalid regular expression: /(?<\\u{1f680}>.)/u: Invalid capture group name", + "index": 4 + } + }, + "/(?<🚀>.)/": { + "error": { + "message": "Invalid regular expression: /(?<🚀>.)/: Invalid capture group name", + "index": 4 + } + }, + "/(?<🚀>.)/u": { + "error": { + "message": "Invalid regular expression: /(?<🚀>.)/u: Invalid capture group name", + "index": 4 + } + } + } +} \ No newline at end of file diff --git a/test/fixtures/parser/literal/unicode-group-names-valid.json b/test/fixtures/parser/literal/unicode-group-names-valid.json new file mode 100644 index 0000000..2d0edb7 --- /dev/null +++ b/test/fixtures/parser/literal/unicode-group-names-valid.json @@ -0,0 +1,416 @@ +{ + "options": { + "ecmaVersion": 2020, + "strict": false + }, + "patterns": { + "/(?<\\ud835\\udc9c>.)/": { + "ast": { + "type": "RegExpLiteral", + "parent": null, + "start": 0, + "end": 20, + "raw": "/(?<\\ud835\\udc9c>.)/", + "pattern": { + "type": "Pattern", + "parent": "♻️..", + "start": 1, + "end": 19, + "raw": "(?<\\ud835\\udc9c>.)", + "alternatives": [ + { + "type": "Alternative", + "parent": "♻️../..", + "start": 1, + "end": 19, + "raw": "(?<\\ud835\\udc9c>.)", + "elements": [ + { + "type": "CapturingGroup", + "parent": "♻️../..", + "start": 1, + "end": 19, + "raw": "(?<\\ud835\\udc9c>.)", + "name": "𝒜", + "alternatives": [ + { + "type": "Alternative", + "parent": "♻️../..", + "start": 17, + "end": 18, + "raw": ".", + "elements": [ + { + "type": "CharacterSet", + "parent": "♻️../..", + "start": 17, + "end": 18, + "raw": ".", + "kind": "any" + } + ] + } + ], + "references": [] + } + ] + } + ] + }, + "flags": { + "type": "Flags", + "parent": "♻️..", + "start": 20, + "end": 20, + "raw": "", + "global": false, + "ignoreCase": false, + "multiline": false, + "unicode": false, + "sticky": false, + "dotAll": false + } + } + }, + "/(?<\\ud835\\udc9c>.)/u": { + "ast": { + "type": "RegExpLiteral", + "parent": null, + "start": 0, + "end": 21, + "raw": "/(?<\\ud835\\udc9c>.)/u", + "pattern": { + "type": "Pattern", + "parent": "♻️..", + "start": 1, + "end": 19, + "raw": "(?<\\ud835\\udc9c>.)", + "alternatives": [ + { + "type": "Alternative", + "parent": "♻️../..", + "start": 1, + "end": 19, + "raw": "(?<\\ud835\\udc9c>.)", + "elements": [ + { + "type": "CapturingGroup", + "parent": "♻️../..", + "start": 1, + "end": 19, + "raw": "(?<\\ud835\\udc9c>.)", + "name": "𝒜", + "alternatives": [ + { + "type": "Alternative", + "parent": "♻️../..", + "start": 17, + "end": 18, + "raw": ".", + "elements": [ + { + "type": "CharacterSet", + "parent": "♻️../..", + "start": 17, + "end": 18, + "raw": ".", + "kind": "any" + } + ] + } + ], + "references": [] + } + ] + } + ] + }, + "flags": { + "type": "Flags", + "parent": "♻️..", + "start": 20, + "end": 21, + "raw": "u", + "global": false, + "ignoreCase": false, + "multiline": false, + "unicode": true, + "sticky": false, + "dotAll": false + } + } + }, + "/(?<\\u{1d49c}>.)/": { + "ast": { + "type": "RegExpLiteral", + "parent": null, + "start": 0, + "end": 17, + "raw": "/(?<\\u{1d49c}>.)/", + "pattern": { + "type": "Pattern", + "parent": "♻️..", + "start": 1, + "end": 16, + "raw": "(?<\\u{1d49c}>.)", + "alternatives": [ + { + "type": "Alternative", + "parent": "♻️../..", + "start": 1, + "end": 16, + "raw": "(?<\\u{1d49c}>.)", + "elements": [ + { + "type": "CapturingGroup", + "parent": "♻️../..", + "start": 1, + "end": 16, + "raw": "(?<\\u{1d49c}>.)", + "name": "𝒜", + "alternatives": [ + { + "type": "Alternative", + "parent": "♻️../..", + "start": 14, + "end": 15, + "raw": ".", + "elements": [ + { + "type": "CharacterSet", + "parent": "♻️../..", + "start": 14, + "end": 15, + "raw": ".", + "kind": "any" + } + ] + } + ], + "references": [] + } + ] + } + ] + }, + "flags": { + "type": "Flags", + "parent": "♻️..", + "start": 17, + "end": 17, + "raw": "", + "global": false, + "ignoreCase": false, + "multiline": false, + "unicode": false, + "sticky": false, + "dotAll": false + } + } + }, + "/(?<\\u{1d49c}>.)/u": { + "ast": { + "type": "RegExpLiteral", + "parent": null, + "start": 0, + "end": 18, + "raw": "/(?<\\u{1d49c}>.)/u", + "pattern": { + "type": "Pattern", + "parent": "♻️..", + "start": 1, + "end": 16, + "raw": "(?<\\u{1d49c}>.)", + "alternatives": [ + { + "type": "Alternative", + "parent": "♻️../..", + "start": 1, + "end": 16, + "raw": "(?<\\u{1d49c}>.)", + "elements": [ + { + "type": "CapturingGroup", + "parent": "♻️../..", + "start": 1, + "end": 16, + "raw": "(?<\\u{1d49c}>.)", + "name": "𝒜", + "alternatives": [ + { + "type": "Alternative", + "parent": "♻️../..", + "start": 14, + "end": 15, + "raw": ".", + "elements": [ + { + "type": "CharacterSet", + "parent": "♻️../..", + "start": 14, + "end": 15, + "raw": ".", + "kind": "any" + } + ] + } + ], + "references": [] + } + ] + } + ] + }, + "flags": { + "type": "Flags", + "parent": "♻️..", + "start": 17, + "end": 18, + "raw": "u", + "global": false, + "ignoreCase": false, + "multiline": false, + "unicode": true, + "sticky": false, + "dotAll": false + } + } + }, + "/(?<𝒜>.)/": { + "ast": { + "type": "RegExpLiteral", + "parent": null, + "start": 0, + "end": 10, + "raw": "/(?<𝒜>.)/", + "pattern": { + "type": "Pattern", + "parent": "♻️..", + "start": 1, + "end": 9, + "raw": "(?<𝒜>.)", + "alternatives": [ + { + "type": "Alternative", + "parent": "♻️../..", + "start": 1, + "end": 9, + "raw": "(?<𝒜>.)", + "elements": [ + { + "type": "CapturingGroup", + "parent": "♻️../..", + "start": 1, + "end": 9, + "raw": "(?<𝒜>.)", + "name": "𝒜", + "alternatives": [ + { + "type": "Alternative", + "parent": "♻️../..", + "start": 7, + "end": 8, + "raw": ".", + "elements": [ + { + "type": "CharacterSet", + "parent": "♻️../..", + "start": 7, + "end": 8, + "raw": ".", + "kind": "any" + } + ] + } + ], + "references": [] + } + ] + } + ] + }, + "flags": { + "type": "Flags", + "parent": "♻️..", + "start": 10, + "end": 10, + "raw": "", + "global": false, + "ignoreCase": false, + "multiline": false, + "unicode": false, + "sticky": false, + "dotAll": false + } + } + }, + "/(?<𝒜>.)/u": { + "ast": { + "type": "RegExpLiteral", + "parent": null, + "start": 0, + "end": 11, + "raw": "/(?<𝒜>.)/u", + "pattern": { + "type": "Pattern", + "parent": "♻️..", + "start": 1, + "end": 9, + "raw": "(?<𝒜>.)", + "alternatives": [ + { + "type": "Alternative", + "parent": "♻️../..", + "start": 1, + "end": 9, + "raw": "(?<𝒜>.)", + "elements": [ + { + "type": "CapturingGroup", + "parent": "♻️../..", + "start": 1, + "end": 9, + "raw": "(?<𝒜>.)", + "name": "𝒜", + "alternatives": [ + { + "type": "Alternative", + "parent": "♻️../..", + "start": 7, + "end": 8, + "raw": ".", + "elements": [ + { + "type": "CharacterSet", + "parent": "♻️../..", + "start": 7, + "end": 8, + "raw": ".", + "kind": "any" + } + ] + } + ], + "references": [] + } + ] + } + ] + }, + "flags": { + "type": "Flags", + "parent": "♻️..", + "start": 10, + "end": 11, + "raw": "u", + "global": false, + "ignoreCase": false, + "multiline": false, + "unicode": true, + "sticky": false, + "dotAll": false + } + } + } + } +} \ No newline at end of file