Skip to content

Commit 2ea1509

Browse files
Copilotjakebailey
andauthored
Fix scanEscapeSequence for multi-byte UTF-8 characters after backslash (#3911)
Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: jakebailey <5341706+jakebailey@users.noreply.github.com>
1 parent 728eeb7 commit 2ea1509

9 files changed

Lines changed: 71 additions & 38 deletions

internal/scanner/scanner.go

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1796,12 +1796,23 @@ func (s *Scanner) scanEscapeSequence(flags EscapeSequenceScanningFlags) string {
17961796
}
17971797
fallthrough
17981798
case '\n':
1799-
// case CharacterCodes.lineSeparator !!!
1800-
// case CharacterCodes.paragraphSeparator !!!
18011799
return ""
18021800
default:
1801+
// ch was read as a single byte; for multi-byte UTF-8 characters,
1802+
// we need to decode the full rune and advance past all its bytes.
1803+
if ch >= utf8.RuneSelf {
1804+
s.pos-- // back up past the single-byte advance
1805+
var size int
1806+
ch, size = utf8.DecodeRuneInString(s.text[s.pos:])
1807+
s.pos += size
1808+
s.containsNonASCII = true
1809+
}
1810+
// LineContinuation: a backslash followed by a line terminator is "the empty code unit sequence".
1811+
if ch == '\u2028' || ch == '\u2029' {
1812+
return ""
1813+
}
18031814
if flags&EscapeSequenceScanningFlagsAnyUnicodeMode != 0 || flags&EscapeSequenceScanningFlagsRegularExpression != 0 && flags&EscapeSequenceScanningFlagsAnnexB == 0 && IsIdentifierPart(ch) {
1804-
s.errorAt(diagnostics.This_character_cannot_be_escaped_in_a_regular_expression, s.pos-2, 2)
1815+
s.errorAt(diagnostics.This_character_cannot_be_escaped_in_a_regular_expression, start, s.pos-start)
18051816
}
18061817
return string(ch)
18071818
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
//// [tests/cases/compiler/backslashBeforeNonSpecialChar.ts] ////
2+
3+
//// [backslashBeforeNonSpecialChar.ts]
4+
const enum Currency {
5+
Euro = "\€",
6+
}
7+
8+
const currency = Currency.Euro;
9+
10+
11+
//// [backslashBeforeNonSpecialChar.js]
12+
"use strict";
13+
const currency = "\u20AC" /* Currency.Euro */;
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
//// [tests/cases/compiler/backslashBeforeNonSpecialChar.ts] ////
2+
3+
=== backslashBeforeNonSpecialChar.ts ===
4+
const enum Currency {
5+
>Currency : Symbol(Currency, Decl(backslashBeforeNonSpecialChar.ts, 0, 0))
6+
7+
Euro = "\€",
8+
>Euro : Symbol(Currency.Euro, Decl(backslashBeforeNonSpecialChar.ts, 0, 21))
9+
}
10+
11+
const currency = Currency.Euro;
12+
>currency : Symbol(currency, Decl(backslashBeforeNonSpecialChar.ts, 4, 5))
13+
>Currency.Euro : Symbol(Currency.Euro, Decl(backslashBeforeNonSpecialChar.ts, 0, 21))
14+
>Currency : Symbol(Currency, Decl(backslashBeforeNonSpecialChar.ts, 0, 0))
15+
>Euro : Symbol(Currency.Euro, Decl(backslashBeforeNonSpecialChar.ts, 0, 21))
16+
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
//// [tests/cases/compiler/backslashBeforeNonSpecialChar.ts] ////
2+
3+
=== backslashBeforeNonSpecialChar.ts ===
4+
const enum Currency {
5+
>Currency : Currency
6+
7+
Euro = "\€",
8+
>Euro : Currency.Euro
9+
>"\€" : "€"
10+
}
11+
12+
const currency = Currency.Euro;
13+
>currency : Currency.Euro
14+
>Currency.Euro : Currency
15+
>Currency : typeof Currency
16+
>Euro : Currency
17+

testdata/baselines/reference/submodule/compiler/sourceMap-LineBreaks(target=es2015).types

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,15 +49,15 @@ line 2";
4949

5050
var stringLiteralWithLineSeparator = "line 1\
5151
>stringLiteralWithLineSeparator : string
52-
>"line 1\
line 2" : "line 1�line 2"
52+
>"line 1\
line 2" : "line 1line 2"
5353

5454
line 2";
5555
var stringLiteralWithParagraphSeparator = "line 1\
5656
>stringLiteralWithParagraphSeparator : string
57-
>"line 1\
line 2" : "line 1�line 2"
57+
>"line 1\
line 2" : "line 1line 2"
5858

5959
line 2";
6060
var stringLiteralWithNextLine = "line 1\…line 2";
6161
>stringLiteralWithNextLine : string
62-
>"line 1\…line 2" : "line 1Â�line 2"
62+
>"line 1\…line 2" : "line 1\u0085line 2"
6363

testdata/baselines/reference/submodule/compiler/sourceMap-LineBreaks(target=es2015).types.diff

Lines changed: 0 additions & 20 deletions
This file was deleted.

testdata/baselines/reference/submodule/conformance/allowUnescapedParagraphAndLineSeparatorsInStringLiteral.types

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ STRING_CONTENT
7474

7575
STRING_CONTENT",
7676
`\
77-
>`\
` : "�"
77+
>`\
` : ""
7878

7979
`,
8080
'

testdata/baselines/reference/submodule/conformance/allowUnescapedParagraphAndLineSeparatorsInStringLiteral.types.diff

Lines changed: 0 additions & 11 deletions
This file was deleted.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// @target: esnext
2+
3+
const enum Currency {
4+
Euro = "\€",
5+
}
6+
7+
const currency = Currency.Euro;

0 commit comments

Comments
 (0)