Skip to content

Commit e2fad6d

Browse files
committed
Handle Unicode characters in fix-it replacement strings.
Patch by Sukolsak Sakshuwong! llvm-svn: 183535
1 parent 6d0004c commit e2fad6d

File tree

2 files changed

+37
-19
lines changed

2 files changed

+37
-19
lines changed

clang/lib/Frontend/TextDiagnostic.cpp

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -336,13 +336,10 @@ static void selectInterestingSourceRegion(std::string &SourceLine,
336336
if (MaxColumns <= Columns)
337337
return;
338338

339-
// no special characters allowed in CaretLine or FixItInsertionLine
339+
// No special characters are allowed in CaretLine.
340340
assert(CaretLine.end() ==
341341
std::find_if(CaretLine.begin(), CaretLine.end(),
342342
char_out_of_range(' ','~')));
343-
assert(FixItInsertionLine.end() ==
344-
std::find_if(FixItInsertionLine.begin(), FixItInsertionLine.end(),
345-
char_out_of_range(' ','~')));
346343

347344
// Find the slice that we need to display the full caret line
348345
// correctly.
@@ -370,8 +367,15 @@ static void selectInterestingSourceRegion(std::string &SourceLine,
370367
if (!isWhitespace(FixItInsertionLine[FixItEnd - 1]))
371368
break;
372369

373-
CaretStart = std::min(FixItStart, CaretStart);
374-
CaretEnd = std::max(FixItEnd, CaretEnd);
370+
// We can safely use the byte offset FixItStart as the column offset
371+
// because the characters up until FixItStart are all ASCII whitespace
372+
// characters.
373+
unsigned FixItStartCol = FixItStart;
374+
unsigned FixItEndCol
375+
= llvm::sys::locale::columnWidth(FixItInsertionLine.substr(0, FixItEnd));
376+
377+
CaretStart = std::min(FixItStartCol, CaretStart);
378+
CaretEnd = std::max(FixItEndCol, CaretEnd);
375379
}
376380

377381
// CaretEnd may have been set at the middle of a character
@@ -1023,24 +1027,18 @@ static std::string buildFixItInsertionLine(unsigned LineNo,
10231027
if (HintCol < PrevHintEndCol)
10241028
HintCol = PrevHintEndCol + 1;
10251029

1026-
// FIXME: This function handles multibyte characters in the source, but
1027-
// not in the fixits. This assertion is intended to catch unintended
1028-
// use of multibyte characters in fixits. If we decide to do this, we'll
1029-
// have to track separate byte widths for the source and fixit lines.
1030-
assert((size_t)llvm::sys::locale::columnWidth(I->CodeToInsert) ==
1031-
I->CodeToInsert.size());
1032-
1033-
// This relies on one byte per column in our fixit hints.
10341030
// This should NOT use HintByteOffset, because the source might have
10351031
// Unicode characters in earlier columns.
1036-
unsigned LastColumnModified = HintCol + I->CodeToInsert.size();
1037-
if (LastColumnModified > FixItInsertionLine.size())
1038-
FixItInsertionLine.resize(LastColumnModified, ' ');
1032+
unsigned NewFixItLineSize = FixItInsertionLine.size() +
1033+
(HintCol - PrevHintEndCol) + I->CodeToInsert.size();
1034+
if (NewFixItLineSize > FixItInsertionLine.size())
1035+
FixItInsertionLine.resize(NewFixItLineSize, ' ');
10391036

10401037
std::copy(I->CodeToInsert.begin(), I->CodeToInsert.end(),
1041-
FixItInsertionLine.begin() + HintCol);
1038+
FixItInsertionLine.end() - I->CodeToInsert.size());
10421039

1043-
PrevHintEndCol = LastColumnModified;
1040+
PrevHintEndCol =
1041+
HintCol + llvm::sys::locale::columnWidth(I->CodeToInsert);
10441042
} else {
10451043
FixItInsertionLine.clear();
10461044
break;

clang/test/FixIt/fixit-unicode.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,23 @@ void test2() {
3434

3535
// CHECK-MACHINE: fix-it:"{{.*}}fixit-unicode.c":{[[@LINE-9]]:16-[[@LINE-9]]:18}:"%ld"
3636
}
37+
38+
void test3() {
39+
int กssss = 42;
40+
int a = กsss; // expected-error{{use of undeclared identifier 'กsss'; did you mean 'กssss'?}}
41+
// CHECK: {{^ \^}}
42+
// CHECK: {{^ [^ ]+ssss}}
43+
// CHECK-MACHINE: fix-it:"{{.*}}":{[[@LINE-3]]:11-[[@LINE-3]]:17}:"\340\270\201ssss"
44+
45+
int ssกss = 42;
46+
int b = ssกs; // expected-error{{use of undeclared identifier 'ssกs'; did you mean 'ssกss'?}}
47+
// CHECK: {{^ \^}}
48+
// CHECK: {{^ ss.+ss}}
49+
// CHECK-MACHINE: fix-it:"{{.*}}":{[[@LINE-3]]:11-[[@LINE-3]]:17}:"ss\340\270\201ss"
50+
51+
int sssssssssก = 42;
52+
int c = sssssssss; // expected-error{{use of undeclared identifier 'sssssssss'; did you mean 'sssssssssก'?}}
53+
// CHECK: {{^ \^}}
54+
// CHECK: {{^ sssssssss.+}}
55+
// CHECK-MACHINE: fix-it:"{{.*}}":{[[@LINE-3]]:11-[[@LINE-3]]:20}:"sssssssss\340\270\201"
56+
}

0 commit comments

Comments
 (0)