diff --git a/clang/lib/Format/BreakableToken.cpp b/clang/lib/Format/BreakableToken.cpp index 7cdbbbd5dd7faa..ef8dc2a864a2a1 100644 --- a/clang/lib/Format/BreakableToken.cpp +++ b/clang/lib/Format/BreakableToken.cpp @@ -771,6 +771,20 @@ BreakableLineCommentSection::BreakableLineCommentSection( OriginalPrefix[i] = IndentPrefix; const unsigned SpacesInPrefix = llvm::count(IndentPrefix, ' '); + // This lambda also considers multibyte character that is not handled in + // functions like isPunctuation provided by CharInfo. + const auto NoSpaceBeforeFirstCommentChar = [&]() { + assert(Lines[i].size() > IndentPrefix.size()); + const char FirstCommentChar = Lines[i][IndentPrefix.size()]; + const unsigned FirstCharByteSize = + encoding::getCodePointNumBytes(FirstCommentChar, Encoding); + return encoding::columnWidth( + Lines[i].substr(IndentPrefix.size(), FirstCharByteSize), + Encoding) == 1 && + (FirstCommentChar == '\\' || isPunctuation(FirstCommentChar) || + isHorizontalWhitespace(FirstCommentChar)); + }; + // On the first line of the comment section we calculate how many spaces // are to be added or removed, all lines after that just get only the // change and we will not look at the maximum anymore. Additionally to the @@ -780,7 +794,7 @@ BreakableLineCommentSection::BreakableLineCommentSection( OriginalPrefix[i - 1].rtrim(Blanks)) { if (SpacesInPrefix < Style.SpacesInLineCommentPrefix.Minimum && Lines[i].size() > IndentPrefix.size() && - isAlphanumeric(Lines[i][IndentPrefix.size()])) { + !NoSpaceBeforeFirstCommentChar()) { FirstLineSpaceChange = Style.SpacesInLineCommentPrefix.Minimum - SpacesInPrefix; } else if (SpacesInPrefix > Style.SpacesInLineCommentPrefix.Maximum) { @@ -804,7 +818,7 @@ BreakableLineCommentSection::BreakableLineCommentSection( const auto FirstNonSpace = Lines[i][IndentPrefix.size()]; const auto AllowsSpaceChange = SpacesInPrefix != 0 || - (isAlphanumeric(FirstNonSpace) || + (!NoSpaceBeforeFirstCommentChar() || (FirstNonSpace == '}' && FirstLineSpaceChange != 0)); if (PrefixSpaceChange[i] > 0 && AllowsSpaceChange) { diff --git a/clang/unittests/Format/FormatTestComments.cpp b/clang/unittests/Format/FormatTestComments.cpp index b487440a06a3b9..282bc46cd04824 100644 --- a/clang/unittests/Format/FormatTestComments.cpp +++ b/clang/unittests/Format/FormatTestComments.cpp @@ -3322,6 +3322,18 @@ TEST_F(FormatTestComments, SpaceAtLineCommentBegin) { "\n" "/// Free Doxygen with 3 spaces\n" "\n" + "//🐉 A nice dragon\n" + "\n" + "//\t abccba\n" + "\n" + "//\\t deffed\n" + "\n" + "// 🐉 Another nice dragon\n" + "\n" + "// \t Three leading spaces following tab\n" + "\n" + "// \\t Three leading spaces following backslash\n" + "\n" "/// A Doxygen Comment with a nested list:\n" "/// - Foo\n" "/// - Bar\n" @@ -3381,6 +3393,18 @@ TEST_F(FormatTestComments, SpaceAtLineCommentBegin) { "\n" "/// Free Doxygen with 3 spaces\n" "\n" + "// 🐉 A nice dragon\n" + "\n" + "//\t abccba\n" + "\n" + "//\\t deffed\n" + "\n" + "// 🐉 Another nice dragon\n" + "\n" + "// \t Three leading spaces following tab\n" + "\n" + "// \\t Three leading spaces following backslash\n" + "\n" "/// A Doxygen Comment with a nested list:\n" "/// - Foo\n" "/// - Bar\n" @@ -3442,6 +3466,18 @@ TEST_F(FormatTestComments, SpaceAtLineCommentBegin) { "\n" "///Free Doxygen with 3 spaces\n" "\n" + "//🐉 A nice dragon\n" + "\n" + "//\t abccba\n" + "\n" + "//\\t deffed\n" + "\n" + "//🐉 Another nice dragon\n" + "\n" + "//\t Three leading spaces following tab\n" + "\n" + "//\\t Three leading spaces following backslash\n" + "\n" "///A Doxygen Comment with a nested list:\n" "///- Foo\n" "///- Bar\n" @@ -3503,6 +3539,18 @@ TEST_F(FormatTestComments, SpaceAtLineCommentBegin) { "\n" "/// Free Doxygen with 3 spaces\n" "\n" + "// 🐉 A nice dragon\n" + "\n" + "//\t abccba\n" + "\n" + "//\\t deffed\n" + "\n" + "// 🐉 Another nice dragon\n" + "\n" + "// \t Three leading spaces following tab\n" + "\n" + "// \\t Three leading spaces following backslash\n" + "\n" "/// A Doxygen Comment with a nested list:\n" "/// - Foo\n" "/// - Bar\n" @@ -3809,6 +3857,18 @@ TEST_F(FormatTestComments, SpaceAtLineCommentBegin) { "\n" "/// Free Doxygen with 3 spaces\n" "\n" + "// 🐉 A nice dragon\n" + "\n" + "//\t abccba\n" + "\n" + "//\\t deffed\n" + "\n" + "// 🐉 Another nice dragon\n" + "\n" + "// \t Three leading spaces following tab\n" + "\n" + "// \\t Three leading spaces following backslash\n" + "\n" "/// A Doxygen Comment with a nested list:\n" "/// - Foo\n" "/// - Bar\n" @@ -3870,6 +3930,18 @@ TEST_F(FormatTestComments, SpaceAtLineCommentBegin) { "\n" "///Free Doxygen with 3 spaces\n" "\n" + "//🐉 A nice dragon\n" + "\n" + "//\t abccba\n" + "\n" + "//\\t deffed\n" + "\n" + "//🐉 Another nice dragon\n" + "\n" + "//\t Three leading spaces following tab\n" + "\n" + "//\\t Three leading spaces following backslash\n" + "\n" "///A Doxygen Comment with a nested list:\n" "///- Foo\n" "///- Bar\n" @@ -3931,6 +4003,18 @@ TEST_F(FormatTestComments, SpaceAtLineCommentBegin) { "\n" "/// Free Doxygen with 3 spaces\n" "\n" + "// 🐉 A nice dragon\n" + "\n" + "//\t abccba\n" + "\n" + "//\\t deffed\n" + "\n" + "// 🐉 Another nice dragon\n" + "\n" + "// \t Three leading spaces following tab\n" + "\n" + "// \\t Three leading spaces following backslash\n" + "\n" "/// A Doxygen Comment with a nested list:\n" "/// - Foo\n" "/// - Bar\n"