diff --git a/lldb/include/lldb/Utility/AnsiTerminal.h b/lldb/include/lldb/Utility/AnsiTerminal.h index 153602cc08b09..dd2eafef1f228 100644 --- a/lldb/include/lldb/Utility/AnsiTerminal.h +++ b/lldb/include/lldb/Utility/AnsiTerminal.h @@ -98,6 +98,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Locale.h" +#include "llvm/Support/Unicode.h" #include "lldb/Utility/Stream.h" @@ -264,13 +265,16 @@ inline std::string TrimAndPad(llvm::StringRef str, size_t visible_length, // Repeatedly trim the string until it its valid unicode and fits. llvm::StringRef trimmed = left; while (!trimmed.empty()) { - // This relies on columnWidth returning -2 for invalid/partial unicode - // characters, which after conversion to size_t will be larger than the - // visible width. - column_width = llvm::sys::locale::columnWidth(trimmed); - if (result_visibile_length + column_width <= visible_length) { + int trimmed_width = llvm::sys::locale::columnWidth(trimmed); + if ( + // If we have only part of a Unicode character, keep trimming. + trimmed_width != + llvm::sys::unicode::ColumnWidthErrors::ErrorInvalidUTF8 && + // If the trimmed string fits, take it. + result_visibile_length + static_cast(trimmed_width) <= + visible_length) { result.append(trimmed); - result_visibile_length += column_width; + result_visibile_length += static_cast(trimmed_width); break; } trimmed = trimmed.drop_back(); diff --git a/lldb/unittests/Utility/AnsiTerminalTest.cpp b/lldb/unittests/Utility/AnsiTerminalTest.cpp index 28fa32461ad5f..8924c083a656b 100644 --- a/lldb/unittests/Utility/AnsiTerminalTest.cpp +++ b/lldb/unittests/Utility/AnsiTerminalTest.cpp @@ -116,6 +116,18 @@ TEST(AnsiTerminal, TrimAndPad) { EXPECT_EQ(" ❤️", ansi::TrimAndPad(" ❤️", 5)); EXPECT_EQ("12❤️4❤️", ansi::TrimAndPad("12❤️4❤️", 5)); EXPECT_EQ("12❤️45", ansi::TrimAndPad("12❤️45❤️", 5)); + + // This string previously triggered a bug in handling incomplete Unicode + // characters, when we had already accumulated some previous parts of the + // string. + const char *quick = "The \x1B[0mquick\x1B[0m 💨\x1B[0m"; + EXPECT_EQ(ansi::TrimAndPad(quick, 0), ""); + EXPECT_EQ(ansi::TrimAndPad(quick, 9), "The \x1B[0mquick\x1B[0m"); + EXPECT_EQ(ansi::TrimAndPad(quick, 10), "The \x1B[0mquick\x1B[0m "); + // The emoji is 2 columns, so 11 is not quite enough. + EXPECT_EQ(ansi::TrimAndPad(quick, 11, '_'), "The \x1B[0mquick\x1B[0m _"); + // 12 exactly enough to include the emoji and proceeding ANSI code. + EXPECT_EQ(ansi::TrimAndPad(quick, 12), quick); } static void TestLines(const std::string &input, int indent,