Skip to content

Commit

Permalink
[lldb] Print embedded nuls in char arrays (PR44649)
Browse files Browse the repository at this point in the history
When we know the bounds of the array, print any embedded nuls instead of
treating them as terminators. An exception to this rule is made for the
nul character at the very end of the string. We don't print that, as
otherwise 99% of the strings would end in \0. This way the strings
usually come out the same as how the user typed it into the compiler
(char foo[] = "with\0nuls"). It also matches how they come out in gdb.

This resolves a FIXME left from D111399, and leaves another FIXME for dealing
with nul characters in "escape-non-printables=false" mode. In this mode the
characters cause the entire summary string to be terminated prematurely.

Differential Revision: https://reviews.llvm.org/D111634
  • Loading branch information
labath committed Oct 14, 2021
1 parent 6e1308b commit ca0ce99
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 13 deletions.
11 changes: 7 additions & 4 deletions lldb/source/Core/ValueObject.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -849,8 +849,10 @@ bool ValueObject::SetData(DataExtractor &data, Status &error) {

static bool CopyStringDataToBufferSP(const StreamString &source,
lldb::DataBufferSP &destination) {
destination = std::make_shared<DataBufferHeap>(source.GetSize() + 1, 0);
memcpy(destination->GetBytes(), source.GetString().data(), source.GetSize());
llvm::StringRef src = source.GetString();
src.consume_back(llvm::StringRef("\0", 1));
destination = std::make_shared<DataBufferHeap>(src.size(), 0);
memcpy(destination->GetBytes(), src.data(), src.size());
return true;
}

Expand Down Expand Up @@ -912,8 +914,8 @@ ValueObject::ReadPointedString(lldb::DataBufferSP &buffer_sp, Status &error,
CopyStringDataToBufferSP(s, buffer_sp);
return {0, was_capped};
}
buffer_sp = std::make_shared<DataBufferHeap>(cstr_len, 0);
memcpy(buffer_sp->GetBytes(), cstr, cstr_len);
s << llvm::StringRef(cstr, cstr_len);
CopyStringDataToBufferSP(s, buffer_sp);
return {cstr_len, was_capped};
} else {
s << "<invalid address>";
Expand Down Expand Up @@ -1196,6 +1198,7 @@ bool ValueObject::DumpPrintableRepresentation(
options.SetQuote('"');
options.SetSourceSize(buffer_sp->GetByteSize());
options.SetIsTruncated(read_string.second);
options.SetBinaryZeroIsTerminator(custom_format != eFormatVectorOfChar);
formatters::StringPrinter::ReadBufferAndDumpToStream<
lldb_private::formatters::StringPrinter::StringElementType::ASCII>(
options);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ def test(self):

# Different character arrays.
# FIXME: Passing a 'const char *' will ignore any given format,
self.assertIn('= " \\U0000001b\\a\\b\\f\\n\\r\\t\\vaA09"\n', self.getFormatted("character array", "cstring"))
self.assertIn('= " \\U0000001b\\a\\b\\f\\n\\r\\t\\vaA09"\n', self.getFormatted("c-string", "cstring"))
self.assertIn(r'= " \U0000001b\a\b\f\n\r\t\vaA09\0"', self.getFormatted("character array", "cstring"))
self.assertIn(r'= " \U0000001b\a\b\f\n\r\t\vaA09\0"', self.getFormatted("c-string", "cstring"))
self.assertIn(' = " \\e\\a\\b\\f\\n\\r\\t\\vaA09" " \\U0000001b\\a\\b\\f\\n\\r\\t\\vaA09"\n',
self.getFormatted("c-string", "(char *)cstring"))
self.assertIn('=\n', self.getFormatted("c-string", "(__UINT64_TYPE__)0"))
Expand Down Expand Up @@ -132,10 +132,10 @@ def test(self):
self.assertIn('= 0x2007080c0a0d090b415a617a30391b00\n', self.getFormatted("OSType", string_expr))

# bytes
self.assertIn('= " \\U0000001b\\a\\b\\f\\n\\r\\t\\vaA09"\n', self.getFormatted("bytes", "cstring"))
self.assertIn(r'= " \U0000001b\a\b\f\n\r\t\vaA09\0"', self.getFormatted("bytes", "cstring"))

# bytes with ASCII
self.assertIn('= " \\U0000001b\\a\\b\\f\\n\\r\\t\\vaA09"\n', self.getFormatted("bytes with ASCII", "cstring"))
self.assertIn(r'= " \U0000001b\a\b\f\n\r\t\vaA09\0"', self.getFormatted("bytes with ASCII", "cstring"))

# unicode16
self.assertIn('= U+5678 U+1234\n', self.getFormatted("unicode16", "0x12345678"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@ struct A {

int main (int argc, char const *argv[])
{
A a, b;
A a, b, c;
// Deliberately write past the end of data to test that the formatter stops
// at the end of array.
memcpy(a.data, "FOOBAR", 7);
memcpy(b.data, "FO\0BAR", 7);
memcpy(c.data, "F\0O\0AR", 7);
std::string stdstring("Hello\t\tWorld\nI am here\t\tto say hello\n"); //%self.addTearDownHook(lambda x: x.runCmd("setting set escape-non-printables true"))
const char* constcharstar = stdstring.c_str();
std::string longstring(
Expand All @@ -33,13 +34,15 @@ int main (int argc, char const *argv[])
return 0; //% if self.TraceOn(): self.runCmd('frame variable')
//% self.expect_var_path('stdstring', summary='"Hello\\t\\tWorld\\nI am here\\t\\tto say hello\\n"')
//% self.expect_var_path('constcharstar', summary='"Hello\\t\\tWorld\\nI am here\\t\\tto say hello\\n"')
//% self.expect_var_path("a.data", summary='"FOOB"')
//% self.expect_var_path("b.data", summary=r'"FO\0B"')
//% self.expect_var_path("c.data", summary=r'"F\0O"')
//%
//% self.runCmd("setting set escape-non-printables false")
//% self.expect_var_path('stdstring', summary='"Hello\t\tWorld\nI am here\t\tto say hello\n"')
//% self.expect_var_path('constcharstar', summary='"Hello\t\tWorld\nI am here\t\tto say hello\n"')
//% self.assertTrue(self.frame().FindVariable('longstring').GetSummary().endswith('"...'))
//% self.assertTrue(self.frame().FindVariable('longconstcharstar').GetSummary().endswith('"...'))
//% self.expect_var_path("a.data", summary='"FOOB"')
// FIXME: Should this be "FO\0B" instead?
//% self.expect_var_path("b.data", summary='"FO"')
// FIXME: make "b.data" and "c.data" work sanely
}

2 changes: 1 addition & 1 deletion lldb/test/Shell/SymbolFile/DWARF/x86/DW_AT_const_value.s
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
## Variables specified using string forms. This behavior purely speculative -- I
## don't know of any compiler that would represent character strings this way.
# CHECK: (char [7]) string = "string"
# CHECK: (char [7]) strp = "strp"
# CHECK: (char [7]) strp = "strp\0\0"
## Bogus attribute form. Let's make sure we don't crash at least.
# CHECK: (char [7]) ref4 = <empty constant data>
## A variable of pointer type.
Expand Down

0 comments on commit ca0ce99

Please sign in to comment.