From 10331224f2edb9923d9b9d94fdf2a28386e370be Mon Sep 17 00:00:00 2001 From: Simon Spies Date: Tue, 19 Aug 2025 18:04:01 +0100 Subject: [PATCH 1/5] clean up string printing --- lldb/source/Core/DumpDataExtractor.cpp | 62 ++++++++++++++++++++------ 1 file changed, 49 insertions(+), 13 deletions(-) diff --git a/lldb/source/Core/DumpDataExtractor.cpp b/lldb/source/Core/DumpDataExtractor.cpp index 5a395fd3457fd..7c6c38a06a6ae 100644 --- a/lldb/source/Core/DumpDataExtractor.cpp +++ b/lldb/source/Core/DumpDataExtractor.cpp @@ -215,6 +215,53 @@ static void DumpCharacter(Stream &s, const char c) { s.Printf("\\x%2.2x", c); } +/// Dump the character to a stream using OCaml string literal format. +/// This matches OCaml's unsafe_escape function from bytes.ml in the standard library. +static void DumpEscapedCharacterOCaml(Stream &s, const char c) { + switch (c) { + case '"': + s.Printf("\\\""); + return; + case '\\': + s.Printf("\\\\"); + return; + case '\n': + s.Printf("\\n"); + return; + case '\t': + s.Printf("\\t"); + return; + case '\r': + s.Printf("\\r"); + return; + case '\b': + s.Printf("\\b"); + return; + default: + break; + } + + // Handle printable ASCII range ' ' to '~' (32 to 126) + if (c >= ' ' && c <= '~') { + s.PutChar(c); + return; + } + + // Use OCaml's 3-digit decimal escape format for non-printable characters + // This matches the logic in bytes.ml unsafe_escape function + unsigned char a = (unsigned char)c; + s.Printf("\\%03d", a); +} + +/// Dump a C string as an OCaml string literal with proper escaping and quotes. +static void DumpStringOCaml(Stream *s, const char *data, uint64_t string_length) { + s->Printf("\""); + for (uint64_t i = 0; i < string_length; ++i) { + DumpEscapedCharacterOCaml(*s, data[i]); + } + s->Printf("\""); +} + /// Dump a floating point type. template void DumpFloatingPoint(std::ostringstream &ss, FloatT f) { @@ -574,19 +621,8 @@ static offset_t FormatOCamlValue(const DataExtractor &DE, Stream *s, if (error.Fail() || bytes_read < string_length) { s->Printf("@"); } else { - const char *c_str = (const char *)&str.front(); - if (strlen(c_str) == string_length) { - /* String does not contain NUL characters */ - s->Printf("\"%s\"", c_str); - } else { - s->Printf("\""); - DataExtractor cstr_data(&str.front(), str.size(), - process->GetByteOrder(), 8); - DumpDataExtractor(cstr_data, s, 0, lldb::eFormatChar, 1, - string_length, UINT32_MAX, - LLDB_INVALID_ADDRESS, 0, 0); - s->Printf("\""); - } + const char *data = (const char *)&str.front(); + DumpStringOCaml(s, data, string_length); print_default = false; } } From e61c2f5feca0c21f0c90be53267b7a52505e696a Mon Sep 17 00:00:00 2001 From: Simon Spies Date: Tue, 19 Aug 2025 18:04:38 +0100 Subject: [PATCH 2/5] float trailing zero --- lldb/source/Core/DumpDataExtractor.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lldb/source/Core/DumpDataExtractor.cpp b/lldb/source/Core/DumpDataExtractor.cpp index 7c6c38a06a6ae..e56b35325d4e6 100644 --- a/lldb/source/Core/DumpDataExtractor.cpp +++ b/lldb/source/Core/DumpDataExtractor.cpp @@ -436,10 +436,10 @@ void PrintAPIntAsFloat(Stream *s, llvm::APInt apint, s->AsRawOstream() << prefix; s->AsRawOstream() << sv; // OCaml Specific: - // Following OCaml conventions, print the trailing "." to + // Following OCaml conventions, print the trailing ".0" to // identify that the integer is in fact a float, but don't - // print any trailing zeros. - bool print_trailing_dot = true; + // print any trailing zeros beyond that. + bool print_trailing_dot_zero = true; for (char c : sv) { switch (c) { case '-': @@ -456,14 +456,14 @@ void PrintAPIntAsFloat(Stream *s, llvm::APInt apint, continue; default: // if we find something that is not a number such as 'e' or 'E' or '.' - // there is no need to print the trailing ".". - print_trailing_dot = false; + // there is no need to print the trailing ".0". + print_trailing_dot_zero = false; } break; // we found something that is not a number, so we will not print - // the trailing "." + // the trailing ".0" } - if (print_trailing_dot){ - s->AsRawOstream() << "."; + if (print_trailing_dot_zero){ + s->AsRawOstream() << ".0"; } s->AsRawOstream() << suffix; From 8b5b1f967c92d63650c721b01e812e47e6b8b06b Mon Sep 17 00:00:00 2001 From: Simon Spies Date: Tue, 19 Aug 2025 18:08:01 +0100 Subject: [PATCH 3/5] unboxed float and int printing --- lldb/source/Core/DumpDataExtractor.cpp | 37 +++++++++++++++++++++----- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/lldb/source/Core/DumpDataExtractor.cpp b/lldb/source/Core/DumpDataExtractor.cpp index e56b35325d4e6..a3eac136ef035 100644 --- a/lldb/source/Core/DumpDataExtractor.cpp +++ b/lldb/source/Core/DumpDataExtractor.cpp @@ -433,9 +433,17 @@ void PrintAPIntAsFloat(Stream *s, llvm::APInt apint, else apfloat.toPossiblyShortString(sv); - s->AsRawOstream() << prefix; - s->AsRawOstream() << sv; // OCaml Specific: + // Handle negative sign placement for OCaml format + if (sv.size() > 0 && sv[0] == '-') { + s->AsRawOstream() << "-"; + s->AsRawOstream() << prefix; + s->AsRawOstream() << llvm::StringRef(sv.data() + 1, sv.size() - 1); + } else { + s->AsRawOstream() << prefix; + s->AsRawOstream() << sv; + } + // Following OCaml conventions, print the trailing ".0" to // identify that the integer is in fact a float, but don't // print any trailing zeros beyond that. @@ -963,11 +971,26 @@ lldb::offset_t lldb_private::DumpDataExtractor( case eFormatEnum: // Print enum value as a signed integer when we don't get // the enum type case eFormatDecimal: - if (item_byte_size <= 8) - s->Printf("%" PRId64, - DE.GetMaxS64Bitfield(&offset, item_byte_size, item_bit_size, - item_bit_offset)); - else { + if (item_byte_size <= 8) { + int64_t value = DE.GetMaxS64Bitfield(&offset, item_byte_size, item_bit_size, + item_bit_offset); + // CR sspies: This is special cases for OCaml values. Consider wrapping + // it in a guard. + // Format as unboxed OCaml integer + std::string suffix = ""; + if (item_byte_size == 4) suffix = "l"; + else if (item_byte_size == 8) suffix = "L"; + else if (item_byte_size == 2) suffix = ""; // int16 - no specific suffix + else if (item_byte_size == 1) suffix = ""; // int8 - no specific suffix + // Add "n" suffix for nativeint when it's pointer-sized (could be 4 or 8 bytes) + + + if (value < 0) { + s->Printf("-#%" PRId64 "%s", -value, suffix.c_str()); + } else { + s->Printf("#%" PRId64 "%s", value, suffix.c_str()); + } + } else { const bool is_signed = true; const unsigned radix = 10; offset = DumpAPInt(s, DE, offset, item_byte_size, is_signed, radix); From c79c553434f9c83af8239c80602e945803c5cf30 Mon Sep 17 00:00:00 2001 From: Simon Spies Date: Tue, 19 Aug 2025 18:08:10 +0100 Subject: [PATCH 4/5] nit --- lldb/source/Core/DumpDataExtractor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Core/DumpDataExtractor.cpp b/lldb/source/Core/DumpDataExtractor.cpp index a3eac136ef035..9b4ab20266301 100644 --- a/lldb/source/Core/DumpDataExtractor.cpp +++ b/lldb/source/Core/DumpDataExtractor.cpp @@ -725,7 +725,7 @@ static offset_t FormatOCamlValue(const DataExtractor &DE, Stream *s, if (int_size == 32) s->Printf("%ld", (long)i); else - s->Printf("%lld", (int64_t)i); + s->Printf("%" PRIi64, (int64_t)i); s->Printf("%s", suffix.c_str()); print_default = false; } From 0727707abfc18b33cc6c662aa63064c956d3e50f Mon Sep 17 00:00:00 2001 From: Simon Spies Date: Wed, 20 Aug 2025 18:26:00 +0100 Subject: [PATCH 5/5] printing of small signed integers --- lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 3cbc56e5f43a3..ca34beeae9dfd 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -5264,6 +5264,11 @@ lldb::Format TypeSystemClang::GetFormat(lldb::opaque_compiler_type_t type) { return lldb::eFormatBoolean; case clang::BuiltinType::Char_S: case clang::BuiltinType::SChar: + // For OCaml, treat signed 8-bit integers as decimal numbers (like int16, int32, int64) + // rather than characters to ensure consistent SIMD vector display + if (isLanguageOCaml()) + return lldb::eFormatDecimal; + return lldb::eFormatChar; case clang::BuiltinType::WChar_S: case clang::BuiltinType::Char_U: case clang::BuiltinType::UChar: