Skip to content

Commit

Permalink
[lldb] Support custom LLVM formatting for variables (#81196)
Browse files Browse the repository at this point in the history
Adds support for applying LLVM formatting to variables.

The reason for this is to support cases such as the following.

Let's say you have two separate bytes that you want to print as a
combined hex value. Consider the following summary string:

```
${var.byte1%x}${var.byte2%x}
```

The output of this will be: `0x120x34`. That is, a `0x` prefix is
unconditionally applied to each byte. This is unlike printf formatting
where you must include the `0x` yourself.

Currently, there's no way to do this with summary strings, instead
you'll need a summary provider in python or c++.

This change introduces formatting support using LLVM's formatter system.
This allows users to achieve the desired custom formatting using:

```
${var.byte1:x-}${var.byte2:x-}
```

Here, each variable is suffixed with `:x-`. This is passed to the LLVM
formatter as `{0:x-}`. For integer values, `x` declares the output as
hex, and `-` declares that no `0x` prefix is to be used. Further, one
could write:

```
${var.byte1:x-2}${var.byte2:x-2}
```

Where the added `2` results in these bytes being written with a minimum
of 2 digits.

An alternative considered was to add a new format specifier that would
print hex values without the `0x` prefix. The reason that approach was
not taken is because in addition to forcing a `0x` prefix, hex values
are also forced to use leading zeros. This approach lets the user have
full control over formatting.
  • Loading branch information
kastiglione committed Apr 30, 2024
1 parent 40083cf commit 7a8d15e
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 10 deletions.
9 changes: 9 additions & 0 deletions lldb/docs/use/variable.rst
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,15 @@ summary strings, regardless of the format they have applied to their types. To
do that, you can use %format inside an expression path, as in ${var.x->x%u},
which would display the value of x as an unsigned integer.

Additionally, custom output can be achieved by using an LLVM format string,
commencing with the ``:`` marker. To illustrate, compare ``${var.byte%x}`` and
``${var.byte:x-}``. The former uses lldb's builtin hex formatting (``x``),
which unconditionally inserts a ``0x`` prefix, and also zero pads the value to
match the size of the type. The latter uses ``llvm::formatv`` formatting
(``:x-``), and will print only the hex value, with no ``0x`` prefix, and no
padding. This raw control is useful when composing multiple pieces into a
larger whole.

You can also use some other special format markers, not available for formats
themselves, but which carry a special meaning when used in this context:

Expand Down
70 changes: 60 additions & 10 deletions lldb/source/Core/FormatEntity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Regex.h"
#include "llvm/TargetParser/Triple.h"

#include <cctype>
Expand Down Expand Up @@ -658,6 +659,38 @@ static char ConvertValueObjectStyleToChar(
return '\0';
}

static llvm::Regex LLVMFormatPattern{"x[-+]?\\d*|n|d", llvm::Regex::IgnoreCase};

static bool DumpValueWithLLVMFormat(Stream &s, llvm::StringRef options,
ValueObject &valobj) {
std::string formatted;
std::string llvm_format = ("{0:" + options + "}").str();

// Options supported by format_provider<T> for integral arithmetic types.
// See table in FormatProviders.h.

auto type_info = valobj.GetTypeInfo();
if (type_info & eTypeIsInteger && LLVMFormatPattern.match(options)) {
if (type_info & eTypeIsSigned) {
bool success = false;
int64_t integer = valobj.GetValueAsSigned(0, &success);
if (success)
formatted = llvm::formatv(llvm_format.data(), integer);
} else {
bool success = false;
uint64_t integer = valobj.GetValueAsUnsigned(0, &success);
if (success)
formatted = llvm::formatv(llvm_format.data(), integer);
}
}

if (formatted.empty())
return false;

s.Write(formatted.data(), formatted.size());
return true;
}

static bool DumpValue(Stream &s, const SymbolContext *sc,
const ExecutionContext *exe_ctx,
const FormatEntity::Entry &entry, ValueObject *valobj) {
Expand Down Expand Up @@ -728,9 +761,12 @@ static bool DumpValue(Stream &s, const SymbolContext *sc,
return RunScriptFormatKeyword(s, sc, exe_ctx, valobj, entry.string.c_str());
}

llvm::StringRef subpath(entry.string);
auto split = llvm::StringRef(entry.string).split(':');
auto subpath = split.first;
auto llvm_format = split.second;

// simplest case ${var}, just print valobj's value
if (entry.string.empty()) {
if (subpath.empty()) {
if (entry.printf_format.empty() && entry.fmt == eFormatDefault &&
entry.number == ValueObject::eValueObjectRepresentationStyleValue)
was_plain_var = true;
Expand All @@ -739,22 +775,19 @@ static bool DumpValue(Stream &s, const SymbolContext *sc,
target = valobj;
} else // this is ${var.something} or multiple .something nested
{
if (entry.string[0] == '[')
if (subpath[0] == '[')
was_var_indexed = true;
ScanBracketedRange(subpath, close_bracket_index,
var_name_final_if_array_range, index_lower,
index_higher);

Status error;

const std::string &expr_path = entry.string;

LLDB_LOGF(log, "[Debugger::FormatPrompt] symbol to expand: %s",
expr_path.c_str());
LLDB_LOG(log, "[Debugger::FormatPrompt] symbol to expand: {0}", subpath);

target =
valobj
->GetValueForExpressionPath(expr_path.c_str(), &reason_to_stop,
->GetValueForExpressionPath(subpath, &reason_to_stop,
&final_value_type, options, &what_next)
.get();

Expand Down Expand Up @@ -883,8 +916,18 @@ static bool DumpValue(Stream &s, const SymbolContext *sc,
}

if (!is_array_range) {
LLDB_LOGF(log,
"[Debugger::FormatPrompt] dumping ordinary printable output");
if (!llvm_format.empty()) {
if (DumpValueWithLLVMFormat(s, llvm_format, *target)) {
LLDB_LOGF(log, "dumping using llvm format");
return true;
} else {
LLDB_LOG(
log,
"empty output using llvm format '{0}' - with type info flags {1}",
entry.printf_format, target->GetTypeInfo());
}
}
LLDB_LOGF(log, "dumping ordinary printable output");
return target->DumpPrintableRepresentation(s, val_obj_display,
custom_format);
} else {
Expand Down Expand Up @@ -2227,6 +2270,13 @@ static Status ParseInternal(llvm::StringRef &format, Entry &parent_entry,
if (error.Fail())
return error;

auto [_, llvm_format] = llvm::StringRef(entry.string).split(':');
if (!LLVMFormatPattern.match(llvm_format)) {
error.SetErrorStringWithFormat("invalid llvm format: '%s'",
llvm_format.data());
return error;
}

if (verify_is_thread_id) {
if (entry.type != Entry::Type::ThreadID &&
entry.type != Entry::Type::ThreadProtocolID) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
C_SOURCES := main.c
include Makefile.rules
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import lldb
from lldbsuite.test.lldbtest import *
import lldbsuite.test.lldbutil as lldbutil


class TestCase(TestBase):
def test_raw_bytes(self):
self.build()
lldbutil.run_to_source_breakpoint(self, "break here", lldb.SBFileSpec("main.c"))
self.runCmd("type summary add -s '${var.ubyte:x-2}${var.sbyte:x-2}!' Bytes")
self.expect("v bytes", substrs=[" = 3001!"])

def test_bad_format(self):
self.build()
lldbutil.run_to_source_breakpoint(self, "break here", lldb.SBFileSpec("main.c"))
self.expect(
"type summary add -s '${var.ubyte:y}!' Bytes",
error=True,
substrs=["invalid llvm format"],
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include <stdint.h>
#include <stdio.h>

struct Bytes {
uint8_t ubyte;
int8_t sbyte;
};

int main() {
struct Bytes bytes = {0x30, 0x01};
(void)bytes;
printf("break here\n");
}

0 comments on commit 7a8d15e

Please sign in to comment.