diff --git a/lldb/include/lldb/API/SBInstruction.h b/lldb/include/lldb/API/SBInstruction.h index 755e3b4a47c9b..05e7087f2e679 100644 --- a/lldb/include/lldb/API/SBInstruction.h +++ b/lldb/include/lldb/API/SBInstruction.h @@ -11,6 +11,7 @@ #include "lldb/API/SBData.h" #include "lldb/API/SBDefines.h" +#include "lldb/API/SBStructuredData.h" #include @@ -73,6 +74,23 @@ class LLDB_API SBInstruction { bool TestEmulation(lldb::SBStream &output_stream, const char *test_file); + /// Get variable annotations for this instruction as structured data. + /// Returns an array of dictionaries, each containing: + /// - "variable_name": string name of the variable + /// - "location_description": string description of where variable is stored + /// ("RDI", "R15", "undef", etc.) + /// - "is_live": boolean indicates if variable is live at this instruction + /// - "start_address": unsigned integer address where this annotation becomes + /// valid + /// - "end_address": unsigned integer address where this annotation becomes + /// invalid + /// - "register_kind": unsigned integer indicating the register numbering + /// scheme + /// - "decl_file": string path to the file where variable is declared + /// - "decl_line": unsigned integer line number where variable is declared + /// - "type_name": string type name of the variable + lldb::SBStructuredData GetVariableAnnotations(lldb::SBTarget target); + protected: friend class SBInstructionList; diff --git a/lldb/include/lldb/API/SBStructuredData.h b/lldb/include/lldb/API/SBStructuredData.h index dfd8ec0e180ce..75fb16b795a5a 100644 --- a/lldb/include/lldb/API/SBStructuredData.h +++ b/lldb/include/lldb/API/SBStructuredData.h @@ -153,6 +153,7 @@ class SBStructuredData { friend class SBBreakpointLocation; friend class SBBreakpointName; friend class SBTrace; + friend class SBInstruction; friend class lldb_private::python::SWIGBridge; friend class lldb_private::lua::SWIGBridge; friend class SBCommandInterpreter; diff --git a/lldb/include/lldb/Core/Disassembler.h b/lldb/include/lldb/Core/Disassembler.h index db186dd33d774..97e392008adfd 100644 --- a/lldb/include/lldb/Core/Disassembler.h +++ b/lldb/include/lldb/Core/Disassembler.h @@ -566,6 +566,21 @@ class Disassembler : public std::enable_shared_from_this, const Disassembler &operator=(const Disassembler &) = delete; }; +/// Structured data for a single variable annotation. +struct VariableAnnotation { + std::string variable_name; + std::string location_description; // e.g., "r15", "undef", "const_0" + lldb::addr_t start_address; // Where this annotation starts being valid + lldb::addr_t end_address; // Where this annotation ends being valid + bool is_live; // Whether variable is live at this instruction + lldb::RegisterKind + register_kind; // Register numbering scheme for location interpretation + std::optional + decl_file; // Source file where variable was declared + std::optional decl_line; // Line number where variable was declared + std::optional type_name; // Variable's type name +}; + /// Tracks live variable annotations across instructions and produces /// per-instruction "events" like `name = RDI` or `name = `. class VariableAnnotator { @@ -574,16 +589,39 @@ class VariableAnnotator { std::string name; /// Last printed location (empty means ). std::string last_loc; + /// Address range where this variable state is valid. + lldb::addr_t start_address; + lldb::addr_t end_address; + /// Register numbering scheme for location interpretation. + lldb::RegisterKind register_kind; + + std::optional decl_file; + std::optional decl_line; + std::optional type_name; }; // Live state from the previous instruction, keyed by Variable::GetID(). llvm::DenseMap Live_; + static constexpr const char *kUndefLocation = "undef"; + public: /// Compute annotation strings for a single instruction and update `Live_`. /// Returns only the events that should be printed *at this instruction*. std::vector annotate(Instruction &inst, Target &target, const lldb::ModuleSP &module_sp); + + /// Compute structured annotation data for a single instruction and update + /// `Live_`. Returns structured data for all variables relevant at this + /// instruction. + std::vector + AnnotateStructured(Instruction &inst, Target &target, + const lldb::ModuleSP &module_sp); + +private: + VariableAnnotation createAnnotation( + const VarState &var_state, bool is_live, + const std::optional &location_desc = std::nullopt); }; } // namespace lldb_private diff --git a/lldb/source/API/SBInstruction.cpp b/lldb/source/API/SBInstruction.cpp index 6755089af39a4..8ce7281a99872 100644 --- a/lldb/source/API/SBInstruction.cpp +++ b/lldb/source/API/SBInstruction.cpp @@ -10,10 +10,11 @@ #include "lldb/Utility/Instrumentation.h" #include "lldb/API/SBAddress.h" -#include "lldb/API/SBFrame.h" #include "lldb/API/SBFile.h" +#include "lldb/API/SBFrame.h" #include "lldb/API/SBStream.h" +#include "lldb/API/SBStructuredData.h" #include "lldb/API/SBTarget.h" #include "lldb/Core/Disassembler.h" #include "lldb/Core/EmulateInstruction.h" @@ -26,6 +27,7 @@ #include "lldb/Utility/ArchSpec.h" #include "lldb/Utility/DataBufferHeap.h" #include "lldb/Utility/DataExtractor.h" +#include "lldb/Utility/StructuredData.h" #include @@ -163,7 +165,8 @@ const char *SBInstruction::GetComment(SBTarget target) { return ConstString(inst_sp->GetComment(&exe_ctx)).GetCString(); } -lldb::InstructionControlFlowKind SBInstruction::GetControlFlowKind(lldb::SBTarget target) { +lldb::InstructionControlFlowKind +SBInstruction::GetControlFlowKind(lldb::SBTarget target) { LLDB_INSTRUMENT_VA(this, target); lldb::InstructionSP inst_sp(GetOpaque()); @@ -347,3 +350,67 @@ bool SBInstruction::TestEmulation(lldb::SBStream &output_stream, return inst_sp->TestEmulation(output_stream.ref(), test_file); return false; } + +lldb::SBStructuredData +SBInstruction::GetVariableAnnotations(lldb::SBTarget target) { + LLDB_INSTRUMENT_VA(this, target); + + SBStructuredData result; + + if (!m_opaque_sp || !m_opaque_sp->IsValid() || !target.IsValid()) { + return result; + } + + lldb::InstructionSP inst_sp = m_opaque_sp->GetSP(); + lldb::TargetSP target_sp = target.GetSP(); + + if (!inst_sp || !target_sp) { + return result; + } + + const Address &addr = inst_sp->GetAddress(); + ModuleSP module_sp = addr.GetModule(); + + if (!module_sp) { + return result; + } + + VariableAnnotator annotator; + std::vector annotations = + annotator.annotateStructured(*inst_sp, *target_sp, module_sp); + + auto array_sp = std::make_shared(); + + for (const auto &ann : annotations) { + auto dict_sp = std::make_shared(); + + dict_sp->AddStringItem("variable_name", ann.variable_name); + dict_sp->AddStringItem("location_description", ann.location_description); + dict_sp->AddBooleanItem("is_live", ann.is_live); + dict_sp->AddItem( + "start_address", + std::make_shared(ann.start_address)); + dict_sp->AddItem( + "end_address", + std::make_shared(ann.end_address)); + dict_sp->AddItem( + "register_kind", + std::make_shared(ann.register_kind)); + if (ann.decl_file.has_value()) { + dict_sp->AddStringItem("decl_file", *ann.decl_file); + } + if (ann.decl_line.has_value()) { + dict_sp->AddItem( + "decl_line", + std::make_shared(*ann.decl_line)); + } + if (ann.type_name.has_value()) { + dict_sp->AddStringItem("type_name", *ann.type_name); + } + + array_sp->AddItem(dict_sp); + } + + result.m_impl_up->SetObjectSP(array_sp); + return result; +} diff --git a/lldb/source/Core/Disassembler.cpp b/lldb/source/Core/Disassembler.cpp index f2ed1f7395346..9786823676275 100644 --- a/lldb/source/Core/Disassembler.cpp +++ b/lldb/source/Core/Disassembler.cpp @@ -302,14 +302,39 @@ bool Disassembler::ElideMixedSourceAndDisassemblyLine( std::vector VariableAnnotator::annotate(Instruction &inst, Target &target, const lldb::ModuleSP &module_sp) { + auto structured_annotations = annotateStructured(inst, target, module_sp); + std::vector events; + events.reserve(structured_annotations.size()); + + for (const auto &annotation : structured_annotations) { + std::string display_string; + display_string = + llvm::formatv( + "{0} = {1}", annotation.variable_name, + annotation.location_description == VariableAnnotator::kUndefLocation + ? llvm::formatv("<{0}>", VariableAnnotator::kUndefLocation) + .str() + : annotation.location_description) + .str(); + events.push_back(display_string); + } + + return events; +} + +std::vector +VariableAnnotator::annotateStructured(Instruction &inst, Target &target, + const lldb::ModuleSP &module_sp) { + std::vector annotations; - // If we lost module context, everything becomes . + // If we lost module context, mark all live variables as undefined. if (!module_sp) { - for (const auto &KV : Live_) - events.emplace_back(llvm::formatv("{0} = ", KV.second.name).str()); + for (const auto &KV : Live_) { + annotations.push_back(createAnnotation(KV.second, false, kUndefLocation)); + } Live_.clear(); - return events; + return annotations; } // Resolve function/block at this *file* address. @@ -319,10 +344,11 @@ VariableAnnotator::annotate(Instruction &inst, Target &target, if (!module_sp->ResolveSymbolContextForAddress(iaddr, mask, sc) || !sc.function) { // No function context: everything dies here. - for (const auto &KV : Live_) - events.emplace_back(llvm::formatv("{0} = ", KV.second.name).str()); + for (const auto &KV : Live_) { + annotations.push_back(createAnnotation(KV.second, false, kUndefLocation)); + } Live_.clear(); - return events; + return annotations; } // Collect in-scope variables for this instruction into Current. @@ -376,8 +402,35 @@ VariableAnnotator::annotate(Instruction &inst, Target &target, if (loc.empty()) continue; - Current.try_emplace(v->GetID(), - VarState{std::string(name), std::string(loc)}); + lldb::addr_t start_addr = inst.GetAddress().GetFileAddress(); + lldb::addr_t end_addr = LLDB_INVALID_ADDRESS; + if (entry.file_range.has_value()) { + start_addr = entry.file_range->GetBaseAddress().GetFileAddress(); + end_addr = start_addr + entry.file_range->GetByteSize(); + } + + std::optional decl_file; + std::optional decl_line; + std::optional type_name; + + const Declaration &decl = v->GetDeclaration(); + if (decl.GetFile()) { + decl_file = decl.GetFile().GetFilename().AsCString(); + if (decl.GetLine() > 0) { + decl_line = decl.GetLine(); + } + } + + if (Type *type = v->GetType()) { + if (const char *type_str = type->GetName().AsCString()) { + type_name = type_str; + } + } + + Current.try_emplace( + v->GetID(), VarState{std::string(name), std::string(loc), start_addr, + end_addr, entry.expr->GetRegisterKind(), decl_file, + decl_line, type_name}); } // Diff Live_ → Current. @@ -387,24 +440,41 @@ VariableAnnotator::annotate(Instruction &inst, Target &target, auto it = Live_.find(KV.first); if (it == Live_.end()) { // Newly live. - events.emplace_back( - llvm::formatv("{0} = {1}", KV.second.name, KV.second.last_loc).str()); + annotations.push_back(createAnnotation(KV.second, true)); } else if (it->second.last_loc != KV.second.last_loc) { // Location changed. - events.emplace_back( - llvm::formatv("{0} = {1}", KV.second.name, KV.second.last_loc).str()); + annotations.push_back(createAnnotation(KV.second, true)); } } - // 2) Ends: anything that was live but is not in Current becomes . + // 2) Ends: anything that was live but is not in Current becomes + // . for (const auto &KV : Live_) { - if (!Current.count(KV.first)) - events.emplace_back(llvm::formatv("{0} = ", KV.second.name).str()); + if (!Current.count(KV.first)) { + annotations.push_back(createAnnotation(KV.second, false, kUndefLocation)); + } } // Commit new state. Live_ = std::move(Current); - return events; + return annotations; +} + +VariableAnnotation VariableAnnotator::createAnnotation( + const VarState &var_state, bool is_live, + const std::optional &location_desc) { + VariableAnnotation annotation; + annotation.variable_name = var_state.name; + annotation.location_description = + location_desc.has_value() ? *location_desc : var_state.last_loc; + annotation.start_address = var_state.start_address; + annotation.end_address = var_state.end_address; + annotation.is_live = is_live; + annotation.register_kind = var_state.register_kind; + annotation.decl_file = var_state.decl_file; + annotation.decl_line = var_state.decl_line; + annotation.type_name = var_state.type_name; + return annotation; } void Disassembler::PrintInstructions(Debugger &debugger, const ArchSpec &arch, diff --git a/lldb/test/API/functionalities/disassembler-variables/TestVariableAnnotationsDisassembler.py b/lldb/test/API/functionalities/disassembler-variables/TestVariableAnnotationsDisassembler.py index f107efbddddeb..eccae3a3ac36c 100644 --- a/lldb/test/API/functionalities/disassembler-variables/TestVariableAnnotationsDisassembler.py +++ b/lldb/test/API/functionalities/disassembler-variables/TestVariableAnnotationsDisassembler.py @@ -116,3 +116,174 @@ def test_seed_reg_const_undef(self): print(out) self.assertRegex(out, r"\b(i|argc)\s*=\s*(DW_OP_reg\d+\b|R[A-Z0-9]+)") self.assertNotIn("", out) + + @no_debug_info_test + @skipIf(archs=no_match(["x86_64"])) + def test_structured_annotations_api(self): + """Test GetVariableAnnotations() API returns structured data""" + obj = self._build_obj("d_original_example.o") + target = self._create_target(obj) + + main_symbols = target.FindSymbols("main") + self.assertTrue(main_symbols.IsValid() and main_symbols.GetSize() > 0, + "Could not find 'main' symbol") + + main_symbol = main_symbols.GetContextAtIndex(0).GetSymbol() + start_addr = main_symbol.GetStartAddress() + self.assertTrue(start_addr.IsValid(), "Invalid start address for main") + + instructions = target.ReadInstructions(start_addr, 16) + self.assertGreater(instructions.GetSize(), 0, "No instructions read") + + print(f"\nTesting GetVariableAnnotations() API on {instructions.GetSize()} instructions") + + # Track what we find. + found_annotations = False + found_variables = set() + + # Track variable locations to detect changes (for selective printing) + prev_locations = {} + + # Test each instruction + for i in range(instructions.GetSize()): + inst = instructions.GetInstructionAtIndex(i) + self.assertTrue(inst.IsValid(), f"Invalid instruction at index {i}") + + annotations = inst.GetVariableAnnotations(target) + + self.assertIsInstance(annotations, lldb.SBStructuredData, + "GetVariableAnnotations should return SBStructuredData") + + if annotations.GetSize() > 0: + found_annotations = True + + # Track current locations and detect changes + current_locations = {} + should_print = False + + # Validate each annotation + for j in range(annotations.GetSize()): + ann = annotations.GetItemAtIndex(j) + self.assertTrue(ann.IsValid(), + f"Invalid annotation at index {j}") + + self.assertEqual(ann.GetType(), lldb.eStructuredDataTypeDictionary, + "Each annotation should be a dictionary") + + var_name_obj = ann.GetValueForKey("variable_name") + self.assertTrue(var_name_obj.IsValid(), + "Missing 'variable_name' field") + + location_obj = ann.GetValueForKey("location_description") + self.assertTrue(location_obj.IsValid(), + "Missing 'location_description' field") + + is_live_obj = ann.GetValueForKey("is_live") + self.assertTrue(is_live_obj.IsValid(), + "Missing 'is_live' field") + + start_addr_obj = ann.GetValueForKey("start_address") + self.assertTrue(start_addr_obj.IsValid(), + "Missing 'start_address' field") + + end_addr_obj = ann.GetValueForKey("end_address") + self.assertTrue(end_addr_obj.IsValid(), + "Missing 'end_address' field") + + register_kind_obj = ann.GetValueForKey("register_kind") + self.assertTrue(register_kind_obj.IsValid(), + "Missing 'register_kind' field") + + # Extract and validate values + var_name = var_name_obj.GetStringValue(1024) + location = location_obj.GetStringValue(1024) + is_live = is_live_obj.GetBooleanValue() + start_addr = start_addr_obj.GetUnsignedIntegerValue() + end_addr = end_addr_obj.GetUnsignedIntegerValue() + register_kind = register_kind_obj.GetUnsignedIntegerValue() + + # Validate types and values + self.assertIsInstance(var_name, str, "variable_name should be string") + self.assertGreater(len(var_name), 0, "variable_name should not be empty") + + self.assertIsInstance(location, str, "location_description should be string") + self.assertGreater(len(location), 0, "location_description should not be empty") + + self.assertIsInstance(is_live, bool, "is_live should be boolean") + + self.assertIsInstance(start_addr, int, "start_address should be integer") + self.assertIsInstance(end_addr, int, "end_address should be integer") + self.assertGreater(end_addr, start_addr, + "end_address should be greater than start_address") + + self.assertIsInstance(register_kind, int, "register_kind should be integer") + + # Check for expected variables in this function + self.assertIn(var_name, ["argc", "argv", "i"], + f"Unexpected variable name: {var_name}") + + found_variables.add(var_name) + + # Track current location + current_locations[var_name] = location + + # Detect if this is a new variable or location changed + if var_name not in prev_locations or prev_locations[var_name] != location: + should_print = True + + # Check optional fields (may or may not be present) + decl_file_obj = ann.GetValueForKey("decl_file") + if decl_file_obj.IsValid(): + decl_file = decl_file_obj.GetStringValue(1024) + self.assertIsInstance(decl_file, str) + self.assertIn("d_original_example.c", decl_file, + f"Expected source file d_original_example.c in {decl_file}") + + decl_line_obj = ann.GetValueForKey("decl_line") + if decl_line_obj.IsValid(): + decl_line = decl_line_obj.GetUnsignedIntegerValue() + self.assertIsInstance(decl_line, int) + + # Validate declaration line matches the source code (according to d_original_example.c) + if var_name == "argc": + self.assertEqual(decl_line, 3, "argc should be declared on line 3") + elif var_name == "argv": + self.assertEqual(decl_line, 3, "argv should be declared on line 3") + elif var_name == "i": + self.assertEqual(decl_line, 4, "i should be declared on line 4") + + type_name_obj = ann.GetValueForKey("type_name") + if type_name_obj.IsValid(): + type_name = type_name_obj.GetStringValue(1024) + self.assertIsInstance(type_name, str) + + # Validate declaration line matches the source code (according to d_original_example.c) + if var_name == "argc": + self.assertEqual(type_name, "int", "argc should be type 'int'") + elif var_name == "argv": + self.assertEqual(type_name, "char **", "argv should be type 'char **'") + elif var_name == "i": + self.assertEqual(type_name, "int", "i should be type 'int'") + + # Only print if something happened (location changed or variable appeared/disappeared) + if should_print or len(current_locations) != len(prev_locations): + print(f"\nInstruction {i} at {inst.GetAddress()}: {annotations.GetSize()} annotations") + for var_name, location in current_locations.items(): + change_marker = " <- CHANGED" if var_name in prev_locations and prev_locations[var_name] != location else "" + new_marker = " <- NEW" if var_name not in prev_locations else "" + print(f" {var_name} = {location}{change_marker}{new_marker}") + # Check for disappeared variables + for var_name in prev_locations: + if var_name not in current_locations: + print(f" {var_name} <- GONE") + + # Update tracking + prev_locations = current_locations.copy() + + self.assertTrue(found_annotations, + "Should find at least one instruction with variable annotations") + + self.assertGreater(len(found_variables), 0, + "Should find at least one variable") + + print(f"\nTest complete. Found variables: {found_variables}")