From aa79be4d2e7fe01a3227c2f17ba0d4df76e2ef02 Mon Sep 17 00:00:00 2001 From: swananan Date: Mon, 4 May 2026 18:12:27 +0800 Subject: [PATCH] refactor: materialize dwarf read plans Introduce an explicit DWARF materialization contract so compiler print and expression lowering consume planned direct values, user reads, unavailable states, and address origins instead of rediscovering those semantics from raw locations. Move link-time address classification into ghostscope-dwarf, preserve pre-dereference arithmetic when rebasing static bases, and reject split piece locations instead of silently reading the first piece. Refs #148. --- ghostscope-compiler/src/ebpf/codegen.rs | 383 +++++++----------- ghostscope-compiler/src/ebpf/dwarf_bridge.rs | 345 +++++++++------- ghostscope-compiler/src/ebpf/expression.rs | 33 +- ghostscope-dwarf/src/analyzer/mod.rs | 1 + ghostscope-dwarf/src/analyzer/plan_pc.rs | 22 +- ghostscope-dwarf/src/lib.rs | 13 +- .../src/semantics/variable_plan.rs | 362 ++++++++++++++++- 7 files changed, 736 insertions(+), 423 deletions(-) diff --git a/ghostscope-compiler/src/ebpf/codegen.rs b/ghostscope-compiler/src/ebpf/codegen.rs index 59933111..a4722d13 100644 --- a/ghostscope-compiler/src/ebpf/codegen.rs +++ b/ghostscope-compiler/src/ebpf/codegen.rs @@ -29,7 +29,7 @@ struct PrintVarRuntimeMeta { #[derive(Debug, Clone)] enum ComplexArgSource<'ctx> { RuntimeRead { - location: ghostscope_dwarf::VariableLocation, + address: ghostscope_dwarf::PlannedAddress, dwarf_type: ghostscope_dwarf::TypeInfo, module_for_offsets: Option, }, @@ -48,7 +48,7 @@ enum ComplexArgSource<'ctx> { bytes: Vec, }, AddressValue { - location: ghostscope_dwarf::VariableLocation, + address: ghostscope_dwarf::PlannedAddress, module_for_offsets: Option, }, // Newly added: a value computed in LLVM at runtime (e.g., expression result) @@ -172,6 +172,99 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } } + fn complex_arg_from_dwarf_read_plan( + &mut self, + plan: ghostscope_dwarf::VariableReadPlan, + display_name: Option, + ) -> Result> { + let pc_address = self.get_compile_time_context()?.pc_address; + let materialized = self.variable_read_plan_to_materialization(plan, pc_address)?; + let display_name = display_name.unwrap_or_else(|| materialized.name.clone()); + + match &materialized.materialization { + ghostscope_dwarf::VariableMaterialization::Unavailable { + availability: ghostscope_dwarf::Availability::OptimizedOut, + } => { + let optimized_type = ghostscope_dwarf::TypeInfo::OptimizedOut { + name: materialized.name.clone(), + }; + Ok(ComplexArg { + var_name_index: self.trace_context.add_variable_name(display_name), + type_index: self.trace_context.add_type(optimized_type), + access_path: Vec::new(), + data_len: 0, + source: ComplexArgSource::ImmediateBytes { bytes: Vec::new() }, + }) + } + ghostscope_dwarf::VariableMaterialization::Unavailable { availability } => { + Err(Self::dwarf_expression_unavailable_error( + &materialized.name, + availability, + pc_address, + )) + } + ghostscope_dwarf::VariableMaterialization::UserMemoryRead { address } => { + let dwarf_type = materialized.dwarf_type.clone().ok_or_else(|| { + CodeGenError::DwarfError( + "Expression has no DWARF type information".to_string(), + ) + })?; + let data_len = Self::compute_read_size_for_type(&dwarf_type); + if data_len == 0 { + return Err(CodeGenError::TypeSizeNotAvailable(display_name)); + } + let module_hint = self.take_module_hint(); + Ok(ComplexArg { + var_name_index: self.trace_context.add_variable_name(display_name), + type_index: self.trace_context.add_type(dwarf_type.clone()), + access_path: Vec::new(), + data_len, + source: ComplexArgSource::RuntimeRead { + address: address.clone(), + dwarf_type, + module_for_offsets: module_hint, + }, + }) + } + ghostscope_dwarf::VariableMaterialization::DirectValue { .. } => { + let value = + self.variable_materialization_to_llvm_value(&materialized, pc_address, None)?; + let dwarf_type = materialized.dwarf_type.clone().ok_or_else(|| { + CodeGenError::DwarfError( + "Expression has no DWARF type information".to_string(), + ) + })?; + let value = match value { + BasicValueEnum::IntValue(value) => value, + BasicValueEnum::PointerValue(value) => self + .builder + .build_ptr_to_int(value, self.context.i64_type(), "direct_ptr_to_i64") + .map_err(|e| CodeGenError::Builder(e.to_string()))?, + _ => { + return Err(CodeGenError::DwarfError(format!( + "direct DWARF value '{}' did not lower to an integer", + materialized.name + ))) + } + }; + let data_len = Self::compute_read_size_for_type(&dwarf_type).clamp(1, 8); + Ok(ComplexArg { + var_name_index: self.trace_context.add_variable_name(display_name), + type_index: self.trace_context.add_type(dwarf_type), + access_path: Vec::new(), + data_len, + source: ComplexArgSource::ComputedInt { value, byte_len: data_len }, + }) + } + ghostscope_dwarf::VariableMaterialization::Composite { .. } => Err( + CodeGenError::DwarfError(format!( + "DWARF variable '{}' is split across pieces; piece reconstruction is not implemented", + materialized.name + )), + ), + } + } + /// Unified expression resolver: returns a ComplexArg carrying /// a consistent var_name_index/type_index/access_path/data_len/source /// with strict priority: script variables -> DWARF (locals/params/globals). @@ -324,13 +417,33 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { let var = self .query_dwarf_for_complex_expr(inner)? .ok_or_else(|| CodeGenError::VariableNotFound(format!("{inner:?}")))?; - let inner_ty = var.dwarf_type.as_ref().ok_or_else(|| { + let pc_address = self.get_compile_time_context()?.pc_address; + let materialized = self.variable_read_plan_to_materialization(var, pc_address)?; + let inner_ty = materialized.dwarf_type.as_ref().ok_or_else(|| { CodeGenError::DwarfError("Expression has no DWARF type information".to_string()) })?; let ptr_ty = ghostscope_dwarf::TypeInfo::PointerType { target_type: Box::new(inner_ty.clone()), size: 8, }; + let address = match materialized.materialization { + ghostscope_dwarf::VariableMaterialization::UserMemoryRead { address } => { + address + } + ghostscope_dwarf::VariableMaterialization::Unavailable { availability } => { + return Err(Self::dwarf_expression_unavailable_error( + &materialized.name, + &availability, + pc_address, + )) + } + _ => { + return Err(CodeGenError::DwarfError(format!( + "cannot take address of value-backed DWARF expression '{}'", + materialized.name + ))) + } + }; let module_hint = self.take_module_hint(); Ok(ComplexArg { var_name_index: self @@ -340,7 +453,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { access_path: Vec::new(), data_len: 8, source: ComplexArgSource::AddressValue { - location: var.location.clone(), + address, module_for_offsets: module_hint, }, }) @@ -351,208 +464,21 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { | E::ArrayAccess(_, _) | E::PointerDeref(_) | E::ChainAccess(_)) => { - if let Some(plan) = self.query_dwarf_for_complex_expr_plan(expr)? { - let pc_address = self.get_compile_time_context()?.pc_address; - let (var_name, dwarf_type, location) = - self.variable_read_plan_to_runtime_read_parts(plan, pc_address)?; - let display_name = if matches!(expr, E::PointerDeref(_)) { - self.expr_to_name(expr) - } else { - var_name - }; - if matches!(location, ghostscope_dwarf::VariableLocation::OptimizedOut) { - return Ok(ComplexArg { - var_name_index: self.trace_context.add_variable_name(display_name), - type_index: self.trace_context.add_type(dwarf_type), - access_path: Vec::new(), - data_len: 0, - source: ComplexArgSource::ImmediateBytes { bytes: Vec::new() }, - }); - } - let data_len = Self::compute_read_size_for_type(&dwarf_type); - if data_len == 0 { - return Err(CodeGenError::TypeSizeNotAvailable(display_name)); - } - let module_hint = self.take_module_hint(); - return Ok(ComplexArg { - var_name_index: self.trace_context.add_variable_name(display_name), - type_index: self.trace_context.add_type(dwarf_type.clone()), - access_path: Vec::new(), - data_len, - source: ComplexArgSource::RuntimeRead { - location, - dwarf_type, - module_for_offsets: module_hint, - }, - }); - } - - let var = self - .query_dwarf_for_complex_expr(expr)? + let plan = self + .query_dwarf_for_complex_expr_plan(expr)? .ok_or_else(|| CodeGenError::VariableNotFound(format!("{expr:?}")))?; - if var.availability == ghostscope_dwarf::Availability::OptimizedOut { - let ti = ghostscope_protocol::type_info::TypeInfo::OptimizedOut { - name: var.name.clone(), - }; - return Ok(ComplexArg { - var_name_index: self.trace_context.add_variable_name(var.name.clone()), - type_index: self.trace_context.add_type(ti), - access_path: Vec::new(), - data_len: 0, - source: ComplexArgSource::ImmediateBytes { bytes: Vec::new() }, - }); - } - let dwarf_type = var.dwarf_type.as_ref().ok_or_else(|| { - CodeGenError::DwarfError("Expression has no DWARF type information".to_string()) - })?; - let data_len = Self::compute_read_size_for_type(dwarf_type); - if data_len == 0 { - return Err(CodeGenError::TypeSizeNotAvailable(var.name)); - } - // Previously clamped to 1993 bytes; now use full DWARF size (transport clamps per event size) - // data_len unchanged - let module_hint = self.take_module_hint(); - Ok(ComplexArg { - var_name_index: self.trace_context.add_variable_name(var.name.clone()), - type_index: self.trace_context.add_type(dwarf_type.clone()), - access_path: Vec::new(), - data_len, - source: ComplexArgSource::RuntimeRead { - location: var.location.clone(), - dwarf_type: dwarf_type.clone(), - module_for_offsets: module_hint, - }, - }) + let display_name = if matches!(expr, E::PointerDeref(_)) { + Some(self.expr_to_name(expr)) + } else { + None + }; + self.complex_arg_from_dwarf_read_plan(plan, display_name) } // 6) Variable not in script scope → DWARF variable or computed fast-path for simple scalars E::Variable(name) => { if let Some(v) = self.query_dwarf_for_variable(name)? { - if let Some(ref t) = v.dwarf_type { - // If DWARF reports optimized-out at this PC, emit OptimizedOut type with no data - if v.availability == ghostscope_dwarf::Availability::OptimizedOut { - let ti = ghostscope_protocol::type_info::TypeInfo::OptimizedOut { - name: v.name.clone(), - }; - return Ok(ComplexArg { - var_name_index: self - .trace_context - .add_variable_name(v.name.clone()), - type_index: self.trace_context.add_type(ti), - access_path: Vec::new(), - data_len: 0, - source: ComplexArgSource::ImmediateBytes { bytes: Vec::new() }, - }); - } - let is_link_addr = - matches!(v.location, ghostscope_dwarf::VariableLocation::Address(_)); - if Self::is_simple_typeinfo(t) && !is_link_addr { - // Prefer computed value to avoid runtime reads - let compiled = self.compile_expr(expr)?; - match compiled { - BasicValueEnum::IntValue(iv) => { - // Respect DWARF pointer types to keep pointer formatting - let (kind, byte_len) = if matches!( - t, - ghostscope_dwarf::TypeInfo::PointerType { .. } - ) { - (TypeKind::Pointer, 8) - } else { - let bitw = iv.get_type().get_bit_width(); - if bitw == 1 { - (TypeKind::Bool, 1) - } else if bitw <= 8 { - (TypeKind::I8, 1) - } else if bitw <= 16 { - (TypeKind::I16, 2) - } else if bitw <= 32 { - (TypeKind::I32, 4) - } else { - (TypeKind::I64, 8) - } - }; - Ok(ComplexArg { - var_name_index: self - .trace_context - .add_variable_name(self.expr_to_name(expr)), - type_index: self.add_synthesized_type_index_for_kind(kind), - access_path: Vec::new(), - data_len: byte_len, - source: ComplexArgSource::ComputedInt { - value: iv, - byte_len, - }, - }) - } - BasicValueEnum::PointerValue(pv) => { - // Pointer register-backed → cast to i64 with pointer typeindex - let iv = self - .builder - .build_ptr_to_int(pv, self.context.i64_type(), "ptr_to_i64") - .map_err(|e| CodeGenError::Builder(e.to_string()))?; - Ok(ComplexArg { - var_name_index: self - .trace_context - .add_variable_name(self.expr_to_name(expr)), - type_index: self - .add_synthesized_type_index_for_kind(TypeKind::Pointer), - access_path: Vec::new(), - data_len: 8, - source: ComplexArgSource::ComputedInt { - value: iv, - byte_len: 8, - }, - }) - } - _ => { - // Fall back to runtime read path - let data_len = Self::compute_read_size_for_type(t); - if data_len == 0 { - return Err(CodeGenError::TypeSizeNotAvailable(v.name)); - } - let module_hint = self.take_module_hint(); - Ok(ComplexArg { - var_name_index: self - .trace_context - .add_variable_name(v.name.clone()), - type_index: self.trace_context.add_type(t.clone()), - access_path: Vec::new(), - data_len, - source: ComplexArgSource::RuntimeRead { - location: v.location.clone(), - dwarf_type: t.clone(), - module_for_offsets: module_hint, - }, - }) - } - } - } else { - // Complex types or link-time addresses: use RuntimeRead - // (globals/statics need memory read; not an address print unless AddressOf) - let data_len = Self::compute_read_size_for_type(t); - if data_len == 0 { - return Err(CodeGenError::TypeSizeNotAvailable(v.name)); - } - let module_hint = self.take_module_hint(); - Ok(ComplexArg { - var_name_index: self - .trace_context - .add_variable_name(v.name.clone()), - type_index: self.trace_context.add_type(t.clone()), - access_path: Vec::new(), - data_len, - source: ComplexArgSource::RuntimeRead { - location: v.location.clone(), - dwarf_type: t.clone(), - module_for_offsets: module_hint, - }, - }) - } - } else { - Err(CodeGenError::DwarfError( - "Variable has no DWARF type information".to_string(), - )) - } + self.complex_arg_from_dwarf_read_plan(v, None) } else { Err(CodeGenError::VariableNotInScope(name.clone())) } @@ -611,6 +537,13 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { let index = sign * int_side; let (location, elem_ty) = self.compute_pointed_location_with_index(ptr_side, index)?; + let address = ghostscope_dwarf::PlannedAddress::from_location(location) + .ok_or_else(|| { + CodeGenError::DwarfError( + "pointer arithmetic did not produce an address-backed plan" + .to_string(), + ) + })?; let data_len = Self::compute_read_size_for_type(&elem_ty); let module_hint = self.take_module_hint(); if data_len == 0 { @@ -627,7 +560,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { access_path: Vec::new(), data_len: 8, source: ComplexArgSource::AddressValue { - location, + address, module_for_offsets: module_hint, }, }); @@ -640,7 +573,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { access_path: Vec::new(), data_len, source: ComplexArgSource::RuntimeRead { - location, + address, dwarf_type: elem_ty, module_for_offsets: module_hint, }, @@ -732,7 +665,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { Ok(1) } ComplexArgSource::RuntimeRead { - location, + address, ref dwarf_type, module_for_offsets, } => { @@ -744,7 +677,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { }; self.generate_print_complex_variable_runtime( meta, - &location, + &address, dwarf_type, module_for_offsets.as_deref(), )?; @@ -1104,28 +1037,6 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { // Already accumulated; EndInstruction will send the whole event Ok(()) } - /// Determine if a TypeInfo qualifies as a "simple variable" for PrintVariableIndex - /// Simple: base types (bool/int/float/char), enums (with base type 1/2/4/8), pointers; - /// Complex: arrays, structs, unions, functions - fn is_simple_typeinfo(t: &ghostscope_dwarf::TypeInfo) -> bool { - use ghostscope_dwarf::TypeInfo as TI; - match t { - TI::BaseType { size, .. } => matches!(*size, 1 | 2 | 4 | 8), - TI::EnumType { base_type, .. } => { - let sz = base_type.size(); - matches!(sz, 1 | 2 | 4 | 8) - } - TI::PointerType { .. } => true, - TI::TypedefType { - underlying_type, .. - } - | TI::QualifiedType { - underlying_type, .. - } => Self::is_simple_typeinfo(underlying_type), - _ => false, - } - } - fn is_char_byte_typeinfo(t: &ghostscope_dwarf::TypeInfo) -> bool { use ghostscope_dwarf::TypeInfo as TI; match t { @@ -3302,7 +3213,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } } ComplexArgSource::RuntimeRead { - location, + address, dwarf_type, module_for_offsets, } => { @@ -3315,9 +3226,8 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { .build_bit_cast(var_data_ptr, ptr_type, "dst_ptr") .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; let size_val = i32_type.const_int(a.data_len as u64, false); - // Compute source address; if link-time address, apply ASLR offsets via map - let src_addr = self.variable_location_to_address_with_hint( - location, + let src_addr = self.planned_address_to_llvm_address( + address, Some(apl_ptr), module_for_offsets.as_deref(), )?; @@ -3479,12 +3389,11 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { self.builder.position_at_end(cont2_block); } ComplexArgSource::AddressValue { - location, + address, module_for_offsets, } => { - // Compute address (apply ASLR if link-time address) and store as 8 bytes - let addr = self.variable_location_to_address_with_hint( - location, + let addr = self.planned_address_to_llvm_address( + address, Some(apl_ptr), module_for_offsets.as_deref(), )?; @@ -4389,7 +4298,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { fn generate_print_complex_variable_runtime( &mut self, meta: PrintVarRuntimeMeta, - location: &ghostscope_dwarf::VariableLocation, + address: &ghostscope_dwarf::PlannedAddress, dwarf_type: &ghostscope_dwarf::TypeInfo, module_hint: Option<&str>, ) -> Result<()> { @@ -4399,7 +4308,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { access_path = %meta.access_path, type_size = dwarf_type.size(), data_len_limit = meta.data_len_limit, - location = ?location, + address = ?address, "generate_print_complex_variable_runtime: begin" ); // Compute sizes first, then reserve instruction region directly in accumulation buffer @@ -4716,7 +4625,7 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { // Compute source address with ASLR-aware helper, honoring module hint // Prefer a previously recorded module path for offsets; fall back handled in helper let src_addr = - self.variable_location_to_address_with_hint(location, Some(status_ptr), module_hint)?; + self.planned_address_to_llvm_address(address, Some(status_ptr), module_hint)?; tracing::trace!(src_addr = %{src_addr}, "generate_print_complex_variable_runtime: computed src_addr"); // Setup common types and casts diff --git a/ghostscope-compiler/src/ebpf/dwarf_bridge.rs b/ghostscope-compiler/src/ebpf/dwarf_bridge.rs index dac7623c..d9e950fd 100644 --- a/ghostscope-compiler/src/ebpf/dwarf_bridge.rs +++ b/ghostscope-compiler/src/ebpf/dwarf_bridge.rs @@ -5,8 +5,9 @@ use super::context::{CodeGenError, EbpfContext, Result}; use ghostscope_dwarf::{ - AddressExpr, Availability, ComputeStep, EntryValueCase, MemoryAccessSize, SectionType, - TypeInfo, VariableAccessPath, VariableAccessSegment, VariableLocation, VariableReadPlan, + AddressExpr, AddressOrigin, Availability, ComputeStep, EntryValueCase, MemoryAccessSize, + PlannedAddress, SectionType, TypeInfo, VariableAccessPath, VariableAccessSegment, + VariableLocation, VariableMaterializationPlan, VariableReadPlan, }; use ghostscope_process::module_probe; use inkwell::values::{BasicValueEnum, IntValue, PointerValue}; @@ -135,17 +136,9 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { var_name, pieces.len() ); - if let Some(first_piece) = pieces.first() { - self.variable_location_to_llvm_value( - &first_piece.location, - dwarf_type, - var_name, - pc_address, - status_ptr, - ) - } else { - Ok(self.context.i64_type().const_zero().into()) - } + Err(CodeGenError::DwarfError(format!( + "DWARF variable '{var_name}' is split across pieces; piece reconstruction is not implemented" + ))) } VariableLocation::FrameBaseRelative { .. } => Err(CodeGenError::DwarfError( "Frame-base-relative variable plan requires resolved frame base".to_string(), @@ -163,51 +156,99 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { status_ptr: Option>, module_hint: Option<&str>, ) -> Result> { - // Policy note: - // - Link-time addresses (DW_OP_addr or constant-foldable address expressions) are - // always rebased using per-module section offsets (ASLR) to get a runtime address. - // - Runtime-derived addresses (register/stack-relative or computed via dereference) - // are used as-is and are NOT rebased. - // The caller signals which path we are on by providing the semantic location shape. + let Some(address) = PlannedAddress::from_location(location.clone()) else { + return match location { + VariableLocation::OptimizedOut => { + let pc_address = self + .current_compile_time_context + .as_ref() + .map(|ctx| ctx.pc_address) + .unwrap_or(0); + Err(Self::dwarf_expression_unavailable_error( + "DWARF address expression", + &Availability::OptimizedOut, + pc_address, + )) + } + _ => Err(CodeGenError::NotImplemented( + "Unable to compute address from variable location".to_string(), + )), + }; + }; + + self.planned_address_to_llvm_address(&address, status_ptr, module_hint) + } + + pub fn planned_address_to_llvm_address( + &mut self, + address: &PlannedAddress, + status_ptr: Option>, + module_hint: Option<&str>, + ) -> Result> { let pt_regs_ptr = self.get_pt_regs_parameter()?; self.store_offsets_found_const(true)?; - match location { - VariableLocation::OptimizedOut => { - let pc_address = self - .current_compile_time_context - .as_ref() - .map(|ctx| ctx.pc_address) - .unwrap_or(0); - Err(Self::dwarf_expression_unavailable_error( - "DWARF address expression", - &Availability::OptimizedOut, - pc_address, - )) + match address.origin { + AddressOrigin::LinkTime => { + let link_addr = address.constant_link_time_address().ok_or_else(|| { + CodeGenError::DwarfError( + "read plan marked address as link-time without a constant address" + .to_string(), + ) + })?; + self.runtime_address_from_link_time_address(link_addr, status_ptr, module_hint) } - VariableLocation::Address(expr) => self.address_steps_to_address_with_hint( - &expr.steps, - pt_regs_ptr, - status_ptr, - module_hint, - ), - VariableLocation::AbsoluteAddressValue(expr) => self - .address_steps_to_address_with_hint( - &expr.steps, - pt_regs_ptr, + AddressOrigin::LinkTimeBase => { + let (link_addr, tail_steps) = + address.link_time_base_and_runtime_tail().ok_or_else(|| { + CodeGenError::DwarfError( + "read plan marked address as link-time-base without a base address" + .to_string(), + ) + })?; + let runtime_base = self.runtime_address_from_link_time_address( + link_addr, status_ptr, module_hint, - ), + )?; + let value = self.generate_compute_steps( + tail_steps, + pt_regs_ptr, + None, + status_ptr, + Some(runtime_base), + )?; + match value { + BasicValueEnum::IntValue(value) => Ok(value), + _ => Err(CodeGenError::LLVMError( + "Computed address did not produce integer".to_string(), + )), + } + } + AddressOrigin::RuntimeDerived | AddressOrigin::Unknown => { + self.planned_address_without_rebase(address, pt_regs_ptr, status_ptr) + } + } + } + + fn planned_address_without_rebase( + &mut self, + address: &PlannedAddress, + pt_regs_ptr: PointerValue<'ctx>, + status_ptr: Option>, + ) -> Result> { + match &address.location { + VariableLocation::Address(expr) | VariableLocation::AbsoluteAddressValue(expr) => { + self.address_steps_to_unrebased_address(&expr.steps, pt_regs_ptr, status_ptr) + } VariableLocation::RegisterAddress { dwarf_reg, offset } => { let reg_val = self.load_register_value(*dwarf_reg, pt_regs_ptr)?; if let BasicValueEnum::IntValue(reg_i) = reg_val { if *offset != 0 { let ofs_val = self.context.i64_type().const_int(*offset as u64, true); - let sum = self - .builder + self.builder .build_int_add(reg_i, ofs_val, "addr_with_offset") - .map_err(|e| CodeGenError::LLVMError(e.to_string()))?; - Ok(sum) + .map_err(|e| CodeGenError::LLVMError(e.to_string())) } else { Ok(reg_i) } @@ -218,96 +259,29 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } } VariableLocation::ComputedAddress(steps) => { - self.address_steps_to_address_with_hint(steps, pt_regs_ptr, status_ptr, module_hint) + self.address_steps_to_unrebased_address(steps, pt_regs_ptr, status_ptr) } _ => Err(CodeGenError::NotImplemented( - "Unable to compute address from variable location".to_string(), + "Unable to compute address from planned address".to_string(), )), } } - fn address_steps_to_address_with_hint( + fn address_steps_to_unrebased_address( &mut self, steps: &[ComputeStep], pt_regs_ptr: PointerValue<'ctx>, status_ptr: Option>, - module_hint: Option<&str>, ) -> Result> { - if let Some(link_addr) = Self::fold_constant_address_steps(steps) { - return self.runtime_address_from_link_time_address(link_addr, status_ptr, module_hint); - } - - // If a static base is dereferenced before any register dependency, rebase - // the base first and then execute the remaining runtime expression. - if let Some(ComputeStep::PushConstant(base_const)) = steps.first() { - let mut saw_reg = false; - let mut saw_deref = false; - for step in &steps[1..] { - match step { - ComputeStep::LoadRegister(_) => { - saw_reg = true; - break; - } - ComputeStep::Dereference { .. } => { - saw_deref = true; - break; - } - _ => {} - } - } - - if saw_deref && !saw_reg { - let rt = self.runtime_address_from_link_time_address( - *base_const as u64, - status_ptr, - module_hint, - )?; - let val = self.generate_compute_steps( - &steps[1..], - pt_regs_ptr, - None, - status_ptr, - Some(rt), - )?; - return match val { - BasicValueEnum::IntValue(value) => Ok(value), - _ => Err(CodeGenError::LLVMError( - "Computed location did not produce integer".to_string(), - )), - }; - } - } - let val = self.generate_compute_steps(steps, pt_regs_ptr, None, status_ptr, None)?; match val { BasicValueEnum::IntValue(value) => Ok(value), _ => Err(CodeGenError::LLVMError( - "Computed location did not produce integer".to_string(), + "Computed address did not produce integer".to_string(), )), } } - fn fold_constant_address_steps(steps: &[ComputeStep]) -> Option { - let mut const_stack: Vec = Vec::new(); - for step in steps { - match step { - ComputeStep::PushConstant(value) => const_stack.push(*value), - ComputeStep::Add => { - let b = const_stack.pop()?; - let a = const_stack.pop()?; - const_stack.push(a.saturating_add(b)); - } - _ => return None, - } - } - - if const_stack.len() == 1 && const_stack[0] >= 0 { - Some(const_stack[0] as u64) - } else { - None - } - } - fn runtime_address_from_link_time_address( &mut self, link_addr: u64, @@ -447,33 +421,74 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { Ok(extended.into()) } - pub(super) fn variable_read_plan_to_runtime_read_parts( + pub(super) fn variable_read_plan_to_materialization( &self, plan: VariableReadPlan, pc_address: u64, - ) -> Result<(String, TypeInfo, VariableLocation)> { - let lowering = plan.bpf_lowering_plan(&self.compile_options.runtime_capabilities); - if !lowering.availability.is_available() - && lowering.availability != Availability::OptimizedOut + ) -> Result { + let materialization = plan.materialization_plan(&self.compile_options.runtime_capabilities); + if !materialization.availability.is_available() + && materialization.availability != Availability::OptimizedOut { return Err(Self::dwarf_expression_unavailable_error( - &plan.name, - &lowering.availability, + &materialization.name, + &materialization.availability, pc_address, )); } - let dwarf_type = if lowering.availability == Availability::OptimizedOut { - TypeInfo::OptimizedOut { - name: plan.name.clone(), - } - } else { - plan.dwarf_type.clone().ok_or_else(|| { + if materialization.availability != Availability::OptimizedOut { + materialization.dwarf_type.as_ref().ok_or_else(|| { CodeGenError::DwarfError("Expression has no DWARF type information".to_string()) - })? - }; + })?; + } + + Ok(materialization) + } - Ok((plan.name, dwarf_type, plan.location)) + pub fn variable_materialization_to_llvm_value( + &mut self, + materialization: &VariableMaterializationPlan, + pc_address: u64, + status_ptr: Option>, + ) -> Result> { + match &materialization.materialization { + ghostscope_dwarf::VariableMaterialization::DirectValue { location, .. } => { + let dwarf_type = materialization.dwarf_type.as_ref().ok_or_else(|| { + CodeGenError::DwarfError( + "Expression has no DWARF type information".to_string(), + ) + })?; + self.variable_location_to_llvm_value( + location, + dwarf_type, + &materialization.name, + pc_address, + status_ptr, + ) + } + ghostscope_dwarf::VariableMaterialization::UserMemoryRead { address } => { + let dwarf_type = materialization.dwarf_type.as_ref().ok_or_else(|| { + CodeGenError::DwarfError( + "Expression has no DWARF type information".to_string(), + ) + })?; + self.generate_memory_location_from_planned_address(address, dwarf_type, status_ptr) + } + ghostscope_dwarf::VariableMaterialization::Unavailable { availability } => { + Err(Self::dwarf_expression_unavailable_error( + &materialization.name, + availability, + pc_address, + )) + } + ghostscope_dwarf::VariableMaterialization::Composite { .. } => { + Err(CodeGenError::DwarfError(format!( + "DWARF variable '{}' is split across pieces; piece reconstruction is not implemented", + materialization.name + ))) + } + } } /// Generate LLVM IR for memory-backed variable locations. @@ -482,6 +497,20 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { location: &VariableLocation, dwarf_type: &TypeInfo, status_ptr: Option>, + ) -> Result> { + let address = PlannedAddress::from_location(location.clone()).ok_or_else(|| { + CodeGenError::DwarfError( + "Variable location cannot be materialized as an address".into(), + ) + })?; + self.generate_memory_location_from_planned_address(&address, dwarf_type, status_ptr) + } + + fn generate_memory_location_from_planned_address( + &mut self, + address: &PlannedAddress, + dwarf_type: &TypeInfo, + status_ptr: Option>, ) -> Result> { let module_hint = self.current_resolved_var_module_path.clone(); let runtime_status_ptr = if self.condition_context_active { @@ -489,8 +518,8 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } else { status_ptr }; - let addr = self.variable_location_to_address_with_hint( - location, + let addr = self.planned_address_to_llvm_address( + address, runtime_status_ptr, module_hint.as_deref(), )?; @@ -1814,6 +1843,32 @@ mod tests { assert!(err.to_string().contains("0x1234")); } + #[test] + fn piece_locations_are_rejected_instead_of_using_first_piece() { + let llctx = LlvmContext::create(); + let opts = crate::CompileOptions::default(); + let mut ctx = EbpfContext::new(&llctx, "piece_value", Some(0), &opts).expect("ctx"); + ctx.create_basic_ebpf_function("f").expect("fn"); + + let ty = ghostscope_protocol::TypeInfo::BaseType { + name: "int".to_string(), + size: 4, + encoding: ghostscope_dwarf::constants::DW_ATE_signed.0 as u16, + }; + let location = VariableLocation::Pieces(vec![ghostscope_dwarf::PieceLocation { + bit_offset: 0, + bit_size: 32, + location: Box::new(VariableLocation::RegisterValue { dwarf_reg: 0 }), + }]); + + let err = ctx + .variable_location_to_llvm_value(&location, &ty, "split", 0x1234, None) + .expect_err("split pieces should not silently use the first piece"); + + assert!(matches!(err, CodeGenError::DwarfError(_))); + assert!(err.to_string().contains("split across pieces")); + } + #[test] fn unavailable_error_formats_structured_dwarf_reason() { let err = EbpfContext::dwarf_expression_unavailable_error( @@ -1859,6 +1914,7 @@ mod tests { let plan = VariableReadPlan { name: "x".to_string(), type_name: "int".to_string(), + access_path: VariableAccessPath::default(), dwarf_type: Some(dwarf_type), declaration: None, type_id: None, @@ -1873,7 +1929,7 @@ mod tests { }; let err = ctx - .variable_read_plan_to_runtime_read_parts(plan, 0x1234) + .variable_read_plan_to_materialization(plan, 0x1234) .expect_err("zero stack capability should reject the read plan"); assert!(matches!(err, CodeGenError::VariableUnavailable(_))); @@ -1893,6 +1949,7 @@ mod tests { let plan = VariableReadPlan { name: "x".to_string(), type_name: "int".to_string(), + access_path: VariableAccessPath::default(), dwarf_type: Some(dwarf_type), declaration: None, type_id: None, @@ -1906,16 +1963,16 @@ mod tests { provenance: Provenance::DirectDie, }; - let (_, marker_type, location) = ctx - .variable_read_plan_to_runtime_read_parts(plan, 0x1234) + let materialized = ctx + .variable_read_plan_to_materialization(plan, 0x1234) .expect("optimized-out runtime metadata should remain printable"); - assert_eq!(location, VariableLocation::OptimizedOut); - assert_eq!( - marker_type, - TypeInfo::OptimizedOut { - name: "x".to_string() + assert_eq!(materialized.availability, Availability::OptimizedOut); + assert!(matches!( + materialized.materialization, + ghostscope_dwarf::VariableMaterialization::Unavailable { + availability: Availability::OptimizedOut } - ); + )); } #[test] diff --git a/ghostscope-compiler/src/ebpf/expression.rs b/ghostscope-compiler/src/ebpf/expression.rs index 82ed9fbc..93ab74e4 100644 --- a/ghostscope-compiler/src/ebpf/expression.rs +++ b/ghostscope-compiler/src/ebpf/expression.rs @@ -7,7 +7,6 @@ use crate::script::{BinaryOp, Expr}; use aya_ebpf_bindings::bindings::bpf_func_id::BPF_FUNC_probe_read_user; use ghostscope_dwarf::{ AmbiguityReason, Availability, RuntimeRequirement, TypeInfo as DwarfType, UnsupportedReason, - VariableReadPlan, }; use inkwell::values::{BasicValueEnum, IntValue}; use inkwell::AddressSpace; @@ -2189,24 +2188,18 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } }; - let dwarf_type = variable_plan.dwarf_type.as_ref().ok_or_else(|| { + let materialized = + self.variable_read_plan_to_materialization(variable_plan, compile_context.pc_address)?; + let dwarf_type = materialized.dwarf_type.as_ref().ok_or_else(|| { CodeGenError::DwarfError("Expression has no DWARF type information".to_string()) })?; - Self::ensure_dwarf_value_available(&variable_plan, compile_context.pc_address)?; debug!( "compile_dwarf_expression: Found DWARF info for expression '{}' with type: {:?}", - variable_plan.name, dwarf_type + materialized.name, dwarf_type ); - // Use the unified evaluation logic to generate LLVM IR - self.variable_location_to_llvm_value( - &variable_plan.location, - dwarf_type, - &variable_plan.name, - compile_context.pc_address, - None, - ) + self.variable_materialization_to_llvm_value(&materialized, compile_context.pc_address, None) } pub(crate) fn dwarf_expression_unavailable_error( @@ -2287,22 +2280,6 @@ impl<'ctx, 'dw> EbpfContext<'ctx, 'dw> { } } - pub(crate) fn ensure_dwarf_value_available( - variable: &VariableReadPlan, - pc_address: u64, - ) -> Result<()> { - let availability = variable.availability.clone(); - if availability.is_available() { - Ok(()) - } else { - Err(Self::dwarf_expression_unavailable_error( - &variable.name, - &availability, - pc_address, - )) - } - } - /// Helper: Convert expression to string for debugging fn expr_to_debug_string(expr: &crate::script::Expr) -> String { use crate::script::Expr; diff --git a/ghostscope-dwarf/src/analyzer/mod.rs b/ghostscope-dwarf/src/analyzer/mod.rs index 3af5ab40..9d51678c 100644 --- a/ghostscope-dwarf/src/analyzer/mod.rs +++ b/ghostscope-dwarf/src/analyzer/mod.rs @@ -1068,6 +1068,7 @@ mod tests { VariableReadPlan { name: name.to_string(), type_name: "int".to_string(), + access_path: crate::VariableAccessPath::default(), dwarf_type: Some(crate::TypeInfo::BaseType { name: "int".to_string(), size: 4, diff --git a/ghostscope-dwarf/src/analyzer/plan_pc.rs b/ghostscope-dwarf/src/analyzer/plan_pc.rs index b99c536f..ba5517d6 100644 --- a/ghostscope-dwarf/src/analyzer/plan_pc.rs +++ b/ghostscope-dwarf/src/analyzer/plan_pc.rs @@ -2,7 +2,7 @@ use super::DwarfAnalyzer; use crate::{ core::{ModuleAddress, Provenance, Result}, semantics::{ - AddressSpaceInfo, PcContext, PcLineInfo, PlanError, VariableAccessPath, + AddressSpaceInfo, PcContext, PcLineInfo, PcRange, PlanError, VariableAccessPath, VariableAccessSegment, VariableReadPlan, VisibleVariable, VisibleVariablesResult, }, }; @@ -135,6 +135,15 @@ impl DwarfAnalyzer { VariableReadPlan::from_visible_variable(variable.visible_variable(), provenance) } + fn attach_pc_context(ctx: &PcContext, mut plan: VariableReadPlan) -> VariableReadPlan { + plan.pc_range = Some(PcRange { + start: ctx.normalized_pc, + end: ctx.normalized_pc, + }); + plan.inline_context = ctx.inline_chain.last().and_then(|frame| frame.context); + plan + } + pub(super) fn plan_access_path_with_type_completion( &self, module_path: &Path, @@ -178,7 +187,10 @@ impl DwarfAnalyzer { ) .map(|variable| { variable.map(|variable| { - VariableReadPlan::from_visible_variable(variable, Provenance::DirectDie) + Self::attach_pc_context( + ctx, + VariableReadPlan::from_visible_variable(variable, Provenance::DirectDie), + ) }) }) } @@ -293,9 +305,9 @@ impl DwarfAnalyzer { match matches.as_slice() { [] => Ok(None), - [variable] => Ok(Some(VariableReadPlan::from_visible_variable( - variable.clone(), - Provenance::DirectDie, + [variable] => Ok(Some(Self::attach_pc_context( + ctx, + VariableReadPlan::from_visible_variable(variable.clone(), Provenance::DirectDie), ))), _ => Err(anyhow::anyhow!( "Ambiguous VariableId {:?} at PC 0x{:x}: {} visible matches", diff --git a/ghostscope-dwarf/src/lib.rs b/ghostscope-dwarf/src/lib.rs index 9f2698b6..4b6bbd57 100644 --- a/ghostscope-dwarf/src/lib.rs +++ b/ghostscope-dwarf/src/lib.rs @@ -28,18 +28,19 @@ pub use analyzer::{ pub use core::{ AddressExpr, AmbiguityReason, Availability, CallerFrameRecovery, CfaResult, ComputeStep, CuId, DieRef, DwarfError, EntryValueCase, FunctionId, FunctionInfo, GlobalVariableInfo, HelperMode, - InlineContextId, MemoryAccessSize, ModuleAddress, ModuleId, Provenance, Result, + InlineContextId, MemoryAccessSize, ModuleAddress, ModuleId, PieceLocation, Provenance, Result, RuntimeCapabilities, RuntimeRequirement, ScopeId, SectionType, SourceLocation, TargetArch, TypeId, UnsupportedReason, VariableId, VariableInfo, VariableLocation, VerifierRisk, }; // Re-export semantic contract types. pub use semantics::{ - AddressSpaceInfo, CfaRulePlan, CompactUnwindRow, CompactUnwindStats, CompactUnwindTable, - InlineFrame, PcContext, PcLineInfo, PcRange, RegisterRecoveryPlan, UnwindDiagnostic, - UnwindDiagnosticKind, VariableAccessPath, VariableAccessSegment, VariableLoweringKind, - VariableLoweringPlan, VariablePlan, VariableQueryDiagnostic, VariableReadPlan, VisibleVariable, - VisibleVariablesResult, + AddressOrigin, AddressSpaceInfo, CfaRulePlan, CompactUnwindRow, CompactUnwindStats, + CompactUnwindTable, InlineFrame, PcContext, PcLineInfo, PcRange, PlannedAddress, + RegisterRecoveryPlan, UnwindDiagnostic, UnwindDiagnosticKind, VariableAccessPath, + VariableAccessSegment, VariableLoweringKind, VariableLoweringPlan, VariableMaterialization, + VariableMaterializationPlan, VariablePlan, VariableQueryDiagnostic, VariableReadPlan, + VisibleVariable, VisibleVariablesResult, }; // Re-export type definitions from protocol (avoiding circular dependencies) diff --git a/ghostscope-dwarf/src/semantics/variable_plan.rs b/ghostscope-dwarf/src/semantics/variable_plan.rs index 5a2405ad..9080c194 100644 --- a/ghostscope-dwarf/src/semantics/variable_plan.rs +++ b/ghostscope-dwarf/src/semantics/variable_plan.rs @@ -2,8 +2,8 @@ use crate::core::{ AddressExpr, Availability, ComputeStep, DieRef, HelperMode, InlineContextId, MemoryAccessSize, - Provenance, Result, RuntimeCapabilities, RuntimeRequirement, TypeId, UnsupportedReason, - VariableId, VariableLocation, VerifierRisk, + PieceLocation, Provenance, Result, RuntimeCapabilities, RuntimeRequirement, TypeId, + UnsupportedReason, VariableId, VariableLocation, VerifierRisk, }; use crate::semantics::PcRange; use crate::TypeInfo; @@ -59,11 +59,54 @@ pub struct VariableLoweringPlan { pub verifier_risk: VerifierRisk, } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum AddressOrigin { + LinkTime, + LinkTimeBase, + RuntimeDerived, + Unknown, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct PlannedAddress { + pub location: VariableLocation, + pub origin: AddressOrigin, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum VariableMaterialization { + DirectValue { + location: VariableLocation, + address_origin: Option, + }, + UserMemoryRead { + address: PlannedAddress, + }, + Composite { + pieces: Vec, + }, + Unavailable { + availability: Availability, + }, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct VariableMaterializationPlan { + pub name: String, + pub type_name: String, + pub access_path: VariableAccessPath, + pub dwarf_type: Option, + pub availability: Availability, + pub lowering: VariableLoweringPlan, + pub materialization: VariableMaterialization, +} + /// Owned, PC-sensitive variable read plan before runtime-specific lowering. #[derive(Debug, Clone, PartialEq)] pub struct VariableReadPlan { pub name: String, pub type_name: String, + pub access_path: VariableAccessPath, pub dwarf_type: Option, pub declaration: Option, pub type_id: Option, @@ -77,7 +120,7 @@ pub struct VariableReadPlan { pub provenance: Provenance, } -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, Default, PartialEq, Eq)] pub struct VariableAccessPath { pub segments: Vec, } @@ -165,6 +208,7 @@ impl VariableReadPlan { Self { name: variable.name, type_name: variable.type_name, + access_path: VariableAccessPath::default(), dwarf_type: variable.dwarf_type, declaration: variable.declaration, type_id: variable.type_id, @@ -233,12 +277,72 @@ impl VariableReadPlan { } } + pub fn materialization_plan( + &self, + capabilities: &RuntimeCapabilities, + ) -> VariableMaterializationPlan { + let lowering = self.bpf_lowering_plan(capabilities); + let materialization = if !lowering.availability.is_available() { + VariableMaterialization::Unavailable { + availability: lowering.availability.clone(), + } + } else { + match lowering.kind { + VariableLoweringKind::DirectValue => VariableMaterialization::DirectValue { + address_origin: direct_value_address_origin(&self.location), + location: self.location.clone(), + }, + VariableLoweringKind::UserMemoryRead => { + match PlannedAddress::from_location(self.location.clone()) { + Some(address) => VariableMaterialization::UserMemoryRead { address }, + None => VariableMaterialization::Unavailable { + availability: Availability::Unsupported( + UnsupportedReason::AddressClass { + detail: format!( + "location {} cannot be materialized as an address", + self.location + ), + }, + ), + }, + } + } + VariableLoweringKind::Composite => match &self.location { + VariableLocation::Pieces(pieces) => VariableMaterialization::Composite { + pieces: pieces.clone(), + }, + _ => VariableMaterialization::Unavailable { + availability: Availability::Unsupported( + UnsupportedReason::ExpressionShape { + detail: "composite lowering without piece locations".to_string(), + }, + ), + }, + }, + VariableLoweringKind::Unavailable => VariableMaterialization::Unavailable { + availability: lowering.availability.clone(), + }, + } + }; + + VariableMaterializationPlan { + name: self.name.clone(), + type_name: self.type_name.clone(), + access_path: self.access_path.clone(), + dwarf_type: self.dwarf_type.clone(), + availability: lowering.availability.clone(), + lowering, + materialization, + } + } + pub fn plan_access_path(&self, path: &VariableAccessPath) -> Result { let mut plan = self.clone(); for segment in &path.segments { plan = plan.plan_access_segment(segment)?; } + plan.access_path.segments.extend(path.segments.clone()); plan.name.push_str(&path.suffix()); Ok(plan) } @@ -346,6 +450,54 @@ impl VariableReadPlan { } } +impl PlannedAddress { + pub fn from_location(location: VariableLocation) -> Option { + let origin = match &location { + VariableLocation::Address(expr) | VariableLocation::AbsoluteAddressValue(expr) => { + address_origin_for_steps(&expr.steps) + } + VariableLocation::RegisterAddress { .. } + | VariableLocation::FrameBaseRelative { .. } => AddressOrigin::RuntimeDerived, + VariableLocation::ComputedAddress(steps) => address_origin_for_steps(steps), + VariableLocation::RegisterValue { .. } + | VariableLocation::ComputedValue(_) + | VariableLocation::ImplicitValue(_) + | VariableLocation::Pieces(_) + | VariableLocation::OptimizedOut + | VariableLocation::Unknown => return None, + }; + + Some(Self { location, origin }) + } + + pub fn constant_link_time_address(&self) -> Option { + match (&self.origin, &self.location) { + (AddressOrigin::LinkTime, VariableLocation::Address(expr)) + | (AddressOrigin::LinkTime, VariableLocation::AbsoluteAddressValue(expr)) => { + fold_constant_steps(&expr.steps) + } + (AddressOrigin::LinkTime, VariableLocation::ComputedAddress(steps)) => { + fold_constant_steps(steps) + } + _ => None, + } + } + + pub fn link_time_base_and_runtime_tail(&self) -> Option<(u64, &[ComputeStep])> { + if self.origin != AddressOrigin::LinkTimeBase { + return None; + } + + match &self.location { + VariableLocation::Address(expr) | VariableLocation::AbsoluteAddressValue(expr) => { + link_time_base_and_runtime_tail(&expr.steps) + } + VariableLocation::ComputedAddress(steps) => link_time_base_and_runtime_tail(steps), + _ => None, + } + } +} + impl RuntimeCapabilities { pub fn supports_requirement(&self, requirement: &RuntimeRequirement) -> bool { match requirement { @@ -360,6 +512,89 @@ impl RuntimeCapabilities { } } +fn direct_value_address_origin(location: &VariableLocation) -> Option { + match location { + VariableLocation::AbsoluteAddressValue(expr) => Some(address_origin_for_steps(&expr.steps)), + _ => None, + } +} + +fn address_origin_for_steps(steps: &[ComputeStep]) -> AddressOrigin { + if fold_constant_steps(steps).is_some() { + return AddressOrigin::LinkTime; + } + + if link_time_base_and_runtime_tail(steps).is_some() { + return AddressOrigin::LinkTimeBase; + } + + if steps_reference_runtime_state(steps) { + AddressOrigin::RuntimeDerived + } else { + AddressOrigin::Unknown + } +} + +fn fold_constant_steps(steps: &[ComputeStep]) -> Option { + let mut const_stack: Vec = Vec::new(); + for step in steps { + match step { + ComputeStep::PushConstant(value) => const_stack.push(*value), + ComputeStep::Add => { + let rhs = const_stack.pop()?; + let lhs = const_stack.pop()?; + const_stack.push(lhs.saturating_add(rhs)); + } + _ => return None, + } + } + + if const_stack.len() == 1 && const_stack[0] >= 0 { + Some(const_stack[0] as u64) + } else { + None + } +} + +fn link_time_base_and_runtime_tail(steps: &[ComputeStep]) -> Option<(u64, &[ComputeStep])> { + let Some(ComputeStep::PushConstant(base)) = steps.first() else { + return None; + }; + + if *base < 0 { + return None; + } + + for step in steps.iter().skip(1) { + match step { + ComputeStep::LoadRegister(_) => { + break; + } + ComputeStep::Dereference { .. } => { + return Some((*base as u64, &steps[1..])); + } + _ => {} + } + } + + None +} + +fn steps_reference_runtime_state(steps: &[ComputeStep]) -> bool { + steps.iter().any(|step| match step { + ComputeStep::LoadRegister(_) + | ComputeStep::Dereference { .. } + | ComputeStep::EntryValueLookup { .. } => true, + ComputeStep::If { + then_branch, + else_branch, + } => { + steps_reference_runtime_state(then_branch) || steps_reference_runtime_state(else_branch) + } + _ => false, + }) +} + trait VariableLocationLoweringExt { fn lowering_kind(&self) -> VariableLoweringKind; fn runtime_requirements(&self) -> Vec; @@ -762,6 +997,7 @@ mod tests { VariableReadPlan { name: "value".to_string(), type_name: "int".to_string(), + access_path: VariableAccessPath::default(), dwarf_type: None, declaration: None, type_id: None, @@ -822,6 +1058,118 @@ mod tests { assert!(lowering.required_registers.is_empty()); } + #[test] + fn materialization_plan_preserves_link_time_address_origin() { + let plan = read_plan(VariableLocation::Address(AddressExpr::constant(0x1000))); + let materialized = plan.materialization_plan(&capabilities(true)); + + match materialized.materialization { + VariableMaterialization::UserMemoryRead { address } => { + assert_eq!(address.origin, AddressOrigin::LinkTime); + assert_eq!(address.constant_link_time_address(), Some(0x1000)); + } + other => panic!("unexpected materialization: {other:?}"), + } + } + + #[test] + fn materialization_plan_marks_static_base_before_deref() { + let plan = read_plan(VariableLocation::ComputedAddress(vec![ + ComputeStep::PushConstant(0x3000), + ComputeStep::Dereference { + size: MemoryAccessSize::U64, + }, + ComputeStep::PushConstant(16), + ComputeStep::Add, + ])); + let materialized = plan.materialization_plan(&capabilities(true)); + + match materialized.materialization { + VariableMaterialization::UserMemoryRead { address } => { + assert_eq!(address.origin, AddressOrigin::LinkTimeBase); + let (base, tail) = address + .link_time_base_and_runtime_tail() + .expect("link-time base"); + assert_eq!(base, 0x3000); + assert_eq!(tail.len(), 3); + } + other => panic!("unexpected materialization: {other:?}"), + } + } + + #[test] + fn materialization_plan_preserves_arithmetic_before_first_deref() { + let plan = read_plan(VariableLocation::ComputedAddress(vec![ + ComputeStep::PushConstant(0x3000), + ComputeStep::PushConstant(8), + ComputeStep::Add, + ComputeStep::Dereference { + size: MemoryAccessSize::U64, + }, + ])); + let materialized = plan.materialization_plan(&capabilities(true)); + + match materialized.materialization { + VariableMaterialization::UserMemoryRead { address } => { + assert_eq!(address.origin, AddressOrigin::LinkTimeBase); + let (base, tail) = address + .link_time_base_and_runtime_tail() + .expect("link-time base"); + assert_eq!(base, 0x3000); + assert_eq!( + tail, + &[ + ComputeStep::PushConstant(8), + ComputeStep::Add, + ComputeStep::Dereference { + size: MemoryAccessSize::U64, + }, + ] + ); + } + other => panic!("unexpected materialization: {other:?}"), + } + } + + #[test] + fn materialization_plan_keeps_absolute_address_value_direct() { + let plan = read_plan(VariableLocation::AbsoluteAddressValue( + AddressExpr::constant(0x2000), + )); + let materialized = plan.materialization_plan(&capabilities(false)); + + match materialized.materialization { + VariableMaterialization::DirectValue { + address_origin, + location, + } => { + assert_eq!(address_origin, Some(AddressOrigin::LinkTime)); + assert!(matches!( + location, + VariableLocation::AbsoluteAddressValue(_) + )); + } + other => panic!("unexpected materialization: {other:?}"), + } + } + + #[test] + fn materialization_plan_surfaces_piece_locations_without_first_piece_fallback() { + let plan = read_plan(VariableLocation::Pieces(vec![PieceLocation { + bit_offset: 0, + bit_size: 32, + location: Box::new(VariableLocation::RegisterValue { dwarf_reg: 0 }), + }])); + let materialized = plan.materialization_plan(&capabilities(true)); + + match materialized.materialization { + VariableMaterialization::Composite { pieces } => { + assert_eq!(pieces.len(), 1); + } + other => panic!("unexpected materialization: {other:?}"), + } + } + #[test] fn absolute_address_value_lowers_without_user_memory_read() { let plan = read_plan(VariableLocation::AbsoluteAddressValue( @@ -940,6 +1288,7 @@ mod tests { let planned = plan.plan_access_path(&access).expect("field access"); assert_eq!(planned.name, "value.fd"); + assert_eq!(planned.access_path, access); assert_eq!(planned.dwarf_type, Some(int_type)); assert_eq!( planned.location, @@ -948,6 +1297,13 @@ mod tests { offset: -20, } ); + assert_eq!( + planned + .materialization_plan(&capabilities(true)) + .access_path + .segments, + vec![VariableAccessSegment::Field("fd".to_string())] + ); } #[test]