diff --git a/Cargo.toml b/Cargo.toml index 7e835aa..73e859e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,7 @@ exclude = ["/.github", "/.vscode", "/tests", "/fixtures", "/big-fixtures"] [dependencies] gimli = "0.27.0" +object = { version = ">=0.30", optional = true } thiserror = "1.0.30" macho-unwind-info = "0.3.0" fallible-iterator = "0.2.0" diff --git a/src/arcdata.rs b/src/arcdata.rs index 5fe1695..932d8a8 100644 --- a/src/arcdata.rs +++ b/src/arcdata.rs @@ -8,7 +8,7 @@ impl> Deref for ArcData { type Target = [u8]; fn deref(&self) -> &Self::Target { - &*self.0 + &self.0 } } diff --git a/src/dwarf.rs b/src/dwarf.rs index abf8354..50ccc46 100644 --- a/src/dwarf.rs +++ b/src/dwarf.rs @@ -1,13 +1,15 @@ -use std::{marker::PhantomData, ops::Range}; +use std::marker::PhantomData; use gimli::{ - BaseAddresses, CfaRule, CieOrFde, DebugFrame, EhFrame, EhFrameHdr, Encoding, EndianSlice, - Evaluation, EvaluationResult, EvaluationStorage, Expression, LittleEndian, Location, - ParsedEhFrameHdr, Reader, ReaderOffset, Register, RegisterRule, UnwindContext, - UnwindContextStorage, UnwindOffset, UnwindSection, UnwindTableRow, Value, + CfaRule, CieOrFde, DebugFrame, EhFrame, EhFrameHdr, Encoding, EndianSlice, Evaluation, + EvaluationResult, EvaluationStorage, Expression, LittleEndian, Location, ParsedEhFrameHdr, + Reader, ReaderOffset, Register, RegisterRule, UnwindContext, UnwindContextStorage, + UnwindOffset, UnwindSection, UnwindTableRow, Value, }; -use crate::{arch::Arch, unwind_result::UnwindResult, ModuleSvmaInfo}; +pub(crate) use gimli::BaseAddresses; + +use crate::{arch::Arch, unwind_result::UnwindResult, ModuleSectionInfo}; #[derive(thiserror::Error, Debug, Clone, Copy, PartialEq, Eq)] pub enum DwarfUnwinderError { @@ -90,9 +92,9 @@ impl<'a, R: Reader, A: DwarfUnwinding, S: UnwindContextStorage + EvaluationSt unwind_section_type: UnwindSectionType, eh_frame_hdr_data: Option<&'a [u8]>, unwind_context: &'a mut UnwindContext, - svma_info: &ModuleSvmaInfo, + bases: BaseAddresses, + base_svma: u64, ) -> Self { - let bases = base_addresses_for_sections(svma_info); let eh_frame_hdr = match eh_frame_hdr_data { Some(eh_frame_hdr_data) => { let hdr = EhFrameHdr::new(eh_frame_hdr_data, unwind_section_data.endian()); @@ -109,7 +111,7 @@ impl<'a, R: Reader, A: DwarfUnwinding, S: UnwindContextStorage + EvaluationSt eh_frame_hdr, unwind_context, bases, - base_svma: svma_info.base_svma, + base_svma, _arch: PhantomData, } } @@ -180,19 +182,21 @@ impl<'a, R: Reader, A: DwarfUnwinding, S: UnwindContextStorage + EvaluationSt } } -fn base_addresses_for_sections(svma_info: &ModuleSvmaInfo) -> BaseAddresses { - fn start_addr(range: &Option>) -> u64 { - if let Some(range) = range { - range.start - } else { - 0 - } - } +pub(crate) fn base_addresses_for_sections( + section_info: &mut impl ModuleSectionInfo, +) -> BaseAddresses { + let mut start_addr = |names: &[&[u8]]| -> u64 { + names + .iter() + .find_map(|name| section_info.section_svma_range(name)) + .map(|r| r.start) + .unwrap_or_default() + }; BaseAddresses::default() - .set_eh_frame(start_addr(&svma_info.eh_frame)) - .set_eh_frame_hdr(start_addr(&svma_info.eh_frame_hdr)) - .set_text(start_addr(&svma_info.text)) - .set_got(start_addr(&svma_info.got)) + .set_eh_frame(start_addr(&[b"__eh_frame", b".eh_frame"])) + .set_eh_frame_hdr(start_addr(&[b"__eh_frame_hdr", b".eh_frame_hdr"])) + .set_text(start_addr(&[b"__text", b".text"])) + .set_got(start_addr(&[b"__got", b".got"])) } #[derive(thiserror::Error, Debug, Clone, Copy, PartialEq, Eq)] @@ -279,26 +283,26 @@ impl DwarfCfiIndex { }) } - pub fn try_new_eh_frame( + pub fn try_new_eh_frame( eh_frame_data: &[u8], - svma_info: &ModuleSvmaInfo, + section_info: &mut impl ModuleSectionInfo, ) -> Result { - let bases = base_addresses_for_sections(svma_info); + let bases = base_addresses_for_sections(section_info); let mut eh_frame = EhFrame::from(EndianSlice::new(eh_frame_data, LittleEndian)); eh_frame.set_address_size(8); - Self::try_new(eh_frame, bases, svma_info.base_svma) + Self::try_new(eh_frame, bases, section_info.base_svma()) } - pub fn try_new_debug_frame( + pub fn try_new_debug_frame( debug_frame_data: &[u8], - svma_info: &ModuleSvmaInfo, + section_info: &mut impl ModuleSectionInfo, ) -> Result { - let bases = base_addresses_for_sections(svma_info); + let bases = base_addresses_for_sections(section_info); let mut debug_frame = DebugFrame::from(EndianSlice::new(debug_frame_data, LittleEndian)); debug_frame.set_address_size(8); - Self::try_new(debug_frame, bases, svma_info.base_svma) + Self::try_new(debug_frame, bases, section_info.base_svma()) } pub fn fde_offset_for_relative_address(&self, rel_lookup_address: u32) -> Option { diff --git a/src/lib.rs b/src/lib.rs index 83e02d1..f5af49f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -47,10 +47,10 @@ //! //! ## Example //! -//! ```rust -//! # fn test_root_doc_comment() { +//! ``` +//! use std::ops::Range; //! use framehop::aarch64::{CacheAarch64, UnwindRegsAarch64, UnwinderAarch64}; -//! use framehop::{FrameAddress, Module, ModuleSvmaInfo, ModuleUnwindData, TextByteData}; +//! use framehop::{ExplicitModuleSectionInfo, FrameAddress, Module, ModuleSectionInfo}; //! //! let mut cache = CacheAarch64::<_>::new(); //! let mut unwinder = UnwinderAarch64::new(); @@ -59,21 +59,20 @@ //! "mybinary".to_string(), //! 0x1003fc000..0x100634000, //! 0x1003fc000, -//! ModuleSvmaInfo { +//! ExplicitModuleSectionInfo { //! base_svma: 0x100000000, -//! text: Some(0x100000b64..0x1001d2d18), -//! text_env: None, -//! stubs: Some(0x1001d2d18..0x1001d309c), -//! stub_helper: Some(0x1001d309c..0x1001d3438), -//! eh_frame: Some(0x100237f80..0x100237ffc), -//! eh_frame_hdr: None, -//! got: Some(0x100238000..0x100238010), +//! text_svma: Some(0x100000b64..0x1001d2d18), +//! text: Some(vec![/* __text */]), +//! stubs_svma: Some(0x1001d2d18..0x1001d309c), +//! stub_helper_svma: Some(0x1001d309c..0x1001d3438), +//! got_svma: Some(0x100238000..0x100238010), +//! unwind_info: Some(vec![/* __unwind_info */]), +//! eh_frame_svma: Some(0x100237f80..0x100237ffc), +//! eh_frame: Some(vec![/* __eh_frame */]), +//! text_segment_file_range: Some(0x1003fc000..0x100634000), +//! text_segment: Some(vec![/* __TEXT */]), +//! ..Default::default() //! }, -//! ModuleUnwindData::CompactUnwindInfoAndEhFrame(vec![/* __unwind_info */], None), -//! Some(TextByteData::new( -//! vec![/* __TEXT */], -//! 0x1003fc000..0x100634000, -//! )), //! ); //! unwinder.add_module(module); //! @@ -109,7 +108,6 @@ //! FrameAddress::from_return_address(0x1003fc000 + 0x12ca28).unwrap() //! ] //! ); -//! # } //! ``` mod add_signed; @@ -137,7 +135,7 @@ pub use code_address::FrameAddress; pub use error::Error; pub use rule_cache::CacheStats; pub use unwinder::{ - Module, ModuleSvmaInfo, ModuleUnwindData, TextByteData, UnwindIterator, Unwinder, + ExplicitModuleSectionInfo, Module, ModuleSectionInfo, UnwindIterator, Unwinder, }; /// The unwinder cache for the native CPU architecture. diff --git a/src/unwinder.rs b/src/unwinder.rs index 8cfe6b3..4197c11 100644 --- a/src/unwinder.rs +++ b/src/unwinder.rs @@ -18,7 +18,6 @@ use crate::FrameAddress; use std::marker::PhantomData; use std::sync::atomic::{AtomicU16, Ordering}; use std::{ - fmt::Debug, ops::{Deref, Range}, sync::Arc, }; @@ -385,32 +384,37 @@ impl< { let is_first_frame = !address.is_return_address(); let unwind_result = match &module.unwind_data { - ModuleUnwindDataInternal::CompactUnwindInfoAndEhFrame(unwind_data, eh_frame_data) => { + ModuleUnwindDataInternal::CompactUnwindInfoAndEhFrame { + unwind_info, + eh_frame, + stubs_svma: stubs, + stub_helper_svma: stub_helper, + base_addresses, + text_data, + } => { // eprintln!("unwinding with cui and eh_frame in module {}", module.name); - let text_bytes = module.text_data.as_ref().and_then(|data| { - let offset_from_base = - u32::try_from(data.avma_range.start.checked_sub(module.base_avma)?).ok()?; + let text_bytes = text_data.as_ref().and_then(|data| { + let offset_from_base = u32::try_from(data.svma_range.start).ok()?; Some(TextBytes::new(offset_from_base, &data.bytes[..])) }); - let stubs_range = if let Some(stubs_range) = &module.svma_info.stubs { + let stubs_range = if let Some(stubs_range) = stubs { ( - (stubs_range.start - module.svma_info.base_svma) as u32, - (stubs_range.end - module.svma_info.base_svma) as u32, + (stubs_range.start - module.base_svma) as u32, + (stubs_range.end - module.base_svma) as u32, + ) + } else { + (0, 0) + }; + let stub_helper_range = if let Some(stub_helper_range) = stub_helper { + ( + (stub_helper_range.start - module.base_svma) as u32, + (stub_helper_range.end - module.base_svma) as u32, ) } else { (0, 0) }; - let stub_helper_range = - if let Some(stub_helper_range) = &module.svma_info.stub_helper { - ( - (stub_helper_range.start - module.svma_info.base_svma) as u32, - (stub_helper_range.end - module.svma_info.base_svma) as u32, - ) - } else { - (0, 0) - }; let mut unwinder = CompactUnwindInfoUnwinder::::new( - &unwind_data[..], + &unwind_info[..], text_bytes, stubs_range, stub_helper_range, @@ -420,7 +424,7 @@ impl< match unwind_result { CuiUnwindResult::ExecRule(rule) => UnwindResult::ExecRule(rule), CuiUnwindResult::NeedDwarf(fde_offset) => { - let eh_frame_data = match eh_frame_data { + let eh_frame_data = match eh_frame { Some(data) => ArcData(data.clone()), None => return Err(UnwinderError::NoDwarfData), }; @@ -429,7 +433,8 @@ impl< UnwindSectionType::EhFrame, None, &mut cache.gimli_unwind_context, - &module.svma_info, + base_addresses.clone(), + module.base_svma, ); dwarf_unwinder.unwind_frame_with_fde( regs, @@ -441,15 +446,20 @@ impl< } } } - ModuleUnwindDataInternal::EhFrameHdrAndEhFrame(eh_frame_hdr, eh_frame_data) => { + ModuleUnwindDataInternal::EhFrameHdrAndEhFrame { + eh_frame_hdr, + eh_frame, + base_addresses, + } => { let eh_frame_hdr_data = &eh_frame_hdr[..]; - let eh_frame_data = ArcData(eh_frame_data.clone()); + let eh_frame_data = ArcData(eh_frame.clone()); let mut dwarf_unwinder = DwarfUnwinder::<_, A, P::GimliStorage>::new( EndianReader::new(eh_frame_data, LittleEndian), UnwindSectionType::EhFrame, Some(eh_frame_hdr_data), &mut cache.gimli_unwind_context, - &module.svma_info, + base_addresses.clone(), + module.base_svma, ); let fde_offset = dwarf_unwinder .get_fde_offset_for_relative_address(rel_lookup_address) @@ -462,14 +472,19 @@ impl< read_stack, )? } - ModuleUnwindDataInternal::DwarfCfiIndexAndEhFrame(index, eh_frame_data) => { - let eh_frame_data = ArcData(eh_frame_data.clone()); + ModuleUnwindDataInternal::DwarfCfiIndexAndEhFrame { + index, + eh_frame, + base_addresses, + } => { + let eh_frame_data = ArcData(eh_frame.clone()); let mut dwarf_unwinder = DwarfUnwinder::<_, A, P::GimliStorage>::new( EndianReader::new(eh_frame_data, LittleEndian), UnwindSectionType::EhFrame, None, &mut cache.gimli_unwind_context, - &module.svma_info, + base_addresses.clone(), + module.base_svma, ); let fde_offset = index .fde_offset_for_relative_address(rel_lookup_address) @@ -482,14 +497,19 @@ impl< read_stack, )? } - ModuleUnwindDataInternal::DwarfCfiIndexAndDebugFrame(index, debug_frame_data) => { - let debug_frame_data = ArcData(debug_frame_data.clone()); + ModuleUnwindDataInternal::DwarfCfiIndexAndDebugFrame { + index, + debug_frame, + base_addresses, + } => { + let debug_frame_data = ArcData(debug_frame.clone()); let mut dwarf_unwinder = DwarfUnwinder::<_, A, P::GimliStorage>::new( EndianReader::new(debug_frame_data, LittleEndian), UnwindSectionType::DebugFrame, None, &mut cache.gimli_unwind_context, - &module.svma_info, + base_addresses.clone(), + module.base_svma, ); let fde_offset = index .fde_offset_for_relative_address(rel_lookup_address) @@ -520,62 +540,111 @@ impl< /// module, e.g. `Vec`. But it could also be a wrapper around mapped memory from /// a file or a different process, for example. It just needs to provide a slice of /// bytes via its `Deref` implementation. -pub enum ModuleUnwindData> { +enum ModuleUnwindDataInternal> { /// Used on macOS, with mach-O binaries. Compact unwind info is in the `__unwind_info` /// section and is sometimes supplemented with DWARF CFI information in the `__eh_frame` - /// section. - CompactUnwindInfoAndEhFrame(D, Option), + /// section. `__stubs` and `__stub_helper` ranges are used by the unwinder. + CompactUnwindInfoAndEhFrame { + unwind_info: D, + eh_frame: Option>, + stubs_svma: Option>, + stub_helper_svma: Option>, + base_addresses: crate::dwarf::BaseAddresses, + text_data: Option>, + }, /// Used with ELF binaries (Linux and friends), in the `.eh_frame_hdr` and `.eh_frame` /// sections. Contains an index and DWARF CFI. - EhFrameHdrAndEhFrame(D, D), + EhFrameHdrAndEhFrame { + eh_frame_hdr: D, + eh_frame: Arc, + base_addresses: crate::dwarf::BaseAddresses, + }, /// Used with ELF binaries (Linux and friends), in the `.eh_frame` section. Contains /// DWARF CFI. We create a binary index for the FDEs when a module with this unwind /// data type is added. - EhFrame(D), + DwarfCfiIndexAndEhFrame { + index: DwarfCfiIndex, + eh_frame: Arc, + base_addresses: crate::dwarf::BaseAddresses, + }, /// Used with ELF binaries (Linux and friends), in the `.debug_frame` section. Contains /// DWARF CFI. We create a binary index for the FDEs when a module with this unwind /// data type is added. - DebugFrame(D), + DwarfCfiIndexAndDebugFrame { + index: DwarfCfiIndex, + debug_frame: Arc, + base_addresses: crate::dwarf::BaseAddresses, + }, /// No unwind information is used. Unwinding in this module will use a fallback rule /// (usually frame pointer unwinding). None, } -enum ModuleUnwindDataInternal> { - CompactUnwindInfoAndEhFrame(D, Option>), - EhFrameHdrAndEhFrame(D, Arc), - DwarfCfiIndexAndEhFrame(DwarfCfiIndex, Arc), - DwarfCfiIndexAndDebugFrame(DwarfCfiIndex, Arc), - None, -} - impl> ModuleUnwindDataInternal { - fn new(unwind_data: ModuleUnwindData, svma_info: &ModuleSvmaInfo) -> Self { - match unwind_data { - ModuleUnwindData::CompactUnwindInfoAndEhFrame(cui, eh_frame) => { - ModuleUnwindDataInternal::CompactUnwindInfoAndEhFrame(cui, eh_frame.map(Arc::new)) - } - ModuleUnwindData::EhFrameHdrAndEhFrame(eh_frame_hdr, eh_frame) => { - ModuleUnwindDataInternal::EhFrameHdrAndEhFrame(eh_frame_hdr, Arc::new(eh_frame)) + fn new(section_info: &mut impl ModuleSectionInfo) -> Self { + use crate::dwarf::base_addresses_for_sections; + + if let Some(unwind_info) = section_info.section_data(b"__unwind_info") { + let eh_frame = section_info.section_data(b"__eh_frame"); + let stubs = section_info.section_svma_range(b"__stubs"); + let stub_helper = section_info.section_svma_range(b"__stub_helper"); + // Get the bytes of the executable code (instructions). + // + // In mach-O objects, executable code is stored in the `__TEXT` segment, which contains + // multiple executable sections such as `__text`, `__stubs`, and `__stub_helper`. If we + // don't have the full `__TEXT` segment contents, we can fall back to the contents of + // just the `__text` section. + let text_data = section_info + .segment_data(b"__TEXT") + .zip(section_info.segment_file_range(b"__TEXT")) + .or_else(|| { + section_info + .section_data(b"__text") + .zip(section_info.section_file_range(b"__text")) + }) + .map(|(bytes, svma_range)| TextByteData { bytes, svma_range }); + ModuleUnwindDataInternal::CompactUnwindInfoAndEhFrame { + unwind_info, + eh_frame: eh_frame.map(Arc::new), + stubs_svma: stubs, + stub_helper_svma: stub_helper, + base_addresses: base_addresses_for_sections(section_info), + text_data, } - ModuleUnwindData::EhFrame(eh_frame) => { - match DwarfCfiIndex::try_new_eh_frame(&eh_frame, svma_info) { - Ok(index) => { - ModuleUnwindDataInternal::DwarfCfiIndexAndEhFrame(index, Arc::new(eh_frame)) - } - Err(_) => ModuleUnwindDataInternal::None, + } else if let Some(eh_frame) = section_info + .section_data(b".eh_frame") + .or_else(|| section_info.section_data(b"__eh_frame")) + { + if let Some(eh_frame_hdr) = section_info + .section_data(b".eh_frame_hdr") + .or_else(|| section_info.section_data(b"__eh_frame_hdr")) + { + ModuleUnwindDataInternal::EhFrameHdrAndEhFrame { + eh_frame_hdr, + eh_frame: Arc::new(eh_frame), + base_addresses: base_addresses_for_sections(section_info), } - } - ModuleUnwindData::DebugFrame(debug_frame) => { - match DwarfCfiIndex::try_new_debug_frame(&debug_frame, svma_info) { - Ok(index) => ModuleUnwindDataInternal::DwarfCfiIndexAndDebugFrame( + } else { + match DwarfCfiIndex::try_new_eh_frame(&eh_frame, section_info) { + Ok(index) => ModuleUnwindDataInternal::DwarfCfiIndexAndEhFrame { index, - Arc::new(debug_frame), - ), + eh_frame: Arc::new(eh_frame), + base_addresses: base_addresses_for_sections(section_info), + }, Err(_) => ModuleUnwindDataInternal::None, } } - ModuleUnwindData::None => ModuleUnwindDataInternal::None, + } else if let Some(debug_frame) = section_info.section_data(b".debug_frame") { + match DwarfCfiIndex::try_new_debug_frame(&debug_frame, section_info) { + Ok(index) => ModuleUnwindDataInternal::DwarfCfiIndexAndDebugFrame { + index, + debug_frame: Arc::new(debug_frame), + base_addresses: base_addresses_for_sections(section_info), + }, + Err(_) => ModuleUnwindDataInternal::None, + } + } else { + ModuleUnwindDataInternal::None } } } @@ -597,32 +666,9 @@ impl> ModuleUnwindDataInternal { /// module, e.g. `Vec`. But it could also be a wrapper around mapped memory from /// a file or a different process, for example. It just needs to provide a slice of /// bytes via its `Deref` implementation. -pub struct TextByteData> { - bytes: D, - avma_range: Range, -} - -impl> TextByteData { - /// Supply the bytes which cover `avma_range` in the process virtual memory. - /// Both arguments should have the same length. - pub fn new(bytes: D, avma_range: Range) -> Self { - Self { bytes, avma_range } - } - - /// Return a byte slice for the requested range, if in-bounds. - pub fn get_bytes(&self, avma_range: Range) -> Option<&[u8]> { - let rel_start = avma_range.start.checked_sub(self.avma_range.start)?; - let rel_start = usize::try_from(rel_start).ok()?; - let rel_end = avma_range.end.checked_sub(self.avma_range.start)?; - let rel_end = usize::try_from(rel_end).ok()?; - self.bytes.get(rel_start..rel_end) - } - - /// The address range covered by the supplied bytes, in process virtual memory. - /// "Actual virtual memory address range" - pub fn avma_range(&self) -> Range { - self.avma_range.clone() - } +struct TextByteData> { + pub bytes: D, + pub svma_range: Range, } /// Information about a module that is loaded in a process. You might know this under a @@ -647,43 +693,80 @@ pub struct Module> { /// The base address of this module, in the process's address space. On Linux, the base /// address can sometimes be different from the start address of the mapped range. base_avma: u64, - /// Information about various addresses in the module. - svma_info: ModuleSvmaInfo, + /// The base address of this module, according to the module. + base_svma: u64, /// The unwind data that should be used for unwinding addresses from this module. unwind_data: ModuleUnwindDataInternal, - /// The raw assembly bytes of this module. Used for instruction analysis to ensure - /// correct unwinding inside function prologues and epilogues. - text_data: Option>, } -/// The addresses of various sections in the module. +/// Information about a module's sections (and segments). +/// +/// This trait is used as an interface to module information, and each function with `&mut self` is +/// called at most once with a particular argument (e.g., `section_data(b".text")` will be called +/// at most once, so it can move data out of the underlying type if desired). +/// +/// Type arguments: +/// +/// - `D`: The type for section data. This allows carrying owned data on the module, e.g. +/// `Vec`. But it could also be a wrapper around mapped memory from a file or a different +/// process, for example. +pub trait ModuleSectionInfo { + /// Return the base address stated in the module. + /// + /// For mach-O objects, this is the vmaddr of the __TEXT segment. For ELF objects, this is + /// zero. For PE objects, this is the image base address. + /// + /// This is used to convert between SVMAs and relative addresses. + fn base_svma(&self) -> u64; + + /// Get the given section's memory range, as stated in the module. + fn section_svma_range(&mut self, name: &[u8]) -> Option>; + + /// Get the given section's file range in the module. + fn section_file_range(&mut self, name: &[u8]) -> Option>; + + /// Get the given section's data. + fn section_data(&mut self, name: &[u8]) -> Option; + + /// Get the given segment's file range in the module. + fn segment_file_range(&mut self, _name: &[u8]) -> Option> { + None + } + + /// Get the given segment's data. + fn segment_data(&mut self, _name: &[u8]) -> Option { + None + } +} + +/// Explicit addresses and data of various sections in the module. This implements +/// the `ModuleSectionInfo` trait. /// -/// These are SVMAs, "stated virtual memory addresses", i.e. addresses as stated -/// in the object, as opposed to AVMAs, "actual virtual memory addresses", i.e. addresses -/// in the virtual memory of the profiled process. +/// Unless otherwise stated, these are SVMAs, "stated virtual memory addresses", i.e. addresses as +/// stated in the object, as opposed to AVMAs, "actual virtual memory addresses", i.e. addresses in +/// the virtual memory of the profiled process. /// /// Code addresses inside a module's unwind information are usually written down as SVMAs, /// or as relative addresses. For example, DWARF CFI can have code addresses expressed as /// relative-to-.text addresses or as absolute SVMAs. And mach-O compact unwind info /// contains addresses relative to the image base address. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct ModuleSvmaInfo { +#[derive(Clone, Debug, Default, PartialEq, Eq)] +pub struct ExplicitModuleSectionInfo { /// The image base address, as stated in the object. For mach-O objects, this is the /// vmaddr of the `__TEXT` segment. For ELF objects, this is zero. /// /// This is used to convert between SVMAs and relative addresses. pub base_svma: u64, - /// The address range of the `__text` or `.text` section. This is where most of the - /// compiled code is stored. + /// The address range of the `__text` or `.text` section. This is where most of the compiled + /// code is stored. /// /// This is used to detect whether we need to do instruction analysis for an address. - pub text: Option>, - /// The address range of the `text_env` section, if present. If present, this contains - /// functions which have been marked as "cold". It stores executable code, just like - /// the text section. + pub text_svma: Option>, + /// The data of the `__text` or `.text` section. This is where most of the compiled code is + /// stored. /// - /// This is used to detect whether we need to do instruction analysis for an address. - pub text_env: Option>, + /// This is used to handle function prologues and epilogues in some cases. + pub text: Option, /// The address range of the mach-O `__stubs` section. Contains small pieces of /// executable code for calling imported functions. Code inside this section is not /// covered by the unwind information in `__unwind_info`. @@ -691,7 +774,7 @@ pub struct ModuleSvmaInfo { /// This is used to exclude addresses in this section from incorrectly applying /// `__unwind_info` opcodes. It is also used to infer unwind rules for the known /// structure of stub functions. - pub stubs: Option>, + pub stubs_svma: Option>, /// The address range of the mach-O `__stub_helper` section. Contains small pieces of /// executable code for calling imported functions. Code inside this section is not /// covered by the unwind information in `__unwind_info`. @@ -700,16 +783,130 @@ pub struct ModuleSvmaInfo { /// `__unwind_info` opcodes. It is also used to infer unwind rules for the known /// structure of stub helper /// functions. - pub stub_helper: Option>, - /// The address range of the `__eh_frame` or `.eh_frame` section. This is used during - /// DWARF CFI processing, to resolve eh_frame-relative addresses. - pub eh_frame: Option>, - /// The address range of the `.eh_frame_hdr` section. This is used during - /// DWARF CFI processing, to resolve eh_frame_hdr-relative addresses. - pub eh_frame_hdr: Option>, + pub stub_helper_svma: Option>, /// The address range of the `.got` section (Global Offset Table). This is used /// during DWARF CFI processing, to resolve got-relative addresses. - pub got: Option>, + pub got_svma: Option>, + /// The data of the `__unwind_info` section of mach-O binaries. + pub unwind_info: Option, + /// The address range of the `__eh_frame` or `.eh_frame` section. This is used during DWARF CFI + /// processing, to resolve eh_frame-relative addresses. + pub eh_frame_svma: Option>, + /// The data of the `__eh_frame` or `.eh_frame` section. This is used during DWARF CFI + /// processing, to resolve eh_frame-relative addresses. + pub eh_frame: Option, + /// The address range of the `.eh_frame_hdr` section. This is used during DWARF CFI processing, + /// to resolve eh_frame_hdr-relative addresses. + pub eh_frame_hdr_svma: Option>, + /// The data of the `.eh_frame_hdr` section. This is used during DWARF CFI processing, to + /// resolve eh_frame_hdr-relative addresses. + pub eh_frame_hdr: Option, + /// The data of the `.debug_frame` section. The related address range is not needed. + pub debug_frame: Option, + /// The file range of the `__TEXT` segment of mach-O binaries, or the `__text` section if the + /// segment is unavailable. + pub text_segment_file_range: Option>, + /// The data of the `__TEXT` segment of mach-O binaries, or the `__text` section if the segment + /// is unavailable. + pub text_segment: Option, +} + +impl ModuleSectionInfo for ExplicitModuleSectionInfo +where + D: Deref, +{ + fn base_svma(&self) -> u64 { + self.base_svma + } + + fn section_svma_range(&mut self, name: &[u8]) -> Option> { + match name { + b"__text" | b".text" => self.text_svma.take(), + b"__stubs" => self.stubs_svma.take(), + b"__stub_helper" => self.stub_helper_svma.take(), + b"__eh_frame" | b".eh_frame" => self.eh_frame_svma.take(), + b"__eh_frame_hdr" | b".eh_frame_hdr" => self.eh_frame_hdr_svma.take(), + b"__got" | b".got" => self.got_svma.take(), + _ => None, + } + } + fn section_file_range(&mut self, _name: &[u8]) -> Option> { + None + } + fn section_data(&mut self, name: &[u8]) -> Option { + match name { + b"__text" | b".text" => self.text.take(), + b"__unwind_info" => self.unwind_info.take(), + b"__eh_frame" | b".eh_frame" => self.eh_frame.take(), + b"__eh_frame_hdr" | b".eh_frame_hdr" => self.eh_frame_hdr.take(), + b"__debug_frame" | b".debug_frame" => self.debug_frame.take(), + _ => None, + } + } + fn segment_file_range(&mut self, name: &[u8]) -> Option> { + match name { + b"__TEXT" => self.text_segment_file_range.take(), + _ => None, + } + } + fn segment_data(&mut self, name: &[u8]) -> Option { + match name { + b"__TEXT" => self.text_segment.take(), + _ => None, + } + } +} + +#[cfg(feature = "object")] +mod object { + use super::{ModuleSectionInfo, Range}; + use object::read::{Object, ObjectSection, ObjectSegment}; + + impl<'data: 'file, 'file, O, D> ModuleSectionInfo for &'file O + where + O: Object<'data, 'file>, + D: From<&'data [u8]>, + { + fn base_svma(&self) -> u64 { + if let Some(text_segment) = self.segments().find(|s| s.name() == Ok(Some("__TEXT"))) { + // This is a mach-O image. "Relative addresses" are relative to the + // vmaddr of the __TEXT segment. + return text_segment.address(); + } + + // For PE binaries, relative_address_base() returns the image base address. + // Otherwise it returns zero. This gives regular ELF images a base address of zero, + // which is what we want. + self.relative_address_base() + } + + fn section_svma_range(&mut self, name: &[u8]) -> Option> { + let section = self.section_by_name_bytes(name)?; + Some(section.address()..section.address() + section.size()) + } + + fn section_file_range(&mut self, name: &[u8]) -> Option> { + let section = self.section_by_name_bytes(name)?; + let (start, size) = section.file_range()?; + Some(start..start + size) + } + + fn section_data(&mut self, name: &[u8]) -> Option { + let section = self.section_by_name_bytes(name)?; + section.data().ok().map(|data| data.into()) + } + + fn segment_file_range(&mut self, name: &[u8]) -> Option> { + let segment = self.segments().find(|s| s.name_bytes() == Ok(Some(name)))?; + let (start, size) = segment.file_range(); + Some(start..start + size) + } + + fn segment_data(&mut self, name: &[u8]) -> Option { + let segment = self.segments().find(|s| s.name_bytes() == Ok(Some(name)))?; + segment.data().ok().map(|data| data.into()) + } + } } impl> Module { @@ -717,18 +914,16 @@ impl> Module { name: String, avma_range: std::ops::Range, base_avma: u64, - svma_info: ModuleSvmaInfo, - unwind_data: ModuleUnwindData, - text_data: Option>, + mut section_info: impl ModuleSectionInfo, ) -> Self { - let unwind_data = ModuleUnwindDataInternal::new(unwind_data, &svma_info); + let unwind_data = ModuleUnwindDataInternal::new(&mut section_info); + Self { name, avma_range, base_avma, - svma_info, + base_svma: section_info.base_svma(), unwind_data, - text_data, } } } diff --git a/tests/integration_tests/common.rs b/tests/integration_tests/common.rs index bc7f50f..aa2815b 100644 --- a/tests/integration_tests/common.rs +++ b/tests/integration_tests/common.rs @@ -12,97 +12,65 @@ where let mut file = std::fs::File::open(objpath).unwrap(); file.read_to_end(&mut buf).unwrap(); - fn section_data<'a>(section: &impl ObjectSection<'a>) -> Option> { - section.data().ok().map(|data| data.to_owned()) - } - let file = object::File::parse(&buf[..]).expect("Could not parse object file"); - let base_svma = relative_address_base(&file); - - let text = file.section_by_name(".text"); - let stubs = file.section_by_name("__stubs"); - let stub_helper = file.section_by_name("__stub_helper"); - let text_env = file.section_by_name("__text_env"); - let unwind_info = file.section_by_name("__unwind_info"); - let eh_frame = file.section_by_name(".eh_frame"); - let got = file.section_by_name(".got"); - let eh_frame_hdr = file.section_by_name(".eh_frame_hdr"); - let debug_frame = file - .section_by_name(".debug_frame") - .or_else(|| file.section_by_name("__zdebug_frame")); - - let unwind_data = match ( - unwind_info.as_ref().and_then(section_data), - eh_frame.as_ref().and_then(section_data), - eh_frame_hdr.as_ref().and_then(section_data), - debug_frame, - ) { - (Some(unwind_info), eh_frame, _, _) => { - framehop::ModuleUnwindData::CompactUnwindInfoAndEhFrame(unwind_info, eh_frame) + struct Module<'a>(object::File<'a, &'a [u8]>); + + impl ModuleSectionInfo> for Module<'_> { + fn base_svma(&self) -> u64 { + relative_address_base(&self.0) } - (None, Some(eh_frame), Some(eh_frame_hdr), _) => { - framehop::ModuleUnwindData::EhFrameHdrAndEhFrame(eh_frame_hdr, eh_frame) + + fn section_svma_range(&mut self, name: &[u8]) -> Option> { + let section = self.0.section_by_name_bytes(name)?; + Some(section.address()..section.address() + section.size()) + } + + fn section_file_range(&mut self, name: &[u8]) -> Option> { + let section = self.0.section_by_name_bytes(name)?; + let (start, size) = section.file_range()?; + Some(start..start + size) } - (None, Some(eh_frame), None, _) => framehop::ModuleUnwindData::EhFrame(eh_frame), - (None, None, _, Some(debug_frame)) => { - eprintln!("Have debug_frame!"); - if let Some(section_data) = get_uncompressed_section_data(&debug_frame) { - let debug_frame_data = Vec::from(section_data); - framehop::ModuleUnwindData::DebugFrame(debug_frame_data) - } else { - framehop::ModuleUnwindData::None + + fn section_data(&mut self, name: &[u8]) -> Option> { + match self.0.section_by_name_bytes(name) { + Some(section) => section.data().ok().map(|data| data.to_owned()), + None if name == b".debug_frame" => { + let section = self.0.section_by_name_bytes(b"__zdebug_frame")?; + get_uncompressed_section_data(§ion).map(|d| d.into_owned()) + } + None => None, } } - (None, None, _, _) => framehop::ModuleUnwindData::None, - }; - let text_data = if let Some(text_segment) = file - .segments() - .find(|segment| segment.name_bytes() == Ok(Some(b"__TEXT"))) - { - let (start, size) = text_segment.file_range(); - let avma_range = base_avma + start..base_avma + start + size; - text_segment - .data() - .ok() - .map(|data| TextByteData::new(data.to_owned(), avma_range)) - } else if let Some(text_section) = &text { - if let Some((start, size)) = text_section.file_range() { - let avma_range = base_avma + start..base_avma + start + size; - text_section - .data() - .ok() - .map(|data| TextByteData::new(data.to_owned(), avma_range)) - } else { - None + fn segment_file_range(&mut self, name: &[u8]) -> Option> { + let segment = self + .0 + .segments() + .find(|s| s.name_bytes() == Ok(Some(name)))?; + let (start, size) = segment.file_range(); + Some(start..start + size) } - } else { - None - }; - fn svma_range<'a>(section: &Option>) -> Option> { - section - .as_ref() - .map(|section| section.address()..section.address() + section.size()) + fn segment_data(&mut self, name: &[u8]) -> Option> { + let segment = self + .0 + .segments() + .find(|s| s.name_bytes() == Ok(Some(name)))?; + segment.data().ok().map(|data| data.to_owned()) + } } + #[cfg(not(feature = "object"))] + let section_info = Module(file); + #[cfg(feature = "object")] + let section_info = &file; + let module = framehop::Module::new( objpath.to_string_lossy().to_string(), base_avma..(base_avma + buf.len() as u64), base_avma, - ModuleSvmaInfo { - base_svma, - text: svma_range(&text), - text_env: svma_range(&text_env), - stubs: svma_range(&stubs), - stub_helper: svma_range(&stub_helper), - eh_frame: svma_range(&eh_frame), - eh_frame_hdr: svma_range(&eh_frame_hdr), - got: svma_range(&got), - }, - unwind_data, - text_data, + section_info, ); unwinder.add_module(module); } diff --git a/tests/integration_tests/macos.rs b/tests/integration_tests/macos.rs index 5a0fcb5..aef95bb 100644 --- a/tests/integration_tests/macos.rs +++ b/tests/integration_tests/macos.rs @@ -76,28 +76,26 @@ fn test_basic() { #[test] fn test_root_doc_comment() { use framehop::aarch64::{CacheAarch64, UnwindRegsAarch64, UnwinderAarch64}; - use framehop::{FrameAddress, Module, ModuleSvmaInfo, ModuleUnwindData, TextByteData}; + use framehop::{ExplicitModuleSectionInfo, FrameAddress, Module}; + let mut cache = CacheAarch64::<_>::new(); let mut unwinder = UnwinderAarch64::new(); let module = Module::new( "mybinary".to_string(), 0x1003fc000..0x100634000, 0x1003fc000, - ModuleSvmaInfo { + ExplicitModuleSectionInfo { base_svma: 0x100000000, - text: Some(0x100000b64..0x1001d2d18), - text_env: None, - stubs: Some(0x1001d2d18..0x1001d309c), - stub_helper: Some(0x1001d309c..0x1001d3438), - eh_frame: Some(0x100237f80..0x100237ffc), - eh_frame_hdr: None, - got: Some(0x100238000..0x100238010), + text_svma: Some(0x100000b64..0x1001d2d18), + text: Some(vec![]), + stubs_svma: Some(0x1001d2d18..0x1001d309c), + stub_helper_svma: Some(0x1001d309c..0x1001d3438), + eh_frame_svma: Some(0x100237f80..0x100237ffc), + got_svma: Some(0x100238000..0x100238010), + text_segment_file_range: Some(0x1003fc000..0x100634000), + text_segment: Some(vec![]), + ..Default::default() }, - ModuleUnwindData::CompactUnwindInfoAndEhFrame(vec![/* __unwind_info */], None), - Some(TextByteData::new( - vec![/* __TEXT */], - 0x1003fc000..0x100634000, - )), ); unwinder.add_module(module);