From 012d42d52f6c5eb3f6bc89cac148742458fec41d Mon Sep 17 00:00:00 2001 From: Alex Franchuk Date: Thu, 20 Jul 2023 17:03:31 -0400 Subject: [PATCH 1/5] Change the Module creation interface. There is now a more general `ModuleSectionInfo` trait which is used to query sections/segments as needed. This moves a lot of cumbersome conditional code into the library to make clients' lives easier. This also adds `object` as an optional dependency, where (if enabled) the trait is implemented for references to `object::read::Object` types. --- Cargo.toml | 1 + src/dwarf.rs | 62 ++--- src/lib.rs | 62 +++-- src/unwinder.rs | 364 +++++++++++++++++------------- tests/integration_tests/common.rs | 120 ++++------ tests/integration_tests/macos.rs | 73 ++++-- 6 files changed, 391 insertions(+), 291 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 7e835aa..04c9d83 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,7 @@ exclude = ["/.github", "/.vscode", "/tests", "/fixtures", "/big-fixtures"] [dependencies] gimli = "0.27.0" +object = { version = "0.30", optional = true } thiserror = "1.0.30" macho-unwind-info = "0.3.0" fallible-iterator = "0.2.0" diff --git a/src/dwarf.rs b/src/dwarf.rs index abf8354..a465219 100644 --- a/src/dwarf.rs +++ b/src/dwarf.rs @@ -1,13 +1,15 @@ -use std::{marker::PhantomData, ops::Range}; +use std::marker::PhantomData; use gimli::{ - BaseAddresses, CfaRule, CieOrFde, DebugFrame, EhFrame, EhFrameHdr, Encoding, EndianSlice, - Evaluation, EvaluationResult, EvaluationStorage, Expression, LittleEndian, Location, - ParsedEhFrameHdr, Reader, ReaderOffset, Register, RegisterRule, UnwindContext, - UnwindContextStorage, UnwindOffset, UnwindSection, UnwindTableRow, Value, + CfaRule, CieOrFde, DebugFrame, EhFrame, EhFrameHdr, Encoding, EndianSlice, Evaluation, + EvaluationResult, EvaluationStorage, Expression, LittleEndian, Location, ParsedEhFrameHdr, + Reader, ReaderOffset, Register, RegisterRule, UnwindContext, UnwindContextStorage, + UnwindOffset, UnwindSection, UnwindTableRow, Value, }; -use crate::{arch::Arch, unwind_result::UnwindResult, ModuleSvmaInfo}; +pub(crate) use gimli::BaseAddresses; + +use crate::{arch::Arch, unwind_result::UnwindResult, ModuleSectionInfo}; #[derive(thiserror::Error, Debug, Clone, Copy, PartialEq, Eq)] pub enum DwarfUnwinderError { @@ -90,9 +92,9 @@ impl<'a, R: Reader, A: DwarfUnwinding, S: UnwindContextStorage + EvaluationSt unwind_section_type: UnwindSectionType, eh_frame_hdr_data: Option<&'a [u8]>, unwind_context: &'a mut UnwindContext, - svma_info: &ModuleSvmaInfo, + bases: BaseAddresses, + base_svma: u64, ) -> Self { - let bases = base_addresses_for_sections(svma_info); let eh_frame_hdr = match eh_frame_hdr_data { Some(eh_frame_hdr_data) => { let hdr = EhFrameHdr::new(eh_frame_hdr_data, unwind_section_data.endian()); @@ -109,7 +111,7 @@ impl<'a, R: Reader, A: DwarfUnwinding, S: UnwindContextStorage + EvaluationSt eh_frame_hdr, unwind_context, bases, - base_svma: svma_info.base_svma, + base_svma, _arch: PhantomData, } } @@ -180,19 +182,21 @@ impl<'a, R: Reader, A: DwarfUnwinding, S: UnwindContextStorage + EvaluationSt } } -fn base_addresses_for_sections(svma_info: &ModuleSvmaInfo) -> BaseAddresses { - fn start_addr(range: &Option>) -> u64 { - if let Some(range) = range { - range.start - } else { - 0 - } - } +pub(crate) fn base_addresses_for_sections( + section_info: &impl ModuleSectionInfo, +) -> BaseAddresses { + let start_addr = |names: &[&[u8]]| -> u64 { + names + .into_iter() + .find_map(|name| section_info.section_svma_range(name)) + .map(|r| r.start) + .unwrap_or_default() + }; BaseAddresses::default() - .set_eh_frame(start_addr(&svma_info.eh_frame)) - .set_eh_frame_hdr(start_addr(&svma_info.eh_frame_hdr)) - .set_text(start_addr(&svma_info.text)) - .set_got(start_addr(&svma_info.got)) + .set_eh_frame(start_addr(&[b"__eh_frame", b".eh_frame"])) + .set_eh_frame_hdr(start_addr(&[b"__eh_frame_hdr", b".eh_frame_hdr"])) + .set_text(start_addr(&[b"__text", b".text"])) + .set_got(start_addr(&[b"__got", b".got"])) } #[derive(thiserror::Error, Debug, Clone, Copy, PartialEq, Eq)] @@ -279,26 +283,26 @@ impl DwarfCfiIndex { }) } - pub fn try_new_eh_frame( + pub fn try_new_eh_frame( eh_frame_data: &[u8], - svma_info: &ModuleSvmaInfo, + section_info: &impl ModuleSectionInfo, ) -> Result { - let bases = base_addresses_for_sections(svma_info); + let bases = base_addresses_for_sections(section_info); let mut eh_frame = EhFrame::from(EndianSlice::new(eh_frame_data, LittleEndian)); eh_frame.set_address_size(8); - Self::try_new(eh_frame, bases, svma_info.base_svma) + Self::try_new(eh_frame, bases, section_info.base_svma()) } - pub fn try_new_debug_frame( + pub fn try_new_debug_frame( debug_frame_data: &[u8], - svma_info: &ModuleSvmaInfo, + section_info: &impl ModuleSectionInfo, ) -> Result { - let bases = base_addresses_for_sections(svma_info); + let bases = base_addresses_for_sections(section_info); let mut debug_frame = DebugFrame::from(EndianSlice::new(debug_frame_data, LittleEndian)); debug_frame.set_address_size(8); - Self::try_new(debug_frame, bases, svma_info.base_svma) + Self::try_new(debug_frame, bases, section_info.base_svma()) } pub fn fde_offset_for_relative_address(&self, rel_lookup_address: u32) -> Option { diff --git a/src/lib.rs b/src/lib.rs index 83e02d1..1165f57 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -47,33 +47,54 @@ //! //! ## Example //! -//! ```rust -//! # fn test_root_doc_comment() { +//! ``` +//! use std::ops::Range; //! use framehop::aarch64::{CacheAarch64, UnwindRegsAarch64, UnwinderAarch64}; -//! use framehop::{FrameAddress, Module, ModuleSvmaInfo, ModuleUnwindData, TextByteData}; +//! use framehop::{FrameAddress, Module, ModuleSectionInfo}; //! //! let mut cache = CacheAarch64::<_>::new(); //! let mut unwinder = UnwinderAarch64::new(); //! +//! struct FixedSectionInfo; +//! +//! impl ModuleSectionInfo> for FixedSectionInfo { +//! fn base_svma(&self) -> u64 { 0x100000000 } +//! fn section_svma_range(&self, name: &[u8]) -> Option> { +//! match name { +//! b"__text" => Some(0x100000b64..0x1001d2d18), +//! b"__stubs" => Some(0x1001d2d18..0x1001d309c), +//! b"__stub_helper" => Some(0x1001d309c..0x1001d3438), +//! b"__eh_frame" => Some(0x100237f80..0x100237ffc), +//! b"__got" => Some(0x100238000..0x100238010), +//! _ => None, +//! } +//! } +//! fn section_file_range(&self, _name: &[u8]) -> Option> { None } +//! fn section_data(&self, name: &[u8]) -> Option> { +//! match name { +//! b"__unwind_info" => Some(vec![/* __unwind_info */]), +//! _ => None, +//! } +//! } +//! fn segment_file_range(&self, name: &[u8]) -> Option> { +//! match name { +//! b"__TEXT" => Some(0x1003fc000..0x100634000), +//! _ => None, +//! } +//! } +//! fn segment_data(&self, name: &[u8]) -> Option> { +//! match name { +//! b"__TEXT" => Some(vec![]), +//! _ => None, +//! } +//! } +//! } +//! //! let module = Module::new( //! "mybinary".to_string(), //! 0x1003fc000..0x100634000, //! 0x1003fc000, -//! ModuleSvmaInfo { -//! base_svma: 0x100000000, -//! text: Some(0x100000b64..0x1001d2d18), -//! text_env: None, -//! stubs: Some(0x1001d2d18..0x1001d309c), -//! stub_helper: Some(0x1001d309c..0x1001d3438), -//! eh_frame: Some(0x100237f80..0x100237ffc), -//! eh_frame_hdr: None, -//! got: Some(0x100238000..0x100238010), -//! }, -//! ModuleUnwindData::CompactUnwindInfoAndEhFrame(vec![/* __unwind_info */], None), -//! Some(TextByteData::new( -//! vec![/* __TEXT */], -//! 0x1003fc000..0x100634000, -//! )), +//! FixedSectionInfo, //! ); //! unwinder.add_module(module); //! @@ -109,7 +130,6 @@ //! FrameAddress::from_return_address(0x1003fc000 + 0x12ca28).unwrap() //! ] //! ); -//! # } //! ``` mod add_signed; @@ -136,9 +156,7 @@ pub use cache::{AllocationPolicy, MayAllocateDuringUnwind, MustNotAllocateDuring pub use code_address::FrameAddress; pub use error::Error; pub use rule_cache::CacheStats; -pub use unwinder::{ - Module, ModuleSvmaInfo, ModuleUnwindData, TextByteData, UnwindIterator, Unwinder, -}; +pub use unwinder::{Module, ModuleSectionInfo, UnwindIterator, Unwinder}; /// The unwinder cache for the native CPU architecture. #[cfg(target_arch = "aarch64")] diff --git a/src/unwinder.rs b/src/unwinder.rs index 8cfe6b3..f406033 100644 --- a/src/unwinder.rs +++ b/src/unwinder.rs @@ -18,7 +18,6 @@ use crate::FrameAddress; use std::marker::PhantomData; use std::sync::atomic::{AtomicU16, Ordering}; use std::{ - fmt::Debug, ops::{Deref, Range}, sync::Arc, }; @@ -385,32 +384,37 @@ impl< { let is_first_frame = !address.is_return_address(); let unwind_result = match &module.unwind_data { - ModuleUnwindDataInternal::CompactUnwindInfoAndEhFrame(unwind_data, eh_frame_data) => { + ModuleUnwindDataInternal::CompactUnwindInfoAndEhFrame { + unwind_info, + eh_frame, + stubs, + stub_helper, + base_addresses, + text_data, + } => { // eprintln!("unwinding with cui and eh_frame in module {}", module.name); - let text_bytes = module.text_data.as_ref().and_then(|data| { - let offset_from_base = - u32::try_from(data.avma_range.start.checked_sub(module.base_avma)?).ok()?; + let text_bytes = text_data.as_ref().and_then(|data| { + let offset_from_base = u32::try_from(data.svma_range.start).ok()?; Some(TextBytes::new(offset_from_base, &data.bytes[..])) }); - let stubs_range = if let Some(stubs_range) = &module.svma_info.stubs { + let stubs_range = if let Some(stubs_range) = stubs { ( - (stubs_range.start - module.svma_info.base_svma) as u32, - (stubs_range.end - module.svma_info.base_svma) as u32, + (stubs_range.start - module.base_svma) as u32, + (stubs_range.end - module.base_svma) as u32, + ) + } else { + (0, 0) + }; + let stub_helper_range = if let Some(stub_helper_range) = stub_helper { + ( + (stub_helper_range.start - module.base_svma) as u32, + (stub_helper_range.end - module.base_svma) as u32, ) } else { (0, 0) }; - let stub_helper_range = - if let Some(stub_helper_range) = &module.svma_info.stub_helper { - ( - (stub_helper_range.start - module.svma_info.base_svma) as u32, - (stub_helper_range.end - module.svma_info.base_svma) as u32, - ) - } else { - (0, 0) - }; let mut unwinder = CompactUnwindInfoUnwinder::::new( - &unwind_data[..], + &unwind_info[..], text_bytes, stubs_range, stub_helper_range, @@ -420,7 +424,7 @@ impl< match unwind_result { CuiUnwindResult::ExecRule(rule) => UnwindResult::ExecRule(rule), CuiUnwindResult::NeedDwarf(fde_offset) => { - let eh_frame_data = match eh_frame_data { + let eh_frame_data = match eh_frame { Some(data) => ArcData(data.clone()), None => return Err(UnwinderError::NoDwarfData), }; @@ -429,7 +433,8 @@ impl< UnwindSectionType::EhFrame, None, &mut cache.gimli_unwind_context, - &module.svma_info, + base_addresses.clone(), + module.base_svma, ); dwarf_unwinder.unwind_frame_with_fde( regs, @@ -441,15 +446,20 @@ impl< } } } - ModuleUnwindDataInternal::EhFrameHdrAndEhFrame(eh_frame_hdr, eh_frame_data) => { + ModuleUnwindDataInternal::EhFrameHdrAndEhFrame { + eh_frame_hdr, + eh_frame, + base_addresses, + } => { let eh_frame_hdr_data = &eh_frame_hdr[..]; - let eh_frame_data = ArcData(eh_frame_data.clone()); + let eh_frame_data = ArcData(eh_frame.clone()); let mut dwarf_unwinder = DwarfUnwinder::<_, A, P::GimliStorage>::new( EndianReader::new(eh_frame_data, LittleEndian), UnwindSectionType::EhFrame, Some(eh_frame_hdr_data), &mut cache.gimli_unwind_context, - &module.svma_info, + base_addresses.clone(), + module.base_svma, ); let fde_offset = dwarf_unwinder .get_fde_offset_for_relative_address(rel_lookup_address) @@ -462,14 +472,19 @@ impl< read_stack, )? } - ModuleUnwindDataInternal::DwarfCfiIndexAndEhFrame(index, eh_frame_data) => { - let eh_frame_data = ArcData(eh_frame_data.clone()); + ModuleUnwindDataInternal::DwarfCfiIndexAndEhFrame { + index, + eh_frame, + base_addresses, + } => { + let eh_frame_data = ArcData(eh_frame.clone()); let mut dwarf_unwinder = DwarfUnwinder::<_, A, P::GimliStorage>::new( EndianReader::new(eh_frame_data, LittleEndian), UnwindSectionType::EhFrame, None, &mut cache.gimli_unwind_context, - &module.svma_info, + base_addresses.clone(), + module.base_svma, ); let fde_offset = index .fde_offset_for_relative_address(rel_lookup_address) @@ -482,14 +497,19 @@ impl< read_stack, )? } - ModuleUnwindDataInternal::DwarfCfiIndexAndDebugFrame(index, debug_frame_data) => { - let debug_frame_data = ArcData(debug_frame_data.clone()); + ModuleUnwindDataInternal::DwarfCfiIndexAndDebugFrame { + index, + debug_frame, + base_addresses, + } => { + let debug_frame_data = ArcData(debug_frame.clone()); let mut dwarf_unwinder = DwarfUnwinder::<_, A, P::GimliStorage>::new( EndianReader::new(debug_frame_data, LittleEndian), UnwindSectionType::DebugFrame, None, &mut cache.gimli_unwind_context, - &module.svma_info, + base_addresses.clone(), + module.base_svma, ); let fde_offset = index .fde_offset_for_relative_address(rel_lookup_address) @@ -520,62 +540,102 @@ impl< /// module, e.g. `Vec`. But it could also be a wrapper around mapped memory from /// a file or a different process, for example. It just needs to provide a slice of /// bytes via its `Deref` implementation. -pub enum ModuleUnwindData> { +enum ModuleUnwindDataInternal> { /// Used on macOS, with mach-O binaries. Compact unwind info is in the `__unwind_info` /// section and is sometimes supplemented with DWARF CFI information in the `__eh_frame` - /// section. - CompactUnwindInfoAndEhFrame(D, Option), + /// section. `__stubs` and `__stub_helper` ranges are used by the unwinder. + CompactUnwindInfoAndEhFrame { + unwind_info: D, + eh_frame: Option>, + stubs: Option>, + stub_helper: Option>, + base_addresses: crate::dwarf::BaseAddresses, + text_data: Option>, + }, /// Used with ELF binaries (Linux and friends), in the `.eh_frame_hdr` and `.eh_frame` /// sections. Contains an index and DWARF CFI. - EhFrameHdrAndEhFrame(D, D), + EhFrameHdrAndEhFrame { + eh_frame_hdr: D, + eh_frame: Arc, + base_addresses: crate::dwarf::BaseAddresses, + }, /// Used with ELF binaries (Linux and friends), in the `.eh_frame` section. Contains /// DWARF CFI. We create a binary index for the FDEs when a module with this unwind /// data type is added. - EhFrame(D), + DwarfCfiIndexAndEhFrame { + index: DwarfCfiIndex, + eh_frame: Arc, + base_addresses: crate::dwarf::BaseAddresses, + }, /// Used with ELF binaries (Linux and friends), in the `.debug_frame` section. Contains /// DWARF CFI. We create a binary index for the FDEs when a module with this unwind /// data type is added. - DebugFrame(D), + DwarfCfiIndexAndDebugFrame { + index: DwarfCfiIndex, + debug_frame: Arc, + base_addresses: crate::dwarf::BaseAddresses, + }, /// No unwind information is used. Unwinding in this module will use a fallback rule /// (usually frame pointer unwinding). None, } -enum ModuleUnwindDataInternal> { - CompactUnwindInfoAndEhFrame(D, Option>), - EhFrameHdrAndEhFrame(D, Arc), - DwarfCfiIndexAndEhFrame(DwarfCfiIndex, Arc), - DwarfCfiIndexAndDebugFrame(DwarfCfiIndex, Arc), - None, -} - impl> ModuleUnwindDataInternal { - fn new(unwind_data: ModuleUnwindData, svma_info: &ModuleSvmaInfo) -> Self { - match unwind_data { - ModuleUnwindData::CompactUnwindInfoAndEhFrame(cui, eh_frame) => { - ModuleUnwindDataInternal::CompactUnwindInfoAndEhFrame(cui, eh_frame.map(Arc::new)) - } - ModuleUnwindData::EhFrameHdrAndEhFrame(eh_frame_hdr, eh_frame) => { - ModuleUnwindDataInternal::EhFrameHdrAndEhFrame(eh_frame_hdr, Arc::new(eh_frame)) + fn new(section_info: &impl ModuleSectionInfo) -> Self { + use crate::dwarf::base_addresses_for_sections; + + if let Some(unwind_info) = section_info.section_data(b"__unwind_info") { + let eh_frame = section_info.section_data(b"__eh_frame"); + let stubs = section_info.section_svma_range(b"__stubs"); + let stub_helper = section_info.section_svma_range(b"__stub_helper"); + const TEXT_SECTIONS: &[&[u8]] = &[b"__text", b".text"]; + let text_data = section_info + .segment_data(b"__TEXT") + .zip(section_info.segment_file_range(b"__TEXT")) + .or_else(|| { + TEXT_SECTIONS.into_iter().find_map(|name| { + section_info + .section_data(name) + .zip(section_info.section_file_range(name)) + }) + }) + .map(|(bytes, svma_range)| TextByteData { bytes, svma_range }); + ModuleUnwindDataInternal::CompactUnwindInfoAndEhFrame { + unwind_info, + eh_frame: eh_frame.map(Arc::new), + stubs, + stub_helper, + base_addresses: base_addresses_for_sections(section_info), + text_data, } - ModuleUnwindData::EhFrame(eh_frame) => { - match DwarfCfiIndex::try_new_eh_frame(&eh_frame, svma_info) { - Ok(index) => { - ModuleUnwindDataInternal::DwarfCfiIndexAndEhFrame(index, Arc::new(eh_frame)) - } - Err(_) => ModuleUnwindDataInternal::None, + } else if let Some(eh_frame) = section_info.section_data(b".eh_frame") { + if let Some(eh_frame_hdr) = section_info.section_data(b".eh_frame_hdr") { + ModuleUnwindDataInternal::EhFrameHdrAndEhFrame { + eh_frame_hdr, + eh_frame: Arc::new(eh_frame), + base_addresses: base_addresses_for_sections(section_info), } - } - ModuleUnwindData::DebugFrame(debug_frame) => { - match DwarfCfiIndex::try_new_debug_frame(&debug_frame, svma_info) { - Ok(index) => ModuleUnwindDataInternal::DwarfCfiIndexAndDebugFrame( + } else { + match DwarfCfiIndex::try_new_eh_frame(&eh_frame, section_info) { + Ok(index) => ModuleUnwindDataInternal::DwarfCfiIndexAndEhFrame { index, - Arc::new(debug_frame), - ), + eh_frame: Arc::new(eh_frame), + base_addresses: base_addresses_for_sections(section_info), + }, Err(_) => ModuleUnwindDataInternal::None, } } - ModuleUnwindData::None => ModuleUnwindDataInternal::None, + } else if let Some(debug_frame) = section_info.section_data(b".debug_frame") { + match DwarfCfiIndex::try_new_debug_frame(&debug_frame, section_info) { + Ok(index) => ModuleUnwindDataInternal::DwarfCfiIndexAndDebugFrame { + index, + debug_frame: Arc::new(debug_frame), + base_addresses: base_addresses_for_sections(section_info), + }, + Err(_) => ModuleUnwindDataInternal::None, + } + } else { + ModuleUnwindDataInternal::None } } } @@ -597,32 +657,9 @@ impl> ModuleUnwindDataInternal { /// module, e.g. `Vec`. But it could also be a wrapper around mapped memory from /// a file or a different process, for example. It just needs to provide a slice of /// bytes via its `Deref` implementation. -pub struct TextByteData> { - bytes: D, - avma_range: Range, -} - -impl> TextByteData { - /// Supply the bytes which cover `avma_range` in the process virtual memory. - /// Both arguments should have the same length. - pub fn new(bytes: D, avma_range: Range) -> Self { - Self { bytes, avma_range } - } - - /// Return a byte slice for the requested range, if in-bounds. - pub fn get_bytes(&self, avma_range: Range) -> Option<&[u8]> { - let rel_start = avma_range.start.checked_sub(self.avma_range.start)?; - let rel_start = usize::try_from(rel_start).ok()?; - let rel_end = avma_range.end.checked_sub(self.avma_range.start)?; - let rel_end = usize::try_from(rel_end).ok()?; - self.bytes.get(rel_start..rel_end) - } - - /// The address range covered by the supplied bytes, in process virtual memory. - /// "Actual virtual memory address range" - pub fn avma_range(&self) -> Range { - self.avma_range.clone() - } +struct TextByteData> { + pub bytes: D, + pub svma_range: Range, } /// Information about a module that is loaded in a process. You might know this under a @@ -647,69 +684,96 @@ pub struct Module> { /// The base address of this module, in the process's address space. On Linux, the base /// address can sometimes be different from the start address of the mapped range. base_avma: u64, - /// Information about various addresses in the module. - svma_info: ModuleSvmaInfo, + /// The base address of this module, according to the module. + base_svma: u64, /// The unwind data that should be used for unwinding addresses from this module. unwind_data: ModuleUnwindDataInternal, - /// The raw assembly bytes of this module. Used for instruction analysis to ensure - /// correct unwinding inside function prologues and epilogues. - text_data: Option>, } -/// The addresses of various sections in the module. -/// -/// These are SVMAs, "stated virtual memory addresses", i.e. addresses as stated -/// in the object, as opposed to AVMAs, "actual virtual memory addresses", i.e. addresses -/// in the virtual memory of the profiled process. +/// Type arguments: /// -/// Code addresses inside a module's unwind information are usually written down as SVMAs, -/// or as relative addresses. For example, DWARF CFI can have code addresses expressed as -/// relative-to-.text addresses or as absolute SVMAs. And mach-O compact unwind info -/// contains addresses relative to the image base address. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct ModuleSvmaInfo { - /// The image base address, as stated in the object. For mach-O objects, this is the - /// vmaddr of the `__TEXT` segment. For ELF objects, this is zero. - /// - /// This is used to convert between SVMAs and relative addresses. - pub base_svma: u64, - /// The address range of the `__text` or `.text` section. This is where most of the - /// compiled code is stored. - /// - /// This is used to detect whether we need to do instruction analysis for an address. - pub text: Option>, - /// The address range of the `text_env` section, if present. If present, this contains - /// functions which have been marked as "cold". It stores executable code, just like - /// the text section. - /// - /// This is used to detect whether we need to do instruction analysis for an address. - pub text_env: Option>, - /// The address range of the mach-O `__stubs` section. Contains small pieces of - /// executable code for calling imported functions. Code inside this section is not - /// covered by the unwind information in `__unwind_info`. +/// - `D`: The type for section data. This allows carrying owned data on the module, e.g. +/// `Vec`. But it could also be a wrapper around mapped memory from a file or a different +/// process, for example. +pub trait ModuleSectionInfo { + /// Return the base address stated in the module. /// - /// This is used to exclude addresses in this section from incorrectly applying - /// `__unwind_info` opcodes. It is also used to infer unwind rules for the known - /// structure of stub functions. - pub stubs: Option>, - /// The address range of the mach-O `__stub_helper` section. Contains small pieces of - /// executable code for calling imported functions. Code inside this section is not - /// covered by the unwind information in `__unwind_info`. + /// For mach-O objects, this is the vmaddr of the __TEXT segment. For ELF objects, this is + /// zero. For PE objects, this is the image base address. /// - /// This is used to exclude addresses in this section from incorrectly applying - /// `__unwind_info` opcodes. It is also used to infer unwind rules for the known - /// structure of stub helper - /// functions. - pub stub_helper: Option>, - /// The address range of the `__eh_frame` or `.eh_frame` section. This is used during - /// DWARF CFI processing, to resolve eh_frame-relative addresses. - pub eh_frame: Option>, - /// The address range of the `.eh_frame_hdr` section. This is used during - /// DWARF CFI processing, to resolve eh_frame_hdr-relative addresses. - pub eh_frame_hdr: Option>, - /// The address range of the `.got` section (Global Offset Table). This is used - /// during DWARF CFI processing, to resolve got-relative addresses. - pub got: Option>, + /// This is used to convert between SVMAs and relative addresses. + fn base_svma(&self) -> u64; + + /// Get the given section's memory range, as stated in the module. + fn section_svma_range(&self, name: &[u8]) -> Option>; + + /// Get the given section's file range in the module. + fn section_file_range(&self, name: &[u8]) -> Option>; + + /// Get the given section's data. + fn section_data(&self, name: &[u8]) -> Option; + + /// Get the given segment's file range in the module. + fn segment_file_range(&self, _name: &[u8]) -> Option> { + None + } + + /// Get the given segment's data. + fn segment_data(&self, _name: &[u8]) -> Option { + None + } +} + +#[cfg(feature = "object")] +mod object { + use super::{ModuleSectionInfo, Range}; + use object::read::{Object, ObjectSection, ObjectSegment}; + + impl<'data: 'file, 'file, O, D> ModuleSectionInfo for &'file O + where + O: Object<'data, 'file>, + D: From<&'data [u8]>, + { + fn base_svma(&self) -> u64 { + if let Some(text_segment) = self.segments().find(|s| s.name() == Ok(Some("__TEXT"))) { + // This is a mach-O image. "Relative addresses" are relative to the + // vmaddr of the __TEXT segment. + return text_segment.address(); + } + + // For PE binaries, relative_address_base() returns the image base address. + // Otherwise it returns zero. This gives regular ELF images a base address of zero, + // which is what we want. + self.relative_address_base() + } + + fn section_svma_range(&self, name: &[u8]) -> Option> { + let section = self.section_by_name_bytes(name)?; + Some(section.address()..section.address() + section.size()) + } + + fn section_file_range(&self, name: &[u8]) -> Option> { + let section = self.section_by_name_bytes(name)?; + let (start, size) = section.file_range()?; + Some(start..start + size) + } + + fn section_data(&self, name: &[u8]) -> Option { + let section = self.section_by_name_bytes(name)?; + section.data().ok().map(|data| data.into()) + } + + fn segment_file_range(&self, name: &[u8]) -> Option> { + let segment = self.segments().find(|s| s.name_bytes() == Ok(Some(name)))?; + let (start, size) = segment.file_range(); + Some(start..start + size) + } + + fn segment_data(&self, name: &[u8]) -> Option { + let segment = self.segments().find(|s| s.name_bytes() == Ok(Some(name)))?; + segment.data().ok().map(|data| data.into()) + } + } } impl> Module { @@ -717,18 +781,16 @@ impl> Module { name: String, avma_range: std::ops::Range, base_avma: u64, - svma_info: ModuleSvmaInfo, - unwind_data: ModuleUnwindData, - text_data: Option>, + section_info: impl ModuleSectionInfo, ) -> Self { - let unwind_data = ModuleUnwindDataInternal::new(unwind_data, &svma_info); + let unwind_data = ModuleUnwindDataInternal::new(§ion_info); + Self { name, avma_range, base_avma, - svma_info, + base_svma: section_info.base_svma(), unwind_data, - text_data, } } } diff --git a/tests/integration_tests/common.rs b/tests/integration_tests/common.rs index bc7f50f..935878c 100644 --- a/tests/integration_tests/common.rs +++ b/tests/integration_tests/common.rs @@ -12,97 +12,65 @@ where let mut file = std::fs::File::open(objpath).unwrap(); file.read_to_end(&mut buf).unwrap(); - fn section_data<'a>(section: &impl ObjectSection<'a>) -> Option> { - section.data().ok().map(|data| data.to_owned()) - } - let file = object::File::parse(&buf[..]).expect("Could not parse object file"); - let base_svma = relative_address_base(&file); - - let text = file.section_by_name(".text"); - let stubs = file.section_by_name("__stubs"); - let stub_helper = file.section_by_name("__stub_helper"); - let text_env = file.section_by_name("__text_env"); - let unwind_info = file.section_by_name("__unwind_info"); - let eh_frame = file.section_by_name(".eh_frame"); - let got = file.section_by_name(".got"); - let eh_frame_hdr = file.section_by_name(".eh_frame_hdr"); - let debug_frame = file - .section_by_name(".debug_frame") - .or_else(|| file.section_by_name("__zdebug_frame")); - - let unwind_data = match ( - unwind_info.as_ref().and_then(section_data), - eh_frame.as_ref().and_then(section_data), - eh_frame_hdr.as_ref().and_then(section_data), - debug_frame, - ) { - (Some(unwind_info), eh_frame, _, _) => { - framehop::ModuleUnwindData::CompactUnwindInfoAndEhFrame(unwind_info, eh_frame) + struct Module<'a>(object::File<'a, &'a [u8]>); + + impl ModuleSectionInfo> for Module<'_> { + fn base_svma(&self) -> u64 { + relative_address_base(&self.0) } - (None, Some(eh_frame), Some(eh_frame_hdr), _) => { - framehop::ModuleUnwindData::EhFrameHdrAndEhFrame(eh_frame_hdr, eh_frame) + + fn section_svma_range(&self, name: &[u8]) -> Option> { + let section = self.0.section_by_name_bytes(name)?; + Some(section.address()..section.address() + section.size()) + } + + fn section_file_range(&self, name: &[u8]) -> Option> { + let section = self.0.section_by_name_bytes(name)?; + let (start, size) = section.file_range()?; + Some(start..start + size) } - (None, Some(eh_frame), None, _) => framehop::ModuleUnwindData::EhFrame(eh_frame), - (None, None, _, Some(debug_frame)) => { - eprintln!("Have debug_frame!"); - if let Some(section_data) = get_uncompressed_section_data(&debug_frame) { - let debug_frame_data = Vec::from(section_data); - framehop::ModuleUnwindData::DebugFrame(debug_frame_data) - } else { - framehop::ModuleUnwindData::None + + fn section_data(&self, name: &[u8]) -> Option> { + match self.0.section_by_name_bytes(name) { + Some(section) => section.data().ok().map(|data| data.to_owned()), + None if name == b".debug_frame" => { + let section = self.0.section_by_name_bytes(b"__zdebug_frame")?; + get_uncompressed_section_data(§ion).map(|d| d.into_owned()) + } + None => None, } } - (None, None, _, _) => framehop::ModuleUnwindData::None, - }; - let text_data = if let Some(text_segment) = file - .segments() - .find(|segment| segment.name_bytes() == Ok(Some(b"__TEXT"))) - { - let (start, size) = text_segment.file_range(); - let avma_range = base_avma + start..base_avma + start + size; - text_segment - .data() - .ok() - .map(|data| TextByteData::new(data.to_owned(), avma_range)) - } else if let Some(text_section) = &text { - if let Some((start, size)) = text_section.file_range() { - let avma_range = base_avma + start..base_avma + start + size; - text_section - .data() - .ok() - .map(|data| TextByteData::new(data.to_owned(), avma_range)) - } else { - None + fn segment_file_range(&self, name: &[u8]) -> Option> { + let segment = self + .0 + .segments() + .find(|s| s.name_bytes() == Ok(Some(name)))?; + let (start, size) = segment.file_range(); + Some(start..start + size) } - } else { - None - }; - fn svma_range<'a>(section: &Option>) -> Option> { - section - .as_ref() - .map(|section| section.address()..section.address() + section.size()) + fn segment_data(&self, name: &[u8]) -> Option> { + let segment = self + .0 + .segments() + .find(|s| s.name_bytes() == Ok(Some(name)))?; + segment.data().ok().map(|data| data.to_owned()) + } } + #[cfg(not(feature = "object"))] + let section_info = Module(file); + #[cfg(feature = "object")] + let section_info = &file; + let module = framehop::Module::new( objpath.to_string_lossy().to_string(), base_avma..(base_avma + buf.len() as u64), base_avma, - ModuleSvmaInfo { - base_svma, - text: svma_range(&text), - text_env: svma_range(&text_env), - stubs: svma_range(&stubs), - stub_helper: svma_range(&stub_helper), - eh_frame: svma_range(&eh_frame), - eh_frame_hdr: svma_range(&eh_frame_hdr), - got: svma_range(&got), - }, - unwind_data, - text_data, + section_info, ); unwinder.add_module(module); } diff --git a/tests/integration_tests/macos.rs b/tests/integration_tests/macos.rs index 5a0fcb5..2e8623b 100644 --- a/tests/integration_tests/macos.rs +++ b/tests/integration_tests/macos.rs @@ -76,7 +76,60 @@ fn test_basic() { #[test] fn test_root_doc_comment() { use framehop::aarch64::{CacheAarch64, UnwindRegsAarch64, UnwinderAarch64}; - use framehop::{FrameAddress, Module, ModuleSvmaInfo, ModuleUnwindData, TextByteData}; + use framehop::{FrameAddress, Module, ModuleSectionInfo}; + + struct ModuleSvmaInfo { + base_svma: u64, + text: std::ops::Range, + stubs: std::ops::Range, + stub_helper: std::ops::Range, + eh_frame: std::ops::Range, + got: std::ops::Range, + text_segment: std::ops::Range, + } + + impl ModuleSectionInfo> for ModuleSvmaInfo { + fn base_svma(&self) -> u64 { + self.base_svma + } + + fn section_svma_range(&self, name: &[u8]) -> Option> { + match name { + b"__text" => Some(self.text.clone()), + b"__stubs" => Some(self.stubs.clone()), + b"__stub_helper" => Some(self.stub_helper.clone()), + b"__eh_frame" => Some(self.eh_frame.clone()), + b"__got" => Some(self.got.clone()), + _ => None, + } + } + + fn section_file_range(&self, _name: &[u8]) -> Option> { + None + } + + fn section_data(&self, name: &[u8]) -> Option> { + match name { + b"__text" => Some(vec![]), + _ => None, + } + } + + fn segment_file_range(&self, name: &[u8]) -> Option> { + match name { + b"__TEXT" => Some(self.text_segment.clone()), + _ => None, + } + } + + fn segment_data(&self, name: &[u8]) -> Option> { + match name { + b"__TEXT" => Some(vec![]), + _ => None, + } + } + } + let mut cache = CacheAarch64::<_>::new(); let mut unwinder = UnwinderAarch64::new(); let module = Module::new( @@ -85,19 +138,13 @@ fn test_root_doc_comment() { 0x1003fc000, ModuleSvmaInfo { base_svma: 0x100000000, - text: Some(0x100000b64..0x1001d2d18), - text_env: None, - stubs: Some(0x1001d2d18..0x1001d309c), - stub_helper: Some(0x1001d309c..0x1001d3438), - eh_frame: Some(0x100237f80..0x100237ffc), - eh_frame_hdr: None, - got: Some(0x100238000..0x100238010), + text: 0x100000b64..0x1001d2d18, + stubs: 0x1001d2d18..0x1001d309c, + stub_helper: 0x1001d309c..0x1001d3438, + eh_frame: 0x100237f80..0x100237ffc, + got: 0x100238000..0x100238010, + text_segment: 0x1003fc000..0x100634000, }, - ModuleUnwindData::CompactUnwindInfoAndEhFrame(vec![/* __unwind_info */], None), - Some(TextByteData::new( - vec![/* __TEXT */], - 0x1003fc000..0x100634000, - )), ); unwinder.add_module(module); From b20d751cc45b3bcd892acf66d757856709330ef4 Mon Sep 17 00:00:00 2001 From: Alex Franchuk Date: Tue, 25 Jul 2023 10:54:03 -0400 Subject: [PATCH 2/5] Add the ExplicitModuleSectionInfo struct. --- src/dwarf.rs | 10 +- src/lib.rs | 56 ++++------- src/unwinder.rs | 152 +++++++++++++++++++++++++++--- tests/integration_tests/common.rs | 10 +- tests/integration_tests/macos.rs | 71 +++----------- 5 files changed, 177 insertions(+), 122 deletions(-) diff --git a/src/dwarf.rs b/src/dwarf.rs index a465219..50ccc46 100644 --- a/src/dwarf.rs +++ b/src/dwarf.rs @@ -183,11 +183,11 @@ impl<'a, R: Reader, A: DwarfUnwinding, S: UnwindContextStorage + EvaluationSt } pub(crate) fn base_addresses_for_sections( - section_info: &impl ModuleSectionInfo, + section_info: &mut impl ModuleSectionInfo, ) -> BaseAddresses { - let start_addr = |names: &[&[u8]]| -> u64 { + let mut start_addr = |names: &[&[u8]]| -> u64 { names - .into_iter() + .iter() .find_map(|name| section_info.section_svma_range(name)) .map(|r| r.start) .unwrap_or_default() @@ -285,7 +285,7 @@ impl DwarfCfiIndex { pub fn try_new_eh_frame( eh_frame_data: &[u8], - section_info: &impl ModuleSectionInfo, + section_info: &mut impl ModuleSectionInfo, ) -> Result { let bases = base_addresses_for_sections(section_info); let mut eh_frame = EhFrame::from(EndianSlice::new(eh_frame_data, LittleEndian)); @@ -296,7 +296,7 @@ impl DwarfCfiIndex { pub fn try_new_debug_frame( debug_frame_data: &[u8], - section_info: &impl ModuleSectionInfo, + section_info: &mut impl ModuleSectionInfo, ) -> Result { let bases = base_addresses_for_sections(section_info); let mut debug_frame = DebugFrame::from(EndianSlice::new(debug_frame_data, LittleEndian)); diff --git a/src/lib.rs b/src/lib.rs index 1165f57..f5af49f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -50,51 +50,29 @@ //! ``` //! use std::ops::Range; //! use framehop::aarch64::{CacheAarch64, UnwindRegsAarch64, UnwinderAarch64}; -//! use framehop::{FrameAddress, Module, ModuleSectionInfo}; +//! use framehop::{ExplicitModuleSectionInfo, FrameAddress, Module, ModuleSectionInfo}; //! //! let mut cache = CacheAarch64::<_>::new(); //! let mut unwinder = UnwinderAarch64::new(); //! -//! struct FixedSectionInfo; -//! -//! impl ModuleSectionInfo> for FixedSectionInfo { -//! fn base_svma(&self) -> u64 { 0x100000000 } -//! fn section_svma_range(&self, name: &[u8]) -> Option> { -//! match name { -//! b"__text" => Some(0x100000b64..0x1001d2d18), -//! b"__stubs" => Some(0x1001d2d18..0x1001d309c), -//! b"__stub_helper" => Some(0x1001d309c..0x1001d3438), -//! b"__eh_frame" => Some(0x100237f80..0x100237ffc), -//! b"__got" => Some(0x100238000..0x100238010), -//! _ => None, -//! } -//! } -//! fn section_file_range(&self, _name: &[u8]) -> Option> { None } -//! fn section_data(&self, name: &[u8]) -> Option> { -//! match name { -//! b"__unwind_info" => Some(vec![/* __unwind_info */]), -//! _ => None, -//! } -//! } -//! fn segment_file_range(&self, name: &[u8]) -> Option> { -//! match name { -//! b"__TEXT" => Some(0x1003fc000..0x100634000), -//! _ => None, -//! } -//! } -//! fn segment_data(&self, name: &[u8]) -> Option> { -//! match name { -//! b"__TEXT" => Some(vec![]), -//! _ => None, -//! } -//! } -//! } -//! //! let module = Module::new( //! "mybinary".to_string(), //! 0x1003fc000..0x100634000, //! 0x1003fc000, -//! FixedSectionInfo, +//! ExplicitModuleSectionInfo { +//! base_svma: 0x100000000, +//! text_svma: Some(0x100000b64..0x1001d2d18), +//! text: Some(vec![/* __text */]), +//! stubs_svma: Some(0x1001d2d18..0x1001d309c), +//! stub_helper_svma: Some(0x1001d309c..0x1001d3438), +//! got_svma: Some(0x100238000..0x100238010), +//! unwind_info: Some(vec![/* __unwind_info */]), +//! eh_frame_svma: Some(0x100237f80..0x100237ffc), +//! eh_frame: Some(vec![/* __eh_frame */]), +//! text_segment_file_range: Some(0x1003fc000..0x100634000), +//! text_segment: Some(vec![/* __TEXT */]), +//! ..Default::default() +//! }, //! ); //! unwinder.add_module(module); //! @@ -156,7 +134,9 @@ pub use cache::{AllocationPolicy, MayAllocateDuringUnwind, MustNotAllocateDuring pub use code_address::FrameAddress; pub use error::Error; pub use rule_cache::CacheStats; -pub use unwinder::{Module, ModuleSectionInfo, UnwindIterator, Unwinder}; +pub use unwinder::{ + ExplicitModuleSectionInfo, Module, ModuleSectionInfo, UnwindIterator, Unwinder, +}; /// The unwinder cache for the native CPU architecture. #[cfg(target_arch = "aarch64")] diff --git a/src/unwinder.rs b/src/unwinder.rs index f406033..8ce8359 100644 --- a/src/unwinder.rs +++ b/src/unwinder.rs @@ -581,7 +581,7 @@ enum ModuleUnwindDataInternal> { } impl> ModuleUnwindDataInternal { - fn new(section_info: &impl ModuleSectionInfo) -> Self { + fn new(section_info: &mut impl ModuleSectionInfo) -> Self { use crate::dwarf::base_addresses_for_sections; if let Some(unwind_info) = section_info.section_data(b"__unwind_info") { @@ -593,7 +593,7 @@ impl> ModuleUnwindDataInternal { .segment_data(b"__TEXT") .zip(section_info.segment_file_range(b"__TEXT")) .or_else(|| { - TEXT_SECTIONS.into_iter().find_map(|name| { + TEXT_SECTIONS.iter().find_map(|name| { section_info .section_data(name) .zip(section_info.section_file_range(name)) @@ -690,6 +690,12 @@ pub struct Module> { unwind_data: ModuleUnwindDataInternal, } +/// Information about a module's sections (and segments). +/// +/// This trait is used as an interface to module information, and each function with `&mut self` is +/// called at most once with a particular argument (e.g., `section_data(b".text")` will be called +/// at most once, so it can move data out of the underlying type if desired). +/// /// Type arguments: /// /// - `D`: The type for section data. This allows carrying owned data on the module, e.g. @@ -705,23 +711,141 @@ pub trait ModuleSectionInfo { fn base_svma(&self) -> u64; /// Get the given section's memory range, as stated in the module. - fn section_svma_range(&self, name: &[u8]) -> Option>; + fn section_svma_range(&mut self, name: &[u8]) -> Option>; /// Get the given section's file range in the module. - fn section_file_range(&self, name: &[u8]) -> Option>; + fn section_file_range(&mut self, name: &[u8]) -> Option>; /// Get the given section's data. - fn section_data(&self, name: &[u8]) -> Option; + fn section_data(&mut self, name: &[u8]) -> Option; /// Get the given segment's file range in the module. - fn segment_file_range(&self, _name: &[u8]) -> Option> { + fn segment_file_range(&mut self, _name: &[u8]) -> Option> { None } /// Get the given segment's data. - fn segment_data(&self, _name: &[u8]) -> Option { + fn segment_data(&mut self, _name: &[u8]) -> Option { + None + } +} + +/// Explicit addresses and data of various sections in the module. This implements +/// the `ModuleSectionInfo` trait. +/// +/// Unless otherwise stated, these are SVMAs, "stated virtual memory addresses", i.e. addresses as +/// stated in the object, as opposed to AVMAs, "actual virtual memory addresses", i.e. addresses in +/// the virtual memory of the profiled process. +/// +/// Code addresses inside a module's unwind information are usually written down as SVMAs, +/// or as relative addresses. For example, DWARF CFI can have code addresses expressed as +/// relative-to-.text addresses or as absolute SVMAs. And mach-O compact unwind info +/// contains addresses relative to the image base address. +#[derive(Clone, Debug, Default, PartialEq, Eq)] +pub struct ExplicitModuleSectionInfo { + /// The image base address, as stated in the object. For mach-O objects, this is the + /// vmaddr of the `__TEXT` segment. For ELF objects, this is zero. + /// + /// This is used to convert between SVMAs and relative addresses. + pub base_svma: u64, + /// The address range of the `__text` or `.text` section. This is where most of the compiled + /// code is stored. + /// + /// This is used to detect whether we need to do instruction analysis for an address. + pub text_svma: Option>, + /// The data of the `__text` or `.text` section. This is where most of the compiled code is + /// stored. + /// + /// This is used to handle function prologues and epilogues in some cases. + pub text: Option, + /// The address range of the mach-O `__stubs` section. Contains small pieces of + /// executable code for calling imported functions. Code inside this section is not + /// covered by the unwind information in `__unwind_info`. + /// + /// This is used to exclude addresses in this section from incorrectly applying + /// `__unwind_info` opcodes. It is also used to infer unwind rules for the known + /// structure of stub functions. + pub stubs_svma: Option>, + /// The address range of the mach-O `__stub_helper` section. Contains small pieces of + /// executable code for calling imported functions. Code inside this section is not + /// covered by the unwind information in `__unwind_info`. + /// + /// This is used to exclude addresses in this section from incorrectly applying + /// `__unwind_info` opcodes. It is also used to infer unwind rules for the known + /// structure of stub helper + /// functions. + pub stub_helper_svma: Option>, + /// The address range of the `.got` section (Global Offset Table). This is used + /// during DWARF CFI processing, to resolve got-relative addresses. + pub got_svma: Option>, + /// The data of the `__unwind_info` section of mach-O binaries. + pub unwind_info: Option, + /// The address range of the `__eh_frame` or `.eh_frame` section. This is used during DWARF CFI + /// processing, to resolve eh_frame-relative addresses. + pub eh_frame_svma: Option>, + /// The data of the `__eh_frame` or `.eh_frame` section. This is used during DWARF CFI + /// processing, to resolve eh_frame-relative addresses. + pub eh_frame: Option, + /// The address range of the `.eh_frame_hdr` section. This is used during DWARF CFI processing, + /// to resolve eh_frame_hdr-relative addresses. + pub eh_frame_hdr_svma: Option>, + /// The data of the `.eh_frame_hdr` section. This is used during DWARF CFI processing, to + /// resolve eh_frame_hdr-relative addresses. + pub eh_frame_hdr: Option, + /// The data of the `.debug_frame` section. The related address range is not needed. + pub debug_frame: Option, + /// The file range of the `__TEXT` segment of mach-O binaries, or the `__text` section if the + /// segment is unavailable. + pub text_segment_file_range: Option>, + /// The data of the `__TEXT` segment of mach-O binaries, or the `__text` section if the segment + /// is unavailable. + pub text_segment: Option, +} + +impl ModuleSectionInfo for ExplicitModuleSectionInfo +where + D: Deref, +{ + fn base_svma(&self) -> u64 { + self.base_svma + } + + fn section_svma_range(&mut self, name: &[u8]) -> Option> { + match name { + b"__text" | b".text" => self.text_svma.take(), + b"__stubs" => self.stubs_svma.take(), + b"__stub_helper" => self.stub_helper_svma.take(), + b"__eh_frame" | b".eh_frame" => self.eh_frame_svma.take(), + b"__eh_frame_hdr" | b".eh_frame_hdr" => self.eh_frame_hdr_svma.take(), + b"__got" | b".got" => self.got_svma.take(), + _ => None, + } + } + fn section_file_range(&mut self, _name: &[u8]) -> Option> { None } + fn section_data(&mut self, name: &[u8]) -> Option { + match name { + b"__text" | b".text" => self.text.take(), + b"__unwind_info" => self.unwind_info.take(), + b"__eh_frame" | b".eh_frame" => self.eh_frame.take(), + b"__eh_frame_hdr" | b".eh_frame_hdr" => self.eh_frame_hdr.take(), + b"__debug_frame" | b".debug_frame" => self.debug_frame.take(), + _ => None, + } + } + fn segment_file_range(&mut self, name: &[u8]) -> Option> { + match name { + b"__TEXT" => self.text_segment_file_range.take(), + _ => None, + } + } + fn segment_data(&mut self, name: &[u8]) -> Option { + match name { + b"__TEXT" => self.text_segment.take(), + _ => None, + } + } } #[cfg(feature = "object")] @@ -747,29 +871,29 @@ mod object { self.relative_address_base() } - fn section_svma_range(&self, name: &[u8]) -> Option> { + fn section_svma_range(&mut self, name: &[u8]) -> Option> { let section = self.section_by_name_bytes(name)?; Some(section.address()..section.address() + section.size()) } - fn section_file_range(&self, name: &[u8]) -> Option> { + fn section_file_range(&mut self, name: &[u8]) -> Option> { let section = self.section_by_name_bytes(name)?; let (start, size) = section.file_range()?; Some(start..start + size) } - fn section_data(&self, name: &[u8]) -> Option { + fn section_data(&mut self, name: &[u8]) -> Option { let section = self.section_by_name_bytes(name)?; section.data().ok().map(|data| data.into()) } - fn segment_file_range(&self, name: &[u8]) -> Option> { + fn segment_file_range(&mut self, name: &[u8]) -> Option> { let segment = self.segments().find(|s| s.name_bytes() == Ok(Some(name)))?; let (start, size) = segment.file_range(); Some(start..start + size) } - fn segment_data(&self, name: &[u8]) -> Option { + fn segment_data(&mut self, name: &[u8]) -> Option { let segment = self.segments().find(|s| s.name_bytes() == Ok(Some(name)))?; segment.data().ok().map(|data| data.into()) } @@ -781,9 +905,9 @@ impl> Module { name: String, avma_range: std::ops::Range, base_avma: u64, - section_info: impl ModuleSectionInfo, + mut section_info: impl ModuleSectionInfo, ) -> Self { - let unwind_data = ModuleUnwindDataInternal::new(§ion_info); + let unwind_data = ModuleUnwindDataInternal::new(&mut section_info); Self { name, diff --git a/tests/integration_tests/common.rs b/tests/integration_tests/common.rs index 935878c..aa2815b 100644 --- a/tests/integration_tests/common.rs +++ b/tests/integration_tests/common.rs @@ -21,18 +21,18 @@ where relative_address_base(&self.0) } - fn section_svma_range(&self, name: &[u8]) -> Option> { + fn section_svma_range(&mut self, name: &[u8]) -> Option> { let section = self.0.section_by_name_bytes(name)?; Some(section.address()..section.address() + section.size()) } - fn section_file_range(&self, name: &[u8]) -> Option> { + fn section_file_range(&mut self, name: &[u8]) -> Option> { let section = self.0.section_by_name_bytes(name)?; let (start, size) = section.file_range()?; Some(start..start + size) } - fn section_data(&self, name: &[u8]) -> Option> { + fn section_data(&mut self, name: &[u8]) -> Option> { match self.0.section_by_name_bytes(name) { Some(section) => section.data().ok().map(|data| data.to_owned()), None if name == b".debug_frame" => { @@ -43,7 +43,7 @@ where } } - fn segment_file_range(&self, name: &[u8]) -> Option> { + fn segment_file_range(&mut self, name: &[u8]) -> Option> { let segment = self .0 .segments() @@ -52,7 +52,7 @@ where Some(start..start + size) } - fn segment_data(&self, name: &[u8]) -> Option> { + fn segment_data(&mut self, name: &[u8]) -> Option> { let segment = self .0 .segments() diff --git a/tests/integration_tests/macos.rs b/tests/integration_tests/macos.rs index 2e8623b..aef95bb 100644 --- a/tests/integration_tests/macos.rs +++ b/tests/integration_tests/macos.rs @@ -76,59 +76,7 @@ fn test_basic() { #[test] fn test_root_doc_comment() { use framehop::aarch64::{CacheAarch64, UnwindRegsAarch64, UnwinderAarch64}; - use framehop::{FrameAddress, Module, ModuleSectionInfo}; - - struct ModuleSvmaInfo { - base_svma: u64, - text: std::ops::Range, - stubs: std::ops::Range, - stub_helper: std::ops::Range, - eh_frame: std::ops::Range, - got: std::ops::Range, - text_segment: std::ops::Range, - } - - impl ModuleSectionInfo> for ModuleSvmaInfo { - fn base_svma(&self) -> u64 { - self.base_svma - } - - fn section_svma_range(&self, name: &[u8]) -> Option> { - match name { - b"__text" => Some(self.text.clone()), - b"__stubs" => Some(self.stubs.clone()), - b"__stub_helper" => Some(self.stub_helper.clone()), - b"__eh_frame" => Some(self.eh_frame.clone()), - b"__got" => Some(self.got.clone()), - _ => None, - } - } - - fn section_file_range(&self, _name: &[u8]) -> Option> { - None - } - - fn section_data(&self, name: &[u8]) -> Option> { - match name { - b"__text" => Some(vec![]), - _ => None, - } - } - - fn segment_file_range(&self, name: &[u8]) -> Option> { - match name { - b"__TEXT" => Some(self.text_segment.clone()), - _ => None, - } - } - - fn segment_data(&self, name: &[u8]) -> Option> { - match name { - b"__TEXT" => Some(vec![]), - _ => None, - } - } - } + use framehop::{ExplicitModuleSectionInfo, FrameAddress, Module}; let mut cache = CacheAarch64::<_>::new(); let mut unwinder = UnwinderAarch64::new(); @@ -136,14 +84,17 @@ fn test_root_doc_comment() { "mybinary".to_string(), 0x1003fc000..0x100634000, 0x1003fc000, - ModuleSvmaInfo { + ExplicitModuleSectionInfo { base_svma: 0x100000000, - text: 0x100000b64..0x1001d2d18, - stubs: 0x1001d2d18..0x1001d309c, - stub_helper: 0x1001d309c..0x1001d3438, - eh_frame: 0x100237f80..0x100237ffc, - got: 0x100238000..0x100238010, - text_segment: 0x1003fc000..0x100634000, + text_svma: Some(0x100000b64..0x1001d2d18), + text: Some(vec![]), + stubs_svma: Some(0x1001d2d18..0x1001d309c), + stub_helper_svma: Some(0x1001d309c..0x1001d3438), + eh_frame_svma: Some(0x100237f80..0x100237ffc), + got_svma: Some(0x100238000..0x100238010), + text_segment_file_range: Some(0x1003fc000..0x100634000), + text_segment: Some(vec![]), + ..Default::default() }, ); unwinder.add_module(module); From e7eb1695c2032a16aa5d6bcc9618aad05f9032f3 Mon Sep 17 00:00:00 2001 From: Alex Franchuk Date: Tue, 25 Jul 2023 10:56:30 -0400 Subject: [PATCH 3/5] Fix clippy lints. --- src/arcdata.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arcdata.rs b/src/arcdata.rs index 5fe1695..932d8a8 100644 --- a/src/arcdata.rs +++ b/src/arcdata.rs @@ -8,7 +8,7 @@ impl> Deref for ArcData { type Target = [u8]; fn deref(&self) -> &Self::Target { - &*self.0 + &self.0 } } From 1f6fb68b79016f85c3b4e0f8b640fb473f083727 Mon Sep 17 00:00:00 2001 From: Alex Franchuk Date: Tue, 25 Jul 2023 11:36:13 -0400 Subject: [PATCH 4/5] Make `object` version more permissive. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 04c9d83..73e859e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ exclude = ["/.github", "/.vscode", "/tests", "/fixtures", "/big-fixtures"] [dependencies] gimli = "0.27.0" -object = { version = "0.30", optional = true } +object = { version = ">=0.30", optional = true } thiserror = "1.0.30" macho-unwind-info = "0.3.0" fallible-iterator = "0.2.0" From 46fe218470f75fe7c715b10ca64e27d9063d6274 Mon Sep 17 00:00:00 2001 From: Alex Franchuk Date: Mon, 27 Nov 2023 09:27:10 -0500 Subject: [PATCH 5/5] Rename a few fields for clarity. Don't check unnecessary sections on mach-O. Always check for `__eh_frame`/`__eh_frame_hdr` in case there's a wonky compiler out there. --- src/unwinder.rs | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/src/unwinder.rs b/src/unwinder.rs index 8ce8359..4197c11 100644 --- a/src/unwinder.rs +++ b/src/unwinder.rs @@ -387,8 +387,8 @@ impl< ModuleUnwindDataInternal::CompactUnwindInfoAndEhFrame { unwind_info, eh_frame, - stubs, - stub_helper, + stubs_svma: stubs, + stub_helper_svma: stub_helper, base_addresses, text_data, } => { @@ -547,8 +547,8 @@ enum ModuleUnwindDataInternal> { CompactUnwindInfoAndEhFrame { unwind_info: D, eh_frame: Option>, - stubs: Option>, - stub_helper: Option>, + stubs_svma: Option>, + stub_helper_svma: Option>, base_addresses: crate::dwarf::BaseAddresses, text_data: Option>, }, @@ -588,28 +588,37 @@ impl> ModuleUnwindDataInternal { let eh_frame = section_info.section_data(b"__eh_frame"); let stubs = section_info.section_svma_range(b"__stubs"); let stub_helper = section_info.section_svma_range(b"__stub_helper"); - const TEXT_SECTIONS: &[&[u8]] = &[b"__text", b".text"]; + // Get the bytes of the executable code (instructions). + // + // In mach-O objects, executable code is stored in the `__TEXT` segment, which contains + // multiple executable sections such as `__text`, `__stubs`, and `__stub_helper`. If we + // don't have the full `__TEXT` segment contents, we can fall back to the contents of + // just the `__text` section. let text_data = section_info .segment_data(b"__TEXT") .zip(section_info.segment_file_range(b"__TEXT")) .or_else(|| { - TEXT_SECTIONS.iter().find_map(|name| { - section_info - .section_data(name) - .zip(section_info.section_file_range(name)) - }) + section_info + .section_data(b"__text") + .zip(section_info.section_file_range(b"__text")) }) .map(|(bytes, svma_range)| TextByteData { bytes, svma_range }); ModuleUnwindDataInternal::CompactUnwindInfoAndEhFrame { unwind_info, eh_frame: eh_frame.map(Arc::new), - stubs, - stub_helper, + stubs_svma: stubs, + stub_helper_svma: stub_helper, base_addresses: base_addresses_for_sections(section_info), text_data, } - } else if let Some(eh_frame) = section_info.section_data(b".eh_frame") { - if let Some(eh_frame_hdr) = section_info.section_data(b".eh_frame_hdr") { + } else if let Some(eh_frame) = section_info + .section_data(b".eh_frame") + .or_else(|| section_info.section_data(b"__eh_frame")) + { + if let Some(eh_frame_hdr) = section_info + .section_data(b".eh_frame_hdr") + .or_else(|| section_info.section_data(b"__eh_frame_hdr")) + { ModuleUnwindDataInternal::EhFrameHdrAndEhFrame { eh_frame_hdr, eh_frame: Arc::new(eh_frame),