Skip to content

Commit

Permalink
Send SIGSTOP to linux/android processes before doing other procfs/ptr…
Browse files Browse the repository at this point in the history
…ace things. (#108)

* Send SIGSTOP to linux/android processes before doing other procfs/ptrace
things.

If this fails, we continue as we used to. This is an attempt to get a
consistent/static process state.

Closes #28.

* Fix mac

* Fix warning about unused doc comment

* Allow user customization of timeout

Not sure this is needed, but better to allow users to customize this rather than rely on a hardcoded value

* Update nix

---------

Co-authored-by: Jake Shadle <jake.shadle@embark-studios.com>
  • Loading branch information
afranchuk and Jake-Shadle committed Mar 21, 2024
1 parent 799b675 commit a018d34
Show file tree
Hide file tree
Showing 10 changed files with 146 additions and 49 deletions.
12 changes: 10 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion Cargo.toml
Expand Up @@ -13,6 +13,7 @@ bitflags = "2.4"
byteorder = "1.4"
cfg-if = "1.0"
crash-context = "0.6"
log = "0.4"
memoffset = "0.9"
minidump-common = "0.21"
scroll = "0.12"
Expand All @@ -25,10 +26,11 @@ goblin = "0.8"
memmap2 = "0.9"

[target.'cfg(any(target_os = "linux", target_os = "android"))'.dependencies]
nix = { version = "0.27", default-features = false, features = [
nix = { version = "0.28", default-features = false, features = [
"mman",
"process",
"ptrace",
"signal",
"user",
] }
# Used for parsing procfs info.
Expand Down
26 changes: 12 additions & 14 deletions src/bin/test.rs
Expand Up @@ -8,14 +8,14 @@ pub type Result<T> = std::result::Result<T, Error>;
mod linux {
use super::*;
use minidump_writer::{
minidump_writer::STOP_TIMEOUT,
ptrace_dumper::{PtraceDumper, AT_SYSINFO_EHDR},
LINUX_GATE_LIBRARY_NAME,
};
use nix::{
sys::mman::{mmap, MapFlags, ProtFlags},
sys::mman::{mmap_anonymous, MapFlags, ProtFlags},
unistd::getppid,
};
use std::os::fd::BorrowedFd;

macro_rules! test {
($x:expr, $errmsg:expr) => {
Expand All @@ -29,13 +29,13 @@ mod linux {

fn test_setup() -> Result<()> {
let ppid = getppid();
PtraceDumper::new(ppid.as_raw())?;
PtraceDumper::new(ppid.as_raw(), STOP_TIMEOUT)?;
Ok(())
}

fn test_thread_list() -> Result<()> {
let ppid = getppid();
let dumper = PtraceDumper::new(ppid.as_raw())?;
let dumper = PtraceDumper::new(ppid.as_raw(), STOP_TIMEOUT)?;
test!(!dumper.threads.is_empty(), "No threads")?;
test!(
dumper
Expand All @@ -51,7 +51,7 @@ mod linux {

fn test_copy_from_process(stack_var: usize, heap_var: usize) -> Result<()> {
let ppid = getppid().as_raw();
let mut dumper = PtraceDumper::new(ppid)?;
let mut dumper = PtraceDumper::new(ppid, STOP_TIMEOUT)?;
dumper.suspend_threads()?;
let stack_res = PtraceDumper::copy_from_process(ppid, stack_var as *mut libc::c_void, 1)?;

Expand All @@ -73,7 +73,7 @@ mod linux {

fn test_find_mappings(addr1: usize, addr2: usize) -> Result<()> {
let ppid = getppid();
let dumper = PtraceDumper::new(ppid.as_raw())?;
let dumper = PtraceDumper::new(ppid.as_raw(), STOP_TIMEOUT)?;
dumper
.find_mapping(addr1)
.ok_or("No mapping for addr1 found")?;
Expand All @@ -90,7 +90,7 @@ mod linux {
let ppid = getppid().as_raw();
let exe_link = format!("/proc/{}/exe", ppid);
let exe_name = std::fs::read_link(exe_link)?.into_os_string();
let mut dumper = PtraceDumper::new(getppid().as_raw())?;
let mut dumper = PtraceDumper::new(getppid().as_raw(), STOP_TIMEOUT)?;
let mut found_exe = None;
for (idx, mapping) in dumper.mappings.iter().enumerate() {
if mapping.name.as_ref().map(|x| x.into()).as_ref() == Some(&exe_name) {
Expand All @@ -107,7 +107,7 @@ mod linux {

fn test_merged_mappings(path: String, mapped_mem: usize, mem_size: usize) -> Result<()> {
// Now check that PtraceDumper interpreted the mappings properly.
let dumper = PtraceDumper::new(getppid().as_raw())?;
let dumper = PtraceDumper::new(getppid().as_raw(), STOP_TIMEOUT)?;
let mut mapping_count = 0;
for map in &dumper.mappings {
if map
Expand All @@ -129,7 +129,7 @@ mod linux {

fn test_linux_gate_mapping_id() -> Result<()> {
let ppid = getppid().as_raw();
let mut dumper = PtraceDumper::new(ppid)?;
let mut dumper = PtraceDumper::new(ppid, STOP_TIMEOUT)?;
let mut found_linux_gate = false;
for mut mapping in dumper.mappings.clone() {
if mapping.name == Some(LINUX_GATE_LIBRARY_NAME.into()) {
Expand All @@ -148,7 +148,7 @@ mod linux {

fn test_mappings_include_linux_gate() -> Result<()> {
let ppid = getppid().as_raw();
let dumper = PtraceDumper::new(ppid)?;
let dumper = PtraceDumper::new(ppid, STOP_TIMEOUT)?;
let linux_gate_loc = dumper.auxv[&AT_SYSINFO_EHDR];
test!(linux_gate_loc != 0, "linux_gate_loc == 0")?;
let mut found_linux_gate = false;
Expand Down Expand Up @@ -215,18 +215,16 @@ mod linux {
let memory_size = std::num::NonZeroUsize::new(page_size.unwrap() as usize).unwrap();
// Get some memory to be mapped by the child-process
let mapped_mem = unsafe {
mmap::<BorrowedFd>(
mmap_anonymous(
None,
memory_size,
ProtFlags::PROT_READ | ProtFlags::PROT_WRITE,
MapFlags::MAP_PRIVATE | MapFlags::MAP_ANON,
None,
0,
)
.unwrap()
};

println!("{} {}", mapped_mem as usize, memory_size);
println!("{} {}", mapped_mem.as_ptr() as usize, memory_size);
loop {
std::thread::park();
}
Expand Down
21 changes: 19 additions & 2 deletions src/linux/minidump_writer.rs
Expand Up @@ -13,14 +13,21 @@ use crate::{
mem_writer::{Buffer, MemoryArrayWriter, MemoryWriter, MemoryWriterError},
minidump_format::*,
};
use std::io::{Seek, Write};
use std::{
io::{Seek, Write},
time::Duration,
};

pub enum CrashingThreadContext {
None,
CrashContext(MDLocationDescriptor),
CrashContextPlusAddress((MDLocationDescriptor, usize)),
}

/// The default timeout after a `SIGSTOP` after which minidump writing proceeds
/// regardless of the process state
pub const STOP_TIMEOUT: Duration = Duration::from_millis(100);

pub struct MinidumpWriter {
pub process_id: Pid,
pub blamed_thread: Pid,
Expand All @@ -34,6 +41,7 @@ pub struct MinidumpWriter {
pub sanitize_stack: bool,
pub crash_context: Option<CrashContext>,
pub crashing_thread_context: CrashingThreadContext,
pub stop_timeout: Duration,
}

// This doesn't work yet:
Expand Down Expand Up @@ -62,6 +70,7 @@ impl MinidumpWriter {
sanitize_stack: false,
crash_context: None,
crashing_thread_context: CrashingThreadContext::None,
stop_timeout: STOP_TIMEOUT,
}
}

Expand Down Expand Up @@ -100,10 +109,18 @@ impl MinidumpWriter {
self
}

/// Sets the timeout after `SIGSTOP` is sent to the process, if the process
/// has not stopped by the time the timeout has reached, we proceed with
/// minidump generation
pub fn stop_timeout(&mut self, duration: Duration) -> &mut Self {
self.stop_timeout = duration;
self
}

/// Generates a minidump and writes to the destination provided. Returns the in-memory
/// version of the minidump as well.
pub fn dump(&mut self, destination: &mut (impl Write + Seek)) -> Result<Vec<u8>> {
let mut dumper = PtraceDumper::new(self.process_id)?;
let mut dumper = PtraceDumper::new(self.process_id, self.stop_timeout)?;
dumper.suspend_threads()?;
dumper.late_init()?;

Expand Down
82 changes: 76 additions & 6 deletions src/linux/ptrace_dumper.rs
Expand Up @@ -15,10 +15,20 @@ use crate::{
use goblin::elf;
use nix::{
errno::Errno,
sys::{ptrace, wait},
sys::{ptrace, signal, wait},
};
use procfs_core::{
process::{MMPermissions, ProcState, Stat},
FromRead, ProcError,
};
use std::{
collections::HashMap,
ffi::c_void,
io::BufReader,
path,
result::Result,
time::{Duration, Instant},
};
use procfs_core::process::MMPermissions;
use std::{collections::HashMap, ffi::c_void, io::BufReader, path, result::Result};

#[derive(Debug, Clone)]
pub struct Thread {
Expand All @@ -29,6 +39,7 @@ pub struct Thread {
#[derive(Debug)]
pub struct PtraceDumper {
pub pid: Pid,
process_stopped: bool,
threads_suspended: bool,
pub threads: Vec<Thread>,
pub auxv: HashMap<AuxvType, AuxvType>,
Expand All @@ -45,9 +56,27 @@ impl Drop for PtraceDumper {
fn drop(&mut self) {
// Always try to resume all threads (e.g. in case of error)
let _ = self.resume_threads();
// Always allow the process to continue.
let _ = self.continue_process();
}
}

#[derive(Debug, thiserror::Error)]
enum StopProcessError {
#[error("Failed to stop the process")]
Stop(#[from] Errno),
#[error("Failed to get the process state")]
State(#[from] ProcError),
#[error("Timeout waiting for process to stop")]
Timeout,
}

#[derive(Debug, thiserror::Error)]
enum ContinueProcessError {
#[error("Failed to continue the process")]
Continue(#[from] Errno),
}

/// PTRACE_DETACH the given pid.
///
/// This handles special errno cases (ESRCH) which we won't consider errors.
Expand All @@ -67,21 +96,26 @@ fn ptrace_detach(child: Pid) -> Result<(), DumperError> {
impl PtraceDumper {
/// Constructs a dumper for extracting information of a given process
/// with a process ID of |pid|.
pub fn new(pid: Pid) -> Result<Self, InitError> {
pub fn new(pid: Pid, stop_timeout: Duration) -> Result<Self, InitError> {
let mut dumper = PtraceDumper {
pid,
process_stopped: false,
threads_suspended: false,
threads: Vec::new(),
auxv: HashMap::new(),
mappings: Vec::new(),
page_size: 0,
};
dumper.init()?;
dumper.init(stop_timeout)?;
Ok(dumper)
}

// TODO: late_init for chromeos and android
pub fn init(&mut self) -> Result<(), InitError> {
pub fn init(&mut self, stop_timeout: Duration) -> Result<(), InitError> {
// Stopping the process is best-effort.
if let Err(e) = self.stop_process(stop_timeout) {
log::warn!("failed to stop process {}: {e}", self.pid);
}
self.read_auxv()?;
self.enumerate_threads()?;
self.enumerate_mappings()?;
Expand Down Expand Up @@ -207,6 +241,42 @@ impl PtraceDumper {
result
}

/// Send SIGSTOP to the process so that we can get a consistent state.
///
/// This will block waiting for the process to stop until `timeout` has passed.
fn stop_process(&mut self, timeout: Duration) -> Result<(), StopProcessError> {
signal::kill(nix::unistd::Pid::from_raw(self.pid), Some(signal::SIGSTOP))?;

// Something like waitpid for non-child processes would be better, but we have no such
// tool, so we poll the status.
const POLL_INTERVAL: Duration = Duration::from_millis(1);
let proc_file = format!("/proc/{}/stat", self.pid);
let end = Instant::now() + timeout;

loop {
if let Ok(ProcState::Stopped) = Stat::from_file(&proc_file)?.state() {
self.process_stopped = true;
return Ok(());
}

std::thread::sleep(POLL_INTERVAL);
if Instant::now() > end {
return Err(StopProcessError::Timeout);
}
}
}

/// Send SIGCONT to the process to continue.
///
/// Unlike `stop_process`, this function does not wait for the process to continue.
fn continue_process(&mut self) -> Result<(), ContinueProcessError> {
if self.process_stopped {
signal::kill(nix::unistd::Pid::from_raw(self.pid), Some(signal::SIGCONT))?;
}
self.process_stopped = false;
Ok(())
}

/// Parse /proc/$pid/task to list all the threads of the process identified by
/// pid.
fn enumerate_threads(&mut self) -> Result<(), InitError> {
Expand Down
3 changes: 2 additions & 1 deletion src/mac/mach.rs
Expand Up @@ -590,7 +590,8 @@ pub fn sysctl_by_name<T: Sized + Default>(name: &[u8]) -> T {
0,
) != 0
{
// log?
// TODO convert to ascii characters when logging?
log::warn!("failed to get sysctl for {name:?}");
T::default()
} else {
out
Expand Down
8 changes: 5 additions & 3 deletions src/mac/streams/exception.rs
Expand Up @@ -69,9 +69,11 @@ impl MinidumpWriter {
} else {
// For all other exceptions types, the value in the code
// _should_ never exceed 32 bits, crashpad does an actual
// range check here, but since we don't really log anything
// else at the moment I'll punt that for now
// TODO: log/do something if exc.code > u32::MAX
// range check here.
if code > u32::MAX.into() {
// TODO: do something more than logging?
log::warn!("exception code {code:#018x} exceeds the expected 32 bits");
}
code as u32
};

Expand Down
4 changes: 2 additions & 2 deletions src/mac/streams/thread_names.rs
Expand Up @@ -25,8 +25,8 @@ impl MinidumpWriter {
// not a critical failure
let name_loc = match Self::write_thread_name(buffer, dumper, tid) {
Ok(loc) => loc,
Err(_err) => {
// TODO: log error
Err(err) => {
log::warn!("failed to write thread name for thread {tid}: {err}");
write_string_to_location(buffer, "")?
}
};
Expand Down

0 comments on commit a018d34

Please sign in to comment.