Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add signal handler on *nix with troubleshooting and stacktrace #1340

Merged
merged 3 commits into from
Feb 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

66 changes: 54 additions & 12 deletions crates/re_viewer/src/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1514,23 +1514,65 @@ fn debug_menu(options: &mut AppOptions, ui: &mut egui::Ui) {

ui.separator();

#[allow(clippy::manual_assert)]
if ui.button("panic!").clicked() {
panic!("Intentional panic");
}
ui.menu_button("Crash", |ui| {
#[allow(clippy::manual_assert)]
if ui.button("panic!").clicked() {
panic!("Intentional panic");
}

if ui.button("panic! during unwind").clicked() {
struct PanicOnDrop {}

impl Drop for PanicOnDrop {
fn drop(&mut self) {
panic!("Second intentional panic in Drop::drop");
}
}

if ui.button("panic! during unwind").clicked() {
struct PanicOnDrop {}
let _this_will_panic_when_dropped = PanicOnDrop {};
panic!("First intentional panic");
}

impl Drop for PanicOnDrop {
fn drop(&mut self) {
panic!("Second intentional panic in Drop::drop");
if ui.button("SEGFAULT").clicked() {
// Taken from https://github.com/EmbarkStudios/crash-handling/blob/main/sadness-generator/src/lib.rs

/// This is the fixed address used to generate a segfault. It's possible that
/// this address can be mapped and writable by the your process in which case a
/// crash may not occur
#[cfg(target_pointer_width = "64")]
pub const SEGFAULT_ADDRESS: u64 = u32::MAX as u64 + 0x42;
#[cfg(target_pointer_width = "32")]
pub const SEGFAULT_ADDRESS: u32 = 0x42;

let bad_ptr: *mut u8 = SEGFAULT_ADDRESS as _;
#[allow(unsafe_code)]
// SAFETY: this is not safe. We are _trying_ to crash.
unsafe {
std::ptr::write_volatile(bad_ptr, 1);
}
}

let _this_will_panic_when_dropped = PanicOnDrop {};
panic!("First intentional panic");
}
if ui.button("Stack overflow").clicked() {
// Taken from https://github.com/EmbarkStudios/crash-handling/blob/main/sadness-generator/src/lib.rs
fn recurse(data: u64) -> u64 {
let mut buff = [0u8; 256];
buff[..9].copy_from_slice(b"junk data");

let mut result = data;
for c in buff {
result += c as u64;
}

if result == 0 {
result
} else {
recurse(result) + 1
}
}

recurse(42);
}
});
}

// ---
Expand Down
5 changes: 5 additions & 0 deletions crates/rerun/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,8 @@ clap = { workspace = true, features = ["derive"] }
mimalloc = "0.1.29"
puffin_http = "0.11"
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }

# Native unix dependencies:
[target.'cfg(not(any(target_arch = "wasm32", target_os = "windows")))'.dependencies]
backtrace = "0.3"
libc = "0.2"
124 changes: 124 additions & 0 deletions crates/rerun/src/crash_handler.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
pub fn install_crash_handlers() {
install_panic_hook();

#[cfg(not(target_arch = "wasm32"))]
#[cfg(not(target_os = "windows"))]
install_signal_handler();
}

fn install_panic_hook() {
let previous_panic_hook = std::panic::take_hook();

std::panic::set_hook(Box::new(move |panic_info: &std::panic::PanicInfo<'_>| {
// This prints the callstack etc
(*previous_panic_hook)(panic_info);

eprintln!(
"\n\
Troubleshooting Rerun: https://www.rerun.io/docs/getting-started/troubleshooting"
);
}));
}

#[cfg(not(target_arch = "wasm32"))]
#[cfg(not(target_os = "windows"))]
#[allow(unsafe_code)]
#[allow(clippy::fn_to_numeric_cast_any)]
fn install_signal_handler() {
// SAFETY: we're installing a signal handler.
unsafe {
for signum in [
libc::SIGABRT,
libc::SIGBUS,
libc::SIGFPE,
libc::SIGILL,
libc::SIGINT,
libc::SIGSEGV,
libc::SIGTERM,
] {
libc::signal(
signum,
signal_handler as *const fn(libc::c_int) as libc::size_t,
);
}
}

unsafe extern "C" fn signal_handler(signal_number: libc::c_int) {
let signal_name = match signal_number {
libc::SIGABRT => "SIGABRT",
libc::SIGBUS => "SIGBUS",
libc::SIGFPE => "SIGFPE",
libc::SIGILL => "SIGILL",
libc::SIGINT => "SIGINT",
libc::SIGSEGV => "SIGSEGV",
libc::SIGTERM => "SIGTERM",
_ => "UNKNOWN SIGNAL",
};

// There are very few things that are safe to do in a signal handler,
// but writing to stderr is one of them.
// So we first print out what happened to stderr so we're sure that gets out,
// then we do the unsafe things, like logging the stack trace.
// We take care not to allocate any memory along the way.

write_to_stderr("\n");
write_to_stderr("Rerun caught a signal: ");
write_to_stderr(signal_name);
write_to_stderr("\n");
write_to_stderr(
"Troubleshooting Rerun: https://www.rerun.io/docs/getting-started/troubleshooting\n\n",
);

// Ok, we printed the most important things.
// Let's do less important things that require memory allocations.
// Allocating memory can lead to deadlocks if the signal
// was triggered from the system's memory management functions.

print_callstack();

// We seem to have managed printing the callstack - great!
// Then let's print the important stuff _again_ so it is visible at the bottom of the users terminal:

write_to_stderr("\n");
write_to_stderr("Rerun caught a signal: ");
write_to_stderr(signal_name);
write_to_stderr("\n");
write_to_stderr(
"Troubleshooting Rerun: https://www.rerun.io/docs/getting-started/troubleshooting\n\n",
);

// We are done!
// Call the default signal handler (which usually terminates the app):
// SAFETY: we're calling a signal handler
unsafe {
libc::signal(signal_number, libc::SIG_DFL);
libc::raise(signal_number);
}
}

fn write_to_stderr(text: &str) {
// SAFETY: writing to stderr is fine, even in a signal handler.
unsafe {
libc::write(libc::STDERR_FILENO, text.as_ptr().cast(), text.len());
}
}

fn print_callstack() {
let backtrace = backtrace::Backtrace::new();
let stack = format!("{backtrace:?}");

// Trim it a bit:
let mut stack = stack.as_str();
let start_pattern = "install_signal_handler::signal_handler\n";
if let Some(start_offset) = stack.find(start_pattern) {
stack = &stack[start_offset + start_pattern.len()..];
}
if let Some(end_offset) =
stack.find("std::sys_common::backtrace::__rust_begin_short_backtrace")
{
stack = &stack[..end_offset];
}

write_to_stderr(stack);
}
}
1 change: 1 addition & 0 deletions crates/rerun/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@

#![warn(missing_docs)] // Let's keep the this crate well-documented!

mod crash_handler;
mod run;

pub use run::{run, CallSource};
Expand Down
16 changes: 1 addition & 15 deletions crates/rerun/src/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ fn run_analytics(cmd: &AnalyticsCommands) -> Result<(), re_analytics::cli::CliEr
}

async fn run_impl(call_source: CallSource, args: Args) -> anyhow::Result<()> {
install_panic_hook();
crate::crash_handler::install_crash_handlers();

let mut profiler = re_viewer::Profiler::default();
if args.profile {
Expand Down Expand Up @@ -332,17 +332,3 @@ fn parse_max_latency(max_latency: Option<&String>) -> f32 {
.unwrap_or_else(|err| panic!("Failed to parse max_latency ({max_latency:?}): {err}"))
})
}

fn install_panic_hook() {
let previous_panic_hook = std::panic::take_hook();

std::panic::set_hook(Box::new(move |panic_info: &std::panic::PanicInfo<'_>| {
// The prints the callstack etc
(*previous_panic_hook)(panic_info);

eprintln!(
"\n\
Troubleshooting Rerun: https://www.rerun.io/docs/getting-started/troubleshooting"
);
}));
}