From 41fde0430fb9b4730aa620ae9efcd15e9d208e1b Mon Sep 17 00:00:00 2001 From: Peng Zhang Date: Mon, 24 Nov 2025 12:04:39 -0800 Subject: [PATCH] Log events for SupervisionError::UnhandledFaultHook Differential Revision: D87787311 --- monarch_hyperactor/src/v1/actor_mesh.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/monarch_hyperactor/src/v1/actor_mesh.rs b/monarch_hyperactor/src/v1/actor_mesh.rs index 01fa7484b..fd773f11e 100644 --- a/monarch_hyperactor/src/v1/actor_mesh.rs +++ b/monarch_hyperactor/src/v1/actor_mesh.rs @@ -266,6 +266,11 @@ impl PythonActorMeshImpl { .extract::() .unwrap(); tracing::error!( + name = "ActorMeshStatus", + status = "SupervisionError::UnhandledFaultHook", + actor_name = failure.mesh_name, + event = %failure.event, + rank = failure.rank, "unhandled event reached unhandled_fault_hook: {}, which is exiting the process with code {}", failure, code @@ -275,6 +280,11 @@ impl PythonActorMeshImpl { // The callback raised some other exception, and there's // no way to handle it. Just exit the process anyways tracing::error!( + name = "ActorMeshStatus", + status = "SupervisionError::UnhandledFaultHook", + actor_name = failure.mesh_name, + event = %failure.event, + rank = failure.rank, "unhandled event reached unhandled_fault_hook: {}, which raised an exception: {:?}. \ Exiting the process with code 1", failure, @@ -284,6 +294,11 @@ impl PythonActorMeshImpl { } } else { tracing::warn!( + name = "ActorMeshStatus", + status = "SupervisionError::UnhandledFaultHook", + actor_name = failure.mesh_name, + event = %failure.event, + rank = failure.rank, "unhandled event reached unhandled_fault_hook: {}, but that function produced no exception or crash. Ignoring the error", failure );