diff --git a/monarch_hyperactor/src/code_sync/rsync.rs b/monarch_hyperactor/src/code_sync/rsync.rs index 93f8c27e4..31bd50504 100644 --- a/monarch_hyperactor/src/code_sync/rsync.rs +++ b/monarch_hyperactor/src/code_sync/rsync.rs @@ -53,6 +53,7 @@ use tokio::process::Child; use tokio::process::Command; #[cfg(feature = "packaged_rsync")] use tokio::sync::OnceCell; +use tracing::warn; use crate::code_sync::WorkspaceLocation; @@ -69,7 +70,7 @@ async fn get_rsync_bin_path() -> Result<&'static Path> { Ok(RSYNC_BIN_PATH .get_or_try_init(|| async { tokio::task::spawn_blocking(|| { - let mut tmp = tempfile::NamedTempFile::new()?; + let mut tmp = tempfile::NamedTempFile::with_prefix("rsync.")?; let rsync_bin = include_bytes!("rsync.bin"); tmp.write_all(rsync_bin)?; let bin_path = tmp.into_temp_path(); @@ -239,7 +240,6 @@ pub async fn do_rsync(addr: &SocketAddr, workspace: &Path) -> Result Result<()> { + pub async fn shutdown(mut self) -> Result { + let logs = fs::read_to_string(self.state.path().join("log")).await; let id = self.child.id().context("missing pid")?; let pid = Pid::from_raw(id as i32); signal::kill(pid, Signal::SIGINT)?; let status = self.child.wait().await?; // rsync exists with 20 when sent SIGINT. ensure!(status.code() == Some(20)); - Ok(()) + Ok(logs?) } } @@ -412,7 +412,7 @@ where let instance = actor_mesh.proc_mesh().client(); let (rsync_conns_tx, rsync_conns_rx) = instance.open_port::(); - let ((), results) = try_join!( + let res = try_join!( rsync_conns_rx .take(actor_mesh.shape().slice().len()) .err_into::() @@ -443,11 +443,25 @@ where .await?; anyhow::Ok(res) }, - )?; + ); - daemon.shutdown().await?; + // Kill rsync server and attempt to grab the logs. + let logs = daemon.shutdown().await; - Ok(results) + // Return results, attaching rsync daemon logs on error. + match res { + Ok(((), results)) => { + let _ = logs?; + Ok(results) + } + Err(err) => match logs { + Ok(logs) => Err(err).with_context(|| format!("rsync server logs: {}", logs)), + Err(shutdown_err) => { + warn!("failed to read logs from rsync daemon: {:?}", shutdown_err); + Err(err) + } + }, + } } #[cfg(test)]