diff --git a/sn_node/src/comm/peer_session.rs b/sn_node/src/comm/peer_session.rs index 1deb10f973..77985d495a 100644 --- a/sn_node/src/comm/peer_session.rs +++ b/sn_node/src/comm/peer_session.rs @@ -90,7 +90,7 @@ impl PeerSession { "Failed sending {msg_id:?} to {:?} {err:?}", self.link.peer() ); - Error::CmdSendError + Error::AdultCmdSendError(*self.link.peer()) }) } diff --git a/sn_node/src/node/data/records.rs b/sn_node/src/node/data/records.rs index eefbe36ffb..395877eb98 100644 --- a/sn_node/src/node/data/records.rs +++ b/sn_node/src/node/data/records.rs @@ -144,6 +144,9 @@ impl MyNode { } Err(error) => { error!("{msg_id:?} Error when replicating to adult {peer:?}: {error:?}"); + if let Error::AdultCmdSendError(peer) = error { + context.log_node_issue(peer.name(), IssueType::Communication); + } last_error = Some(error); } } diff --git a/sn_node/src/node/error.rs b/sn_node/src/node/error.rs index 605a07b378..d103015bcb 100644 --- a/sn_node/src/node/error.rs +++ b/sn_node/src/node/error.rs @@ -158,6 +158,9 @@ pub enum Error { /// Error Sending Cmd in to node for processing #[error("Error Sending Cmd in to node for processing.")] CmdSendError, + /// Error Sending Cmd in to node for processing + #[error("Error Sending Cmd to adult {0:?} for processing.")] + AdultCmdSendError(Peer), /// Network Knowledge error. #[error("Network data error:: {0}")] NetworkKnowledge(#[from] sn_interface::network_knowledge::Error), diff --git a/sn_node/src/node/mod.rs b/sn_node/src/node/mod.rs index 2ae177d219..9e98406a8a 100644 --- a/sn_node/src/node/mod.rs +++ b/sn_node/src/node/mod.rs @@ -146,6 +146,8 @@ mod core { #[debug(skip)] pub(crate) comm: Comm, pub(crate) joins_allowed: bool, + #[debug(skip)] + pub(crate) dysfunction_cmds_sender: mpsc::Sender, } impl NodeContext { @@ -158,6 +160,21 @@ mod core { .section_auth_by_name(name) .map_err(Error::from) } + + /// Log an issue in dysfunction + /// Spawns a process to send this incase the channel may be full, we don't hold up + /// processing around this (as this can be called during dkg eg) + pub(crate) fn log_node_issue(&self, name: XorName, issue: IssueType) { + trace!("Logging issue {issue:?} in dysfunction for {name}"); + let dysf_sender = self.dysfunction_cmds_sender.clone(); + // TODO: do we need to kill the node if we fail tracking dysf? + let _handle = tokio::spawn(async move { + if let Err(error) = dysf_sender.send(DysCmds::TrackIssue(name, issue)).await { + // Log the issue, and error. We need to be wary of actually hitting this. + warn!("Could not send DysCmds through dysfunctional_cmds_tx: {error}"); + } + }); + } } impl MyNode { @@ -176,6 +193,7 @@ mod core { comm: self.comm.clone(), joins_allowed: self.joins_allowed, data_storage: self.data_storage.clone(), + dysfunction_cmds_sender: self.dysfunction_cmds_sender.clone(), } }