Skip to content

Commit

Permalink
chore(node): improve error on failed send + track dysf
Browse files Browse the repository at this point in the history
  • Loading branch information
joshuef committed Dec 22, 2022
1 parent ea50066 commit 54180f6
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 1 deletion.
2 changes: 1 addition & 1 deletion sn_node/src/comm/peer_session.rs
Expand Up @@ -90,7 +90,7 @@ impl PeerSession {
"Failed sending {msg_id:?} to {:?} {err:?}",
self.link.peer()
);
Error::CmdSendError
Error::AdultCmdSendError(*self.link.peer())
})
}

Expand Down
3 changes: 3 additions & 0 deletions sn_node/src/node/data/records.rs
Expand Up @@ -144,6 +144,9 @@ impl MyNode {
}
Err(error) => {
error!("{msg_id:?} Error when replicating to adult {peer:?}: {error:?}");
if let Error::AdultCmdSendError(peer) = error {
context.log_node_issue(peer.name(), IssueType::Communication);
}
last_error = Some(error);
}
}
Expand Down
3 changes: 3 additions & 0 deletions sn_node/src/node/error.rs
Expand Up @@ -158,6 +158,9 @@ pub enum Error {
/// Error Sending Cmd in to node for processing
#[error("Error Sending Cmd in to node for processing.")]
CmdSendError,
/// Error Sending Cmd in to node for processing
#[error("Error Sending Cmd to adult {0:?} for processing.")]
AdultCmdSendError(Peer),
/// Network Knowledge error.
#[error("Network data error:: {0}")]
NetworkKnowledge(#[from] sn_interface::network_knowledge::Error),
Expand Down
18 changes: 18 additions & 0 deletions sn_node/src/node/mod.rs
Expand Up @@ -146,6 +146,8 @@ mod core {
#[debug(skip)]
pub(crate) comm: Comm,
pub(crate) joins_allowed: bool,
#[debug(skip)]
pub(crate) dysfunction_cmds_sender: mpsc::Sender<DysCmds>,
}

impl NodeContext {
Expand All @@ -158,6 +160,21 @@ mod core {
.section_auth_by_name(name)
.map_err(Error::from)
}

/// Log an issue in dysfunction
/// Spawns a process to send this incase the channel may be full, we don't hold up
/// processing around this (as this can be called during dkg eg)
pub(crate) fn log_node_issue(&self, name: XorName, issue: IssueType) {
trace!("Logging issue {issue:?} in dysfunction for {name}");
let dysf_sender = self.dysfunction_cmds_sender.clone();
// TODO: do we need to kill the node if we fail tracking dysf?
let _handle = tokio::spawn(async move {
if let Err(error) = dysf_sender.send(DysCmds::TrackIssue(name, issue)).await {
// Log the issue, and error. We need to be wary of actually hitting this.
warn!("Could not send DysCmds through dysfunctional_cmds_tx: {error}");
}
});
}
}

impl MyNode {
Expand All @@ -176,6 +193,7 @@ mod core {
comm: self.comm.clone(),
joins_allowed: self.joins_allowed,
data_storage: self.data_storage.clone(),
dysfunction_cmds_sender: self.dysfunction_cmds_sender.clone(),
}
}

Expand Down

0 comments on commit 54180f6

Please sign in to comment.