diff --git a/resources/scripts/all_nodes_joined.sh b/resources/scripts/all_nodes_joined.sh index c9eb6671ce..3ca0d6033c 100755 --- a/resources/scripts/all_nodes_joined.sh +++ b/resources/scripts/all_nodes_joined.sh @@ -6,27 +6,55 @@ then exit 1 fi -DEFAULT_ELDER_COUNT=7 -ELDER_COUNT="${SN_ELDER_COUNT:-$DEFAULT_ELDER_COUNT}" DEFAULT_NODE_COUNT=30 +DEFAULT_LOG_DIR="$HOME/.safe/node/local-test-network" + NODE_COUNT="${NODE_COUNT:-$DEFAULT_NODE_COUNT}" -# It's better to use the network health test in rust as it's type safe. -# This is needed for windows though at the moment due to the logfile locking... -echo "Checking logfiles to check all nodes have joined" -log_dir="$HOME/.safe/node/local-test-network" +log_dir="${LOG_DIR:-$DEFAULT_LOG_DIR}" +echo "Checking nodes log files to verify all nodes have joined. Logs path: $log_dir" # -u needed here to search log dirs -nodes_joined=$(rg "ReceivedJoinApproval" "$log_dir" -g "*.log*" -u -c | wc -l) +nodes_ips=$(rg "connection info:.*" "$log_dir" -g "*.log*" -u | rg "(127.0.0.1:\d{5})" -or '$1') +nodes_ips_count=$(echo "$nodes_ips" | wc -l) -echo "Joined nodes found: $nodes_joined ." +echo "Number of nodes: $nodes_ips_count" -# Node count will always be 1 less than total nodes as genesis does not join anything. -if ! [[ $nodes_joined -gt $(($NODE_COUNT - 2)) ]] +if [[ $nodes_ips_count -ne $NODE_COUNT ]] then - echo "Some nodes have not joined successfully! expected $(($NODE_COUNT -1)), we have $nodes_joined" + echo "Unexpected number of joined nodes. Expected $NODE_COUNT, we have $nodes_ips_count:" + echo "$nodes_ips" exit 100 else - echo "All nodes have joined!" - exit 0 + echo "All nodes have joined. Nodes IPs:" + echo "$nodes_ips" +fi + +# We'll use the logs from the last node that joined, to obtain the +# list of members in the network knowledge they share with AE messages. +last_node_log_dig="$log_dir/sn-node-$NODE_COUNT" +members=$(rg ".* msg: AntiEntropy \{.* kind: Update \{ members: \{(SectionSigned \{.*\})+ \}.*" -or '$1' "$last_node_log_dig" -g "*.log*" | tail -1 | rg "(127.0.0.1:\d{5})" -or '$1') + +echo "" +echo "Checking if nodes in network knowledge match the list of nodes IPs..." + +invalid_member_found=false +for m in $members +do + if grep -q "$m" <<< "$nodes_ips" + then + echo "Node $m is a valid member" + else + echo "Node $m in network knowledge was not found in the list of nodes IPs" + invalid_member_found=true + fi +done + +echo "" +if $invalid_member_found +then + echo "At least one member in the network knowledge was found invalid" + exit 100 +else + echo "All good, members in the network knowledge found in the list of nodes IPs!" fi diff --git a/sn_client/src/sessions/messaging.rs b/sn_client/src/sessions/messaging.rs index 141cf1dcc9..4c573adeaa 100644 --- a/sn_client/src/sessions/messaging.rs +++ b/sn_client/src/sessions/messaging.rs @@ -91,7 +91,7 @@ impl Session { let wire_msg = WireMsg::new_msg(msg_id, payload, kind, dst); let send_cmd_tasks = self.send_msg(elders.clone(), wire_msg).await?; - trace!("Cmd msg {:?} sent", msg_id); + trace!("Cmd msg {msg_id:?} sent"); // We wait for ALL the Acks get received. // The AE messages are handled by the tasks, hence no extra wait is required. @@ -107,7 +107,7 @@ impl Session { elders: Vec, mut send_cmd_tasks: JoinSet, ) -> Result<()> { - debug!("----> init of check for acks for {:?}", msg_id); + debug!("----> Init of check for acks for {msg_id:?}"); let expected_acks = elders.len(); let mut received_acks = BTreeSet::default(); let mut received_errors = BTreeSet::default(); @@ -154,10 +154,7 @@ impl Session { // exit if too many errors: if failures.len() + received_errors.len() >= expected_acks { - error!( - "Received majority of error response for cmd {:?}: {:?}", - msg_id, error - ); + error!("Received majority of error response for cmd {msg_id:?}: {error:?}"); return Err(Error::CmdError { source: error, msg_id, @@ -228,12 +225,9 @@ impl Session { let msg_id = MsgId::new(); debug!( - "Sending query message {:?}, from {}, {:?} to the {} Elders closest to data name: {:?}", - msg_id, + "Sending query message {msg_id:?}, from {}, {query:?} to \ + the {elders_len} Elders closest to data name: {elders:?}", endpoint.public_addr(), - query, - elders_len, - elders ); let dst = Dst { @@ -291,23 +285,23 @@ impl Session { }; // let's see if we have a positive response... - debug!("Response to {msg_id:?}: {:?}", response); + debug!("Response to {msg_id:?}: {response:?}"); match *response { QueryResponse::GetChunk(Ok(chunk)) => { if let Some(chunk_addr) = chunk_addr { // We are dealing with Chunk query responses, thus we validate its hash // matches its xorname, if so, we don't need to await for more responses - debug!("Chunk QueryResponse received is: {:#?}", chunk); + debug!("Chunk QueryResponse received is: {chunk:#?}"); if chunk_addr.name() == chunk.name() { - trace!("Valid Chunk received for {:?}", msg_id); + trace!("Valid Chunk received for {msg_id:?}"); valid_response = Some(QueryResponse::GetChunk(Ok(chunk))); break; } else { // the Chunk content doesn't match its XorName, // this is suspicious and it could be a byzantine node - warn!("We received an invalid Chunk response from one of the nodes"); + warn!("We received an invalid Chunk response from one of the nodes for {msg_id:?}"); discarded_responses += 1; } } @@ -371,7 +365,7 @@ impl Session { // we've looped over all responses... // if any are valid, lets return it if let Some(response) = valid_response { - debug!("valid response innnn!!! : {:?}", response); + debug!("Valid response in!!!: {response:?}"); return Ok(QueryResult { response }); // otherwise, if we've got an error in // we can return that too @@ -431,14 +425,18 @@ impl Session { if let Some(sap) = a_close_sap { let sap_elders = sap.elders_vec(); let section_pk = sap.section_key(); - trace!("SAP elders found {:?}", sap_elders); + trace!("SAP elders found {sap_elders:?}"); // Supermajority of elders is expected. let targets_count = supermajority(sap_elders.len()); // any SAP that does not hold elders_count() is indicative of a broken network (after genesis) if sap_elders.len() < targets_count { - error!("Insufficient knowledge to send to address {:?}, elders for this section: {sap_elders:?} ({targets_count} needed), section PK is: {section_pk:?}", dst_address); + error!( + "Insufficient knowledge to send to address {dst_address:?}, \ + elders for this section: {sap_elders:?} ({targets_count} needed), \ + section PK is: {section_pk:?}" + ); return Err(Error::InsufficientElderKnowledge { connections: sap_elders.len(), required: targets_count, @@ -459,7 +457,7 @@ impl Session { wire_msg: WireMsg, ) -> Result> { let msg_id = wire_msg.msg_id(); - debug!("---> send msg {msg_id:?} going out."); + debug!("---> Send msg {msg_id:?} going out."); let bytes = wire_msg.serialize()?; let mut tasks = JoinSet::new();