From 0103c7f1556c5b2f5e0a223201b5331f4f16921a Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Thu, 21 Mar 2024 17:27:01 +0200 Subject: [PATCH 01/47] authorithy-discovery: Make changing of peer-id while active a bit more robust In the case when nodes don't persist their node-key or they want to generate a new one while being in the active set, things go wrong because both the old addresses and the new ones will still be present in DHT, so because of the distributed nature of the DHT both will survive in the network untill the old ones expires which is 36 hours. Nodes in the network will randomly resolve the authorithy-id to the old address or the new one. More details in: https://github.com/paritytech/polkadot-sdk/issues/3673 This PR proposes we mitigate this problem, by: 1. Let the query for a DHT key retrieve all the results, that is usually bounded by the replication factor which is 20, currently we interrupt the querry on the first result. 2. Modify the authority-discovery service to keep all the discovered addresses around for 24h since they last seen an address. 3. Plumb through other subsystems where the assumption was that an authorithy-id will resolve only to one PeerId. Currently, the authorithy-discovery keeps just the last record it received from DHT and queries the DHT every 10 minutes. But they could always receive only the old address, only the new address or a flip-flop between them depending on what node wins the race to provide the record 4. Update gossip-support to try resolve authorities more often than every session. This would gives us a lot more chances for the nodes in the networks to also discover not only the old address of the node but also the new one and should improve the time it takes for a node to be properly connected in the network. The behaviour won't be deterministic because there is no guarantee the all nodes will see the new record at least once, since they could query only nodes that have the old one. Signed-off-by: Alexandru Gheorghe --- .../node/network/gossip-support/src/lib.rs | 30 ++++++++++--- .../client/authority-discovery/src/worker.rs | 27 ++++++++---- .../src/worker/addr_cache.rs | 42 +++++++++++++++---- substrate/client/network/src/discovery.rs | 11 +++-- 4 files changed, 86 insertions(+), 24 deletions(-) diff --git a/polkadot/node/network/gossip-support/src/lib.rs b/polkadot/node/network/gossip-support/src/lib.rs index 9f33cd5d8a31..9c1fa26a1bc6 100644 --- a/polkadot/node/network/gossip-support/src/lib.rs +++ b/polkadot/node/network/gossip-support/src/lib.rs @@ -69,6 +69,12 @@ const BACKOFF_DURATION: Duration = Duration::from_secs(5); #[cfg(test)] const BACKOFF_DURATION: Duration = Duration::from_millis(500); +// The authorithy_discovery queries runs every ten minutes, +// so no point in trying more often than that, so let's +// try re-resolving the authorithies every 10 minutes and force +// the reconnection to the ones that changed their address. +const TRY_RECONNECT_AFTER: Duration = Duration::from_secs(60 * 10); + /// Duration after which we consider low connectivity a problem. /// /// Especially at startup low connectivity is expected (authority discovery cache needs to be @@ -91,6 +97,14 @@ pub struct GossipSupport { // `None` otherwise. last_failure: Option, + // Validators can restart during a session, so if they change + // their PeerID, we will connect to them in the best case after + // a session, so we need to try more often to resolved peers and + // reconnect to them. The authorithy_discovery queries runs every ten + // minutes, so no point in trying more often than that, so let's + // try reconnecting every 10 minutes here as well. + last_connection_request: Option, + /// First time we did not reach our connectivity threshold. /// /// This is the time of the first failed attempt to connect to >2/3 of all validators in a @@ -131,6 +145,7 @@ where keystore, last_session_index: None, last_failure: None, + last_connection_request: None, failure_start: None, resolved_authorities: HashMap::new(), connected_authorities: HashMap::new(), @@ -196,7 +211,11 @@ where for leaf in leaves { let current_index = util::request_session_index_for_child(leaf, sender).await.await??; let since_failure = self.last_failure.map(|i| i.elapsed()).unwrap_or_default(); - let force_request = since_failure >= BACKOFF_DURATION; + let since_last_reconnect = + self.last_connection_request.map(|i| i.elapsed()).unwrap_or_default(); + + let force_request = + since_failure >= BACKOFF_DURATION || since_last_reconnect >= TRY_RECONNECT_AFTER; let leaf_session = Some((current_index, leaf)); let maybe_new_session = match self.last_session_index { Some(i) if current_index <= i => None, @@ -248,7 +267,7 @@ where // connections to a much broader set of validators. { let mut connections = authorities_past_present_future(sender, leaf).await?; - + self.last_connection_request = Some(Instant::now()); // Remove all of our locally controlled validator indices so we don't connect to // ourself. let connections = @@ -405,10 +424,11 @@ where .await .into_iter() .flat_map(|list| list.into_iter()) - .find_map(|addr| parse_addr(addr).ok().map(|(p, _)| p)); + .flat_map(|addr| parse_addr(addr).ok().map(|(p, _)| p)) + .collect::>(); - if let Some(p) = peer_id { - authority_ids.entry(p).or_default().insert(authority); + for p in peer_id { + authority_ids.entry(p).or_default().insert(authority.clone()); } } diff --git a/substrate/client/authority-discovery/src/worker.rs b/substrate/client/authority-discovery/src/worker.rs index 9bccb96ff378..eb898aaec5ef 100644 --- a/substrate/client/authority-discovery/src/worker.rs +++ b/substrate/client/authority-discovery/src/worker.rs @@ -26,7 +26,7 @@ use std::{ collections::{HashMap, HashSet}, marker::PhantomData, sync::Arc, - time::Duration, + time::{Duration, Instant}, }; use futures::{channel::mpsc, future, stream::Fuse, FutureExt, Stream, StreamExt}; @@ -139,6 +139,11 @@ pub struct Worker { /// Set of in-flight lookups. in_flight_lookups: HashMap, + /// Set of lookups we can still received records. + /// These are the entries in the `in_flight_lookups` for which + /// we got at least on successfull result. + known_lookups: HashMap, + addr_cache: addr_cache::AddrCache, metrics: Option, @@ -237,6 +242,7 @@ where query_interval, pending_lookups: Vec::new(), in_flight_lookups: HashMap::new(), + known_lookups: HashMap::new(), addr_cache, role, metrics, @@ -292,9 +298,7 @@ where fn process_message_from_service(&self, msg: ServicetoWorkerMsg) { match msg { ServicetoWorkerMsg::GetAddressesByAuthorityId(authority, sender) => { - let _ = sender.send( - self.addr_cache.get_addresses_by_authority_id(&authority).map(Clone::clone), - ); + let _ = sender.send(self.addr_cache.get_addresses_by_authority_id(&authority)); }, ServicetoWorkerMsg::GetAuthorityIdsByPeerId(peer_id, sender) => { let _ = sender @@ -408,6 +412,7 @@ where // Ignore all still in-flight lookups. Those that are still in-flight are likely stalled as // query interval ticks are far enough apart for all lookups to succeed. self.in_flight_lookups.clear(); + self.known_lookups.clear(); if let Some(metrics) = &self.metrics { metrics @@ -500,10 +505,16 @@ where .map_err(|_| Error::ReceivingDhtValueFoundEventWithDifferentKeys)? .ok_or(Error::ReceivingDhtValueFoundEventWithNoRecords)?; - let authority_id: AuthorityId = self - .in_flight_lookups - .remove(&remote_key) - .ok_or(Error::ReceivingUnexpectedRecord)?; + let authority_id: AuthorityId = + if let Some(authority_id) = self.in_flight_lookups.remove(&remote_key) { + authority_id + } else if let Some(authority_id) = self.known_lookups.get(&remote_key) { + authority_id.clone() + } else { + return Err(Error::ReceivingUnexpectedRecord); + }; + + self.known_lookups.insert(remote_key.clone(), authority_id.clone()); let local_peer_id = self.network.local_peer_id(); diff --git a/substrate/client/authority-discovery/src/worker/addr_cache.rs b/substrate/client/authority-discovery/src/worker/addr_cache.rs index 8084b7f0a6df..b6af11f73886 100644 --- a/substrate/client/authority-discovery/src/worker/addr_cache.rs +++ b/substrate/client/authority-discovery/src/worker/addr_cache.rs @@ -21,7 +21,10 @@ use libp2p::{ PeerId, }; use sp_authority_discovery::AuthorityId; -use std::collections::{hash_map::Entry, HashMap, HashSet}; +use std::{ + collections::{hash_map::Entry, HashMap, HashSet}, + time::Instant, +}; /// Cache for [`AuthorityId`] -> [`HashSet`] and [`PeerId`] -> [`HashSet`] /// mappings. @@ -33,7 +36,7 @@ pub(super) struct AddrCache { /// Since we may store the mapping across several sessions, a single /// `PeerId` might correspond to multiple `AuthorityId`s. However, /// it's not expected that a single `AuthorityId` can have multiple `PeerId`s. - authority_id_to_addresses: HashMap>, + authority_id_to_addresses: HashMap>, peer_id_to_authority_ids: HashMap>, } @@ -48,8 +51,12 @@ impl AddrCache { /// Inserts the given [`AuthorityId`] and [`Vec`] pair for future lookups by /// [`AuthorityId`] or [`PeerId`]. pub fn insert(&mut self, authority_id: AuthorityId, addresses: Vec) { - let addresses = addresses.into_iter().collect::>(); - let peer_ids = addresses_to_peer_ids(&addresses); + let mut addresses = addresses + .into_iter() + .map(|addr| (addr, Instant::now() + std::time::Duration::from_secs(24 * 60 * 60))) + .collect::>(); + + let mut peer_ids = addresses_to_peer_ids(&addresses); if peer_ids.is_empty() { log::debug!( @@ -74,9 +81,26 @@ impl AddrCache { "Found addresses for authority {authority_id:?}: {addresses:?}", ); + let old_addresses = + self.authority_id_to_addresses.get(&authority_id).cloned().unwrap_or_default(); + + let time_now = Instant::now(); + + let to_keep_addresses = old_addresses + .iter() + .filter(|(addr, expires)| **expires >= time_now && !addresses.contains_key(addr)) + .map(|(addr, expires)| (addr.clone(), expires.clone())) + .collect::>(); + + addresses.extend(to_keep_addresses.clone()); + let old_addresses = self.authority_id_to_addresses.insert(authority_id.clone(), addresses); + let old_peer_ids = addresses_to_peer_ids(&old_addresses.unwrap_or_default()); + let to_kepp_peer_ids = addresses_to_peer_ids(&to_keep_addresses); + peer_ids.extend(to_kepp_peer_ids); + // Add the new peer ids peer_ids.difference(&old_peer_ids).for_each(|new_peer_id| { self.peer_id_to_authority_ids @@ -118,8 +142,10 @@ impl AddrCache { pub fn get_addresses_by_authority_id( &self, authority_id: &AuthorityId, - ) -> Option<&HashSet> { - self.authority_id_to_addresses.get(authority_id) + ) -> Option> { + self.authority_id_to_addresses + .get(authority_id) + .map(|result| result.keys().map(|addr| addr.clone()).collect::>()) } /// Returns the [`AuthorityId`]s for the given [`PeerId`]. @@ -170,8 +196,8 @@ fn peer_id_from_multiaddr(addr: &Multiaddr) -> Option { }) } -fn addresses_to_peer_ids(addresses: &HashSet) -> HashSet { - addresses.iter().filter_map(peer_id_from_multiaddr).collect::>() +fn addresses_to_peer_ids(addresses: &HashMap) -> HashSet { + addresses.keys().filter_map(peer_id_from_multiaddr).collect::>() } #[cfg(test)] diff --git a/substrate/client/network/src/discovery.rs b/substrate/client/network/src/discovery.rs index 77c26266aac4..8c03f17d3bbd 100644 --- a/substrate/client/network/src/discovery.rs +++ b/substrate/client/network/src/discovery.rs @@ -72,7 +72,7 @@ use libp2p::{ }, PeerId, }; -use log::{debug, info, trace, warn}; +use log::{debug, error, info, trace, warn}; use sp_core::hexdisplay::HexDisplay; use std::{ cmp, @@ -784,8 +784,13 @@ impl NetworkBehaviour for DiscoveryBehaviour { // Let's directly finish the query, as we are only interested in a // quorum of 1. if let Some(kad) = self.kademlia.as_mut() { - if let Some(mut query) = kad.query_mut(&id) { - query.finish(); + if let Some(query) = kad.query_mut(&id) { + // Let the query continue, to increase the chances we + // discover all possible addresses, for the cases where more + // addresses might exist in DHT, for example when the node + // changes its PeerId. + + // query.finish(); } } From 41726774e2df5b0b05227010cdf33a7c89f62d4e Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Fri, 22 Mar 2024 08:55:18 +0200 Subject: [PATCH 02/47] Make clippy happy Signed-off-by: Alexandru Gheorghe --- substrate/client/authority-discovery/src/worker.rs | 2 +- .../authority-discovery/src/worker/addr_cache.rs | 10 +++++----- .../client/authority-discovery/src/worker/tests.rs | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/substrate/client/authority-discovery/src/worker.rs b/substrate/client/authority-discovery/src/worker.rs index eb898aaec5ef..5e47856b1ac4 100644 --- a/substrate/client/authority-discovery/src/worker.rs +++ b/substrate/client/authority-discovery/src/worker.rs @@ -26,7 +26,7 @@ use std::{ collections::{HashMap, HashSet}, marker::PhantomData, sync::Arc, - time::{Duration, Instant}, + time::Duration, }; use futures::{channel::mpsc, future, stream::Fuse, FutureExt, Stream, StreamExt}; diff --git a/substrate/client/authority-discovery/src/worker/addr_cache.rs b/substrate/client/authority-discovery/src/worker/addr_cache.rs index b6af11f73886..76c31d7fdaf7 100644 --- a/substrate/client/authority-discovery/src/worker/addr_cache.rs +++ b/substrate/client/authority-discovery/src/worker/addr_cache.rs @@ -89,7 +89,7 @@ impl AddrCache { let to_keep_addresses = old_addresses .iter() .filter(|(addr, expires)| **expires >= time_now && !addresses.contains_key(addr)) - .map(|(addr, expires)| (addr.clone(), expires.clone())) + .map(|(addr, expires)| (addr.clone(), *expires)) .collect::>(); addresses.extend(to_keep_addresses.clone()); @@ -279,7 +279,7 @@ mod tests { cache.insert(third.0.clone(), vec![third.1.clone()]); assert_eq!( - Some(&HashSet::from([third.1.clone()])), + Some(HashSet::from([third.1.clone()])), cache.get_addresses_by_authority_id(&third.0), "Expect `get_addresses_by_authority_id` to return addresses of third authority.", ); @@ -373,7 +373,7 @@ mod tests { cache.get_authority_ids_by_peer_id(&peer_id_from_multiaddr(&multiaddr3).unwrap()) ); assert_eq!( - &HashSet::from([multiaddr2.clone(), multiaddr3.clone(), multiaddr4.clone()]), + HashSet::from([multiaddr2.clone(), multiaddr3.clone(), multiaddr4.clone()]), cache.get_addresses_by_authority_id(&authority1).unwrap(), ); @@ -403,11 +403,11 @@ mod tests { assert_eq!(2, addr_cache.num_authority_ids()); assert_eq!( - &HashSet::from([addr.clone()]), + HashSet::from([addr.clone()]), addr_cache.get_addresses_by_authority_id(&authority_id0).unwrap() ); assert_eq!( - &HashSet::from([addr]), + HashSet::from([addr]), addr_cache.get_addresses_by_authority_id(&authority_id1).unwrap() ); } diff --git a/substrate/client/authority-discovery/src/worker/tests.rs b/substrate/client/authority-discovery/src/worker/tests.rs index c29120881940..47b53513b1bc 100644 --- a/substrate/client/authority-discovery/src/worker/tests.rs +++ b/substrate/client/authority-discovery/src/worker/tests.rs @@ -533,7 +533,7 @@ impl DhtValueFoundTester { &mut self, strict_record_validation: bool, values: Vec<(KademliaKey, Vec)>, - ) -> Option<&HashSet> { + ) -> Option> { let (_dht_event_tx, dht_event_rx) = channel(1); let local_test_api = Arc::new(TestApi { authorities: vec![self.remote_authority_public.into()] }); @@ -597,7 +597,7 @@ fn strict_accept_address_with_peer_signature() { let cached_remote_addresses = tester.process_value_found(true, kv_pairs); assert_eq!( - Some(&HashSet::from([addr])), + Some(HashSet::from([addr])), cached_remote_addresses, "Expect worker to only cache `Multiaddr`s with `PeerId`s.", ); @@ -675,7 +675,7 @@ fn do_not_cache_addresses_without_peer_id() { let cached_remote_addresses = tester.process_value_found(false, kv_pairs); assert_eq!( - Some(&HashSet::from([multiaddr_with_peer_id])), + Some(HashSet::from([multiaddr_with_peer_id])), cached_remote_addresses, "Expect worker to only cache `Multiaddr`s with `PeerId`s.", ); From 64f38d28b859833fb4cabdcf48d8d8071cae554f Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Fri, 22 Mar 2024 09:32:33 +0200 Subject: [PATCH 03/47] Fix warnings Signed-off-by: Alexandru Gheorghe --- substrate/client/network/src/discovery.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/substrate/client/network/src/discovery.rs b/substrate/client/network/src/discovery.rs index 8c03f17d3bbd..2a07b0620db0 100644 --- a/substrate/client/network/src/discovery.rs +++ b/substrate/client/network/src/discovery.rs @@ -72,7 +72,7 @@ use libp2p::{ }, PeerId, }; -use log::{debug, error, info, trace, warn}; +use log::{debug, info, trace, warn}; use sp_core::hexdisplay::HexDisplay; use std::{ cmp, @@ -220,7 +220,6 @@ impl DiscoveryConfig { // auto-insertion and instead add peers manually. config.set_kbucket_inserts(KademliaBucketInserts::Manual); config.disjoint_query_paths(kademlia_disjoint_query_paths); - let store = MemoryStore::new(local_peer_id); let mut kad = Kademlia::with_config(local_peer_id, store, config); @@ -784,7 +783,7 @@ impl NetworkBehaviour for DiscoveryBehaviour { // Let's directly finish the query, as we are only interested in a // quorum of 1. if let Some(kad) = self.kademlia.as_mut() { - if let Some(query) = kad.query_mut(&id) { + if let Some(_query) = kad.query_mut(&id) { // Let the query continue, to increase the chances we // discover all possible addresses, for the cases where more // addresses might exist in DHT, for example when the node From ce87688a837ea75a306aab7b24adc9674b17b04a Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Mon, 25 Mar 2024 10:11:10 +0200 Subject: [PATCH 04/47] Refactor gossip support Signed-off-by: Alexandru Gheorghe --- polkadot/node/network/bridge/src/network.rs | 14 +++ polkadot/node/network/bridge/src/tx/mod.rs | 16 +++ .../network/bridge/src/validator_discovery.rs | 38 +++++++ .../node/network/gossip-support/src/lib.rs | 105 ++++++++++++++---- polkadot/node/subsystem-types/src/messages.rs | 10 ++ .../client/authority-discovery/src/worker.rs | 2 +- substrate/client/network/src/discovery.rs | 10 +- 7 files changed, 172 insertions(+), 23 deletions(-) diff --git a/polkadot/node/network/bridge/src/network.rs b/polkadot/node/network/bridge/src/network.rs index 21bed019256a..a966f7f9e219 100644 --- a/polkadot/node/network/bridge/src/network.rs +++ b/polkadot/node/network/bridge/src/network.rs @@ -205,6 +205,12 @@ pub trait Network: Clone + Send + 'static { multiaddresses: HashSet, ) -> Result<(), String>; + async fn add_peers_to_reserved_set( + &mut self, + protocol: ProtocolName, + multiaddresses: HashSet, + ) -> Result<(), String>; + /// Removes the peers for the protocol's peer set (both reserved and non-reserved). async fn remove_from_peers_set( &mut self, @@ -241,6 +247,14 @@ impl Network for Arc> { NetworkService::set_reserved_peers(&**self, protocol, multiaddresses) } + async fn add_peers_to_reserved_set( + &mut self, + protocol: ProtocolName, + multiaddresses: HashSet, + ) -> Result<(), String> { + NetworkService::add_peers_to_reserved_set(&**self, protocol, multiaddresses) + } + async fn remove_from_peers_set( &mut self, protocol: ProtocolName, diff --git a/polkadot/node/network/bridge/src/tx/mod.rs b/polkadot/node/network/bridge/src/tx/mod.rs index d5be6f01c337..58825419bd62 100644 --- a/polkadot/node/network/bridge/src/tx/mod.rs +++ b/polkadot/node/network/bridge/src/tx/mod.rs @@ -362,6 +362,22 @@ where .await; return (network_service, authority_discovery_service) }, + + NetworkBridgeTxMessage::AddToResolvedValidators { validator_addrs, peer_set } => { + gum::trace!( + target: LOG_TARGET, + action = "AddToResolvedValidators", + peer_set = ?peer_set, + ?validator_addrs, + "Received a resolved validator connection request", + ); + + let all_addrs = validator_addrs.into_iter().flatten().collect(); + let network_service = validator_discovery + .on_add_to_resolved_request(all_addrs, peer_set, network_service) + .await; + return (network_service, authority_discovery_service) + }, } (network_service, authority_discovery_service) } diff --git a/polkadot/node/network/bridge/src/validator_discovery.rs b/polkadot/node/network/bridge/src/validator_discovery.rs index b11af8a8a089..b07ebd7b8a23 100644 --- a/polkadot/node/network/bridge/src/validator_discovery.rs +++ b/polkadot/node/network/bridge/src/validator_discovery.rs @@ -102,6 +102,44 @@ impl Service { network_service } + /// Connect to already resolved addresses. + pub async fn on_add_to_resolved_request( + &mut self, + newly_requested: HashSet, + peer_set: PeerSet, + mut network_service: N, + ) -> N { + let state = &mut self.state[peer_set]; + let new_peer_ids: HashSet = extract_peer_ids(newly_requested.iter().cloned()); + let num_peers = new_peer_ids.len(); + + state.previously_requested.extend(new_peer_ids); + + gum::debug!( + target: LOG_TARGET, + ?peer_set, + ?num_peers, + "New add to resolved validators request", + ); + + // ask the network to connect to these nodes and not disconnect + // from them until they are removed from the set. + // + // for peer-set management, the main protocol name should be used regardless of + // the negotiated version. + if let Err(e) = network_service + .add_peers_to_reserved_set( + self.peerset_protocol_names.get_main_name(peer_set), + newly_requested, + ) + .await + { + gum::warn!(target: LOG_TARGET, err = ?e, "AuthorityDiscoveryService returned an invalid multiaddress"); + } + + network_service + } + /// On a new connection request, a peer set update will be issued. /// It will ask the network to connect to the validators and not disconnect /// from them at least until the next request is issued for the same peer set. diff --git a/polkadot/node/network/gossip-support/src/lib.rs b/polkadot/node/network/gossip-support/src/lib.rs index 9c1fa26a1bc6..449b25a89c53 100644 --- a/polkadot/node/network/gossip-support/src/lib.rs +++ b/polkadot/node/network/gossip-support/src/lib.rs @@ -70,10 +70,10 @@ const BACKOFF_DURATION: Duration = Duration::from_secs(5); const BACKOFF_DURATION: Duration = Duration::from_millis(500); // The authorithy_discovery queries runs every ten minutes, -// so no point in trying more often than that, so let's -// try re-resolving the authorithies every 10 minutes and force -// the reconnection to the ones that changed their address. -const TRY_RECONNECT_AFTER: Duration = Duration::from_secs(60 * 10); +// so it make sense to run a bit more often than that to +// detect changes as often as we can, but not too often since +// it won't help. +const TRY_RERESOLVE_AUTHORITIES: Duration = Duration::from_secs(5 * 60); /// Duration after which we consider low connectivity a problem. /// @@ -101,8 +101,8 @@ pub struct GossipSupport { // their PeerID, we will connect to them in the best case after // a session, so we need to try more often to resolved peers and // reconnect to them. The authorithy_discovery queries runs every ten - // minutes, so no point in trying more often than that, so let's - // try reconnecting every 10 minutes here as well. + // minutes, so we can't detect changes in the address more often + // that that. last_connection_request: Option, /// First time we did not reach our connectivity threshold. @@ -214,16 +214,19 @@ where let since_last_reconnect = self.last_connection_request.map(|i| i.elapsed()).unwrap_or_default(); - let force_request = - since_failure >= BACKOFF_DURATION || since_last_reconnect >= TRY_RECONNECT_AFTER; + let force_request = since_failure >= BACKOFF_DURATION; + let re_resolve_authorities = since_last_reconnect >= TRY_RERESOLVE_AUTHORITIES; let leaf_session = Some((current_index, leaf)); let maybe_new_session = match self.last_session_index { Some(i) if current_index <= i => None, _ => leaf_session, }; - let maybe_issue_connection = - if force_request { leaf_session } else { maybe_new_session }; + let maybe_issue_connection = if force_request || re_resolve_authorities { + leaf_session + } else { + maybe_new_session + }; if let Some((session_index, relay_parent)) = maybe_issue_connection { let session_info = @@ -278,7 +281,12 @@ where // to clean up all connections. Vec::new() }; - self.issue_connection_request(sender, connections).await; + + if force_request || is_new_session { + self.issue_connection_request(sender, connections).await; + } else if re_resolve_authorities { + self.issue_connection_request_to_changed(sender, connections).await; + } } if is_new_session { @@ -343,17 +351,14 @@ where authority_check_result } - async fn issue_connection_request( + async fn resolve_authorities( &mut self, - sender: &mut Sender, authorities: Vec, - ) where - Sender: overseer::GossipSupportSenderTrait, - { - let num = authorities.len(); + ) -> (Vec>, HashMap>, usize) { let mut validator_addrs = Vec::with_capacity(authorities.len()); - let mut failures = 0; let mut resolved = HashMap::with_capacity(authorities.len()); + let mut failures = 0; + for authority in authorities { if let Some(addrs) = self.authority_discovery.get_addresses_by_authority_id(authority.clone()).await @@ -369,6 +374,57 @@ where ); } } + (validator_addrs, resolved, failures) + } + + async fn issue_connection_request_to_changed( + &mut self, + sender: &mut Sender, + authorities: Vec, + ) where + Sender: overseer::GossipSupportSenderTrait, + { + let (_, resolved, _) = self.resolve_authorities(authorities).await; + + let mut changed = Vec::new(); + + for (authority, new_addresses) in &resolved { + match self.resolved_authorities.get(authority) { + Some(old_address) if !old_address.is_superset(new_addresses) => + changed.push(new_addresses.clone()), + None => changed.push(new_addresses.clone()), + _ => {}, + } + } + gum::debug!( + target: LOG_TARGET, + num_changed = ?changed.len(), + ?changed, + "Issuing a connection request to changed validators" + ); + if !changed.is_empty() { + self.resolved_authorities = resolved; + + sender + .send_message(NetworkBridgeTxMessage::AddToResolvedValidators { + validator_addrs: changed, + peer_set: PeerSet::Validation, + }) + .await; + } + } + + async fn issue_connection_request( + &mut self, + sender: &mut Sender, + authorities: Vec, + ) where + Sender: overseer::GossipSupportSenderTrait, + { + let num = authorities.len(); + + let (validator_addrs, resolved, failures) = self.resolve_authorities(authorities).await; + self.resolved_authorities = resolved; gum::debug!(target: LOG_TARGET, %num, "Issuing a connection request"); @@ -418,16 +474,23 @@ where { let mut authority_ids: HashMap> = HashMap::new(); for authority in authorities { - let peer_id = self + let peer_ids = self .authority_discovery .get_addresses_by_authority_id(authority.clone()) .await .into_iter() .flat_map(|list| list.into_iter()) .flat_map(|addr| parse_addr(addr).ok().map(|(p, _)| p)) - .collect::>(); + .collect::>(); + + gum::trace!( + target: LOG_TARGET, + ?peer_ids, + ?authority, + "Resolved to peer ids" + ); - for p in peer_id { + for p in peer_ids { authority_ids.entry(p).or_default().insert(authority.clone()); } } diff --git a/polkadot/node/subsystem-types/src/messages.rs b/polkadot/node/subsystem-types/src/messages.rs index eeb684149c80..5241414cbca0 100644 --- a/polkadot/node/subsystem-types/src/messages.rs +++ b/polkadot/node/subsystem-types/src/messages.rs @@ -442,6 +442,16 @@ pub enum NetworkBridgeTxMessage { /// The peer set we want the connection on. peer_set: PeerSet, }, + + /// Extends the known validators set with new peers we already know the `Multiaddrs`, this is + /// usually needed for validators that change their address mid-session. it is usually called + /// after a ConnectToResolvedValidators at the beginning of the session. + AddToResolvedValidators { + /// Each entry corresponds to the addresses of an already resolved validator. + validator_addrs: Vec>, + /// The peer set we want the connection on. + peer_set: PeerSet, + }, } /// Availability Distribution Message. diff --git a/substrate/client/authority-discovery/src/worker.rs b/substrate/client/authority-discovery/src/worker.rs index 5e47856b1ac4..0a62baa63ac3 100644 --- a/substrate/client/authority-discovery/src/worker.rs +++ b/substrate/client/authority-discovery/src/worker.rs @@ -141,7 +141,7 @@ pub struct Worker { /// Set of lookups we can still received records. /// These are the entries in the `in_flight_lookups` for which - /// we got at least on successfull result. + /// we got at least one successfull result. known_lookups: HashMap, addr_cache: addr_cache::AddrCache, diff --git a/substrate/client/network/src/discovery.rs b/substrate/client/network/src/discovery.rs index 2a07b0620db0..487e54846d15 100644 --- a/substrate/client/network/src/discovery.rs +++ b/substrate/client/network/src/discovery.rs @@ -775,9 +775,11 @@ impl NetworkBehaviour for DiscoveryBehaviour { Ok(GetRecordOk::FoundRecord(r)) => { debug!( target: "sub-libp2p", - "Libp2p => Found record ({:?}) with value: {:?}", + "Libp2p => Found record ({:?}) with value: {:?} id {:?} stats {:?}", r.record.key, r.record.value, + id, + stats, ); // Let's directly finish the query, as we are only interested in a @@ -805,6 +807,12 @@ impl NetworkBehaviour for DiscoveryBehaviour { Ok(GetRecordOk::FinishedWithNoAdditionalRecord { cache_candidates, }) => { + debug!( + target: "sub-libp2p", + "Libp2p => Finished with no-additional-record {:?} stats {:?}", + id, + stats, + ); // We always need to remove the record to not leak any data! if let Some(record) = self.records_to_publish.remove(&id) { if cache_candidates.is_empty() { From a69ba99e46e4781cf7239399acc68a4a057a3415 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Mon, 25 Mar 2024 10:33:58 +0200 Subject: [PATCH 05/47] Make clippy happy Signed-off-by: Alexandru Gheorghe --- polkadot/node/network/bridge/src/network.rs | 1 + polkadot/node/network/bridge/src/rx/tests.rs | 8 ++++++++ polkadot/node/network/bridge/src/tx/tests.rs | 8 ++++++++ polkadot/node/network/bridge/src/validator_discovery.rs | 9 +++++++++ substrate/client/network/src/discovery.rs | 3 ++- 5 files changed, 28 insertions(+), 1 deletion(-) diff --git a/polkadot/node/network/bridge/src/network.rs b/polkadot/node/network/bridge/src/network.rs index a966f7f9e219..e82a32d5c873 100644 --- a/polkadot/node/network/bridge/src/network.rs +++ b/polkadot/node/network/bridge/src/network.rs @@ -205,6 +205,7 @@ pub trait Network: Clone + Send + 'static { multiaddresses: HashSet, ) -> Result<(), String>; + /// Ask the network to extend the reserved set with these nodes. async fn add_peers_to_reserved_set( &mut self, protocol: ProtocolName, diff --git a/polkadot/node/network/bridge/src/rx/tests.rs b/polkadot/node/network/bridge/src/rx/tests.rs index 6847b8a7e24d..c29148497cea 100644 --- a/polkadot/node/network/bridge/src/rx/tests.rs +++ b/polkadot/node/network/bridge/src/rx/tests.rs @@ -124,6 +124,14 @@ impl Network for TestNetwork { Ok(()) } + async fn add_peers_to_reserved_set( + &mut self, + _protocol: ProtocolName, + _: HashSet, + ) -> Result<(), String> { + Ok(()) + } + async fn remove_from_peers_set( &mut self, _protocol: ProtocolName, diff --git a/polkadot/node/network/bridge/src/tx/tests.rs b/polkadot/node/network/bridge/src/tx/tests.rs index c3cf0f322f68..6842c36f419c 100644 --- a/polkadot/node/network/bridge/src/tx/tests.rs +++ b/polkadot/node/network/bridge/src/tx/tests.rs @@ -148,6 +148,14 @@ impl Network for TestNetwork { Ok(()) } + async fn add_peers_to_reserved_set( + &mut self, + _protocol: ProtocolName, + _: HashSet, + ) -> Result<(), String> { + Ok(()) + } + async fn remove_from_peers_set( &mut self, _protocol: ProtocolName, diff --git a/polkadot/node/network/bridge/src/validator_discovery.rs b/polkadot/node/network/bridge/src/validator_discovery.rs index b07ebd7b8a23..16047154ba34 100644 --- a/polkadot/node/network/bridge/src/validator_discovery.rs +++ b/polkadot/node/network/bridge/src/validator_discovery.rs @@ -270,6 +270,15 @@ mod tests { Ok(()) } + async fn add_peers_to_reserved_set( + &mut self, + _protocol: ProtocolName, + multiaddresses: HashSet, + ) -> Result<(), String> { + self.peers_set.extend(extract_peer_ids(multiaddresses.into_iter())); + Ok(()) + } + async fn remove_from_peers_set( &mut self, _protocol: ProtocolName, diff --git a/substrate/client/network/src/discovery.rs b/substrate/client/network/src/discovery.rs index 23977485ec2d..1693775190bc 100644 --- a/substrate/client/network/src/discovery.rs +++ b/substrate/client/network/src/discovery.rs @@ -818,9 +818,10 @@ impl NetworkBehaviour for DiscoveryBehaviour { }) => { debug!( target: "sub-libp2p", - "Libp2p => Finished with no-additional-record {:?} stats {:?}", + "Libp2p => Finished with no-additional-record {:?} stats {:?} took {:?} ms", id, stats, + stats.duration().map(|val| val.as_millis()) ); // We always need to remove the record to not leak any data! if let Some(record) = self.records_to_publish.remove(&id) { From 13057634fea0b434a0bf88dc5166c51ac921eb2a Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Wed, 3 Apr 2024 08:26:06 +0300 Subject: [PATCH 06/47] Some other hacks Signed-off-by: Alexandru Gheorghe --- Cargo.lock | 92 +++++-------- Cargo.toml | 4 + .../client/authority-discovery/src/worker.rs | 130 ++++++++++++++---- .../src/worker/addr_cache.rs | 15 +- .../src/worker/schema/dht-v2.proto | 6 + substrate/client/network/src/behaviour.rs | 12 +- substrate/client/network/src/discovery.rs | 12 +- substrate/client/network/src/event.rs | 7 +- substrate/client/network/src/service.rs | 8 ++ .../client/network/src/service/traits.rs | 8 +- 10 files changed, 199 insertions(+), 95 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 81eb682a27d7..8980aef460f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7290,8 +7290,6 @@ checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" [[package]] name = "libp2p" version = "0.51.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f35eae38201a993ece6bdc823292d6abd1bffed1c4d0f4a3517d2bd8e1d917fe" dependencies = [ "bytes", "futures", @@ -7323,8 +7321,6 @@ dependencies = [ [[package]] name = "libp2p-allow-block-list" version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "510daa05efbc25184458db837f6f9a5143888f1caa742426d92e1833ddd38a50" dependencies = [ "libp2p-core", "libp2p-identity", @@ -7335,8 +7331,6 @@ dependencies = [ [[package]] name = "libp2p-connection-limits" version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4caa33f1d26ed664c4fe2cca81a08c8e07d4c1c04f2f4ac7655c2dd85467fda0" dependencies = [ "libp2p-core", "libp2p-identity", @@ -7347,8 +7341,6 @@ dependencies = [ [[package]] name = "libp2p-core" version = "0.39.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c1df63c0b582aa434fb09b2d86897fa2b419ffeccf934b36f87fcedc8e835c2" dependencies = [ "either", "fnv", @@ -7359,7 +7351,7 @@ dependencies = [ "log", "multiaddr", "multihash 0.17.0", - "multistream-select", + "multistream-select 0.12.1", "once_cell", "parking_lot 0.12.1", "pin-project", @@ -7375,11 +7367,10 @@ dependencies = [ [[package]] name = "libp2p-dns" version = "0.39.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146ff7034daae62077c415c2376b8057368042df6ab95f5432ad5e88568b1554" dependencies = [ "futures", "libp2p-core", + "libp2p-identity", "log", "parking_lot 0.12.1", "smallvec", @@ -7389,8 +7380,6 @@ dependencies = [ [[package]] name = "libp2p-identify" version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5455f472243e63b9c497ff320ded0314254a9eb751799a39c283c6f20b793f3c" dependencies = [ "asynchronous-codec", "either", @@ -7411,8 +7400,6 @@ dependencies = [ [[package]] name = "libp2p-identity" version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "276bb57e7af15d8f100d3c11cbdd32c6752b7eef4ba7a18ecf464972c07abcce" dependencies = [ "bs58 0.4.0", "ed25519-dalek", @@ -7429,8 +7416,6 @@ dependencies = [ [[package]] name = "libp2p-kad" version = "0.43.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39d5ef876a2b2323d63c258e63c2f8e36f205fe5a11f0b3095d59635650790ff" dependencies = [ "arrayvec 0.7.4", "asynchronous-codec", @@ -7457,8 +7442,6 @@ dependencies = [ [[package]] name = "libp2p-mdns" version = "0.43.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19983e1f949f979a928f2c603de1cf180cc0dc23e4ac93a62651ccb18341460b" dependencies = [ "data-encoding", "futures", @@ -7478,11 +7461,10 @@ dependencies = [ [[package]] name = "libp2p-metrics" version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a42ec91e227d7d0dafa4ce88b333cdf5f277253873ab087555c92798db2ddd46" dependencies = [ "libp2p-core", "libp2p-identify", + "libp2p-identity", "libp2p-kad", "libp2p-ping", "libp2p-swarm", @@ -7492,8 +7474,6 @@ dependencies = [ [[package]] name = "libp2p-noise" version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c3673da89d29936bc6435bafc638e2f184180d554ce844db65915113f86ec5e" dependencies = [ "bytes", "curve25519-dalek 3.2.0", @@ -7515,14 +7495,13 @@ dependencies = [ [[package]] name = "libp2p-ping" version = "0.42.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e57759c19c28a73ef1eb3585ca410cefb72c1a709fcf6de1612a378e4219202" dependencies = [ "either", "futures", "futures-timer", "instant", "libp2p-core", + "libp2p-identity", "libp2p-swarm", "log", "rand", @@ -7532,8 +7511,6 @@ dependencies = [ [[package]] name = "libp2p-quic" version = "0.7.0-alpha.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6b26abd81cd2398382a1edfe739b539775be8a90fa6914f39b2ab49571ec735" dependencies = [ "bytes", "futures", @@ -7554,8 +7531,6 @@ dependencies = [ [[package]] name = "libp2p-request-response" version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ffdb374267d42dc5ed5bc53f6e601d4a64ac5964779c6e40bb9e4f14c1e30d5" dependencies = [ "async-trait", "futures", @@ -7570,8 +7545,6 @@ dependencies = [ [[package]] name = "libp2p-swarm" version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "903b3d592d7694e56204d211f29d31bc004be99386644ba8731fc3e3ef27b296" dependencies = [ "either", "fnv", @@ -7591,25 +7564,22 @@ dependencies = [ [[package]] name = "libp2p-swarm-derive" version = "0.32.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fba456131824ab6acd4c7bf61e9c0f0a3014b5fc9868ccb8e10d344594cdc4f" dependencies = [ "heck 0.4.1", "quote", - "syn 1.0.109", + "syn 2.0.53", ] [[package]] name = "libp2p-tcp" version = "0.39.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33d33698596d7722d85d3ab0c86c2c322254fce1241e91208e3679b4eb3026cf" dependencies = [ "futures", "futures-timer", "if-watch", "libc", "libp2p-core", + "libp2p-identity", "log", "socket2 0.4.9", "tokio", @@ -7618,8 +7588,6 @@ dependencies = [ [[package]] name = "libp2p-tls" version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff08d13d0dc66e5e9ba6279c1de417b84fa0d0adc3b03e5732928c180ec02781" dependencies = [ "futures", "futures-rustls", @@ -7637,8 +7605,6 @@ dependencies = [ [[package]] name = "libp2p-wasm-ext" version = "0.39.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77dff9d32353a5887adb86c8afc1de1a94d9e8c3bc6df8b2201d7cdf5c848f43" dependencies = [ "futures", "js-sys", @@ -7651,27 +7617,24 @@ dependencies = [ [[package]] name = "libp2p-websocket" version = "0.41.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "111273f7b3d3510524c752e8b7a5314b7f7a1fee7e68161c01a7d72cbb06db9f" dependencies = [ "either", "futures", "futures-rustls", "libp2p-core", + "libp2p-identity", "log", "parking_lot 0.12.1", "quicksink", "rw-stream-sink", "soketto", "url", - "webpki-roots 0.22.6", + "webpki-roots 0.23.1", ] [[package]] name = "libp2p-yamux" version = "0.43.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dcd21d950662700a385d4c6d68e2f5f54d778e97068cdd718522222ef513bda" dependencies = [ "futures", "libp2p-core", @@ -8421,6 +8384,18 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" +[[package]] +name = "multistream-select" +version = "0.12.1" +dependencies = [ + "bytes", + "futures", + "log", + "pin-project", + "smallvec", + "unsigned-varint", +] + [[package]] name = "multistream-select" version = "0.12.1" @@ -14460,8 +14435,6 @@ dependencies = [ [[package]] name = "quick-protobuf-codec" version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1693116345026436eb2f10b677806169c1a1260c1c60eaaffe3fb5a29ae23d8b" dependencies = [ "asynchronous-codec", "bytes", @@ -15453,6 +15426,16 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a716eb65e3158e90e17cd93d855216e27bde02745ab842f2cab4a39dba1bacf" +[[package]] +name = "rustls-webpki" +version = "0.100.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f6a5fc258f1c1276dfe3016516945546e2d5383911efc0fc4f1cdc5df3a4ae3" +dependencies = [ + "ring 0.16.20", + "untrusted 0.7.1", +] + [[package]] name = "rustls-webpki" version = "0.101.4" @@ -15506,8 +15489,6 @@ dependencies = [ [[package]] name = "rw-stream-sink" version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26338f5e09bb721b85b135ea05af7767c90b52f6de4f087d4f4a3a9d64e7dc04" dependencies = [ "futures", "pin-project", @@ -16304,7 +16285,7 @@ dependencies = [ "linked_hash_set", "log", "mockall", - "multistream-select", + "multistream-select 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)", "parity-scale-codec", "parking_lot 0.12.1", "partial_sort", @@ -21840,11 +21821,11 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.22.6" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c71e40d7d2c34a5106301fb632274ca37242cd0c9d3e64dbece371a40a2d87" +checksum = "b03058f88386e5ff5310d9111d53f48b17d732b401aeb83a8d5190f2ac459338" dependencies = [ - "webpki", + "rustls-webpki 0.100.3", ] [[package]] @@ -22372,12 +22353,11 @@ dependencies = [ [[package]] name = "x509-parser" -version = "0.14.0" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0ecbeb7b67ce215e40e3cc7f2ff902f94a223acf44995934763467e7b1febc8" +checksum = "7069fba5b66b9193bd2c5d3d4ff12b839118f6bcbef5328efafafb5395cf63da" dependencies = [ "asn1-rs", - "base64 0.13.1", "data-encoding", "der-parser", "lazy_static", diff --git a/Cargo.toml b/Cargo.toml index e6162830375f..ca22e38f12f6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -633,3 +633,7 @@ wasmi = { opt-level = 3 } x25519-dalek = { opt-level = 3 } yamux = { opt-level = 3 } zeroize = { opt-level = 3 } + +[patch.crates-io] +libp2p = { path = "../rust-libp2p/libp2p" } +libp2p-identity = { path = "../rust-libp2p/identity" } \ No newline at end of file diff --git a/substrate/client/authority-discovery/src/worker.rs b/substrate/client/authority-discovery/src/worker.rs index e0fed1ef4bd1..e3b06dc45ad5 100644 --- a/substrate/client/authority-discovery/src/worker.rs +++ b/substrate/client/authority-discovery/src/worker.rs @@ -26,7 +26,7 @@ use std::{ collections::{HashMap, HashSet}, marker::PhantomData, sync::Arc, - time::Duration, + time::{Duration, SystemTime, UNIX_EPOCH}, }; use futures::{channel::mpsc, future, stream::Fuse, FutureExt, Stream, StreamExt}; @@ -34,7 +34,9 @@ use futures::{channel::mpsc, future, stream::Fuse, FutureExt, Stream, StreamExt} use addr_cache::AddrCache; use codec::{Decode, Encode}; use ip_network::IpNetwork; -use libp2p::{core::multiaddr, identity::PublicKey, multihash::Multihash, Multiaddr, PeerId}; +use libp2p::{ + core::multiaddr, identity::PublicKey, kad::PeerRecord, multihash::Multihash, Multiaddr, PeerId, +}; use linked_hash_set::LinkedHashSet; use multihash_codetable::{Code, MultihashDigest}; @@ -157,6 +159,9 @@ pub struct Worker { /// we got at least one successfull result. known_lookups: HashMap, + /// Peers with old records + last_known_record: HashMap, Option)>, + addr_cache: addr_cache::AddrCache, metrics: Option, @@ -285,6 +290,7 @@ where role, metrics, phantom: PhantomData, + last_known_record: HashMap::new(), } } @@ -336,7 +342,9 @@ where fn process_message_from_service(&self, msg: ServicetoWorkerMsg) { match msg { ServicetoWorkerMsg::GetAddressesByAuthorityId(authority, sender) => { - let _ = sender.send(self.addr_cache.get_addresses_by_authority_id(&authority)); + let _ = sender.send( + self.addr_cache.get_addresses_by_authority_id(&authority).map(Clone::clone), + ); }, ServicetoWorkerMsg::GetAuthorityIdsByPeerId(peer_id, sender) => { let _ = sender @@ -526,7 +534,7 @@ where } if log_enabled!(log::Level::Debug) { - let hashes: Vec<_> = v.iter().map(|(hash, _value)| hash.clone()).collect(); + let hashes: Vec<_> = v.iter().map(|record| record.record.key.clone()).collect(); debug!(target: LOG_TARGET, "Value for hash '{:?}' found on Dht.", hashes); } @@ -583,12 +591,16 @@ where } } - fn handle_dht_value_found_event(&mut self, values: Vec<(KademliaKey, Vec)>) -> Result<()> { + fn handle_dht_value_found_event(&mut self, values: Vec) -> Result<()> { // Ensure `values` is not empty and all its keys equal. - let remote_key = single(values.iter().map(|(key, _)| key.clone())) + let remote_key = single(values.iter().map(|record| record.record.key.clone())) .map_err(|_| Error::ReceivingDhtValueFoundEventWithDifferentKeys)? .ok_or(Error::ReceivingDhtValueFoundEventWithNoRecords)?; + let mut peer_id = single(values.iter().map(|record| record.peer.clone())) + .map_err(|_| Error::ReceivingDhtValueFoundEventWithDifferentKeys)? + .ok_or(Error::ReceivingDhtValueFoundEventWithNoRecords)?; + let values_clone = values.clone(); let authority_id: AuthorityId = if let Some(authority_id) = self.in_flight_lookups.remove(&remote_key) { authority_id @@ -601,13 +613,17 @@ where self.known_lookups.insert(remote_key.clone(), authority_id.clone()); let local_peer_id = self.network.local_peer_id(); - - let remote_addresses: Vec = values + let kadmelia_key = remote_key; + let remote_addresses: Vec<(Multiaddr, u128)> = values .into_iter() - .map(|(_k, v)| { - let schema::SignedAuthorityRecord { record, auth_signature, peer_signature } = - schema::SignedAuthorityRecord::decode(v.as_slice()) - .map_err(Error::DecodingProto)?; + .map(|record| { + let schema::SignedAuthorityRecord { + record, + auth_signature, + peer_signature, + creation_time, + } = schema::SignedAuthorityRecord::decode(record.record.value.as_slice()) + .map_err(Error::DecodingProto)?; let auth_signature = AuthoritySignature::decode(&mut &auth_signature[..]) .map_err(Error::EncodingDecodingScale)?; @@ -615,14 +631,19 @@ where if !AuthorityPair::verify(&auth_signature, &record, &authority_id) { return Err(Error::VerifyingDhtPayload) } - - let addresses: Vec = schema::AuthorityRecord::decode(record.as_slice()) - .map(|a| a.addresses) - .map_err(Error::DecodingProto)? - .into_iter() - .map(|a| a.try_into()) - .collect::>() - .map_err(Error::ParsingMultiaddress)?; + let creation_time: u128 = creation_time + .map(|creation_time| { + u128::decode(&mut &creation_time.timestamp[..]).unwrap_or_default() + }) + .unwrap_or_default(); + let addresses: Vec<(Multiaddr, u128)> = + schema::AuthorityRecord::decode(record.as_slice()) + .map(|a| a.addresses) + .map_err(Error::DecodingProto)? + .into_iter() + .map(|a| a.try_into().map(|addr| (addr, creation_time))) + .collect::>() + .map_err(Error::ParsingMultiaddress)?; let get_peer_id = |a: &Multiaddr| match a.iter().last() { Some(multiaddr::Protocol::P2p(key)) => PeerId::from_multihash(key).ok(), @@ -630,12 +651,12 @@ where }; // Ignore [`Multiaddr`]s without [`PeerId`] or with own addresses. - let addresses: Vec = addresses + let addresses: Vec<(Multiaddr, u128)> = addresses .into_iter() - .filter(|a| get_peer_id(a).filter(|p| *p != local_peer_id).is_some()) + .filter(|a| get_peer_id(&a.0).filter(|p| *p != local_peer_id).is_some()) .collect(); - let remote_peer_id = single(addresses.iter().map(get_peer_id)) + let remote_peer_id = single(addresses.iter().map(|a| get_peer_id(&a.0))) .map_err(|_| Error::ReceivingDhtValueFoundEventWithDifferentPeerIds)? // different peer_id in records .flatten() .ok_or(Error::ReceivingDhtValueFoundEventWithNoPeerIds)?; // no records with peer_id in them @@ -662,14 +683,65 @@ where } Ok(addresses) }) - .collect::>>>()? + .collect::>>>()? .into_iter() .flatten() .take(MAX_ADDRESSES_PER_AUTHORITY) .collect(); - if !remote_addresses.is_empty() { - self.addr_cache.insert(authority_id, remote_addresses); + let newest_timestamp = single(remote_addresses.iter().map(|(_addr, timestamp)| *timestamp)) + .map_err(|_| Error::ReceivingDhtValueFoundEventWithDifferentKeys)? + .unwrap_or_default(); + + let mut needs_update = false; + let last_value = self.last_known_record.entry(kadmelia_key).or_default(); + if newest_timestamp > last_value.0 { + last_value.0 = newest_timestamp; + last_value.1 = Default::default(); + last_value.2 = values_clone.first().cloned(); + needs_update = true; + let peers_that_need_updating = last_value.1.clone(); + log::error!( + "convergence: Updating stored value on peers newest_timestamp {:?} === last_value {:?} peers_that_need_updating {:?}", + newest_timestamp, + last_value.0, + peers_that_need_updating + ); + for record in values_clone { + self.network.put_record_to(record, peers_that_need_updating.clone()); + } + // TODO update records on peers with old records. + } else if newest_timestamp == last_value.0 { + // Same record just update in case this is a record from old nods that don't have + // timestamp. + needs_update = true; + log::error!( + "convergence: Peer already has the latest value newest_timestamp {:?} == last_value {:?}", + newest_timestamp, + last_value.0 + ); + } else { + let wtf_peer_id = peer_id.clone(); + let peers_that_need_updating: HashSet = peer_id.take().into_iter().collect(); + log::error!( + "convergence: Peers informs us of old value newest_timestamp {:?} == last_value {:?} == peers_that_need_updating {:?} wtf_peer_id {:?}", + newest_timestamp, + last_value, + peers_that_need_updating, + wtf_peer_id + ); + for record in last_value.2.iter() { + self.network.put_record_to(record.clone(), peers_that_need_updating.clone()); + } + } + + if let Some(peer_id) = peer_id { + last_value.1.insert(peer_id); + } + + if !remote_addresses.is_empty() && needs_update { + self.addr_cache + .insert(authority_id, remote_addresses.into_iter().map(|(addr, _)| addr).collect()); if let Some(metrics) = &self.metrics { metrics .known_authorities_count @@ -777,11 +849,17 @@ fn sign_record_with_authority_ids( // Scale encode let auth_signature = auth_signature.encode(); + let creation_time = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_nanos(); + log::error!("Publish record created at {:?}", creation_time); + + let creation_time = creation_time.encode(); + let creation_time = schema::Timestamp { timestamp: creation_time }; let signed_record = schema::SignedAuthorityRecord { record: serialized_record.clone(), auth_signature, peer_signature: peer_signature.clone(), + creation_time: Some(creation_time), } .encode_to_vec(); diff --git a/substrate/client/authority-discovery/src/worker/addr_cache.rs b/substrate/client/authority-discovery/src/worker/addr_cache.rs index 76c31d7fdaf7..36c4269c85a1 100644 --- a/substrate/client/authority-discovery/src/worker/addr_cache.rs +++ b/substrate/client/authority-discovery/src/worker/addr_cache.rs @@ -36,7 +36,7 @@ pub(super) struct AddrCache { /// Since we may store the mapping across several sessions, a single /// `PeerId` might correspond to multiple `AuthorityId`s. However, /// it's not expected that a single `AuthorityId` can have multiple `PeerId`s. - authority_id_to_addresses: HashMap>, + authority_id_to_addresses: HashMap>, peer_id_to_authority_ids: HashMap>, } @@ -50,10 +50,15 @@ impl AddrCache { /// Inserts the given [`AuthorityId`] and [`Vec`] pair for future lookups by /// [`AuthorityId`] or [`PeerId`]. - pub fn insert(&mut self, authority_id: AuthorityId, addresses: Vec) { + pub fn insert(&mut self, authority_id: AuthorityId, addresses: Vec<(Multiaddr, u128)>) { + let newest_address = addresses + .iter() + .map(|(_, creation_time)| *creation_time) + .max() + .unwrap_or_default(); let mut addresses = addresses .into_iter() - .map(|addr| (addr, Instant::now() + std::time::Duration::from_secs(24 * 60 * 60))) + .map(|(addr, creation_time)| (addr, creation_time)) .collect::>(); let mut peer_ids = addresses_to_peer_ids(&addresses); @@ -88,7 +93,7 @@ impl AddrCache { let to_keep_addresses = old_addresses .iter() - .filter(|(addr, expires)| **expires >= time_now && !addresses.contains_key(addr)) + .filter(|(addr, expires)| creation_tim&& !addresses.contains_key(addr)) .map(|(addr, expires)| (addr.clone(), *expires)) .collect::>(); @@ -196,7 +201,7 @@ fn peer_id_from_multiaddr(addr: &Multiaddr) -> Option { }) } -fn addresses_to_peer_ids(addresses: &HashMap) -> HashSet { +fn addresses_to_peer_ids(addresses: &HashMap) -> HashSet { addresses.keys().filter_map(peer_id_from_multiaddr).collect::>() } diff --git a/substrate/client/authority-discovery/src/worker/schema/dht-v2.proto b/substrate/client/authority-discovery/src/worker/schema/dht-v2.proto index fdbadb426630..f039eb0a4df1 100644 --- a/substrate/client/authority-discovery/src/worker/schema/dht-v2.proto +++ b/substrate/client/authority-discovery/src/worker/schema/dht-v2.proto @@ -13,6 +13,10 @@ message PeerSignature { bytes public_key = 2; } +message Timestamp { + bytes timestamp = 1; +} + // Then we need to serialize the authority record and signature to send them over the wire. message SignedAuthorityRecord { bytes record = 1; @@ -20,4 +24,6 @@ message SignedAuthorityRecord { // Even if there are multiple `record.addresses`, all of them have the same peer id. // Old versions are missing this field. It is optional in order to provide compatibility both ways. PeerSignature peer_signature = 3; + // Utc timestamp when this record was created + Timestamp creation_time = 4; } diff --git a/substrate/client/network/src/behaviour.rs b/substrate/client/network/src/behaviour.rs index 1f234683392f..163c3e6aea90 100644 --- a/substrate/client/network/src/behaviour.rs +++ b/substrate/client/network/src/behaviour.rs @@ -31,8 +31,12 @@ use crate::{ use futures::channel::oneshot; use libp2p::{ - core::Multiaddr, identify::Info as IdentifyInfo, identity::PublicKey, kad::RecordKey, - swarm::NetworkBehaviour, PeerId, + core::Multiaddr, + identify::Info as IdentifyInfo, + identity::PublicKey, + kad::{PeerRecord, RecordKey}, + swarm::NetworkBehaviour, + PeerId, }; use parking_lot::Mutex; @@ -279,6 +283,10 @@ impl Behaviour { pub fn put_value(&mut self, key: RecordKey, value: Vec) { self.discovery.put_value(key, value); } + + pub fn put_record_to(&mut self, record: PeerRecord, peers: HashSet) { + self.discovery.put_record_to(record, peers); + } } impl From for BehaviourOut { diff --git a/substrate/client/network/src/discovery.rs b/substrate/client/network/src/discovery.rs index 1693775190bc..0657b28d2825 100644 --- a/substrate/client/network/src/discovery.rs +++ b/substrate/client/network/src/discovery.rs @@ -58,7 +58,7 @@ use libp2p::{ handler::KademliaHandler, record::store::{MemoryStore, RecordStore}, GetClosestPeersError, GetRecordOk, Kademlia, KademliaBucketInserts, KademliaConfig, - KademliaEvent, QueryId, QueryResult, Quorum, Record, RecordKey, + KademliaEvent, PeerRecord, QueryId, QueryResult, Quorum, Record, RecordKey, }, mdns::{self, tokio::Behaviour as TokioMdns}, multiaddr::Protocol, @@ -408,6 +408,12 @@ impl DiscoveryBehaviour { } } + pub fn put_record_to(&mut self, record: PeerRecord, peers: HashSet) { + if let Some(kad) = self.kademlia.as_mut() { + kad.put_record_to(record.record, peers.into_iter(), Quorum::One); + } + } + /// Returns the number of nodes in each Kademlia kbucket for each Kademlia instance. /// /// Identifies Kademlia instances by their [`ProtocolId`] and kbuckets by the base 2 logarithm @@ -474,7 +480,7 @@ pub enum DiscoveryOut { /// The DHT yielded results for the record request. /// /// Returning the result grouped in (key, value) pairs as well as the request duration. - ValueFound(Vec<(RecordKey, Vec)>, Duration), + ValueFound(Vec, Duration), /// The record requested was not found in the DHT. /// @@ -809,7 +815,7 @@ impl NetworkBehaviour for DiscoveryBehaviour { self.records_to_publish.insert(id, r.record.clone()); DiscoveryOut::ValueFound( - vec![(r.record.key, r.record.value)], + vec![r], stats.duration().unwrap_or_default(), ) }, diff --git a/substrate/client/network/src/event.rs b/substrate/client/network/src/event.rs index dc4fd53a49aa..6823189ff101 100644 --- a/substrate/client/network/src/event.rs +++ b/substrate/client/network/src/event.rs @@ -22,7 +22,10 @@ use crate::types::ProtocolName; use bytes::Bytes; -use libp2p::{kad::record::Key, PeerId}; +use libp2p::{ + kad::{record::Key, PeerRecord}, + PeerId, +}; use sc_network_common::role::ObservedRole; @@ -31,7 +34,7 @@ use sc_network_common::role::ObservedRole; #[must_use] pub enum DhtEvent { /// The value was found. - ValueFound(Vec<(Key, Vec)>), + ValueFound(Vec), /// The requested record has not been found in the DHT. ValueNotFound(Key), diff --git a/substrate/client/network/src/service.rs b/substrate/client/network/src/service.rs index 47e23337633b..2c44b8355952 100644 --- a/substrate/client/network/src/service.rs +++ b/substrate/client/network/src/service.rs @@ -57,6 +57,7 @@ use crate::{ use codec::DecodeAll; use either::Either; use futures::{channel::oneshot, prelude::*}; +use libp2p::kad::PeerRecord; #[allow(deprecated)] use libp2p::{ connection_limits::Exceeded, @@ -823,6 +824,10 @@ where fn put_value(&self, key: KademliaKey, value: Vec) { let _ = self.to_worker.unbounded_send(ServiceToWorkerMsg::PutValue(key, value)); } + + fn put_record_to(&self, record: PeerRecord, peers: HashSet) { + let _ = self.to_worker.unbounded_send(ServiceToWorkerMsg::PutRecordTo(record, peers)); + } } #[async_trait::async_trait] @@ -1157,6 +1162,7 @@ impl<'a> NotificationSenderReadyT for NotificationSenderReady<'a> { enum ServiceToWorkerMsg { GetValue(KademliaKey), PutValue(KademliaKey, Vec), + PutRecordTo(PeerRecord, HashSet), AddKnownAddress(PeerId, Multiaddr), EventStream(out_events::Sender), Request { @@ -1284,6 +1290,8 @@ where self.network_service.behaviour_mut().get_value(key), ServiceToWorkerMsg::PutValue(key, value) => self.network_service.behaviour_mut().put_value(key, value), + ServiceToWorkerMsg::PutRecordTo(record, peers) => + self.network_service.behaviour_mut().put_record_to(record, peers), ServiceToWorkerMsg::AddKnownAddress(peer_id, addr) => self.network_service.behaviour_mut().add_known_address(peer_id, addr), ServiceToWorkerMsg::EventStream(sender) => self.event_streams.push(sender), diff --git a/substrate/client/network/src/service/traits.rs b/substrate/client/network/src/service/traits.rs index 74ddb986c247..f392fe13a38e 100644 --- a/substrate/client/network/src/service/traits.rs +++ b/substrate/client/network/src/service/traits.rs @@ -31,7 +31,7 @@ use crate::{ }; use futures::{channel::oneshot, Stream}; -use libp2p::{Multiaddr, PeerId}; +use libp2p::{kad::PeerRecord, Multiaddr, PeerId}; use sc_network_common::role::ObservedRole; @@ -62,6 +62,8 @@ pub trait NetworkDHTProvider { /// Start putting a value in the DHT. fn put_value(&self, key: KademliaKey, value: Vec); + + fn put_record_to(&self, record: PeerRecord, peers: HashSet); } impl NetworkDHTProvider for Arc @@ -76,6 +78,10 @@ where fn put_value(&self, key: KademliaKey, value: Vec) { T::put_value(self, key, value) } + + fn put_record_to(&self, record: PeerRecord, peers: HashSet) { + T::put_record_to(self, record, peers) + } } /// Provides an ability to set a fork sync request for a particular block. From 7e16e58702a61687cf907bb9e70779e3f8a447d8 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Wed, 3 Apr 2024 11:07:38 +0300 Subject: [PATCH 07/47] Add more changes Signed-off-by: Alexandru Gheorghe --- .../client/authority-discovery/src/worker.rs | 73 ++++++++++++++++--- .../src/worker/schema/dht-v2.proto | 1 + substrate/client/network/src/discovery.rs | 9 ++- 3 files changed, 73 insertions(+), 10 deletions(-) diff --git a/substrate/client/authority-discovery/src/worker.rs b/substrate/client/authority-discovery/src/worker.rs index e3b06dc45ad5..cc859938f1bc 100644 --- a/substrate/client/authority-discovery/src/worker.rs +++ b/substrate/client/authority-discovery/src/worker.rs @@ -226,8 +226,8 @@ where // interval for `publish_interval`, `query_interval` and `priority_group_set_interval` // instead of a constant interval. let publish_interval = - ExpIncInterval::new(Duration::from_secs(2), config.max_publish_interval); - let query_interval = ExpIncInterval::new(Duration::from_secs(2), config.max_query_interval); + ExpIncInterval::new(Duration::from_secs(2), Duration::from_secs(60 * 10)); + let query_interval = ExpIncInterval::new(Duration::from_secs(2), Duration::from_secs(90)); // An `ExpIncInterval` is overkill here because the interval is constant, but consistency // is more simple. @@ -621,7 +621,7 @@ where record, auth_signature, peer_signature, - creation_time, + creation_time: creation_time_info, } = schema::SignedAuthorityRecord::decode(record.record.value.as_slice()) .map_err(Error::DecodingProto)?; @@ -631,7 +631,28 @@ where if !AuthorityPair::verify(&auth_signature, &record, &authority_id) { return Err(Error::VerifyingDhtPayload) } - let creation_time: u128 = creation_time + + if let Some(creation_time_info) = creation_time_info.as_ref() { + let creation_time_signature = + AuthoritySignature::decode(&mut &creation_time_info.signature[..]) + .map_err(Error::EncodingDecodingScale)?; + if !AuthorityPair::verify( + &creation_time_signature, + &creation_time_info.timestamp, + &authority_id, + ) { + log::error!( + "Could not verify public key {:?}, {:?}", + creation_time_info.timestamp, + creation_time_info.signature + ); + return Err(Error::VerifyingDhtPayload) + } + log::error!("Public key verified"); + } + + let creation_time: u128 = creation_time_info + .as_ref() .map(|creation_time| { u128::decode(&mut &creation_time.timestamp[..]).unwrap_or_default() }) @@ -668,7 +689,10 @@ where if let Some(peer_signature) = peer_signature { let public_key = PublicKey::try_decode_protobuf(&peer_signature.public_key) .map_err(Error::ParsingLibp2pIdentity)?; - let signature = Signature { public_key, bytes: peer_signature.signature }; + let signature = Signature { + public_key: public_key.clone(), + bytes: peer_signature.signature, + }; if !signature.verify(record, &remote_peer_id) { return Err(Error::VerifyingDhtPayload) @@ -702,7 +726,8 @@ where needs_update = true; let peers_that_need_updating = last_value.1.clone(); log::error!( - "convergence: Updating stored value on peers newest_timestamp {:?} === last_value {:?} peers_that_need_updating {:?}", + "convergence: {:?} Updating stored value on peers newest_timestamp {:?} === last_value {:?} peers_that_need_updating {:?}", + authority_id, newest_timestamp, last_value.0, peers_that_need_updating @@ -716,15 +741,28 @@ where // timestamp. needs_update = true; log::error!( - "convergence: Peer already has the latest value newest_timestamp {:?} == last_value {:?}", + "convergence: {:?} Peer already has the latest value newest_timestamp {:?} == last_value {:?}", + authority_id, newest_timestamp, last_value.0 ); } else { let wtf_peer_id = peer_id.clone(); let peers_that_need_updating: HashSet = peer_id.take().into_iter().collect(); + if last_value.0 - newest_timestamp > Duration::from_secs(20 * 60).as_nanos() { + log::error!( + "convergence: {:?} Peers informs us of really old value {:?} seconds newest_timestamp {:?} == last_value {:?} == peers_that_need_updating {:?} wtf_peer_id {:?}", + authority_id, + newest_timestamp, + last_value, + peers_that_need_updating, + wtf_peer_id, + (last_value.0 - newest_timestamp) / 1000 / 1000 / 1000 + ); + } log::error!( - "convergence: Peers informs us of old value newest_timestamp {:?} == last_value {:?} == peers_that_need_updating {:?} wtf_peer_id {:?}", + "convergence: {:?} Peers informs us of old value newest_timestamp {:?} == last_value {:?} == peers_that_need_updating {:?} wtf_peer_id {:?}", + authority_id, newest_timestamp, last_value, peers_that_need_updating, @@ -853,7 +891,24 @@ fn sign_record_with_authority_ids( log::error!("Publish record created at {:?}", creation_time); let creation_time = creation_time.encode(); - let creation_time = schema::Timestamp { timestamp: creation_time }; + let creation_time_signature = key_store + .sr25519_sign(key_types::AUTHORITY_DISCOVERY, key.as_ref(), &creation_time) + .map_err(|e| Error::CannotSign(format!("{}. Key: {:?}", e, key)))? + .ok_or_else(|| { + Error::CannotSign(format!("Could not find key in keystore. Key: {:?}", key)) + })?; + + log::error!( + "Publish record created signature {:?} ==== {:?} ===== {:?}", + creation_time, + creation_time_signature, + creation_time_signature.encode() + ); + + let creation_time = schema::Timestamp { + timestamp: creation_time, + signature: creation_time_signature.encode(), + }; let signed_record = schema::SignedAuthorityRecord { record: serialized_record.clone(), diff --git a/substrate/client/authority-discovery/src/worker/schema/dht-v2.proto b/substrate/client/authority-discovery/src/worker/schema/dht-v2.proto index f039eb0a4df1..498ba5d7192b 100644 --- a/substrate/client/authority-discovery/src/worker/schema/dht-v2.proto +++ b/substrate/client/authority-discovery/src/worker/schema/dht-v2.proto @@ -15,6 +15,7 @@ message PeerSignature { message Timestamp { bytes timestamp = 1; + bytes signature = 2; } // Then we need to serialize the authority record and signature to send them over the wire. diff --git a/substrate/client/network/src/discovery.rs b/substrate/client/network/src/discovery.rs index 0657b28d2825..f798c3caf3e6 100644 --- a/substrate/client/network/src/discovery.rs +++ b/substrate/client/network/src/discovery.rs @@ -410,7 +410,14 @@ impl DiscoveryBehaviour { pub fn put_record_to(&mut self, record: PeerRecord, peers: HashSet) { if let Some(kad) = self.kademlia.as_mut() { - kad.put_record_to(record.record, peers.into_iter(), Quorum::One); + if peers.is_empty() { + warn!(target: "sub-libp2p", "Updating our record in the store"); + if let Err(e) = kad.store_mut().put(record.record) { + warn!(target: "sub-libp2p", "Failed to update our record"); + } + } else { + kad.put_record_to(record.record, peers.into_iter(), Quorum::One); + } } } From a82ebedb9e9e181935fe7a97cdd4f96db8ffc518 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Wed, 3 Apr 2024 11:15:41 +0300 Subject: [PATCH 08/47] More refactoring Signed-off-by: Alexandru Gheorghe --- substrate/client/authority-discovery/build.rs | 6 +++- .../client/authority-discovery/src/worker.rs | 4 +-- .../src/worker/schema/dht-v2.proto | 7 ---- .../src/worker/schema/dht-v3.proto | 33 +++++++++++++++++++ 4 files changed, 40 insertions(+), 10 deletions(-) create mode 100644 substrate/client/authority-discovery/src/worker/schema/dht-v3.proto diff --git a/substrate/client/authority-discovery/build.rs b/substrate/client/authority-discovery/build.rs index 83076ac8c893..cdabc1a74427 100644 --- a/substrate/client/authority-discovery/build.rs +++ b/substrate/client/authority-discovery/build.rs @@ -18,7 +18,11 @@ fn main() { prost_build::compile_protos( - &["src/worker/schema/dht-v1.proto", "src/worker/schema/dht-v2.proto"], + &[ + "src/worker/schema/dht-v1.proto", + "src/worker/schema/dht-v2.proto", + "src/worker/schema/dht-v3.proto", + ], &["src/worker/schema"], ) .unwrap(); diff --git a/substrate/client/authority-discovery/src/worker.rs b/substrate/client/authority-discovery/src/worker.rs index cc859938f1bc..1da121ecb944 100644 --- a/substrate/client/authority-discovery/src/worker.rs +++ b/substrate/client/authority-discovery/src/worker.rs @@ -63,7 +63,7 @@ mod schema { #[cfg(test)] mod tests; - include!(concat!(env!("OUT_DIR"), "/authority_discovery_v2.rs")); + include!(concat!(env!("OUT_DIR"), "/authority_discovery_v3.rs")); } #[cfg(test)] pub mod tests; @@ -905,7 +905,7 @@ fn sign_record_with_authority_ids( creation_time_signature.encode() ); - let creation_time = schema::Timestamp { + let creation_time = schema::TimestampInfo { timestamp: creation_time, signature: creation_time_signature.encode(), }; diff --git a/substrate/client/authority-discovery/src/worker/schema/dht-v2.proto b/substrate/client/authority-discovery/src/worker/schema/dht-v2.proto index 498ba5d7192b..fdbadb426630 100644 --- a/substrate/client/authority-discovery/src/worker/schema/dht-v2.proto +++ b/substrate/client/authority-discovery/src/worker/schema/dht-v2.proto @@ -13,11 +13,6 @@ message PeerSignature { bytes public_key = 2; } -message Timestamp { - bytes timestamp = 1; - bytes signature = 2; -} - // Then we need to serialize the authority record and signature to send them over the wire. message SignedAuthorityRecord { bytes record = 1; @@ -25,6 +20,4 @@ message SignedAuthorityRecord { // Even if there are multiple `record.addresses`, all of them have the same peer id. // Old versions are missing this field. It is optional in order to provide compatibility both ways. PeerSignature peer_signature = 3; - // Utc timestamp when this record was created - Timestamp creation_time = 4; } diff --git a/substrate/client/authority-discovery/src/worker/schema/dht-v3.proto b/substrate/client/authority-discovery/src/worker/schema/dht-v3.proto new file mode 100644 index 000000000000..980f1cdc3104 --- /dev/null +++ b/substrate/client/authority-discovery/src/worker/schema/dht-v3.proto @@ -0,0 +1,33 @@ +syntax = "proto3"; + +package authority_discovery_v3; + +// First we need to serialize the addresses in order to be able to sign them. +message AuthorityRecord { + // Possibly multiple `MultiAddress`es through which the node can be + repeated bytes addresses = 1; +} + +message PeerSignature { + bytes signature = 1; + bytes public_key = 2; +} + +// Information regarding the creation data of the record +message TimestampInfo { + // Time since UNIX_EPOCH in nanoseconds, scale encoded + bytes timestamp = 1; + // A signature of the creation time. + bytes signature = 2; +} + +// Then we need to serialize the authority record and signature to send them over the wire. +message SignedAuthorityRecord { + bytes record = 1; + bytes auth_signature = 2; + // Even if there are multiple `record.addresses`, all of them have the same peer id. + // Old versions are missing this field. It is optional in order to provide compatibility both ways. + PeerSignature peer_signature = 3; + // Information regarding the creation data of this record. + TimestampInfo creation_time = 4; +} From d569ab3669f0ce88af8ecfa8bd8c729b40273e1f Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Wed, 3 Apr 2024 11:50:42 +0300 Subject: [PATCH 09/47] A bit more refactoring Signed-off-by: Alexandru Gheorghe --- .../client/authority-discovery/src/worker.rs | 109 ++++++++---------- 1 file changed, 47 insertions(+), 62 deletions(-) diff --git a/substrate/client/authority-discovery/src/worker.rs b/substrate/client/authority-discovery/src/worker.rs index 1da121ecb944..ef101747be70 100644 --- a/substrate/client/authority-discovery/src/worker.rs +++ b/substrate/client/authority-discovery/src/worker.rs @@ -160,7 +160,7 @@ pub struct Worker { known_lookups: HashMap, /// Peers with old records - last_known_record: HashMap, Option)>, + last_known_record: HashMap, addr_cache: addr_cache::AddrCache, @@ -171,6 +171,16 @@ pub struct Worker { phantom: PhantomData, } +#[derive(Debug, Clone, Default)] +struct RecordInfo { + // Time since UNIX_EPOCH in nanoseconds. + creation_time: u128, + // Peers that we know have this record. + peers: HashSet, + // The record itself. + record: Option, +} + /// Wrapper for [`AuthorityDiscoveryApi`](sp_authority_discovery::AuthorityDiscoveryApi). Can be /// be implemented by any struct without dependency on the runtime. #[async_trait::async_trait] @@ -713,70 +723,22 @@ where .take(MAX_ADDRESSES_PER_AUTHORITY) .collect(); - let newest_timestamp = single(remote_addresses.iter().map(|(_addr, timestamp)| *timestamp)) - .map_err(|_| Error::ReceivingDhtValueFoundEventWithDifferentKeys)? - .unwrap_or_default(); - let mut needs_update = false; - let last_value = self.last_known_record.entry(kadmelia_key).or_default(); - if newest_timestamp > last_value.0 { - last_value.0 = newest_timestamp; - last_value.1 = Default::default(); - last_value.2 = values_clone.first().cloned(); - needs_update = true; - let peers_that_need_updating = last_value.1.clone(); - log::error!( - "convergence: {:?} Updating stored value on peers newest_timestamp {:?} === last_value {:?} peers_that_need_updating {:?}", - authority_id, - newest_timestamp, - last_value.0, - peers_that_need_updating - ); - for record in values_clone { - self.network.put_record_to(record, peers_that_need_updating.clone()); - } - // TODO update records on peers with old records. - } else if newest_timestamp == last_value.0 { - // Same record just update in case this is a record from old nods that don't have - // timestamp. - needs_update = true; - log::error!( - "convergence: {:?} Peer already has the latest value newest_timestamp {:?} == last_value {:?}", - authority_id, - newest_timestamp, - last_value.0 - ); - } else { - let wtf_peer_id = peer_id.clone(); - let peers_that_need_updating: HashSet = peer_id.take().into_iter().collect(); - if last_value.0 - newest_timestamp > Duration::from_secs(20 * 60).as_nanos() { - log::error!( - "convergence: {:?} Peers informs us of really old value {:?} seconds newest_timestamp {:?} == last_value {:?} == peers_that_need_updating {:?} wtf_peer_id {:?}", - authority_id, - newest_timestamp, - last_value, - peers_that_need_updating, - wtf_peer_id, - (last_value.0 - newest_timestamp) / 1000 / 1000 / 1000 - ); - } - log::error!( - "convergence: {:?} Peers informs us of old value newest_timestamp {:?} == last_value {:?} == peers_that_need_updating {:?} wtf_peer_id {:?}", - authority_id, - newest_timestamp, - last_value, - peers_that_need_updating, - wtf_peer_id + let records_creation_time = + single(remote_addresses.iter().map(|(_addr, timestamp)| *timestamp)) + .map_err(|_| Error::ReceivingDhtValueFoundEventWithDifferentKeys)? + .unwrap_or_default(); + + for record in values_clone { + needs_update |= self.handle_new_record( + kadmelia_key.clone(), + RecordInfo { + creation_time: records_creation_time, + peers: peer_id.iter().map(|peer| *peer).collect(), + record: Some(record), + }, ); - for record in last_value.2.iter() { - self.network.put_record_to(record.clone(), peers_that_need_updating.clone()); - } - } - - if let Some(peer_id) = peer_id { - last_value.1.insert(peer_id); } - if !remote_addresses.is_empty() && needs_update { self.addr_cache .insert(authority_id, remote_addresses.into_iter().map(|(addr, _)| addr).collect()); @@ -789,6 +751,29 @@ where Ok(()) } + fn handle_new_record(&mut self, kadmelia_key: KademliaKey, new_record: RecordInfo) -> bool { + let last_value = self.last_known_record.entry(kadmelia_key.clone()).or_default(); + if new_record.creation_time > last_value.creation_time { + let peers_that_need_updating = last_value.peers.clone(); + for record in new_record.record.iter() { + self.network.put_record_to(record.clone(), peers_that_need_updating.clone()); + } + self.last_known_record.insert(kadmelia_key, new_record); + true + // TODO update records on peers with old records. + } else if new_record.creation_time == last_value.creation_time { + // Same record just update in case this is a record from old nods that don't have + // timestamp. + last_value.peers.extend(new_record.peers); + true + } else { + for record in last_value.record.iter() { + self.network.put_record_to(record.clone(), new_record.peers.clone()); + } + false + } + } + /// Retrieve our public keys within the current and next authority set. // A node might have multiple authority discovery keys within its keystore, e.g. an old one and // one for the upcoming session. In addition it could be participating in the current and (/ or) From 1c9a40d8c30010cbdfe9e5a31dbc4f47bed1ea77 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Wed, 3 Apr 2024 12:00:12 +0300 Subject: [PATCH 10/47] A few more improvements Signed-off-by: Alexandru Gheorghe --- .../client/authority-discovery/src/error.rs | 3 +++ .../client/authority-discovery/src/worker.rs | 24 ++++++++++--------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/substrate/client/authority-discovery/src/error.rs b/substrate/client/authority-discovery/src/error.rs index ca685115d497..4aa67cb10f4a 100644 --- a/substrate/client/authority-discovery/src/error.rs +++ b/substrate/client/authority-discovery/src/error.rs @@ -75,4 +75,7 @@ pub enum Error { #[error("Unable to fetch best block.")] BestBlockFetchingError, + + #[error("Received dht value found event with records received from different peers.")] + ReceivingDhtValueFoundFromDifferentPeerIds, } diff --git a/substrate/client/authority-discovery/src/worker.rs b/substrate/client/authority-discovery/src/worker.rs index ef101747be70..8c1a8673a066 100644 --- a/substrate/client/authority-discovery/src/worker.rs +++ b/substrate/client/authority-discovery/src/worker.rs @@ -159,7 +159,9 @@ pub struct Worker { /// we got at least one successfull result. known_lookups: HashMap, - /// Peers with old records + /// Last known record by key, here we always keep the record with + /// the highest creation time and we don't accept records older than + /// that. last_known_record: HashMap, addr_cache: addr_cache::AddrCache, @@ -176,7 +178,7 @@ struct RecordInfo { // Time since UNIX_EPOCH in nanoseconds. creation_time: u128, // Peers that we know have this record. - peers: HashSet, + peers_with_record: HashSet, // The record itself. record: Option, } @@ -236,8 +238,8 @@ where // interval for `publish_interval`, `query_interval` and `priority_group_set_interval` // instead of a constant interval. let publish_interval = - ExpIncInterval::new(Duration::from_secs(2), Duration::from_secs(60 * 10)); - let query_interval = ExpIncInterval::new(Duration::from_secs(2), Duration::from_secs(90)); + ExpIncInterval::new(Duration::from_secs(2), config.max_publish_interval); + let query_interval = ExpIncInterval::new(Duration::from_secs(2), config.max_query_interval); // An `ExpIncInterval` is overkill here because the interval is constant, but consistency // is more simple. @@ -607,8 +609,8 @@ where .map_err(|_| Error::ReceivingDhtValueFoundEventWithDifferentKeys)? .ok_or(Error::ReceivingDhtValueFoundEventWithNoRecords)?; - let mut peer_id = single(values.iter().map(|record| record.peer.clone())) - .map_err(|_| Error::ReceivingDhtValueFoundEventWithDifferentKeys)? + let answering_peer_id = single(values.iter().map(|record| record.peer.clone())) + .map_err(|_| Error::ReceivingDhtValueFoundFromDifferentPeerIds)? .ok_or(Error::ReceivingDhtValueFoundEventWithNoRecords)?; let values_clone = values.clone(); let authority_id: AuthorityId = @@ -728,13 +730,13 @@ where single(remote_addresses.iter().map(|(_addr, timestamp)| *timestamp)) .map_err(|_| Error::ReceivingDhtValueFoundEventWithDifferentKeys)? .unwrap_or_default(); - + for record in values_clone { needs_update |= self.handle_new_record( kadmelia_key.clone(), RecordInfo { creation_time: records_creation_time, - peers: peer_id.iter().map(|peer| *peer).collect(), + peers_with_record: answering_peer_id.iter().map(|peer| *peer).collect(), record: Some(record), }, ); @@ -754,7 +756,7 @@ where fn handle_new_record(&mut self, kadmelia_key: KademliaKey, new_record: RecordInfo) -> bool { let last_value = self.last_known_record.entry(kadmelia_key.clone()).or_default(); if new_record.creation_time > last_value.creation_time { - let peers_that_need_updating = last_value.peers.clone(); + let peers_that_need_updating = last_value.peers_with_record.clone(); for record in new_record.record.iter() { self.network.put_record_to(record.clone(), peers_that_need_updating.clone()); } @@ -764,11 +766,11 @@ where } else if new_record.creation_time == last_value.creation_time { // Same record just update in case this is a record from old nods that don't have // timestamp. - last_value.peers.extend(new_record.peers); + last_value.peers_with_record.extend(new_record.peers_with_record); true } else { for record in last_value.record.iter() { - self.network.put_record_to(record.clone(), new_record.peers.clone()); + self.network.put_record_to(record.clone(), new_record.peers_with_record.clone()); } false } From 91e647caf4e13f0d4c4cb6e5afce1b245fca983e Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Wed, 3 Apr 2024 12:19:15 +0300 Subject: [PATCH 11/47] Fixup even more Signed-off-by: Alexandru Gheorghe --- .../client/authority-discovery/src/error.rs | 6 +++ .../client/authority-discovery/src/worker.rs | 47 ++++++++----------- 2 files changed, 25 insertions(+), 28 deletions(-) diff --git a/substrate/client/authority-discovery/src/error.rs b/substrate/client/authority-discovery/src/error.rs index 4aa67cb10f4a..cad8fa6673e8 100644 --- a/substrate/client/authority-discovery/src/error.rs +++ b/substrate/client/authority-discovery/src/error.rs @@ -78,4 +78,10 @@ pub enum Error { #[error("Received dht value found event with records received from different peers.")] ReceivingDhtValueFoundFromDifferentPeerIds, + + #[error("Failed to verify a dht creation time signature.")] + VerifyingDhtPayloadCreationTime, + + #[error("Received dht value found event with different record creation time.")] + ReceivingDhtValueFoundWithDifferentRecordCreationTime, } diff --git a/substrate/client/authority-discovery/src/worker.rs b/substrate/client/authority-discovery/src/worker.rs index 8c1a8673a066..3cd922b06843 100644 --- a/substrate/client/authority-discovery/src/worker.rs +++ b/substrate/client/authority-discovery/src/worker.rs @@ -609,10 +609,6 @@ where .map_err(|_| Error::ReceivingDhtValueFoundEventWithDifferentKeys)? .ok_or(Error::ReceivingDhtValueFoundEventWithNoRecords)?; - let answering_peer_id = single(values.iter().map(|record| record.peer.clone())) - .map_err(|_| Error::ReceivingDhtValueFoundFromDifferentPeerIds)? - .ok_or(Error::ReceivingDhtValueFoundEventWithNoRecords)?; - let values_clone = values.clone(); let authority_id: AuthorityId = if let Some(authority_id) = self.in_flight_lookups.remove(&remote_key) { authority_id @@ -627,7 +623,7 @@ where let local_peer_id = self.network.local_peer_id(); let kadmelia_key = remote_key; let remote_addresses: Vec<(Multiaddr, u128)> = values - .into_iter() + .iter() .map(|record| { let schema::SignedAuthorityRecord { record, @@ -653,14 +649,8 @@ where &creation_time_info.timestamp, &authority_id, ) { - log::error!( - "Could not verify public key {:?}, {:?}", - creation_time_info.timestamp, - creation_time_info.signature - ); - return Err(Error::VerifyingDhtPayload) + return Err(Error::VerifyingDhtPayloadCreationTime) } - log::error!("Public key verified"); } let creation_time: u128 = creation_time_info @@ -668,7 +658,8 @@ where .map(|creation_time| { u128::decode(&mut &creation_time.timestamp[..]).unwrap_or_default() }) - .unwrap_or_default(); + .unwrap_or_default(); // 0 is a sane default for records that do not have creation time present. + let addresses: Vec<(Multiaddr, u128)> = schema::AuthorityRecord::decode(record.as_slice()) .map(|a| a.addresses) @@ -725,14 +716,19 @@ where .take(MAX_ADDRESSES_PER_AUTHORITY) .collect(); - let mut needs_update = false; + let mut addr_cache_needs_update = false; + + let answering_peer_id = single(values.iter().map(|record| record.peer.clone())) + .map_err(|_| Error::ReceivingDhtValueFoundFromDifferentPeerIds)? + .ok_or(Error::ReceivingDhtValueFoundEventWithNoRecords)?; + let records_creation_time = single(remote_addresses.iter().map(|(_addr, timestamp)| *timestamp)) - .map_err(|_| Error::ReceivingDhtValueFoundEventWithDifferentKeys)? + .map_err(|_| Error::ReceivingDhtValueFoundWithDifferentRecordCreationTime)? .unwrap_or_default(); - for record in values_clone { - needs_update |= self.handle_new_record( + for record in values { + addr_cache_needs_update |= self.handle_new_record( kadmelia_key.clone(), RecordInfo { creation_time: records_creation_time, @@ -741,7 +737,8 @@ where }, ); } - if !remote_addresses.is_empty() && needs_update { + + if !remote_addresses.is_empty() && addr_cache_needs_update { self.addr_cache .insert(authority_id, remote_addresses.into_iter().map(|(addr, _)| addr).collect()); if let Some(metrics) = &self.metrics { @@ -762,7 +759,6 @@ where } self.last_known_record.insert(kadmelia_key, new_record); true - // TODO update records on peers with old records. } else if new_record.creation_time == last_value.creation_time { // Same record just update in case this is a record from old nods that don't have // timestamp. @@ -874,8 +870,10 @@ fn sign_record_with_authority_ids( // Scale encode let auth_signature = auth_signature.encode(); - let creation_time = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_nanos(); - log::error!("Publish record created at {:?}", creation_time); + let creation_time = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|time| time.as_nanos()) + .unwrap_or_default(); let creation_time = creation_time.encode(); let creation_time_signature = key_store @@ -885,13 +883,6 @@ fn sign_record_with_authority_ids( Error::CannotSign(format!("Could not find key in keystore. Key: {:?}", key)) })?; - log::error!( - "Publish record created signature {:?} ==== {:?} ===== {:?}", - creation_time, - creation_time_signature, - creation_time_signature.encode() - ); - let creation_time = schema::TimestampInfo { timestamp: creation_time, signature: creation_time_signature.encode(), From 0a53ec2bb1523a995bd3f5c719720bfb399de897 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Wed, 3 Apr 2024 12:21:11 +0300 Subject: [PATCH 12/47] Another something Signed-off-by: Alexandru Gheorghe --- .../src/worker/addr_cache.rs | 52 ++++--------------- 1 file changed, 9 insertions(+), 43 deletions(-) diff --git a/substrate/client/authority-discovery/src/worker/addr_cache.rs b/substrate/client/authority-discovery/src/worker/addr_cache.rs index 36c4269c85a1..709d461bd616 100644 --- a/substrate/client/authority-discovery/src/worker/addr_cache.rs +++ b/substrate/client/authority-discovery/src/worker/addr_cache.rs @@ -21,10 +21,7 @@ use libp2p::{ PeerId, }; use sp_authority_discovery::AuthorityId; -use std::{ - collections::{hash_map::Entry, HashMap, HashSet}, - time::Instant, -}; +use std::collections::{hash_map::Entry, HashMap, HashSet}; /// Cache for [`AuthorityId`] -> [`HashSet`] and [`PeerId`] -> [`HashSet`] /// mappings. @@ -36,7 +33,7 @@ pub(super) struct AddrCache { /// Since we may store the mapping across several sessions, a single /// `PeerId` might correspond to multiple `AuthorityId`s. However, /// it's not expected that a single `AuthorityId` can have multiple `PeerId`s. - authority_id_to_addresses: HashMap>, + authority_id_to_addresses: HashMap>, peer_id_to_authority_ids: HashMap>, } @@ -50,18 +47,9 @@ impl AddrCache { /// Inserts the given [`AuthorityId`] and [`Vec`] pair for future lookups by /// [`AuthorityId`] or [`PeerId`]. - pub fn insert(&mut self, authority_id: AuthorityId, addresses: Vec<(Multiaddr, u128)>) { - let newest_address = addresses - .iter() - .map(|(_, creation_time)| *creation_time) - .max() - .unwrap_or_default(); - let mut addresses = addresses - .into_iter() - .map(|(addr, creation_time)| (addr, creation_time)) - .collect::>(); - - let mut peer_ids = addresses_to_peer_ids(&addresses); + pub fn insert(&mut self, authority_id: AuthorityId, addresses: Vec) { + let addresses = addresses.into_iter().collect::>(); + let peer_ids = addresses_to_peer_ids(&addresses); if peer_ids.is_empty() { log::debug!( @@ -70,7 +58,6 @@ impl AddrCache { authority_id, addresses, ); - return } else if peer_ids.len() > 1 { log::warn!( @@ -80,32 +67,14 @@ impl AddrCache { peer_ids ); } - log::debug!( target: super::LOG_TARGET, "Found addresses for authority {authority_id:?}: {addresses:?}", ); - let old_addresses = - self.authority_id_to_addresses.get(&authority_id).cloned().unwrap_or_default(); - - let time_now = Instant::now(); - - let to_keep_addresses = old_addresses - .iter() - .filter(|(addr, expires)| creation_tim&& !addresses.contains_key(addr)) - .map(|(addr, expires)| (addr.clone(), *expires)) - .collect::>(); - - addresses.extend(to_keep_addresses.clone()); - let old_addresses = self.authority_id_to_addresses.insert(authority_id.clone(), addresses); - let old_peer_ids = addresses_to_peer_ids(&old_addresses.unwrap_or_default()); - let to_kepp_peer_ids = addresses_to_peer_ids(&to_keep_addresses); - peer_ids.extend(to_kepp_peer_ids); - // Add the new peer ids peer_ids.difference(&old_peer_ids).for_each(|new_peer_id| { self.peer_id_to_authority_ids @@ -113,7 +82,6 @@ impl AddrCache { .or_default() .insert(authority_id.clone()); }); - // Remove the old peer ids self.remove_authority_id_from_peer_ids(&authority_id, old_peer_ids.difference(&peer_ids)); } @@ -147,10 +115,8 @@ impl AddrCache { pub fn get_addresses_by_authority_id( &self, authority_id: &AuthorityId, - ) -> Option> { - self.authority_id_to_addresses - .get(authority_id) - .map(|result| result.keys().map(|addr| addr.clone()).collect::>()) + ) -> Option<&HashSet> { + self.authority_id_to_addresses.get(authority_id) } /// Returns the [`AuthorityId`]s for the given [`PeerId`]. @@ -201,8 +167,8 @@ fn peer_id_from_multiaddr(addr: &Multiaddr) -> Option { }) } -fn addresses_to_peer_ids(addresses: &HashMap) -> HashSet { - addresses.keys().filter_map(peer_id_from_multiaddr).collect::>() +fn addresses_to_peer_ids(addresses: &HashSet) -> HashSet { + addresses.iter().filter_map(peer_id_from_multiaddr).collect::>() } #[cfg(test)] From ef6ddb65b88a97f66a759be74b46ee50e98d50c9 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Wed, 3 Apr 2024 12:30:08 +0300 Subject: [PATCH 13/47] Fixup everything Signed-off-by: Alexandru Gheorghe --- .../client/authority-discovery/src/worker.rs | 8 +++++- substrate/client/network/src/behaviour.rs | 10 ++++++-- substrate/client/network/src/discovery.rs | 17 +++++++------ substrate/client/network/src/service.rs | 25 +++++++++++++++---- .../client/network/src/service/traits.rs | 11 +++++--- 5 files changed, 53 insertions(+), 18 deletions(-) diff --git a/substrate/client/authority-discovery/src/worker.rs b/substrate/client/authority-discovery/src/worker.rs index 3cd922b06843..0bfcc8c749e0 100644 --- a/substrate/client/authority-discovery/src/worker.rs +++ b/substrate/client/authority-discovery/src/worker.rs @@ -766,7 +766,13 @@ where true } else { for record in last_value.record.iter() { - self.network.put_record_to(record.clone(), new_record.peers_with_record.clone()); + self.network.put_record_to( + record.clone(), + new_record.peers_with_record.clone(), + // If this is empty it means we received the answer from our node local + // storage, so we need to update that as well. + new_record.peers_with_record.is_empty(), + ); } false } diff --git a/substrate/client/network/src/behaviour.rs b/substrate/client/network/src/behaviour.rs index 163c3e6aea90..7d953c93fc65 100644 --- a/substrate/client/network/src/behaviour.rs +++ b/substrate/client/network/src/behaviour.rs @@ -284,8 +284,14 @@ impl Behaviour { self.discovery.put_value(key, value); } - pub fn put_record_to(&mut self, record: PeerRecord, peers: HashSet) { - self.discovery.put_record_to(record, peers); + /// Puts a record into DHT, on the provided Peers + pub fn put_record_to( + &mut self, + record: PeerRecord, + peers: HashSet, + update_local_storage: bool, + ) { + self.discovery.put_record_to(record, peers, update_local_storage); } } diff --git a/substrate/client/network/src/discovery.rs b/substrate/client/network/src/discovery.rs index f798c3caf3e6..5a49d8ab974b 100644 --- a/substrate/client/network/src/discovery.rs +++ b/substrate/client/network/src/discovery.rs @@ -408,16 +408,19 @@ impl DiscoveryBehaviour { } } - pub fn put_record_to(&mut self, record: PeerRecord, peers: HashSet) { + pub fn put_record_to( + &mut self, + record: PeerRecord, + peers: HashSet, + update_local_storage: bool, + ) { if let Some(kad) = self.kademlia.as_mut() { - if peers.is_empty() { - warn!(target: "sub-libp2p", "Updating our record in the store"); - if let Err(e) = kad.store_mut().put(record.record) { - warn!(target: "sub-libp2p", "Failed to update our record"); + if update_local_storage { + if let Err(e) = kad.store_mut().put(record.record.clone()) { + warn!(target: "sub-libp2p", "Failed to update local starage"); } - } else { - kad.put_record_to(record.record, peers.into_iter(), Quorum::One); } + kad.put_record_to(record.record, peers.into_iter(), Quorum::One); } } diff --git a/substrate/client/network/src/service.rs b/substrate/client/network/src/service.rs index 2c44b8355952..67d54dabebb1 100644 --- a/substrate/client/network/src/service.rs +++ b/substrate/client/network/src/service.rs @@ -825,8 +825,17 @@ where let _ = self.to_worker.unbounded_send(ServiceToWorkerMsg::PutValue(key, value)); } - fn put_record_to(&self, record: PeerRecord, peers: HashSet) { - let _ = self.to_worker.unbounded_send(ServiceToWorkerMsg::PutRecordTo(record, peers)); + fn put_record_to( + &self, + record: PeerRecord, + peers: HashSet, + update_local_storage: bool, + ) { + let _ = self.to_worker.unbounded_send(ServiceToWorkerMsg::PutRecordTo { + record, + peers, + update_local_storage, + }); } } @@ -1162,7 +1171,11 @@ impl<'a> NotificationSenderReadyT for NotificationSenderReady<'a> { enum ServiceToWorkerMsg { GetValue(KademliaKey), PutValue(KademliaKey, Vec), - PutRecordTo(PeerRecord, HashSet), + PutRecordTo { + record: PeerRecord, + peers: HashSet, + update_local_storage: bool, + }, AddKnownAddress(PeerId, Multiaddr), EventStream(out_events::Sender), Request { @@ -1290,8 +1303,10 @@ where self.network_service.behaviour_mut().get_value(key), ServiceToWorkerMsg::PutValue(key, value) => self.network_service.behaviour_mut().put_value(key, value), - ServiceToWorkerMsg::PutRecordTo(record, peers) => - self.network_service.behaviour_mut().put_record_to(record, peers), + ServiceToWorkerMsg::PutRecordTo { record, peers, update_local_storage } => self + .network_service + .behaviour_mut() + .put_record_to(record, peers, update_local_storage), ServiceToWorkerMsg::AddKnownAddress(peer_id, addr) => self.network_service.behaviour_mut().add_known_address(peer_id, addr), ServiceToWorkerMsg::EventStream(sender) => self.event_streams.push(sender), diff --git a/substrate/client/network/src/service/traits.rs b/substrate/client/network/src/service/traits.rs index f392fe13a38e..535e7b90535d 100644 --- a/substrate/client/network/src/service/traits.rs +++ b/substrate/client/network/src/service/traits.rs @@ -63,7 +63,7 @@ pub trait NetworkDHTProvider { /// Start putting a value in the DHT. fn put_value(&self, key: KademliaKey, value: Vec); - fn put_record_to(&self, record: PeerRecord, peers: HashSet); + fn put_record_to(&self, record: PeerRecord, peers: HashSet, update_local_storage: bool); } impl NetworkDHTProvider for Arc @@ -79,8 +79,13 @@ where T::put_value(self, key, value) } - fn put_record_to(&self, record: PeerRecord, peers: HashSet) { - T::put_record_to(self, record, peers) + fn put_record_to( + &self, + record: PeerRecord, + peers: HashSet, + update_local_storage: bool, + ) { + T::put_record_to(self, record, peers, update_local_storage) } } From b3eb61533b666e4cf9d65fecd3de8abfb2ae4cef Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Wed, 3 Apr 2024 12:34:17 +0300 Subject: [PATCH 14/47] Post refactoring Signed-off-by: Alexandru Gheorghe --- substrate/client/authority-discovery/src/worker.rs | 8 +++++++- substrate/client/network/src/discovery.rs | 11 ++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/substrate/client/authority-discovery/src/worker.rs b/substrate/client/authority-discovery/src/worker.rs index 0bfcc8c749e0..b42baceb3f22 100644 --- a/substrate/client/authority-discovery/src/worker.rs +++ b/substrate/client/authority-discovery/src/worker.rs @@ -755,7 +755,13 @@ where if new_record.creation_time > last_value.creation_time { let peers_that_need_updating = last_value.peers_with_record.clone(); for record in new_record.record.iter() { - self.network.put_record_to(record.clone(), peers_that_need_updating.clone()); + self.network.put_record_to( + record.clone(), + peers_that_need_updating.clone(), + // If this is empty it means we received the answer from our node local + // storage, so we need to update that as well. + new_record.peers_with_record.is_empty(), + ); } self.last_known_record.insert(kadmelia_key, new_record); true diff --git a/substrate/client/network/src/discovery.rs b/substrate/client/network/src/discovery.rs index 5a49d8ab974b..3a0befb26bc7 100644 --- a/substrate/client/network/src/discovery.rs +++ b/substrate/client/network/src/discovery.rs @@ -408,6 +408,9 @@ impl DiscoveryBehaviour { } } + /// Puts a record into the DHT on the provided `peers` + /// + /// If `update_local_storage` is true, the local storage is update as well. pub fn put_record_to( &mut self, record: PeerRecord, @@ -416,7 +419,7 @@ impl DiscoveryBehaviour { ) { if let Some(kad) = self.kademlia.as_mut() { if update_local_storage { - if let Err(e) = kad.store_mut().put(record.record.clone()) { + if let Err(_e) = kad.store_mut().put(record.record.clone()) { warn!(target: "sub-libp2p", "Failed to update local starage"); } } @@ -816,6 +819,12 @@ impl NetworkBehaviour for DiscoveryBehaviour { // addresses might exist in DHT, for example when the node // changes its PeerId. + // TODO: We need to determine what is the right balance + // here, between creating too many queries in our network + // and robustness in the presence of bad records. + // With letting the query finish by itself we receive around + // 20 answers with 3 of them happening in parallel. + // More details here: https://github.com/libp2p/specs/blob/master/kad-dht/README.md#definitions // query.finish(); } } From f6d4b29e1b8f941bd9f4e6f49736cc38c5527f77 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Wed, 3 Apr 2024 14:13:43 +0300 Subject: [PATCH 15/47] Fixup Cargo's Signed-off-by: Alexandru Gheorghe --- Cargo.lock | 92 +++++++++++++++++++++++++++++++++--------------------- Cargo.toml | 4 --- 2 files changed, 56 insertions(+), 40 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8980aef460f6..81eb682a27d7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7290,6 +7290,8 @@ checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" [[package]] name = "libp2p" version = "0.51.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f35eae38201a993ece6bdc823292d6abd1bffed1c4d0f4a3517d2bd8e1d917fe" dependencies = [ "bytes", "futures", @@ -7321,6 +7323,8 @@ dependencies = [ [[package]] name = "libp2p-allow-block-list" version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "510daa05efbc25184458db837f6f9a5143888f1caa742426d92e1833ddd38a50" dependencies = [ "libp2p-core", "libp2p-identity", @@ -7331,6 +7335,8 @@ dependencies = [ [[package]] name = "libp2p-connection-limits" version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4caa33f1d26ed664c4fe2cca81a08c8e07d4c1c04f2f4ac7655c2dd85467fda0" dependencies = [ "libp2p-core", "libp2p-identity", @@ -7341,6 +7347,8 @@ dependencies = [ [[package]] name = "libp2p-core" version = "0.39.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c1df63c0b582aa434fb09b2d86897fa2b419ffeccf934b36f87fcedc8e835c2" dependencies = [ "either", "fnv", @@ -7351,7 +7359,7 @@ dependencies = [ "log", "multiaddr", "multihash 0.17.0", - "multistream-select 0.12.1", + "multistream-select", "once_cell", "parking_lot 0.12.1", "pin-project", @@ -7367,10 +7375,11 @@ dependencies = [ [[package]] name = "libp2p-dns" version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "146ff7034daae62077c415c2376b8057368042df6ab95f5432ad5e88568b1554" dependencies = [ "futures", "libp2p-core", - "libp2p-identity", "log", "parking_lot 0.12.1", "smallvec", @@ -7380,6 +7389,8 @@ dependencies = [ [[package]] name = "libp2p-identify" version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5455f472243e63b9c497ff320ded0314254a9eb751799a39c283c6f20b793f3c" dependencies = [ "asynchronous-codec", "either", @@ -7400,6 +7411,8 @@ dependencies = [ [[package]] name = "libp2p-identity" version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "276bb57e7af15d8f100d3c11cbdd32c6752b7eef4ba7a18ecf464972c07abcce" dependencies = [ "bs58 0.4.0", "ed25519-dalek", @@ -7416,6 +7429,8 @@ dependencies = [ [[package]] name = "libp2p-kad" version = "0.43.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39d5ef876a2b2323d63c258e63c2f8e36f205fe5a11f0b3095d59635650790ff" dependencies = [ "arrayvec 0.7.4", "asynchronous-codec", @@ -7442,6 +7457,8 @@ dependencies = [ [[package]] name = "libp2p-mdns" version = "0.43.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19983e1f949f979a928f2c603de1cf180cc0dc23e4ac93a62651ccb18341460b" dependencies = [ "data-encoding", "futures", @@ -7461,10 +7478,11 @@ dependencies = [ [[package]] name = "libp2p-metrics" version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a42ec91e227d7d0dafa4ce88b333cdf5f277253873ab087555c92798db2ddd46" dependencies = [ "libp2p-core", "libp2p-identify", - "libp2p-identity", "libp2p-kad", "libp2p-ping", "libp2p-swarm", @@ -7474,6 +7492,8 @@ dependencies = [ [[package]] name = "libp2p-noise" version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3673da89d29936bc6435bafc638e2f184180d554ce844db65915113f86ec5e" dependencies = [ "bytes", "curve25519-dalek 3.2.0", @@ -7495,13 +7515,14 @@ dependencies = [ [[package]] name = "libp2p-ping" version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e57759c19c28a73ef1eb3585ca410cefb72c1a709fcf6de1612a378e4219202" dependencies = [ "either", "futures", "futures-timer", "instant", "libp2p-core", - "libp2p-identity", "libp2p-swarm", "log", "rand", @@ -7511,6 +7532,8 @@ dependencies = [ [[package]] name = "libp2p-quic" version = "0.7.0-alpha.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6b26abd81cd2398382a1edfe739b539775be8a90fa6914f39b2ab49571ec735" dependencies = [ "bytes", "futures", @@ -7531,6 +7554,8 @@ dependencies = [ [[package]] name = "libp2p-request-response" version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffdb374267d42dc5ed5bc53f6e601d4a64ac5964779c6e40bb9e4f14c1e30d5" dependencies = [ "async-trait", "futures", @@ -7545,6 +7570,8 @@ dependencies = [ [[package]] name = "libp2p-swarm" version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "903b3d592d7694e56204d211f29d31bc004be99386644ba8731fc3e3ef27b296" dependencies = [ "either", "fnv", @@ -7564,22 +7591,25 @@ dependencies = [ [[package]] name = "libp2p-swarm-derive" version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fba456131824ab6acd4c7bf61e9c0f0a3014b5fc9868ccb8e10d344594cdc4f" dependencies = [ "heck 0.4.1", "quote", - "syn 2.0.53", + "syn 1.0.109", ] [[package]] name = "libp2p-tcp" version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33d33698596d7722d85d3ab0c86c2c322254fce1241e91208e3679b4eb3026cf" dependencies = [ "futures", "futures-timer", "if-watch", "libc", "libp2p-core", - "libp2p-identity", "log", "socket2 0.4.9", "tokio", @@ -7588,6 +7618,8 @@ dependencies = [ [[package]] name = "libp2p-tls" version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff08d13d0dc66e5e9ba6279c1de417b84fa0d0adc3b03e5732928c180ec02781" dependencies = [ "futures", "futures-rustls", @@ -7605,6 +7637,8 @@ dependencies = [ [[package]] name = "libp2p-wasm-ext" version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77dff9d32353a5887adb86c8afc1de1a94d9e8c3bc6df8b2201d7cdf5c848f43" dependencies = [ "futures", "js-sys", @@ -7617,24 +7651,27 @@ dependencies = [ [[package]] name = "libp2p-websocket" version = "0.41.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "111273f7b3d3510524c752e8b7a5314b7f7a1fee7e68161c01a7d72cbb06db9f" dependencies = [ "either", "futures", "futures-rustls", "libp2p-core", - "libp2p-identity", "log", "parking_lot 0.12.1", "quicksink", "rw-stream-sink", "soketto", "url", - "webpki-roots 0.23.1", + "webpki-roots 0.22.6", ] [[package]] name = "libp2p-yamux" version = "0.43.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dcd21d950662700a385d4c6d68e2f5f54d778e97068cdd718522222ef513bda" dependencies = [ "futures", "libp2p-core", @@ -8384,18 +8421,6 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" -[[package]] -name = "multistream-select" -version = "0.12.1" -dependencies = [ - "bytes", - "futures", - "log", - "pin-project", - "smallvec", - "unsigned-varint", -] - [[package]] name = "multistream-select" version = "0.12.1" @@ -14435,6 +14460,8 @@ dependencies = [ [[package]] name = "quick-protobuf-codec" version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1693116345026436eb2f10b677806169c1a1260c1c60eaaffe3fb5a29ae23d8b" dependencies = [ "asynchronous-codec", "bytes", @@ -15426,16 +15453,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a716eb65e3158e90e17cd93d855216e27bde02745ab842f2cab4a39dba1bacf" -[[package]] -name = "rustls-webpki" -version = "0.100.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6a5fc258f1c1276dfe3016516945546e2d5383911efc0fc4f1cdc5df3a4ae3" -dependencies = [ - "ring 0.16.20", - "untrusted 0.7.1", -] - [[package]] name = "rustls-webpki" version = "0.101.4" @@ -15489,6 +15506,8 @@ dependencies = [ [[package]] name = "rw-stream-sink" version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26338f5e09bb721b85b135ea05af7767c90b52f6de4f087d4f4a3a9d64e7dc04" dependencies = [ "futures", "pin-project", @@ -16285,7 +16304,7 @@ dependencies = [ "linked_hash_set", "log", "mockall", - "multistream-select 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)", + "multistream-select", "parity-scale-codec", "parking_lot 0.12.1", "partial_sort", @@ -21821,11 +21840,11 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.23.1" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b03058f88386e5ff5310d9111d53f48b17d732b401aeb83a8d5190f2ac459338" +checksum = "b6c71e40d7d2c34a5106301fb632274ca37242cd0c9d3e64dbece371a40a2d87" dependencies = [ - "rustls-webpki 0.100.3", + "webpki", ] [[package]] @@ -22353,11 +22372,12 @@ dependencies = [ [[package]] name = "x509-parser" -version = "0.15.1" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7069fba5b66b9193bd2c5d3d4ff12b839118f6bcbef5328efafafb5395cf63da" +checksum = "e0ecbeb7b67ce215e40e3cc7f2ff902f94a223acf44995934763467e7b1febc8" dependencies = [ "asn1-rs", + "base64 0.13.1", "data-encoding", "der-parser", "lazy_static", diff --git a/Cargo.toml b/Cargo.toml index ca22e38f12f6..e6162830375f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -633,7 +633,3 @@ wasmi = { opt-level = 3 } x25519-dalek = { opt-level = 3 } yamux = { opt-level = 3 } zeroize = { opt-level = 3 } - -[patch.crates-io] -libp2p = { path = "../rust-libp2p/libp2p" } -libp2p-identity = { path = "../rust-libp2p/identity" } \ No newline at end of file From 1ae17595766fb3a222d87e4b11a00c4d1248d886 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Wed, 3 Apr 2024 16:55:04 +0300 Subject: [PATCH 16/47] Make clippy happy Signed-off-by: Alexandru Gheorghe --- .../client/authority-discovery/src/worker.rs | 2 +- .../src/worker/addr_cache.rs | 8 +-- .../src/worker/schema/tests.rs | 1 + .../authority-discovery/src/worker/tests.rs | 61 +++++++++++++++++-- .../client/network/src/service/traits.rs | 3 + 5 files changed, 64 insertions(+), 11 deletions(-) diff --git a/substrate/client/authority-discovery/src/worker.rs b/substrate/client/authority-discovery/src/worker.rs index b42baceb3f22..b9b90b6ee3bb 100644 --- a/substrate/client/authority-discovery/src/worker.rs +++ b/substrate/client/authority-discovery/src/worker.rs @@ -718,7 +718,7 @@ where let mut addr_cache_needs_update = false; - let answering_peer_id = single(values.iter().map(|record| record.peer.clone())) + let answering_peer_id = single(values.iter().map(|record| record.peer)) .map_err(|_| Error::ReceivingDhtValueFoundFromDifferentPeerIds)? .ok_or(Error::ReceivingDhtValueFoundEventWithNoRecords)?; diff --git a/substrate/client/authority-discovery/src/worker/addr_cache.rs b/substrate/client/authority-discovery/src/worker/addr_cache.rs index 709d461bd616..891475ca4ed7 100644 --- a/substrate/client/authority-discovery/src/worker/addr_cache.rs +++ b/substrate/client/authority-discovery/src/worker/addr_cache.rs @@ -250,7 +250,7 @@ mod tests { cache.insert(third.0.clone(), vec![third.1.clone()]); assert_eq!( - Some(HashSet::from([third.1.clone()])), + Some(&HashSet::from([third.1.clone()])), cache.get_addresses_by_authority_id(&third.0), "Expect `get_addresses_by_authority_id` to return addresses of third authority.", ); @@ -344,7 +344,7 @@ mod tests { cache.get_authority_ids_by_peer_id(&peer_id_from_multiaddr(&multiaddr3).unwrap()) ); assert_eq!( - HashSet::from([multiaddr2.clone(), multiaddr3.clone(), multiaddr4.clone()]), + &HashSet::from([multiaddr2.clone(), multiaddr3.clone(), multiaddr4.clone()]), cache.get_addresses_by_authority_id(&authority1).unwrap(), ); @@ -374,11 +374,11 @@ mod tests { assert_eq!(2, addr_cache.num_authority_ids()); assert_eq!( - HashSet::from([addr.clone()]), + &HashSet::from([addr.clone()]), addr_cache.get_addresses_by_authority_id(&authority_id0).unwrap() ); assert_eq!( - HashSet::from([addr]), + &HashSet::from([addr]), addr_cache.get_addresses_by_authority_id(&authority_id1).unwrap() ); } diff --git a/substrate/client/authority-discovery/src/worker/schema/tests.rs b/substrate/client/authority-discovery/src/worker/schema/tests.rs index c765e4e5384d..5105b4f7396d 100644 --- a/substrate/client/authority-discovery/src/worker/schema/tests.rs +++ b/substrate/client/authority-discovery/src/worker/schema/tests.rs @@ -74,6 +74,7 @@ fn v1_decodes_v2() { record: vec_record_v2.clone(), auth_signature: vec_auth_signature.clone(), peer_signature: Some(peer_signature_v2.clone()), + creation_time: None, }; let mut vec_signed_record_v2 = vec![]; signed_record_v2.encode(&mut vec_signed_record_v2).unwrap(); diff --git a/substrate/client/authority-discovery/src/worker/tests.rs b/substrate/client/authority-discovery/src/worker/tests.rs index 4b79105b24b9..44597f854f86 100644 --- a/substrate/client/authority-discovery/src/worker/tests.rs +++ b/substrate/client/authority-discovery/src/worker/tests.rs @@ -32,7 +32,7 @@ use futures::{ use libp2p::{ core::multiaddr, identity::{Keypair, SigningError}, - kad::record::Key as KademliaKey, + kad::{record::Key as KademliaKey, Record}, PeerId, }; use prometheus_endpoint::prometheus::default_registry; @@ -119,6 +119,7 @@ sp_api::mock_impl_runtime_apis! { pub enum TestNetworkEvent { GetCalled(KademliaKey), PutCalled(KademliaKey, Vec), + PutToCalled(PeerRecord, HashSet, bool), } pub struct TestNetwork { @@ -128,6 +129,7 @@ pub struct TestNetwork { // Whenever functions on `TestNetwork` are called, the function arguments are added to the // vectors below. pub put_value_call: Arc)>>>, + pub put_value_to_call: Arc, bool)>>>, pub get_value_call: Arc>>, event_sender: mpsc::UnboundedSender, event_receiver: Option>, @@ -149,6 +151,7 @@ impl Default for TestNetwork { external_addresses: vec!["/ip6/2001:db8::/tcp/30333".parse().unwrap()], put_value_call: Default::default(), get_value_call: Default::default(), + put_value_to_call: Default::default(), event_sender: tx, event_receiver: Some(rx), } @@ -179,6 +182,23 @@ impl NetworkDHTProvider for TestNetwork { .unbounded_send(TestNetworkEvent::GetCalled(key.clone())) .unwrap(); } + + fn put_record_to( + &self, + record: PeerRecord, + peers: HashSet, + update_local_storage: bool, + ) { + self.put_value_to_call.lock().unwrap().push(( + record.clone(), + peers.clone(), + update_local_storage, + )); + self.event_sender + .clone() + .unbounded_send(TestNetworkEvent::PutToCalled(record, peers, update_local_storage)) + .unwrap(); + } } impl NetworkStateInfo for TestNetwork { @@ -323,7 +343,10 @@ fn publish_discover_cycle() { let dht_event = { let (key, value) = network.put_value_call.lock().unwrap().pop().unwrap(); - DhtEvent::ValueFound(vec![(key, value)]) + DhtEvent::ValueFound(vec![PeerRecord { + peer: None, + record: Record { key, value, publisher: None, expires: None }, + }]) }; // Node B discovering node A's address. @@ -477,7 +500,13 @@ fn dont_stop_polling_dht_event_stream_after_bogus_event() { remote_public_key.clone(), &remote_key_store, None, - ); + ) + .into_iter() + .map(|(key, value)| PeerRecord { + peer: None, + record: Record { key, value, publisher: None, expires: None }, + }) + .collect(); DhtEvent::ValueFound(kv_pairs) }; dht_event_tx.send(dht_event).await.expect("Channel has capacity of 1."); @@ -554,14 +583,26 @@ impl DhtValueFoundTester { block_on(local_worker.refill_pending_lookups_queue()).unwrap(); local_worker.start_new_lookups(); - drop(local_worker.handle_dht_value_found_event(values)); + drop( + local_worker.handle_dht_value_found_event( + values + .into_iter() + .map(|(key, value)| PeerRecord { + peer: None, + record: Record { key, value, publisher: None, expires: None }, + }) + .collect(), + ), + ); self.local_worker = Some(local_worker); self.local_worker .as_ref() .map(|w| { - w.addr_cache.get_addresses_by_authority_id(&self.remote_authority_public.into()) + w.addr_cache + .get_addresses_by_authority_id(&self.remote_authority_public.into()) + .cloned() }) .unwrap() } @@ -819,11 +860,19 @@ fn lookup_throttling() { remote_key, &remote_key_store, None, - ); + ) + .into_iter() + .map(|(key, value)| PeerRecord { + peer: None, + record: Record { key, value, publisher: None, expires: None }, + }) + .collect(); DhtEvent::ValueFound(kv_pairs) }; dht_event_tx.send(dht_event).await.expect("Channel has capacity of 1."); + assert!(matches!(receiver.next().await, Some(TestNetworkEvent::PutToCalled(_, _, _)))); + // Assert worker to trigger another lookup. assert!(matches!(receiver.next().await, Some(TestNetworkEvent::GetCalled(_)))); assert_eq!( diff --git a/substrate/client/network/src/service/traits.rs b/substrate/client/network/src/service/traits.rs index 535e7b90535d..5959ae90e662 100644 --- a/substrate/client/network/src/service/traits.rs +++ b/substrate/client/network/src/service/traits.rs @@ -63,6 +63,9 @@ pub trait NetworkDHTProvider { /// Start putting a value in the DHT. fn put_value(&self, key: KademliaKey, value: Vec); + /// Start putting the record to `peers`. + /// + /// If `update_local_storage` is true the local storage is udpated as well. fn put_record_to(&self, record: PeerRecord, peers: HashSet, update_local_storage: bool); } From 7b56bc8ad89e0846e793e5d4eff5b20b13e17817 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Wed, 3 Apr 2024 17:59:30 +0300 Subject: [PATCH 17/47] Some minor tweaks Signed-off-by: Alexandru Gheorghe --- .../client/authority-discovery/src/worker.rs | 35 +++++++++++++++---- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/substrate/client/authority-discovery/src/worker.rs b/substrate/client/authority-discovery/src/worker.rs index b9b90b6ee3bb..425a319afef7 100644 --- a/substrate/client/authority-discovery/src/worker.rs +++ b/substrate/client/authority-discovery/src/worker.rs @@ -729,6 +729,7 @@ where for record in values { addr_cache_needs_update |= self.handle_new_record( + &authority_id, kadmelia_key.clone(), RecordInfo { creation_time: records_creation_time, @@ -750,10 +751,15 @@ where Ok(()) } - fn handle_new_record(&mut self, kadmelia_key: KademliaKey, new_record: RecordInfo) -> bool { - let last_value = self.last_known_record.entry(kadmelia_key.clone()).or_default(); - if new_record.creation_time > last_value.creation_time { - let peers_that_need_updating = last_value.peers_with_record.clone(); + fn handle_new_record( + &mut self, + authority_id: &AuthorityId, + kadmelia_key: KademliaKey, + new_record: RecordInfo, + ) -> bool { + let current_record_info = self.last_known_record.entry(kadmelia_key.clone()).or_default(); + if new_record.creation_time > current_record_info.creation_time { + let peers_that_need_updating = current_record_info.peers_with_record.clone(); for record in new_record.record.iter() { self.network.put_record_to( record.clone(), @@ -763,15 +769,30 @@ where new_record.peers_with_record.is_empty(), ); } + debug!( + target: LOG_TARGET, + "Found a newer record for {:?} new record creation time {:?} old record creation time {:?}", + authority_id, new_record.creation_time, current_record_info.creation_time + ); self.last_known_record.insert(kadmelia_key, new_record); true - } else if new_record.creation_time == last_value.creation_time { + } else if new_record.creation_time == current_record_info.creation_time { // Same record just update in case this is a record from old nods that don't have // timestamp. - last_value.peers_with_record.extend(new_record.peers_with_record); + debug!( + target: LOG_TARGET, + "Found same record for {:?} record creation time {:?}", + authority_id, new_record.creation_time + ); + current_record_info.peers_with_record.extend(new_record.peers_with_record); true } else { - for record in last_value.record.iter() { + debug!( + target: LOG_TARGET, + "Found old record for {:?} received record creation time {:?} current record creation time {:?}", + authority_id, new_record.creation_time, current_record_info.creation_time, + ); + for record in current_record_info.record.iter() { self.network.put_record_to( record.clone(), new_record.peers_with_record.clone(), From d610df62c4669e4751a7b929c8ea0bfbaf8e55b0 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Wed, 3 Apr 2024 19:14:59 +0300 Subject: [PATCH 18/47] Minor updates Signed-off-by: Alexandru Gheorghe --- .../client/authority-discovery/src/worker.rs | 2 +- substrate/client/network/src/discovery.rs | 27 ++++++++----------- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/substrate/client/authority-discovery/src/worker.rs b/substrate/client/authority-discovery/src/worker.rs index 425a319afef7..8e0fdf96e7b9 100644 --- a/substrate/client/authority-discovery/src/worker.rs +++ b/substrate/client/authority-discovery/src/worker.rs @@ -907,7 +907,7 @@ fn sign_record_with_authority_ids( .duration_since(UNIX_EPOCH) .map(|time| time.as_nanos()) .unwrap_or_default(); - + debug!(target: LOG_TARGET, "Publish address with creation time{:?}", creation_time); let creation_time = creation_time.encode(); let creation_time_signature = key_store .sr25519_sign(key_types::AUTHORITY_DISCOVERY, key.as_ref(), &creation_time) diff --git a/substrate/client/network/src/discovery.rs b/substrate/client/network/src/discovery.rs index 3a0befb26bc7..74077fc7ac9d 100644 --- a/substrate/client/network/src/discovery.rs +++ b/substrate/client/network/src/discovery.rs @@ -810,22 +810,17 @@ impl NetworkBehaviour for DiscoveryBehaviour { stats, ); - // Let's directly finish the query, as we are only interested in a - // quorum of 1. - if let Some(kad) = self.kademlia.as_mut() { - if let Some(_query) = kad.query_mut(&id) { - // Let the query continue, to increase the chances we - // discover all possible addresses, for the cases where more - // addresses might exist in DHT, for example when the node - // changes its PeerId. - - // TODO: We need to determine what is the right balance - // here, between creating too many queries in our network - // and robustness in the presence of bad records. - // With letting the query finish by itself we receive around - // 20 answers with 3 of them happening in parallel. - // More details here: https://github.com/libp2p/specs/blob/master/kad-dht/README.md#definitions - // query.finish(); + // Let's directly finish the query if we are above 4. + // This number should small enough to make sure we don't + // unnecessarily flood the network with queries, but high + // enough to make sure we also touch peers which might have + // old record, so that we can update them once we notice + // they have old records. + if stats.num_successes() > 4 { + if let Some(kad) = self.kademlia.as_mut() { + if let Some(mut query) = kad.query_mut(&id) { + query.finish(); + } } } From 4263dc40a25bd249d99dd323780ce1b4c59fe20b Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Wed, 3 Apr 2024 19:24:28 +0300 Subject: [PATCH 19/47] Minor review feedback Signed-off-by: Alexandru Gheorghe --- polkadot/node/network/gossip-support/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polkadot/node/network/gossip-support/src/lib.rs b/polkadot/node/network/gossip-support/src/lib.rs index ba3b3dd7f544..5cb59375a596 100644 --- a/polkadot/node/network/gossip-support/src/lib.rs +++ b/polkadot/node/network/gossip-support/src/lib.rs @@ -390,7 +390,7 @@ where for (authority, new_addresses) in &resolved { match self.resolved_authorities.get(authority) { - Some(old_address) if !old_address.is_superset(new_addresses) => + Some(old_addresses) if !old_addresses.is_superset(new_addresses) => changed.push(new_addresses.clone()), None => changed.push(new_addresses.clone()), _ => {}, From 4d7b1646eaaebe716ac2984e00f976edcde62728 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Fri, 5 Apr 2024 16:24:01 +0300 Subject: [PATCH 20/47] Add authorithy-discovery-unittests Signed-off-by: Alexandru Gheorghe --- .../client/authority-discovery/src/worker.rs | 44 +-- .../authority-discovery/src/worker/tests.rs | 269 +++++++++++++++++- 2 files changed, 282 insertions(+), 31 deletions(-) diff --git a/substrate/client/authority-discovery/src/worker.rs b/substrate/client/authority-discovery/src/worker.rs index 8e0fdf96e7b9..126af62bae2a 100644 --- a/substrate/client/authority-discovery/src/worker.rs +++ b/substrate/client/authority-discovery/src/worker.rs @@ -467,6 +467,7 @@ where Some(peer_signature), key_store.as_ref(), keys_vec, + true, )?; self.latest_published_kad_keys = kv_pairs.iter().map(|(k, _)| k.clone()).collect(); @@ -766,7 +767,7 @@ where peers_that_need_updating.clone(), // If this is empty it means we received the answer from our node local // storage, so we need to update that as well. - new_record.peers_with_record.is_empty(), + current_record_info.peers_with_record.is_empty(), ); } debug!( @@ -890,6 +891,7 @@ fn sign_record_with_authority_ids( peer_signature: Option, key_store: &dyn Keystore, keys: Vec, + include_creation_time: bool, ) -> Result)>> { let mut result = Vec::with_capacity(keys.len()); @@ -903,29 +905,33 @@ fn sign_record_with_authority_ids( // Scale encode let auth_signature = auth_signature.encode(); - let creation_time = SystemTime::now() - .duration_since(UNIX_EPOCH) - .map(|time| time.as_nanos()) - .unwrap_or_default(); - debug!(target: LOG_TARGET, "Publish address with creation time{:?}", creation_time); - let creation_time = creation_time.encode(); - let creation_time_signature = key_store - .sr25519_sign(key_types::AUTHORITY_DISCOVERY, key.as_ref(), &creation_time) - .map_err(|e| Error::CannotSign(format!("{}. Key: {:?}", e, key)))? - .ok_or_else(|| { - Error::CannotSign(format!("Could not find key in keystore. Key: {:?}", key)) - })?; - - let creation_time = schema::TimestampInfo { - timestamp: creation_time, - signature: creation_time_signature.encode(), + let creation_time = if include_creation_time { + let creation_time = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|time| time.as_nanos()) + .unwrap_or_default(); + debug!(target: LOG_TARGET, "Publish address with creation time{:?}", creation_time); + let creation_time = creation_time.encode(); + let creation_time_signature = key_store + .sr25519_sign(key_types::AUTHORITY_DISCOVERY, key.as_ref(), &creation_time) + .map_err(|e| Error::CannotSign(format!("{}. Key: {:?}", e, key)))? + .ok_or_else(|| { + Error::CannotSign(format!("Could not find key in keystore. Key: {:?}", key)) + })?; + + let creation_time = schema::TimestampInfo { + timestamp: creation_time, + signature: creation_time_signature.encode(), + }; + Some(creation_time) + } else { + None }; - let signed_record = schema::SignedAuthorityRecord { record: serialized_record.clone(), auth_signature, peer_signature: peer_signature.clone(), - creation_time: Some(creation_time), + creation_time, } .encode_to_vec(); diff --git a/substrate/client/authority-discovery/src/worker/tests.rs b/substrate/client/authority-discovery/src/worker/tests.rs index 44597f854f86..8c4060d7f113 100644 --- a/substrate/client/authority-discovery/src/worker/tests.rs +++ b/substrate/client/authority-discovery/src/worker/tests.rs @@ -233,6 +233,7 @@ fn build_dht_event( public_key: AuthorityId, key_store: &MemoryKeystore, network: Option<&Signer>, + include_creation_time: bool, ) -> Vec<(KademliaKey, Vec)> { let serialized_record = serialize_authority_record(serialize_addresses(addresses.into_iter())).unwrap(); @@ -243,6 +244,7 @@ fn build_dht_event( peer_signature, key_store, vec![public_key.into()], + include_creation_time, ) .unwrap(); // There is always a single item in it, because we signed it with a single key @@ -500,6 +502,7 @@ fn dont_stop_polling_dht_event_stream_after_bogus_event() { remote_public_key.clone(), &remote_key_store, None, + true, ) .into_iter() .map(|(key, value)| PeerRecord { @@ -570,15 +573,20 @@ impl DhtValueFoundTester { let local_key_store = MemoryKeystore::new(); let (_to_worker, from_service) = mpsc::channel(0); - let mut local_worker = Worker::new( - from_service, - local_test_api, - local_network.clone(), - Box::pin(dht_event_rx), - Role::PublishAndDiscover(Arc::new(local_key_store)), - None, - WorkerConfig { strict_record_validation, ..Default::default() }, - ); + let local_worker = if let Some(local_work) = self.local_worker.as_mut() { + local_work + } else { + self.local_worker = Some(Worker::new( + from_service, + local_test_api, + local_network.clone(), + Box::pin(dht_event_rx), + Role::PublishAndDiscover(Arc::new(local_key_store)), + None, + WorkerConfig { strict_record_validation, ..Default::default() }, + )); + self.local_worker.as_mut().unwrap() + }; block_on(local_worker.refill_pending_lookups_queue()).unwrap(); local_worker.start_new_lookups(); @@ -588,15 +596,13 @@ impl DhtValueFoundTester { values .into_iter() .map(|(key, value)| PeerRecord { - peer: None, + peer: Some(PeerId::random()), record: Record { key, value, publisher: None, expires: None }, }) .collect(), ), ); - self.local_worker = Some(local_worker); - self.local_worker .as_ref() .map(|w| { @@ -618,6 +624,7 @@ fn limit_number_of_addresses_added_to_cache_per_authority() { tester.remote_authority_public.into(), &tester.remote_key_store, None, + true, ); let cached_remote_addresses = tester.process_value_found(false, kv_pairs); @@ -633,15 +640,248 @@ fn strict_accept_address_with_peer_signature() { tester.remote_authority_public.into(), &tester.remote_key_store, Some(&TestSigner { keypair: &tester.remote_node_key }), + true, + ); + + let cached_remote_addresses = tester.process_value_found(true, kv_pairs); + + assert_eq!( + Some(HashSet::from([addr])), + cached_remote_addresses, + "Expect worker to only cache `Multiaddr`s with `PeerId`s.", + ); +} + +#[test] +fn strict_accept_address_without_creation_time() { + let mut tester = DhtValueFoundTester::new(); + let addr = tester.multiaddr_with_peer_id(1); + let kv_pairs = build_dht_event( + vec![addr.clone()], + tester.remote_authority_public.into(), + &tester.remote_key_store, + Some(&TestSigner { keypair: &tester.remote_node_key }), + false, + ); + + let cached_remote_addresses = tester.process_value_found(true, kv_pairs); + + assert_eq!( + Some(HashSet::from([addr])), + cached_remote_addresses, + "Expect worker to only cache `Multiaddr`s with `PeerId`s.", + ); +} + +#[test] +fn keep_last_received_if_no_creation_time() { + let mut tester: DhtValueFoundTester = DhtValueFoundTester::new(); + let addr = tester.multiaddr_with_peer_id(1); + let kv_pairs = build_dht_event( + vec![addr.clone()], + tester.remote_authority_public.into(), + &tester.remote_key_store, + Some(&TestSigner { keypair: &tester.remote_node_key }), + false, + ); + + let cached_remote_addresses = tester.process_value_found(true, kv_pairs); + + assert_eq!( + Some(HashSet::from([addr])), + cached_remote_addresses, + "Expect worker to only cache `Multiaddr`s with `PeerId`s.", + ); + + assert!(tester + .local_worker + .as_ref() + .map(|worker| worker.network.put_value_to_call.lock().unwrap().is_empty()) + .unwrap_or_default()); + + let addr2 = tester.multiaddr_with_peer_id(2); + let kv_pairs = build_dht_event( + vec![addr2.clone()], + tester.remote_authority_public.into(), + &tester.remote_key_store, + Some(&TestSigner { keypair: &tester.remote_node_key }), + false, + ); + + let cached_remote_addresses = tester.process_value_found(true, kv_pairs); + + assert_eq!( + Some(HashSet::from([addr2])), + cached_remote_addresses, + "Expect worker to only cache `Multiaddr`s with `PeerId`s.", + ); + assert!(tester + .local_worker + .as_ref() + .map(|worker| worker.network.put_value_to_call.lock().unwrap().is_empty()) + .unwrap_or_default()); +} + +#[test] +fn records_with_incorrectly_signed_creation_time_are_ignored() { + let mut tester: DhtValueFoundTester = DhtValueFoundTester::new(); + let addr = tester.multiaddr_with_peer_id(1); + let kv_pairs = build_dht_event( + vec![addr.clone()], + tester.remote_authority_public.into(), + &tester.remote_key_store, + Some(&TestSigner { keypair: &tester.remote_node_key }), + true, ); let cached_remote_addresses = tester.process_value_found(true, kv_pairs); + assert_eq!( + Some(HashSet::from([addr.clone()])), + cached_remote_addresses, + "Expect worker to only cache `Multiaddr`s with `PeerId`s.", + ); + assert!(tester + .local_worker + .as_ref() + .map(|worker| worker.network.put_value_to_call.lock().unwrap().len() == 1) + .unwrap_or_default()); + + let alternative_key = tester + .remote_key_store + .sr25519_generate_new(key_types::AUTHORITY_DISCOVERY, None) + .unwrap(); + + let addr2 = tester.multiaddr_with_peer_id(2); + let mut kv_pairs = build_dht_event( + vec![addr2.clone()], + alternative_key.into(), + &tester.remote_key_store, + Some(&TestSigner { keypair: &tester.remote_node_key }), + true, + ); + let kademlia_key = hash_authority_id(tester.remote_authority_public.as_slice()); + for key in kv_pairs.iter_mut() { + key.0 = kademlia_key.clone(); + } + let cached_remote_addresses = tester.process_value_found(true, kv_pairs); + assert_eq!( Some(HashSet::from([addr])), cached_remote_addresses, "Expect worker to only cache `Multiaddr`s with `PeerId`s.", ); + assert!(tester + .local_worker + .as_ref() + .map(|worker| worker.network.put_value_to_call.lock().unwrap().len() == 1) + .unwrap_or_default()); +} + +#[test] +fn newer_records_overwrite_older_ones() { + let mut tester: DhtValueFoundTester = DhtValueFoundTester::new(); + let old_record = tester.multiaddr_with_peer_id(1); + let kv_pairs = build_dht_event( + vec![old_record.clone()], + tester.remote_authority_public.into(), + &tester.remote_key_store, + Some(&TestSigner { keypair: &tester.remote_node_key }), + true, + ); + + let cached_remote_addresses = tester.process_value_found(true, kv_pairs); + + assert_eq!( + Some(HashSet::from([old_record])), + cached_remote_addresses, + "Expect worker to only cache `Multiaddr`s with `PeerId`s.", + ); + + let update_peers_info = tester + .local_worker + .as_ref() + .map(|worker| worker.network.put_value_to_call.lock().unwrap().remove(0)) + .unwrap(); + assert!(matches!(update_peers_info, (_, _, true))); + + let new_record = tester.multiaddr_with_peer_id(2); + let kv_pairs = build_dht_event( + vec![new_record.clone()], + tester.remote_authority_public.into(), + &tester.remote_key_store, + Some(&TestSigner { keypair: &tester.remote_node_key }), + true, + ); + + let cached_remote_addresses = tester.process_value_found(true, kv_pairs); + + assert_eq!( + Some(HashSet::from([new_record])), + cached_remote_addresses, + "Expect worker to only cache `Multiaddr`s with `PeerId`s.", + ); + + let result = tester + .local_worker + .as_ref() + .map(|worker| worker.network.put_value_to_call.lock().unwrap().first().unwrap().clone()) + .unwrap(); + assert!(matches!(result, (_, _, false))); + assert_eq!(result.1, update_peers_info.0.peer.into_iter().collect()); +} + +#[test] +fn older_records_dont_affect_newer_ones() { + let mut tester: DhtValueFoundTester = DhtValueFoundTester::new(); + let old_record = tester.multiaddr_with_peer_id(1); + let old_kv_pairs = build_dht_event( + vec![old_record.clone()], + tester.remote_authority_public.into(), + &tester.remote_key_store, + Some(&TestSigner { keypair: &tester.remote_node_key }), + true, + ); + + let new_record = tester.multiaddr_with_peer_id(2); + let kv_pairs = build_dht_event( + vec![new_record.clone()], + tester.remote_authority_public.into(), + &tester.remote_key_store, + Some(&TestSigner { keypair: &tester.remote_node_key }), + true, + ); + + let cached_remote_addresses = tester.process_value_found(true, kv_pairs); + + assert_eq!( + Some(HashSet::from([new_record.clone()])), + cached_remote_addresses, + "Expect worker to only cache `Multiaddr`s with `PeerId`s.", + ); + + let update_peers_info = tester + .local_worker + .as_ref() + .map(|worker| worker.network.put_value_to_call.lock().unwrap().remove(0)) + .unwrap(); + assert!(matches!(update_peers_info, (_, _, true))); + + let cached_remote_addresses = tester.process_value_found(true, old_kv_pairs); + + assert_eq!( + Some(HashSet::from([new_record])), + cached_remote_addresses, + "Expect worker to only cache `Multiaddr`s with `PeerId`s.", + ); + + let update_peers_info = tester + .local_worker + .as_ref() + .map(|worker| worker.network.put_value_to_call.lock().unwrap().remove(0)) + .unwrap(); + assert!(matches!(update_peers_info, (_, _, false))); + assert_ne!(update_peers_info.1, update_peers_info.0.peer.into_iter().collect()); } #[test] @@ -653,6 +893,7 @@ fn reject_address_with_rogue_peer_signature() { tester.remote_authority_public.into(), &tester.remote_key_store, Some(&TestSigner { keypair: &rogue_remote_node_key }), + true, ); let cached_remote_addresses = tester.process_value_found(false, kv_pairs); @@ -671,6 +912,7 @@ fn reject_address_with_invalid_peer_signature() { tester.remote_authority_public.into(), &tester.remote_key_store, Some(&TestSigner { keypair: &tester.remote_node_key }), + true, ); // tamper with the signature let mut record = schema::SignedAuthorityRecord::decode(kv_pairs[0].1.as_slice()).unwrap(); @@ -693,6 +935,7 @@ fn reject_address_without_peer_signature() { tester.remote_authority_public.into(), &tester.remote_key_store, None, + true, ); let cached_remote_addresses = tester.process_value_found(true, kv_pairs); @@ -711,6 +954,7 @@ fn do_not_cache_addresses_without_peer_id() { tester.remote_authority_public.into(), &tester.remote_key_store, None, + true, ); let cached_remote_addresses = tester.process_value_found(false, kv_pairs); @@ -860,6 +1104,7 @@ fn lookup_throttling() { remote_key, &remote_key_store, None, + true, ) .into_iter() .map(|(key, value)| PeerRecord { From 7cc3333f052a3f4fb5aa3b4e838dd21994083c1c Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Fri, 5 Apr 2024 17:31:42 +0300 Subject: [PATCH 21/47] Add unittest for gossip-support changes Signed-off-by: Alexandru Gheorghe --- .../node/network/gossip-support/src/lib.rs | 4 + .../node/network/gossip-support/src/tests.rs | 315 ++++++++++++++++++ 2 files changed, 319 insertions(+) diff --git a/polkadot/node/network/gossip-support/src/lib.rs b/polkadot/node/network/gossip-support/src/lib.rs index 5cb59375a596..0d8f6545f75c 100644 --- a/polkadot/node/network/gossip-support/src/lib.rs +++ b/polkadot/node/network/gossip-support/src/lib.rs @@ -73,8 +73,12 @@ const BACKOFF_DURATION: Duration = Duration::from_millis(500); // so it make sense to run a bit more often than that to // detect changes as often as we can, but not too often since // it won't help. +#[cfg(not(test))] const TRY_RERESOLVE_AUTHORITIES: Duration = Duration::from_secs(5 * 60); +#[cfg(test)] +const TRY_RERESOLVE_AUTHORITIES: Duration = Duration::from_secs(2); + /// Duration after which we consider low connectivity a problem. /// /// Especially at startup low connectivity is expected (authority discovery cache needs to be diff --git a/polkadot/node/network/gossip-support/src/tests.rs b/polkadot/node/network/gossip-support/src/tests.rs index cce78df38f30..3889bea4ce62 100644 --- a/polkadot/node/network/gossip-support/src/tests.rs +++ b/polkadot/node/network/gossip-support/src/tests.rs @@ -118,6 +118,14 @@ impl MockAuthorityDiscovery { } } + fn change_address_for_authority(&self, authority_id: AuthorityDiscoveryId) -> PeerId { + let new_peer_id = PeerId::random(); + let addr = Multiaddr::empty().with(Protocol::P2p(new_peer_id.into())); + self.addrs.lock().insert(authority_id.clone(), HashSet::from([addr])); + self.authorities.lock().insert(new_peer_id, HashSet::from([authority_id])); + new_peer_id + } + fn authorities(&self) -> HashMap> { self.authorities.lock().clone() } @@ -807,6 +815,313 @@ fn issues_update_authorities_after_session() { ); } +// Test we connect to authorities that changed their address `TRY_RERESOLVE_AUTHORITIES` rate +// and that is is no-op if no authority changed. +#[test] +fn test_quickly_connect_to_authorithies_that_changed_address() { + let hash = Hash::repeat_byte(0xAA); + + let authorities = PAST_PRESENT_FUTURE_AUTHORITIES.clone(); + let authority_that_changes_address = authorities.get(5).unwrap().clone(); + + let mut authority_discovery_mock = MockAuthorityDiscovery::new(authorities); + + test_harness( + make_subsystem_with_authority_discovery(authority_discovery_mock.clone()), + |mut virtual_overseer| async move { + let overseer = &mut virtual_overseer; + // 1. Initialize with the first leaf in the session. + overseer_signal_active_leaves(overseer, hash).await; + assert_matches!( + overseer_recv(overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::SessionIndexForChild(tx), + )) => { + assert_eq!(relay_parent, hash); + tx.send(Ok(1)).unwrap(); + } + ); + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::SessionInfo(s, tx), + )) => { + assert_eq!(relay_parent, hash); + assert_eq!(s, 1); + let mut session_info = make_session_info(); + session_info.discovery_keys = PAST_PRESENT_FUTURE_AUTHORITIES.clone(); + tx.send(Ok(Some(session_info))).unwrap(); + } + ); + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::Authorities(tx), + )) => { + assert_eq!(relay_parent, hash); + tx.send(Ok(PAST_PRESENT_FUTURE_AUTHORITIES.clone())).unwrap(); + } + ); + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridgeTx(NetworkBridgeTxMessage::ConnectToResolvedValidators { + validator_addrs, + peer_set, + }) => { + let all_without_ferdie: Vec<_> = PAST_PRESENT_FUTURE_AUTHORITIES + .iter() + .cloned() + .filter(|p| p != &Sr25519Keyring::Ferdie.public().into()) + .collect(); + + let addrs = get_multiaddrs(all_without_ferdie, authority_discovery_mock.clone()).await; + + assert_eq!(validator_addrs, addrs); + assert_eq!(peer_set, PeerSet::Validation); + } + ); + + // Ensure neighbors are unaffected + assert_matches!( + overseer_recv(overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + _, + RuntimeApiRequest::CurrentBabeEpoch(tx), + )) => { + let _ = tx.send(Ok(BabeEpoch { + epoch_index: 2 as _, + start_slot: 0.into(), + duration: 200, + authorities: vec![(Sr25519Keyring::Alice.public().into(), 1)], + randomness: [0u8; 32], + config: BabeEpochConfiguration { + c: (1, 4), + allowed_slots: AllowedSlots::PrimarySlots, + }, + })).unwrap(); + } + ); + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridgeRx(NetworkBridgeRxMessage::NewGossipTopology { + session: _, + local_index: _, + canonical_shuffling: _, + shuffled_indices: _, + }) => { + + } + ); + + // 2. Connect all authorities that are known so far. + let known_authorities = authority_discovery_mock.authorities(); + for (peer_id, _id) in known_authorities.iter() { + let msg = + GossipSupportMessage::NetworkBridgeUpdate(NetworkBridgeEvent::PeerConnected( + *peer_id, + ObservedRole::Authority, + ValidationVersion::V3.into(), + None, + )); + overseer.send(FromOrchestra::Communication { msg }).await + } + + // 3. Send a new leaf after TRY_RERESOLVE_AUTHORITIES, we should notice + // UpdateAuthorithies is emitted for all ConnectedPeers. + Delay::new(TRY_RERESOLVE_AUTHORITIES).await; + let hash = Hash::repeat_byte(0xBB); + overseer_signal_active_leaves(overseer, hash).await; + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::SessionIndexForChild(tx), + )) => { + assert_eq!(relay_parent, hash); + tx.send(Ok(1)).unwrap(); + } + ); + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::SessionInfo(s, tx), + )) => { + assert_eq!(relay_parent, hash); + assert_eq!(s, 1); + let mut session_info = make_session_info(); + session_info.discovery_keys = PAST_PRESENT_FUTURE_AUTHORITIES.clone(); + tx.send(Ok(Some(session_info))).unwrap(); + + } + ); + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::Authorities(tx), + )) => { + assert_eq!(relay_parent, hash); + tx.send(Ok(PAST_PRESENT_FUTURE_AUTHORITIES.clone())).unwrap(); + } + ); + + for _ in 0..known_authorities.len() { + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridgeRx(NetworkBridgeRxMessage::UpdatedAuthorityIds { + peer_id, + authority_ids, + }) => { + assert_eq!(authority_discovery_mock.get_authority_ids_by_peer_id(peer_id).await.unwrap_or_default(), authority_ids); + } + ); + } + + // 4. At next re-resolve no-authorithy changes their address, so it should be no-op. + Delay::new(TRY_RERESOLVE_AUTHORITIES).await; + let hash = Hash::repeat_byte(0xCC); + overseer_signal_active_leaves(overseer, hash).await; + assert_matches!( + overseer_recv(overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::SessionIndexForChild(tx), + )) => { + assert_eq!(relay_parent, hash); + tx.send(Ok(1)).unwrap(); + } + ); + assert_matches!( + overseer_recv(overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::SessionInfo(s, tx), + )) => { + assert_eq!(relay_parent, hash); + assert_eq!(s, 1); + let mut session_info = make_session_info(); + session_info.discovery_keys = PAST_PRESENT_FUTURE_AUTHORITIES.clone(); + tx.send(Ok(Some(session_info))).unwrap(); + + } + ); + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::Authorities(tx), + )) => { + assert_eq!(relay_parent, hash); + tx.send(Ok(PAST_PRESENT_FUTURE_AUTHORITIES.clone())).unwrap(); + } + ); + assert!(overseer.recv().timeout(TIMEOUT).await.is_none()); + + // Change address for one authorithy and check we try to connect to it and + // that we emit UpdateAuthorityID for the old PeerId and the new one. + Delay::new(TRY_RERESOLVE_AUTHORITIES).await; + let changed_peerid = authority_discovery_mock + .change_address_for_authority(authority_that_changes_address.clone()); + let hash = Hash::repeat_byte(0xDD); + let msg = GossipSupportMessage::NetworkBridgeUpdate(NetworkBridgeEvent::PeerConnected( + changed_peerid, + ObservedRole::Authority, + ValidationVersion::V3.into(), + None, + )); + overseer.send(FromOrchestra::Communication { msg }).await; + + overseer_signal_active_leaves(overseer, hash).await; + assert_matches!( + overseer_recv(overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::SessionIndexForChild(tx), + )) => { + assert_eq!(relay_parent, hash); + tx.send(Ok(1)).unwrap(); + } + ); + assert_matches!( + overseer_recv(overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::SessionInfo(s, tx), + )) => { + assert_eq!(relay_parent, hash); + assert_eq!(s, 1); + let mut session_info = make_session_info(); + session_info.discovery_keys = PAST_PRESENT_FUTURE_AUTHORITIES.clone(); + tx.send(Ok(Some(session_info))).unwrap(); + + } + ); + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::Authorities(tx), + )) => { + assert_eq!(relay_parent, hash); + tx.send(Ok(PAST_PRESENT_FUTURE_AUTHORITIES.clone())).unwrap(); + } + ); + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridgeTx(NetworkBridgeTxMessage::AddToResolvedValidators { + validator_addrs, + peer_set, + }) => { + let expected = get_address_map(vec![authority_that_changes_address.clone()], authority_discovery_mock.clone()).await; + let expected: HashSet = expected.into_values().flat_map(|v| v.into_iter()).collect(); + assert_eq!(validator_addrs.into_iter().flat_map(|v| v.into_iter()).collect::>(), expected); + assert_eq!(peer_set, PeerSet::Validation); + } + ); + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridgeRx(NetworkBridgeRxMessage::UpdatedAuthorityIds { + peer_id, + authority_ids, + }) => { + assert_eq!(authority_discovery_mock.get_authority_ids_by_peer_id(peer_id).await.unwrap(), HashSet::from([authority_that_changes_address.clone()])); + assert!(authority_ids.is_empty()); + } + ); + + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridgeRx(NetworkBridgeRxMessage::UpdatedAuthorityIds { + peer_id, + authority_ids, + }) => { + assert_eq!(authority_ids, HashSet::from([authority_that_changes_address])); + assert_eq!(changed_peerid, peer_id); + } + ); + + assert!(overseer.recv().timeout(TIMEOUT).await.is_none()); + + virtual_overseer + }, + ); +} + #[test] fn disconnect_when_not_in_past_present_future() { sp_tracing::try_init_simple(); From 42afffca2807c142f5a4d02c38311fcd01f3f4a5 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Wed, 17 Apr 2024 18:31:51 +0300 Subject: [PATCH 22/47] Simplify indentation levels Signed-off-by: Alexandru Gheorghe --- .../client/authority-discovery/src/worker.rs | 176 +++++++++--------- 1 file changed, 83 insertions(+), 93 deletions(-) diff --git a/substrate/client/authority-discovery/src/worker.rs b/substrate/client/authority-discovery/src/worker.rs index 10b222c08047..14f4dab155eb 100644 --- a/substrate/client/authority-discovery/src/worker.rs +++ b/substrate/client/authority-discovery/src/worker.rs @@ -621,107 +621,98 @@ where self.known_lookups.insert(remote_key.clone(), authority_id.clone()); let local_peer_id = self.network.local_peer_id(); - let kademlia_key = remote_key; - let remote_addresses: Vec<(Multiaddr, u128)> = { - let schema::SignedAuthorityRecord { - record, - auth_signature, - peer_signature, - creation_time: creation_time_info, - } = schema::SignedAuthorityRecord::decode(values.record.value.as_slice()) - .map_err(Error::DecodingProto)?; - - let auth_signature = AuthoritySignature::decode(&mut &auth_signature[..]) - .map_err(Error::EncodingDecodingScale)?; - - if !AuthorityPair::verify(&auth_signature, &record, &authority_id) { - return Err(Error::VerifyingDhtPayload) - } - if let Some(creation_time_info) = creation_time_info.as_ref() { - let creation_time_signature = - AuthoritySignature::decode(&mut &creation_time_info.signature[..]) - .map_err(Error::EncodingDecodingScale)?; - if !AuthorityPair::verify( - &creation_time_signature, - &creation_time_info.timestamp, - &authority_id, - ) { - return Err(Error::VerifyingDhtPayloadCreationTime) - } + let schema::SignedAuthorityRecord { + record, + auth_signature, + peer_signature, + creation_time: creation_time_info, + } = schema::SignedAuthorityRecord::decode(values.record.value.as_slice()) + .map_err(Error::DecodingProto)?; + + let auth_signature = AuthoritySignature::decode(&mut &auth_signature[..]) + .map_err(Error::EncodingDecodingScale)?; + + if !AuthorityPair::verify(&auth_signature, &record, &authority_id) { + return Err(Error::VerifyingDhtPayload) + } + + if let Some(creation_time_info) = creation_time_info.as_ref() { + let creation_time_signature = + AuthoritySignature::decode(&mut &creation_time_info.signature[..]) + .map_err(Error::EncodingDecodingScale)?; + if !AuthorityPair::verify( + &creation_time_signature, + &creation_time_info.timestamp, + &authority_id, + ) { + return Err(Error::VerifyingDhtPayloadCreationTime) } + } - let creation_time: u128 = creation_time_info - .as_ref() - .map(|creation_time| { - u128::decode(&mut &creation_time.timestamp[..]).unwrap_or_default() - }) - .unwrap_or_default(); // 0 is a sane default for records that do not have creation time present. - - let addresses: Vec<(Multiaddr, u128)> = - schema::AuthorityRecord::decode(record.as_slice()) - .map(|a| a.addresses) - .map_err(Error::DecodingProto)? - .into_iter() - .map(|a| a.try_into().map(|addr| (addr, creation_time))) - .collect::>() - .map_err(Error::ParsingMultiaddress)?; - - let get_peer_id = |a: &Multiaddr| match a.iter().last() { - Some(multiaddr::Protocol::P2p(key)) => PeerId::from_multihash(key).ok(), - _ => None, - }; + let records_creation_time: u128 = creation_time_info + .as_ref() + .map(|creation_time| { + u128::decode(&mut &creation_time.timestamp[..]).unwrap_or_default() + }) + .unwrap_or_default(); // 0 is a sane default for records that do not have creation time present. - // Ignore [`Multiaddr`]s without [`PeerId`] or with own addresses. - let addresses: Vec<(Multiaddr, u128)> = addresses - .into_iter() - .filter(|a| get_peer_id(&a.0).filter(|p| *p != local_peer_id).is_some()) - .collect(); - - let remote_peer_id = single(addresses.iter().map(|a| get_peer_id(&a.0))) - .map_err(|_| Error::ReceivingDhtValueFoundEventWithDifferentPeerIds)? // different peer_id in records - .flatten() - .ok_or(Error::ReceivingDhtValueFoundEventWithNoPeerIds)?; // no records with peer_id in them - - // At this point we know all the valid multiaddresses from the record, know that - // each of them belong to the same PeerId, we just need to check if the record is - // properly signed by the owner of the PeerId - - if let Some(peer_signature) = peer_signature { - match self.network.verify( - remote_peer_id.into(), - &peer_signature.public_key, - &peer_signature.signature, - &record, - ) { - Ok(true) => {}, - Ok(false) => return Err(Error::VerifyingDhtPayload), - Err(error) => return Err(Error::ParsingLibp2pIdentity(error)), - } - } else if self.strict_record_validation { - return Err(Error::MissingPeerIdSignature) - } else { - debug!( - target: LOG_TARGET, - "Received unsigned authority discovery record from {}", authority_id - ); + let addresses: Vec = schema::AuthorityRecord::decode(record.as_slice()) + .map(|a| a.addresses) + .map_err(Error::DecodingProto)? + .into_iter() + .map(|a| a.try_into()) + .collect::>() + .map_err(Error::ParsingMultiaddress)?; + + let get_peer_id = |a: &Multiaddr| match a.iter().last() { + Some(multiaddr::Protocol::P2p(key)) => PeerId::from_multihash(key).ok(), + _ => None, + }; + + // Ignore [`Multiaddr`]s without [`PeerId`] or with own addresses. + let addresses: Vec = addresses + .into_iter() + .filter(|a| get_peer_id(&a).filter(|p| *p != local_peer_id).is_some()) + .collect(); + + let remote_peer_id = single(addresses.iter().map(|a| get_peer_id(&a))) + .map_err(|_| Error::ReceivingDhtValueFoundEventWithDifferentPeerIds)? // different peer_id in records + .flatten() + .ok_or(Error::ReceivingDhtValueFoundEventWithNoPeerIds)?; // no records with peer_id in them + + // At this point we know all the valid multiaddresses from the record, know that + // each of them belong to the same PeerId, we just need to check if the record is + // properly signed by the owner of the PeerId + + if let Some(peer_signature) = peer_signature { + match self.network.verify( + remote_peer_id.into(), + &peer_signature.public_key, + &peer_signature.signature, + &record, + ) { + Ok(true) => {}, + Ok(false) => return Err(Error::VerifyingDhtPayload), + Err(error) => return Err(Error::ParsingLibp2pIdentity(error)), } - Ok::, Error>(addresses) - }? - .into_iter() - .take(MAX_ADDRESSES_PER_AUTHORITY) - .collect(); + } else if self.strict_record_validation { + return Err(Error::MissingPeerIdSignature) + } else { + debug!( + target: LOG_TARGET, + "Received unsigned authority discovery record from {}", authority_id + ); + } - let answering_peer_id = values.peer.map(|peer| peer.into()); + let remote_addresses: Vec = + addresses.into_iter().take(MAX_ADDRESSES_PER_AUTHORITY).collect(); - let records_creation_time = - single(remote_addresses.iter().map(|(_addr, timestamp)| *timestamp)) - .map_err(|_| Error::ReceivingDhtValueFoundWithDifferentRecordCreationTime)? - .unwrap_or_default(); + let answering_peer_id = values.peer.map(|peer| peer.into()); let addr_cache_needs_update = self.handle_new_record( &authority_id, - kademlia_key.clone(), + remote_key.clone(), RecordInfo { creation_time: records_creation_time, peers_with_record: answering_peer_id.into_iter().collect(), @@ -730,8 +721,7 @@ where ); if !remote_addresses.is_empty() && addr_cache_needs_update { - self.addr_cache - .insert(authority_id, remote_addresses.into_iter().map(|(addr, _)| addr).collect()); + self.addr_cache.insert(authority_id, remote_addresses); if let Some(metrics) = &self.metrics { metrics .known_authorities_count From 28ac0a24474b2d4d1e901110fbf80d6f6618fa73 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Wed, 17 Apr 2024 18:34:20 +0300 Subject: [PATCH 23/47] Minor cleanups Signed-off-by: Alexandru Gheorghe --- substrate/client/authority-discovery/src/worker.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/substrate/client/authority-discovery/src/worker.rs b/substrate/client/authority-discovery/src/worker.rs index 14f4dab155eb..27fa9f322ac3 100644 --- a/substrate/client/authority-discovery/src/worker.rs +++ b/substrate/client/authority-discovery/src/worker.rs @@ -605,9 +605,9 @@ where } } - fn handle_dht_value_found_event(&mut self, values: PeerRecord) -> Result<()> { + fn handle_dht_value_found_event(&mut self, peer_record: PeerRecord) -> Result<()> { // Ensure `values` is not empty and all its keys equal. - let remote_key = values.record.key.clone(); + let remote_key = peer_record.record.key.clone(); let authority_id: AuthorityId = if let Some(authority_id) = self.in_flight_lookups.remove(&remote_key) { @@ -627,7 +627,7 @@ where auth_signature, peer_signature, creation_time: creation_time_info, - } = schema::SignedAuthorityRecord::decode(values.record.value.as_slice()) + } = schema::SignedAuthorityRecord::decode(peer_record.record.value.as_slice()) .map_err(Error::DecodingProto)?; let auth_signature = AuthoritySignature::decode(&mut &auth_signature[..]) @@ -708,7 +708,7 @@ where let remote_addresses: Vec = addresses.into_iter().take(MAX_ADDRESSES_PER_AUTHORITY).collect(); - let answering_peer_id = values.peer.map(|peer| peer.into()); + let answering_peer_id = peer_record.peer.map(|peer| peer.into()); let addr_cache_needs_update = self.handle_new_record( &authority_id, @@ -716,7 +716,7 @@ where RecordInfo { creation_time: records_creation_time, peers_with_record: answering_peer_id.into_iter().collect(), - record: Some(values.record), + record: Some(peer_record.record), }, ); From bd21fda2b62d58f19897c48664048bc3f20a4ad7 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Wed, 24 Apr 2024 13:16:33 +0300 Subject: [PATCH 24/47] Update litep2p Signed-off-by: Alexandru Gheorghe --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 62479cce2a0e..f4b392e7856d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8188,7 +8188,7 @@ dependencies = [ [[package]] name = "litep2p" version = "0.3.0" -source = "git+https://github.com/paritytech/litep2p?branch=master#b142c9eb611fb2fe78d2830266a3675b37299ceb" +source = "git+https://github.com/paritytech/litep2p?branch=master#08112ca642cea3809625ae0abde05dc0dc46b4f3" dependencies = [ "async-trait", "bs58 0.4.0", @@ -20449,9 +20449,9 @@ dependencies = [ [[package]] name = "str0m" -version = "0.2.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee48572247f422dcbe68630c973f8296fbd5157119cd36a3223e48bf83d47727" +checksum = "d3f10d3f68e60168d81110410428a435dbde28cc5525f5f7c6fdec92dbdc2800" dependencies = [ "combine", "crc", From 57f1b385bf574d59e0f0eea8e7adfa5709f661ab Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Wed, 24 Apr 2024 14:26:16 +0300 Subject: [PATCH 25/47] Use put_valut_to from litep2p Signed-off-by: Alexandru Gheorghe --- .../client/network/src/litep2p/discovery.rs | 15 +++++++-- substrate/client/network/src/litep2p/mod.rs | 25 +++++++++++---- .../client/network/src/litep2p/service.rs | 32 ++++++++++++++++--- 3 files changed, 59 insertions(+), 13 deletions(-) diff --git a/substrate/client/network/src/litep2p/discovery.rs b/substrate/client/network/src/litep2p/discovery.rs index 27f4d5473722..5c4139aa1a0e 100644 --- a/substrate/client/network/src/litep2p/discovery.rs +++ b/substrate/client/network/src/litep2p/discovery.rs @@ -36,7 +36,7 @@ use litep2p::{ identify::{Config as IdentifyConfig, IdentifyEvent}, kademlia::{ Config as KademliaConfig, ConfigBuilder as KademliaConfigBuilder, KademliaEvent, - KademliaHandle, QueryId, Quorum, Record, RecordKey, + KademliaHandle, PeerRecord, QueryId, Quorum, Record, RecordKey, }, ping::{Config as PingConfig, PingEvent}, }, @@ -125,7 +125,7 @@ pub enum DiscoveryEvent { query_id: QueryId, /// Record. - record: Record, + record: PeerRecord, }, /// Record was successfully stored on the DHT. @@ -336,6 +336,17 @@ impl Discovery { .await } + /// Put record to given peers. + pub async fn put_value_to_peers( + &mut self, + record: Record, + peers: Vec, + ) -> QueryId { + self.kademlia_handle + .put_record_to_peers(record, peers.into_iter().map(|peer| peer.into()).collect()) + .await + } + /// Check if the observed address is a known address. fn is_known_address(known: &Multiaddr, observed: &Multiaddr) -> bool { let mut known = known.iter(); diff --git a/substrate/client/network/src/litep2p/mod.rs b/substrate/client/network/src/litep2p/mod.rs index 021a86968be6..79020e061618 100644 --- a/substrate/client/network/src/litep2p/mod.rs +++ b/substrate/client/network/src/litep2p/mod.rs @@ -696,6 +696,13 @@ impl NetworkBackend for Litep2pNetworkBac let query_id = self.discovery.put_value(key.clone(), value).await; self.pending_put_values.insert(query_id, (key, Instant::now())); } + + NetworkServiceCommand::PutValueTo { record, peers, update_local_storage: _ } => { + let kademlia_key = record.key.to_vec().into(); + let query_id = self.discovery.put_value_to_peers(record, peers).await; + self.pending_put_values.insert(query_id, (kademlia_key, Instant::now())); + } + NetworkServiceCommand::EventStream { tx } => { self.event_streams.push(tx); } @@ -805,7 +812,7 @@ impl NetworkBackend for Litep2pNetworkBac log::trace!( target: LOG_TARGET, "`GET_VALUE` for {:?} ({query_id:?}) succeeded", - record.key, + record.record.key, ); self.event_streams.send( @@ -813,12 +820,18 @@ impl NetworkBackend for Litep2pNetworkBac DhtEvent::ValueFound( PeerRecord { record: Record { - key: record.key.to_vec().into(), - value: record.value, - publisher: None, - expires: None + key: record.record.key.to_vec().into(), + value: record.record.value, + publisher: record.record.publisher.map(|peer_id| { + let peer_id: sc_network_types::PeerId = peer_id.into(); + peer_id.into() + }), + expires: record.record.expires, }, - peer: None, + peer: record.peer.map(|peer_id| { + let peer_id: sc_network_types::PeerId = peer_id.into(); + peer_id.into() + }), } ) ) diff --git a/substrate/client/network/src/litep2p/service.rs b/substrate/client/network/src/litep2p/service.rs index b75a7dcca064..21ed7835cdc1 100644 --- a/substrate/client/network/src/litep2p/service.rs +++ b/substrate/client/network/src/litep2p/service.rs @@ -36,7 +36,7 @@ use crate::{ use codec::DecodeAll; use futures::{channel::oneshot, stream::BoxStream}; use libp2p::{identity::SigningError, kad::record::Key as KademliaKey, Multiaddr}; -use litep2p::crypto::ed25519::Keypair; +use litep2p::{crypto::ed25519::Keypair, protocol::libp2p::kademlia::Record}; use parking_lot::RwLock; use sc_network_common::{ @@ -73,6 +73,16 @@ pub enum NetworkServiceCommand { value: Vec, }, + /// Put value to DHT. + PutValueTo { + /// Record. + record: Record, + /// Peers we want to put the record. + peers: Vec, + /// If we should update the local storage or not. + update_local_storage: bool, + }, + /// Query network status. Status { /// `oneshot::Sender` for sending the status. @@ -237,11 +247,23 @@ impl NetworkDHTProvider for Litep2pNetworkService { fn put_record_to( &self, - _record: libp2p::kad::Record, - _peers: HashSet, - _update_local_storage: bool, + record: libp2p::kad::Record, + peers: HashSet, + update_local_storage: bool, ) { - todo!("Litep2p does not implement put_record_to"); + let _ = self.cmd_tx.unbounded_send(NetworkServiceCommand::PutValueTo { + record: Record { + key: record.key.to_vec().into(), + value: record.value, + publisher: record.publisher.map(|peer_id| { + let peer_id: sc_network_types::PeerId = peer_id.into(); + peer_id.into() + }), + expires: record.expires, + }, + peers: peers.into_iter().collect(), + update_local_storage, + }); } } From 9f56b02673aa6c2121057a10b9b54a66eca74d41 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Wed, 24 Apr 2024 14:42:38 +0300 Subject: [PATCH 26/47] Minor feedback Signed-off-by: Alexandru Gheorghe --- substrate/client/network/src/litep2p/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/substrate/client/network/src/litep2p/mod.rs b/substrate/client/network/src/litep2p/mod.rs index 79020e061618..1be74a0e4e4f 100644 --- a/substrate/client/network/src/litep2p/mod.rs +++ b/substrate/client/network/src/litep2p/mod.rs @@ -836,7 +836,6 @@ impl NetworkBackend for Litep2pNetworkBac ) ) ); - log::error!(target: LOG_TARGET, "Fix logic differences between litep2p and libp2p"); if let Some(ref metrics) = self.metrics { metrics From 6d196c40eb2075b035ea338d4e025bbc03637e2b Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Thu, 25 Apr 2024 13:36:42 +0300 Subject: [PATCH 27/47] Minor changes Signed-off-by: Alexandru Gheorghe --- substrate/client/authority-discovery/src/worker.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/substrate/client/authority-discovery/src/worker.rs b/substrate/client/authority-discovery/src/worker.rs index 27fa9f322ac3..a5ae4d5ce447 100644 --- a/substrate/client/authority-discovery/src/worker.rs +++ b/substrate/client/authority-discovery/src/worker.rs @@ -162,7 +162,7 @@ pub struct Worker { /// Last known record by key, here we always keep the record with /// the highest creation time and we don't accept records older than /// that. - last_known_record: HashMap, + last_known_records: HashMap, addr_cache: addr_cache::AddrCache, @@ -301,7 +301,7 @@ where role, metrics, phantom: PhantomData, - last_known_record: HashMap::new(), + last_known_records: HashMap::new(), } } @@ -738,7 +738,7 @@ where new_record: RecordInfo, ) -> bool { let current_record_info = self - .last_known_record + .last_known_records .entry(kademlia_key.clone()) .or_insert_with(|| new_record.clone()); if new_record.creation_time > current_record_info.creation_time { @@ -757,7 +757,7 @@ where "Found a newer record for {:?} new record creation time {:?} old record creation time {:?}", authority_id, new_record.creation_time, current_record_info.creation_time ); - self.last_known_record.insert(kademlia_key, new_record); + self.last_known_records.insert(kademlia_key, new_record); true } else if new_record.creation_time == current_record_info.creation_time { // Same record just update in case this is a record from old nods that don't have @@ -898,7 +898,7 @@ fn sign_record_with_authority_ids( .duration_since(UNIX_EPOCH) .map(|time| time.as_nanos()) .unwrap_or_default(); - debug!(target: LOG_TARGET, "Publish address with creation time{:?}", creation_time); + debug!(target: LOG_TARGET, "Publish address with creation time {:?}", creation_time); let creation_time = creation_time.encode(); let creation_time_signature = key_store .sr25519_sign(key_types::AUTHORITY_DISCOVERY, key.as_ref(), &creation_time) From 9979f8fef0bf1ec09a41bea629b574cca8697e82 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Thu, 25 Apr 2024 14:13:02 +0300 Subject: [PATCH 28/47] Minor updates Signed-off-by: Alexandru Gheorghe --- substrate/client/network/src/litep2p/discovery.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/substrate/client/network/src/litep2p/discovery.rs b/substrate/client/network/src/litep2p/discovery.rs index 5c4139aa1a0e..e4d22c91400d 100644 --- a/substrate/client/network/src/litep2p/discovery.rs +++ b/substrate/client/network/src/litep2p/discovery.rs @@ -50,6 +50,7 @@ use schnellru::{ByLength, LruMap}; use std::{ cmp, collections::{HashMap, HashSet, VecDeque}, + num::NonZeroUsize, pin::Pin, sync::Arc, task::{Context, Poll}, @@ -68,6 +69,9 @@ const MDNS_QUERY_INTERVAL: Duration = Duration::from_secs(30); /// Minimum number of confirmations received before an address is verified. const MIN_ADDRESS_CONFIRMATIONS: usize = 5; +// The minimum number of peers we expect an answer before we terminate the request. +const GET_RECORD_REDUNDANCY_FACTOR: usize = 4; + /// Discovery events. #[derive(Debug)] pub enum DiscoveryEvent { @@ -325,7 +329,10 @@ impl Discovery { /// Start Kademlia `GET_VALUE` query for `key`. pub async fn get_value(&mut self, key: KademliaKey) -> QueryId { self.kademlia_handle - .get_record(RecordKey::new(&key.to_vec()), Quorum::One) + .get_record( + RecordKey::new(&key.to_vec()), + Quorum::N(NonZeroUsize::new(GET_RECORD_REDUNDANCY_FACTOR).unwrap()), + ) .await } From fd5dc46cf42a459a8cfe58394dc3f8932021800e Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Tue, 30 Apr 2024 16:45:56 +0300 Subject: [PATCH 29/47] Integrate with https://github.com/paritytech/litep2p/pull/96 Signed-off-by: Alexandru Gheorghe --- Cargo.lock | 46 +++++++-------- Cargo.toml | 3 + .../client/network/src/litep2p/discovery.rs | 19 ++++--- substrate/client/network/src/litep2p/mod.rs | 56 +++++++++++++------ 4 files changed, 77 insertions(+), 47 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f4b392e7856d..82f4b02bb28f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1393,7 +1393,7 @@ dependencies = [ "rand_chacha 0.3.1", "rand_core 0.6.4", "ring 0.1.0", - "sha2 0.10.7", + "sha2 0.10.8", "sp-ark-bls12-381", "sp-ark-ed-on-bls12-381-bandersnatch", "zeroize", @@ -5071,7 +5071,7 @@ dependencies = [ "ed25519 2.2.2", "rand_core 0.6.4", "serde", - "sha2 0.10.7", + "sha2 0.10.8", "subtle 2.5.0", "zeroize", ] @@ -5101,7 +5101,7 @@ dependencies = [ "hashbrown 0.14.3", "hex", "rand_core 0.6.4", - "sha2 0.10.7", + "sha2 0.10.8", "zeroize", ] @@ -7339,7 +7339,7 @@ dependencies = [ "elliptic-curve", "once_cell", "serdect", - "sha2 0.10.7", + "sha2 0.10.8", ] [[package]] @@ -7770,7 +7770,7 @@ dependencies = [ "multihash 0.17.0", "quick-protobuf", "rand 0.8.5", - "sha2 0.10.7", + "sha2 0.10.8", "thiserror", "zeroize", ] @@ -7795,7 +7795,7 @@ dependencies = [ "log", "quick-protobuf", "rand 0.8.5", - "sha2 0.10.7", + "sha2 0.10.8", "smallvec", "thiserror", "uint", @@ -7853,7 +7853,7 @@ dependencies = [ "once_cell", "quick-protobuf", "rand 0.8.5", - "sha2 0.10.7", + "sha2 0.10.8", "snow", "static_assertions", "thiserror", @@ -8188,7 +8188,7 @@ dependencies = [ [[package]] name = "litep2p" version = "0.3.0" -source = "git+https://github.com/paritytech/litep2p?branch=master#08112ca642cea3809625ae0abde05dc0dc46b4f3" +source = "git+https://github.com/paritytech//litep2p?branch=lenxv/expose-peer-records#b6fb4c0c77f211006997e118c11f8d2aadea1f58" dependencies = [ "async-trait", "bs58 0.4.0", @@ -8215,7 +8215,7 @@ dependencies = [ "ring 0.16.20", "rustls 0.20.8", "serde", - "sha2 0.10.7", + "sha2 0.10.8", "simple-dns", "smallvec", "snow", @@ -8751,7 +8751,7 @@ dependencies = [ "core2", "digest 0.10.7", "multihash-derive 0.8.0", - "sha2 0.10.7", + "sha2 0.10.8", "sha3", "unsigned-varint", ] @@ -8768,7 +8768,7 @@ dependencies = [ "core2", "digest 0.10.7", "multihash-derive 0.8.0", - "sha2 0.10.7", + "sha2 0.10.8", "sha3", "unsigned-varint", ] @@ -8798,7 +8798,7 @@ dependencies = [ "ripemd", "serde", "sha1", - "sha2 0.10.7", + "sha2 0.10.8", "sha3", "strobe-rs", ] @@ -12552,7 +12552,7 @@ checksum = "56af0a30af74d0445c0bf6d9d051c979b516a1a5af790d251daee76005420a48" dependencies = [ "once_cell", "pest", - "sha2 0.10.7", + "sha2 0.10.8", ] [[package]] @@ -17896,7 +17896,7 @@ dependencies = [ "merlin", "rand_core 0.6.4", "serde_bytes", - "sha2 0.10.7", + "sha2 0.10.8", "subtle 2.5.0", "zeroize", ] @@ -18309,9 +18309,9 @@ dependencies = [ [[package]] name = "sha2" -version = "0.10.7" +version = "0.10.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479fb9d862239e610720565ca91403019f2f00410f1864c5aa7479b950a76ed8" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" dependencies = [ "cfg-if", "cpufeatures", @@ -18569,7 +18569,7 @@ dependencies = [ "schnorrkel 0.10.2", "serde", "serde_json", - "sha2 0.10.7", + "sha2 0.10.8", "sha3", "siphasher", "slab", @@ -18636,7 +18636,7 @@ dependencies = [ "rand_core 0.6.4", "ring 0.16.20", "rustc_version 0.4.0", - "sha2 0.10.7", + "sha2 0.10.8", "subtle 2.5.0", ] @@ -19484,7 +19484,7 @@ dependencies = [ "byteorder", "criterion 0.4.0", "digest 0.10.7", - "sha2 0.10.7", + "sha2 0.10.8", "sha3", "sp-crypto-hashing-proc-macro", "twox-hash", @@ -19905,7 +19905,7 @@ dependencies = [ "parity-scale-codec", "rand 0.8.5", "scale-info", - "sha2 0.10.7", + "sha2 0.10.8", "sp-api", "sp-application-crypto", "sp-core", @@ -20602,7 +20602,7 @@ dependencies = [ "pbkdf2", "rustc-hex", "schnorrkel 0.11.4", - "sha2 0.10.7", + "sha2 0.10.8", "zeroize", ] @@ -22298,7 +22298,7 @@ dependencies = [ "rand 0.8.5", "rand_chacha 0.3.1", "rand_core 0.6.4", - "sha2 0.10.7", + "sha2 0.10.8", "sha3", "thiserror", "zeroize", @@ -22616,7 +22616,7 @@ dependencies = [ "log", "rustix 0.36.15", "serde", - "sha2 0.10.7", + "sha2 0.10.8", "toml 0.5.11", "windows-sys 0.45.0", "zstd 0.11.2+zstd.1.5.2", diff --git a/Cargo.toml b/Cargo.toml index 42a6bc8abe1e..b4de76e29b08 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -646,3 +646,6 @@ wasmi = { opt-level = 3 } x25519-dalek = { opt-level = 3 } yamux = { opt-level = 3 } zeroize = { opt-level = 3 } + +[patch."https://github.com/paritytech/litep2p"] +litep2p = { git = "https://github.com/paritytech//litep2p", branch = "lenxv/expose-peer-records" } \ No newline at end of file diff --git a/substrate/client/network/src/litep2p/discovery.rs b/substrate/client/network/src/litep2p/discovery.rs index e4d22c91400d..589bb135e194 100644 --- a/substrate/client/network/src/litep2p/discovery.rs +++ b/substrate/client/network/src/litep2p/discovery.rs @@ -36,7 +36,7 @@ use litep2p::{ identify::{Config as IdentifyConfig, IdentifyEvent}, kademlia::{ Config as KademliaConfig, ConfigBuilder as KademliaConfigBuilder, KademliaEvent, - KademliaHandle, PeerRecord, QueryId, Quorum, Record, RecordKey, + KademliaHandle, QueryId, Quorum, Record, RecordKey, RecordsType, }, ping::{Config as PingConfig, PingEvent}, }, @@ -128,8 +128,8 @@ pub enum DiscoveryEvent { /// Query ID. query_id: QueryId, - /// Record. - record: PeerRecord, + /// Records. + records: RecordsType, }, /// Record was successfully stored on the DHT. @@ -348,9 +348,14 @@ impl Discovery { &mut self, record: Record, peers: Vec, + update_local_storage: bool, ) -> QueryId { self.kademlia_handle - .put_record_to_peers(record, peers.into_iter().map(|peer| peer.into()).collect()) + .put_record_to_peers( + record, + peers.into_iter().map(|peer| peer.into()).collect(), + update_local_storage, + ) .await } @@ -474,13 +479,13 @@ impl Stream for Discovery { peers: peers.into_iter().collect(), })) }, - Poll::Ready(Some(KademliaEvent::GetRecordSuccess { query_id, record })) => { + Poll::Ready(Some(KademliaEvent::GetRecordSuccess { query_id, records })) => { log::trace!( target: LOG_TARGET, - "`GET_RECORD` succeeded for {query_id:?}: {record:?}", + "`GET_RECORD` succeeded for {query_id:?}: {records:?}", ); - return Poll::Ready(Some(DiscoveryEvent::GetRecordSuccess { query_id, record })); + return Poll::Ready(Some(DiscoveryEvent::GetRecordSuccess { query_id, records })); }, Poll::Ready(Some(KademliaEvent::PutRecordSucess { query_id, key: _ })) => return Poll::Ready(Some(DiscoveryEvent::PutRecordSuccess { query_id })), diff --git a/substrate/client/network/src/litep2p/mod.rs b/substrate/client/network/src/litep2p/mod.rs index 1be74a0e4e4f..0fce58458c54 100644 --- a/substrate/client/network/src/litep2p/mod.rs +++ b/substrate/client/network/src/litep2p/mod.rs @@ -697,9 +697,9 @@ impl NetworkBackend for Litep2pNetworkBac self.pending_put_values.insert(query_id, (key, Instant::now())); } - NetworkServiceCommand::PutValueTo { record, peers, update_local_storage: _ } => { + NetworkServiceCommand::PutValueTo { record, peers, update_local_storage} => { let kademlia_key = record.key.to_vec().into(); - let query_id = self.discovery.put_value_to_peers(record, peers).await; + let query_id = self.discovery.put_value_to_peers(record, peers, update_local_storage).await; self.pending_put_values.insert(query_id, (kademlia_key, Instant::now())); } @@ -802,40 +802,62 @@ impl NetworkBackend for Litep2pNetworkBac self.peerstore_handle.add_known_peer(peer.into()); } } - Some(DiscoveryEvent::GetRecordSuccess { query_id, record }) => { + Some(DiscoveryEvent::GetRecordSuccess { query_id, records }) => { match self.pending_get_values.remove(&query_id) { None => log::warn!( target: LOG_TARGET, "`GET_VALUE` succeeded for a non-existent query", ), - Some((_key, started)) => { + Some((key, started)) => { log::trace!( target: LOG_TARGET, "`GET_VALUE` for {:?} ({query_id:?}) succeeded", - record.record.key, + key, ); - - self.event_streams.send( - Event::Dht( - DhtEvent::ValueFound( + let received_records = match records { + litep2p::protocol::libp2p::kademlia::RecordsType::LocalStore(record) => { + vec![ PeerRecord { record: Record { - key: record.record.key.to_vec().into(), - value: record.record.value, - publisher: record.record.publisher.map(|peer_id| { + key: record.key.to_vec().into(), + value: record.value, + publisher: record.publisher.map(|peer_id| { let peer_id: sc_network_types::PeerId = peer_id.into(); peer_id.into() }), - expires: record.record.expires, + expires: record.expires, }, - peer: record.peer.map(|peer_id| { + peer: None, + } + ] + }, + litep2p::protocol::libp2p::kademlia::RecordsType::Network(records) => records.into_iter().map(|record| { + let peer_id: sc_network_types::PeerId = record.peer.into(); + + PeerRecord { + record: Record { + key: record.record.key.to_vec().into(), + value: record.record.value, + publisher: record.record.publisher.map(|peer_id| { let peer_id: sc_network_types::PeerId = peer_id.into(); peer_id.into() }), - } + expires: record.record.expires, + }, + peer: Some(peer_id.into()), + } + }).collect::>(), + }; + + for record in received_records { + self.event_streams.send( + Event::Dht( + DhtEvent::ValueFound( + record + ) ) - ) - ); + ); + } if let Some(ref metrics) = self.metrics { metrics From 20e351e0e39c06396166b842b1d1fef8b20e8479 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Wed, 1 May 2024 16:07:49 +0300 Subject: [PATCH 30/47] Fix warning on quorum failed Signed-off-by: Alexandru Gheorghe --- substrate/client/network/src/discovery.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/substrate/client/network/src/discovery.rs b/substrate/client/network/src/discovery.rs index 420f872c4cae..dfcd99c0ba92 100644 --- a/substrate/client/network/src/discovery.rs +++ b/substrate/client/network/src/discovery.rs @@ -427,7 +427,14 @@ impl DiscoveryBehaviour { warn!(target: "sub-libp2p", "Failed to update local starage"); } } - kad.put_record_to(record, peers.into_iter().map(|peer_id| peer_id.into()), Quorum::All); + + if !peers.is_empty() { + kad.put_record_to( + record, + peers.into_iter().map(|peer_id| peer_id.into()), + Quorum::All, + ); + } } } From 98398e309f1e05ef6e7b052a59c7d8f947a96352 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Wed, 1 May 2024 18:03:06 +0300 Subject: [PATCH 31/47] Reconnect only if new peer ids pop-up Signed-off-by: Alexandru Gheorghe --- polkadot/node/network/gossip-support/src/lib.rs | 16 +++++++++++++--- substrate/client/network/src/discovery.rs | 7 +++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/polkadot/node/network/gossip-support/src/lib.rs b/polkadot/node/network/gossip-support/src/lib.rs index 0d8f6545f75c..cd327c11e408 100644 --- a/polkadot/node/network/gossip-support/src/lib.rs +++ b/polkadot/node/network/gossip-support/src/lib.rs @@ -393,11 +393,21 @@ where let mut changed = Vec::new(); for (authority, new_addresses) in &resolved { + let new_peer_ids = new_addresses + .iter() + .flat_map(|addr| parse_addr(addr.clone()).ok().map(|(p, _)| p)) + .collect::>(); match self.resolved_authorities.get(authority) { - Some(old_addresses) if !old_addresses.is_superset(new_addresses) => - changed.push(new_addresses.clone()), + Some(old_addresses) => { + let old_peer_ids = old_addresses + .iter() + .flat_map(|addr| parse_addr(addr.clone()).ok().map(|(p, _)| p)) + .collect::>(); + if !old_peer_ids.is_superset(&new_peer_ids) { + changed.push(new_addresses.clone()); + } + }, None => changed.push(new_addresses.clone()), - _ => {}, } } gum::debug!( diff --git a/substrate/client/network/src/discovery.rs b/substrate/client/network/src/discovery.rs index dfcd99c0ba92..b1959881532b 100644 --- a/substrate/client/network/src/discovery.rs +++ b/substrate/client/network/src/discovery.rs @@ -646,6 +646,13 @@ impl NetworkBehaviour for DiscoveryBehaviour { { for (addr, _error) in errors { entry.get_mut().retain(|a| a != addr); + if let Some(k) = self.kademlia.as_mut() { + // Remove addresses that can't be reached from kademlia as well, + // otherwise it is going to try to continously reconnect to + // peers that go away if they are not removed from the routing + // table. + k.remove_address(&peer_id, &addr); + } } if entry.get().is_empty() { entry.remove(); From 806604475a35df5fb4847c48a30530ff5643684e Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Thu, 2 May 2024 15:28:25 +0300 Subject: [PATCH 32/47] Revert kademlia removal Signed-off-by: Alexandru Gheorghe --- substrate/client/network/src/discovery.rs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/substrate/client/network/src/discovery.rs b/substrate/client/network/src/discovery.rs index b1959881532b..dfcd99c0ba92 100644 --- a/substrate/client/network/src/discovery.rs +++ b/substrate/client/network/src/discovery.rs @@ -646,13 +646,6 @@ impl NetworkBehaviour for DiscoveryBehaviour { { for (addr, _error) in errors { entry.get_mut().retain(|a| a != addr); - if let Some(k) = self.kademlia.as_mut() { - // Remove addresses that can't be reached from kademlia as well, - // otherwise it is going to try to continously reconnect to - // peers that go away if they are not removed from the routing - // table. - k.remove_address(&peer_id, &addr); - } } if entry.get().is_empty() { entry.remove(); From b4fe357767da858ac66cc15ed571a3b9011f00a3 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Thu, 2 May 2024 15:56:01 +0300 Subject: [PATCH 33/47] Update cargo.lock Signed-off-by: Alexandru Gheorghe --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index f5b2ee855f5d..88134c1f4bc3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8116,7 +8116,7 @@ dependencies = [ [[package]] name = "litep2p" version = "0.3.0" -source = "git+https://github.com/paritytech/litep2p?rev=e03a6023882db111beeb24d8c0ceaac0721d3f0f#e03a6023882db111beeb24d8c0ceaac0721d3f0f" +source = "git+https://github.com/paritytech//litep2p?branch=lenxv/expose-peer-records#b6fb4c0c77f211006997e118c11f8d2aadea1f58" dependencies = [ "async-trait", "bs58 0.4.0", From 09b63062f505ba3850939956f3aa14e880b63102 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe <49718502+alexggh@users.noreply.github.com> Date: Sun, 12 May 2024 12:53:25 +0300 Subject: [PATCH 34/47] Update substrate/client/network/src/litep2p/mod.rs Co-authored-by: Dmitry Markin --- substrate/client/network/src/litep2p/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/substrate/client/network/src/litep2p/mod.rs b/substrate/client/network/src/litep2p/mod.rs index 0fce58458c54..add97aa981db 100644 --- a/substrate/client/network/src/litep2p/mod.rs +++ b/substrate/client/network/src/litep2p/mod.rs @@ -696,7 +696,6 @@ impl NetworkBackend for Litep2pNetworkBac let query_id = self.discovery.put_value(key.clone(), value).await; self.pending_put_values.insert(query_id, (key, Instant::now())); } - NetworkServiceCommand::PutValueTo { record, peers, update_local_storage} => { let kademlia_key = record.key.to_vec().into(); let query_id = self.discovery.put_value_to_peers(record, peers, update_local_storage).await; From fb534b5ebbfaa07df72efbb9b4e35818b8a582c7 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe <49718502+alexggh@users.noreply.github.com> Date: Sun, 12 May 2024 12:54:43 +0300 Subject: [PATCH 35/47] Update polkadot/node/network/gossip-support/src/tests.rs Co-authored-by: Dmitry Markin --- polkadot/node/network/gossip-support/src/tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polkadot/node/network/gossip-support/src/tests.rs b/polkadot/node/network/gossip-support/src/tests.rs index 3889bea4ce62..2fda9be47682 100644 --- a/polkadot/node/network/gossip-support/src/tests.rs +++ b/polkadot/node/network/gossip-support/src/tests.rs @@ -818,7 +818,7 @@ fn issues_update_authorities_after_session() { // Test we connect to authorities that changed their address `TRY_RERESOLVE_AUTHORITIES` rate // and that is is no-op if no authority changed. #[test] -fn test_quickly_connect_to_authorithies_that_changed_address() { +fn test_quickly_connect_to_authorities_that_changed_address() { let hash = Hash::repeat_byte(0xAA); let authorities = PAST_PRESENT_FUTURE_AUTHORITIES.clone(); From 7399dc554d2c12078b0fcecc6e5444975fdd67d2 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe <49718502+alexggh@users.noreply.github.com> Date: Sun, 12 May 2024 12:55:12 +0300 Subject: [PATCH 36/47] Update substrate/client/authority-discovery/src/worker/tests.rs Co-authored-by: Dmitry Markin --- substrate/client/authority-discovery/src/worker/tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/substrate/client/authority-discovery/src/worker/tests.rs b/substrate/client/authority-discovery/src/worker/tests.rs index 92b65ac16052..aa6d6f215755 100644 --- a/substrate/client/authority-discovery/src/worker/tests.rs +++ b/substrate/client/authority-discovery/src/worker/tests.rs @@ -800,7 +800,7 @@ fn records_with_incorrectly_signed_creation_time_are_ignored() { assert_eq!( Some(HashSet::from([addr])), cached_remote_addresses, - "Expect worker to only cache `Multiaddr`s with `PeerId`s.", + "Expect `Multiaddr` to remain the same", ); assert!(network .as_ref() From 17df838b665fb7f1cf8973646b9a8240187a359e Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Sun, 12 May 2024 13:03:29 +0300 Subject: [PATCH 37/47] Update assert messages Signed-off-by: Alexandru Gheorghe --- .../authority-discovery/src/worker/tests.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/substrate/client/authority-discovery/src/worker/tests.rs b/substrate/client/authority-discovery/src/worker/tests.rs index aa6d6f215755..4096fe9698eb 100644 --- a/substrate/client/authority-discovery/src/worker/tests.rs +++ b/substrate/client/authority-discovery/src/worker/tests.rs @@ -703,7 +703,7 @@ fn strict_accept_address_without_creation_time() { assert_eq!( Some(HashSet::from([addr])), cached_remote_addresses, - "Expect worker to only cache `Multiaddr`s with `PeerId`s.", + "Expect worker to cache address without creation time", ); } @@ -724,7 +724,7 @@ fn keep_last_received_if_no_creation_time() { assert_eq!( Some(HashSet::from([addr])), cached_remote_addresses, - "Expect worker to only cache `Multiaddr`s with `PeerId`s.", + "Expect worker to cache address without creation time", ); assert!(network @@ -746,7 +746,7 @@ fn keep_last_received_if_no_creation_time() { assert_eq!( Some(HashSet::from([addr2])), cached_remote_addresses, - "Expect worker to only cache `Multiaddr`s with `PeerId`s.", + "Expect worker to cache last received when no creation time", ); assert!(network .as_ref() @@ -771,7 +771,7 @@ fn records_with_incorrectly_signed_creation_time_are_ignored() { assert_eq!( Some(HashSet::from([addr.clone()])), cached_remote_addresses, - "Expect worker to only cache `Multiaddr`s with `PeerId`s.", + "Expect worker to cache record with creation time", ); assert!(network .as_ref() @@ -825,7 +825,7 @@ fn newer_records_overwrite_older_ones() { assert_eq!( Some(HashSet::from([old_record])), cached_remote_addresses, - "Expect worker to only cache `Multiaddr`s with `PeerId`s.", + "Expect worker to cache record with creation time", ); let nothing_updated = network @@ -848,7 +848,7 @@ fn newer_records_overwrite_older_ones() { assert_eq!( Some(HashSet::from([new_record])), cached_remote_addresses, - "Expect worker to only cache `Multiaddr`s with `PeerId`s.", + "Expect worker to store the newest recrod", ); let result = network @@ -885,7 +885,7 @@ fn older_records_dont_affect_newer_ones() { assert_eq!( Some(HashSet::from([new_record.clone()])), cached_remote_addresses, - "Expect worker to only cache `Multiaddr`s with `PeerId`s.", + "Expect worker to store new record", ); let nothing_updated = network @@ -899,7 +899,7 @@ fn older_records_dont_affect_newer_ones() { assert_eq!( Some(HashSet::from([new_record])), cached_remote_addresses, - "Expect worker to only cache `Multiaddr`s with `PeerId`s.", + "Expect worker to not update stored record", ); let update_peers_info = network From 3313fd7e33c99ac2a02fd67cae5a490097c59b83 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Sun, 12 May 2024 13:29:57 +0300 Subject: [PATCH 38/47] Address review feedback Signed-off-by: Alexandru Gheorghe --- substrate/client/network/src/litep2p/mod.rs | 81 +++++++++++---------- 1 file changed, 44 insertions(+), 37 deletions(-) diff --git a/substrate/client/network/src/litep2p/mod.rs b/substrate/client/network/src/litep2p/mod.rs index add97aa981db..1f450430d7d2 100644 --- a/substrate/client/network/src/litep2p/mod.rs +++ b/substrate/client/network/src/litep2p/mod.rs @@ -57,7 +57,10 @@ use litep2p::{ crypto::ed25519::{Keypair, SecretKey}, executor::Executor, protocol::{ - libp2p::{bitswap::Config as BitswapConfig, kademlia::QueryId}, + libp2p::{ + bitswap::Config as BitswapConfig, + kademlia::{QueryId, RecordsType}, + }, request_response::ConfigBuilder as RequestResponseConfigBuilder, }, transport::{ @@ -813,42 +816,7 @@ impl NetworkBackend for Litep2pNetworkBac "`GET_VALUE` for {:?} ({query_id:?}) succeeded", key, ); - let received_records = match records { - litep2p::protocol::libp2p::kademlia::RecordsType::LocalStore(record) => { - vec![ - PeerRecord { - record: Record { - key: record.key.to_vec().into(), - value: record.value, - publisher: record.publisher.map(|peer_id| { - let peer_id: sc_network_types::PeerId = peer_id.into(); - peer_id.into() - }), - expires: record.expires, - }, - peer: None, - } - ] - }, - litep2p::protocol::libp2p::kademlia::RecordsType::Network(records) => records.into_iter().map(|record| { - let peer_id: sc_network_types::PeerId = record.peer.into(); - - PeerRecord { - record: Record { - key: record.record.key.to_vec().into(), - value: record.record.value, - publisher: record.record.publisher.map(|peer_id| { - let peer_id: sc_network_types::PeerId = peer_id.into(); - peer_id.into() - }), - expires: record.record.expires, - }, - peer: Some(peer_id.into()), - } - }).collect::>(), - }; - - for record in received_records { + for record in litep2p_to_libp2p_peer_record(records) { self.event_streams.send( Event::Dht( DhtEvent::ValueFound( @@ -1032,3 +1000,42 @@ impl NetworkBackend for Litep2pNetworkBac } } } + +// Glue code to convert from a litep2p records type to a libp2p2 PeerRecord. +fn litep2p_to_libp2p_peer_record(records: RecordsType) -> Vec { + match records { + litep2p::protocol::libp2p::kademlia::RecordsType::LocalStore(record) => { + vec![PeerRecord { + record: Record { + key: record.key.to_vec().into(), + value: record.value, + publisher: record.publisher.map(|peer_id| { + let peer_id: sc_network_types::PeerId = peer_id.into(); + peer_id.into() + }), + expires: record.expires, + }, + peer: None, + }] + }, + litep2p::protocol::libp2p::kademlia::RecordsType::Network(records) => records + .into_iter() + .map(|record| { + let peer_id: sc_network_types::PeerId = record.peer.into(); + + PeerRecord { + record: Record { + key: record.record.key.to_vec().into(), + value: record.record.value, + publisher: record.record.publisher.map(|peer_id| { + let peer_id: sc_network_types::PeerId = peer_id.into(); + peer_id.into() + }), + expires: record.record.expires, + }, + peer: Some(peer_id.into()), + } + }) + .collect::>(), + } +} From 999a710395273eb09f4b1c75be28fabdc0914857 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Wed, 29 May 2024 18:50:24 +0300 Subject: [PATCH 39/47] Use a single signature Signed-off-by: Alexandru Gheorghe --- .../client/authority-discovery/src/worker.rs | 72 +++++----------- .../src/worker/schema/dht-v3.proto | 6 +- .../src/worker/schema/tests.rs | 86 ++++++++++++++++++- .../authority-discovery/src/worker/tests.rs | 40 ++++----- 4 files changed, 128 insertions(+), 76 deletions(-) diff --git a/substrate/client/authority-discovery/src/worker.rs b/substrate/client/authority-discovery/src/worker.rs index a5ae4d5ce447..faa15ff83ae2 100644 --- a/substrate/client/authority-discovery/src/worker.rs +++ b/substrate/client/authority-discovery/src/worker.rs @@ -460,7 +460,7 @@ where .set(addresses.len().try_into().unwrap_or(std::u64::MAX)); } - let serialized_record = serialize_authority_record(addresses)?; + let serialized_record = serialize_authority_record(addresses, Some(build_creation_time()))?; let peer_signature = sign_record_with_peer_id(&serialized_record, &self.network)?; let keys_vec = keys.iter().cloned().collect::>(); @@ -470,7 +470,6 @@ where Some(peer_signature), key_store.as_ref(), keys_vec, - true, )?; self.latest_published_kad_keys = kv_pairs.iter().map(|(k, _)| k.clone()).collect(); @@ -622,13 +621,9 @@ where let local_peer_id = self.network.local_peer_id(); - let schema::SignedAuthorityRecord { - record, - auth_signature, - peer_signature, - creation_time: creation_time_info, - } = schema::SignedAuthorityRecord::decode(peer_record.record.value.as_slice()) - .map_err(Error::DecodingProto)?; + let schema::SignedAuthorityRecord { record, auth_signature, peer_signature } = + schema::SignedAuthorityRecord::decode(peer_record.record.value.as_slice()) + .map_err(Error::DecodingProto)?; let auth_signature = AuthoritySignature::decode(&mut &auth_signature[..]) .map_err(Error::EncodingDecodingScale)?; @@ -637,27 +632,18 @@ where return Err(Error::VerifyingDhtPayload) } - if let Some(creation_time_info) = creation_time_info.as_ref() { - let creation_time_signature = - AuthoritySignature::decode(&mut &creation_time_info.signature[..]) - .map_err(Error::EncodingDecodingScale)?; - if !AuthorityPair::verify( - &creation_time_signature, - &creation_time_info.timestamp, - &authority_id, - ) { - return Err(Error::VerifyingDhtPayloadCreationTime) - } - } + let authority_record = schema::AuthorityRecord::decode(record.as_slice()); - let records_creation_time: u128 = creation_time_info + let records_creation_time: u128 = authority_record .as_ref() + .ok() + .and_then(|a| a.creation_time.as_ref()) .map(|creation_time| { u128::decode(&mut &creation_time.timestamp[..]).unwrap_or_default() }) .unwrap_or_default(); // 0 is a sane default for records that do not have creation time present. - let addresses: Vec = schema::AuthorityRecord::decode(record.as_slice()) + let addresses: Vec = authority_record .map(|a| a.addresses) .map_err(Error::DecodingProto)? .into_iter() @@ -854,9 +840,21 @@ fn serialize_addresses(addresses: impl Iterator) -> Vec>) -> Result> { +fn build_creation_time() -> schema::TimestampInfo { + let creation_time = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|time| time.as_nanos()) + .unwrap_or_default(); + schema::TimestampInfo { timestamp: creation_time.encode() } +} + +fn serialize_authority_record( + addresses: Vec>, + creation_time: Option, +) -> Result> { let mut serialized_record = vec![]; - schema::AuthorityRecord { addresses } + + schema::AuthorityRecord { addresses, creation_time } .encode(&mut serialized_record) .map_err(Error::EncodingProto)?; Ok(serialized_record) @@ -879,7 +877,6 @@ fn sign_record_with_authority_ids( peer_signature: Option, key_store: &dyn Keystore, keys: Vec, - include_creation_time: bool, ) -> Result)>> { let mut result = Vec::with_capacity(keys.len()); @@ -893,33 +890,10 @@ fn sign_record_with_authority_ids( // Scale encode let auth_signature = auth_signature.encode(); - let creation_time = if include_creation_time { - let creation_time = SystemTime::now() - .duration_since(UNIX_EPOCH) - .map(|time| time.as_nanos()) - .unwrap_or_default(); - debug!(target: LOG_TARGET, "Publish address with creation time {:?}", creation_time); - let creation_time = creation_time.encode(); - let creation_time_signature = key_store - .sr25519_sign(key_types::AUTHORITY_DISCOVERY, key.as_ref(), &creation_time) - .map_err(|e| Error::CannotSign(format!("{}. Key: {:?}", e, key)))? - .ok_or_else(|| { - Error::CannotSign(format!("Could not find key in keystore. Key: {:?}", key)) - })?; - - let creation_time = schema::TimestampInfo { - timestamp: creation_time, - signature: creation_time_signature.encode(), - }; - Some(creation_time) - } else { - None - }; let signed_record = schema::SignedAuthorityRecord { record: serialized_record.clone(), auth_signature, peer_signature: peer_signature.clone(), - creation_time, } .encode_to_vec(); diff --git a/substrate/client/authority-discovery/src/worker/schema/dht-v3.proto b/substrate/client/authority-discovery/src/worker/schema/dht-v3.proto index dc925c51553d..547237573af2 100644 --- a/substrate/client/authority-discovery/src/worker/schema/dht-v3.proto +++ b/substrate/client/authority-discovery/src/worker/schema/dht-v3.proto @@ -6,6 +6,8 @@ package authority_discovery_v3; message AuthorityRecord { // Possibly multiple `MultiAddress`es through which the node can be reached. repeated bytes addresses = 1; + // Information about the creation time of the record + TimestampInfo creation_time = 2; } message PeerSignature { @@ -17,8 +19,6 @@ message PeerSignature { message TimestampInfo { // Time since UNIX_EPOCH in nanoseconds, scale encoded bytes timestamp = 1; - // A signature of the creation time. - bytes signature = 2; } // Then we need to serialize the authority record and signature to send them over the wire. @@ -28,6 +28,4 @@ message SignedAuthorityRecord { // Even if there are multiple `record.addresses`, all of them have the same peer id. // Old versions are missing this field. It is optional in order to provide compatibility both ways. PeerSignature peer_signature = 3; - // Information regarding the creation data of this record. - TimestampInfo creation_time = 4; } diff --git a/substrate/client/authority-discovery/src/worker/schema/tests.rs b/substrate/client/authority-discovery/src/worker/schema/tests.rs index 5532624b9ef2..557fa9641f97 100644 --- a/substrate/client/authority-discovery/src/worker/schema/tests.rs +++ b/substrate/client/authority-discovery/src/worker/schema/tests.rs @@ -20,7 +20,12 @@ mod schema_v1 { include!(concat!(env!("OUT_DIR"), "/authority_discovery_v1.rs")); } +mod schema_v2 { + include!(concat!(env!("OUT_DIR"), "/authority_discovery_v2.rs")); +} + use super::*; +use codec::Encode; use libp2p::identity::Keypair; use prost::Message; use sc_network::{Multiaddr, PeerId}; @@ -65,7 +70,7 @@ fn v1_decodes_v2() { let vec_auth_signature = b"Totally valid signature, I promise!".to_vec(); let vec_peer_signature = b"Surprisingly hard to crack crypto".to_vec(); - let record_v2 = AuthorityRecord { addresses: vec_addresses.clone() }; + let record_v2 = schema_v2::AuthorityRecord { addresses: vec_addresses.clone() }; let mut vec_record_v2 = vec![]; record_v2.encode(&mut vec_record_v2).unwrap(); let vec_peer_public = peer_public.encode_protobuf(); @@ -75,7 +80,6 @@ fn v1_decodes_v2() { record: vec_record_v2.clone(), auth_signature: vec_auth_signature.clone(), peer_signature: Some(peer_signature_v2.clone()), - creation_time: None, }; let mut vec_signed_record_v2 = vec![]; signed_record_v2.encode(&mut vec_signed_record_v2).unwrap(); @@ -86,6 +90,82 @@ fn v1_decodes_v2() { assert_eq!(&signed_addresses_v1_decoded.addresses, &vec_record_v2); assert_eq!(&signed_addresses_v1_decoded.signature, &vec_auth_signature); - let addresses_v2_decoded = AuthorityRecord::decode(vec_record_v2.as_slice()).unwrap(); + let addresses_v2_decoded = + schema_v2::AuthorityRecord::decode(vec_record_v2.as_slice()).unwrap(); + assert_eq!(&addresses_v2_decoded.addresses, &vec_addresses); +} + +#[test] +fn v1_decodes_v3() { + let peer_secret = Keypair::generate_ed25519(); + let peer_public = peer_secret.public(); + let peer_id = peer_public.to_peer_id(); + let multiaddress: Multiaddr = + format!("/ip4/127.0.0.1/tcp/3003/p2p/{}", peer_id).parse().unwrap(); + let vec_addresses = vec![multiaddress.to_vec()]; + let vec_auth_signature = b"Totally valid signature, I promise!".to_vec(); + let vec_peer_signature = b"Surprisingly hard to crack crypto".to_vec(); + + let record_v3 = AuthorityRecord { + addresses: vec_addresses.clone(), + creation_time: Some(TimestampInfo { timestamp: Encode::encode(&55) }), + }; + let mut vec_record_v3 = vec![]; + record_v3.encode(&mut vec_record_v3).unwrap(); + let vec_peer_public = peer_public.encode_protobuf(); + let peer_signature_v3 = + PeerSignature { public_key: vec_peer_public, signature: vec_peer_signature }; + let signed_record_v3 = SignedAuthorityRecord { + record: vec_record_v3.clone(), + auth_signature: vec_auth_signature.clone(), + peer_signature: Some(peer_signature_v3.clone()), + }; + let mut vec_signed_record_v3 = vec![]; + signed_record_v3.encode(&mut vec_signed_record_v3).unwrap(); + + let signed_addresses_v1_decoded = + schema_v1::SignedAuthorityAddresses::decode(vec_signed_record_v3.as_slice()).unwrap(); + + assert_eq!(&signed_addresses_v1_decoded.addresses, &vec_record_v3); + assert_eq!(&signed_addresses_v1_decoded.signature, &vec_auth_signature); + + let addresses_v2_decoded = + schema_v2::AuthorityRecord::decode(vec_record_v3.as_slice()).unwrap(); assert_eq!(&addresses_v2_decoded.addresses, &vec_addresses); } + +#[test] +fn v3_decodes_v2() { + let peer_secret = Keypair::generate_ed25519(); + let peer_public = peer_secret.public(); + let peer_id = peer_public.to_peer_id(); + let multiaddress: Multiaddr = + format!("/ip4/127.0.0.1/tcp/3003/p2p/{}", peer_id).parse().unwrap(); + let vec_addresses = vec![multiaddress.to_vec()]; + let vec_auth_signature = b"Totally valid signature, I promise!".to_vec(); + let vec_peer_signature = b"Surprisingly hard to crack crypto".to_vec(); + + let record_v2 = schema_v2::AuthorityRecord { addresses: vec_addresses.clone() }; + let mut vec_record_v2 = vec![]; + record_v2.encode(&mut vec_record_v2).unwrap(); + let vec_peer_public = peer_public.encode_protobuf(); + let peer_signature_v2 = + schema_v2::PeerSignature { public_key: vec_peer_public, signature: vec_peer_signature }; + let signed_record_v2 = schema_v2::SignedAuthorityRecord { + record: vec_record_v2.clone(), + auth_signature: vec_auth_signature.clone(), + peer_signature: Some(peer_signature_v2.clone()), + }; + let mut vec_signed_record_v2 = vec![]; + signed_record_v2.encode(&mut vec_signed_record_v2).unwrap(); + + let signed_addresses_v3_decoded = + SignedAuthorityRecord::decode(vec_signed_record_v2.as_slice()).unwrap(); + + assert_eq!(&signed_addresses_v3_decoded.record, &vec_record_v2); + assert_eq!(&signed_addresses_v3_decoded.auth_signature, &vec_auth_signature); + + let addresses_v3_decoded = AuthorityRecord::decode(vec_record_v2.as_slice()).unwrap(); + assert_eq!(&addresses_v3_decoded.addresses, &vec_addresses); + assert_eq!(&addresses_v3_decoded.creation_time, &None); +} diff --git a/substrate/client/authority-discovery/src/worker/tests.rs b/substrate/client/authority-discovery/src/worker/tests.rs index 4096fe9698eb..112082c47935 100644 --- a/substrate/client/authority-discovery/src/worker/tests.rs +++ b/substrate/client/authority-discovery/src/worker/tests.rs @@ -258,10 +258,11 @@ fn build_dht_event( public_key: AuthorityId, key_store: &MemoryKeystore, network: Option<&Signer>, - include_creation_time: bool, + creation_time: Option, ) -> Vec<(KademliaKey, Vec)> { let serialized_record = - serialize_authority_record(serialize_addresses(addresses.into_iter())).unwrap(); + serialize_authority_record(serialize_addresses(addresses.into_iter()), creation_time) + .unwrap(); let peer_signature = network.map(|n| sign_record_with_peer_id(&serialized_record, n).unwrap()); let kv_pairs = sign_record_with_authority_ids( @@ -269,7 +270,6 @@ fn build_dht_event( peer_signature, key_store, vec![public_key.into()], - include_creation_time, ) .unwrap(); // There is always a single item in it, because we signed it with a single key @@ -534,7 +534,7 @@ fn dont_stop_polling_dht_event_stream_after_bogus_event() { remote_public_key.clone(), &remote_key_store, None, - true, + Some(build_creation_time()), ) .into_iter() .map(|(key, value)| PeerRecord { @@ -658,7 +658,7 @@ fn limit_number_of_addresses_added_to_cache_per_authority() { tester.remote_authority_public.into(), &tester.remote_key_store, None, - true, + Some(build_creation_time()), ); let cached_remote_addresses = tester.process_value_found(false, kv_pairs).0; @@ -674,7 +674,7 @@ fn strict_accept_address_with_peer_signature() { tester.remote_authority_public.into(), &tester.remote_key_store, Some(&TestSigner { keypair: &tester.remote_node_key }), - true, + Some(build_creation_time()), ); let cached_remote_addresses = tester.process_value_found(true, kv_pairs).0; @@ -695,7 +695,7 @@ fn strict_accept_address_without_creation_time() { tester.remote_authority_public.into(), &tester.remote_key_store, Some(&TestSigner { keypair: &tester.remote_node_key }), - false, + None, ); let cached_remote_addresses = tester.process_value_found(true, kv_pairs).0; @@ -716,7 +716,7 @@ fn keep_last_received_if_no_creation_time() { tester.remote_authority_public.into(), &tester.remote_key_store, Some(&TestSigner { keypair: &tester.remote_node_key }), - false, + None, ); let (cached_remote_addresses, network) = tester.process_value_found(true, kv_pairs); @@ -738,7 +738,7 @@ fn keep_last_received_if_no_creation_time() { tester.remote_authority_public.into(), &tester.remote_key_store, Some(&TestSigner { keypair: &tester.remote_node_key }), - false, + None, ); let cached_remote_addresses = tester.process_value_found(true, kv_pairs).0; @@ -763,7 +763,7 @@ fn records_with_incorrectly_signed_creation_time_are_ignored() { tester.remote_authority_public.into(), &tester.remote_key_store, Some(&TestSigner { keypair: &tester.remote_node_key }), - true, + Some(build_creation_time()), ); let (cached_remote_addresses, network) = tester.process_value_found(true, kv_pairs); @@ -789,7 +789,7 @@ fn records_with_incorrectly_signed_creation_time_are_ignored() { alternative_key.into(), &tester.remote_key_store, Some(&TestSigner { keypair: &tester.remote_node_key }), - true, + Some(build_creation_time()), ); let kademlia_key = hash_authority_id(tester.remote_authority_public.as_slice()); for key in kv_pairs.iter_mut() { @@ -817,7 +817,7 @@ fn newer_records_overwrite_older_ones() { tester.remote_authority_public.into(), &tester.remote_key_store, Some(&TestSigner { keypair: &tester.remote_node_key }), - true, + Some(build_creation_time()), ); let (cached_remote_addresses, network) = tester.process_value_found(true, kv_pairs); @@ -840,7 +840,7 @@ fn newer_records_overwrite_older_ones() { tester.remote_authority_public.into(), &tester.remote_key_store, Some(&TestSigner { keypair: &tester.remote_node_key }), - true, + Some(build_creation_time()), ); let cached_remote_addresses = tester.process_value_found(true, kv_pairs).0; @@ -868,7 +868,7 @@ fn older_records_dont_affect_newer_ones() { tester.remote_authority_public.into(), &tester.remote_key_store, Some(&TestSigner { keypair: &tester.remote_node_key }), - true, + Some(build_creation_time()), ); let new_record = tester.multiaddr_with_peer_id(2); @@ -877,7 +877,7 @@ fn older_records_dont_affect_newer_ones() { tester.remote_authority_public.into(), &tester.remote_key_store, Some(&TestSigner { keypair: &tester.remote_node_key }), - true, + Some(build_creation_time()), ); let (cached_remote_addresses, network) = tester.process_value_found(true, kv_pairs); @@ -919,7 +919,7 @@ fn reject_address_with_rogue_peer_signature() { tester.remote_authority_public.into(), &tester.remote_key_store, Some(&TestSigner { keypair: &rogue_remote_node_key }), - true, + Some(build_creation_time()), ); let cached_remote_addresses = tester.process_value_found(false, kv_pairs).0; @@ -938,7 +938,7 @@ fn reject_address_with_invalid_peer_signature() { tester.remote_authority_public.into(), &tester.remote_key_store, Some(&TestSigner { keypair: &tester.remote_node_key }), - true, + Some(build_creation_time()), ); // tamper with the signature let mut record = schema::SignedAuthorityRecord::decode(kv_pairs[0].1.as_slice()).unwrap(); @@ -961,7 +961,7 @@ fn reject_address_without_peer_signature() { tester.remote_authority_public.into(), &tester.remote_key_store, None, - true, + Some(build_creation_time()), ); let cached_remote_addresses = tester.process_value_found(true, kv_pairs).0; @@ -980,7 +980,7 @@ fn do_not_cache_addresses_without_peer_id() { tester.remote_authority_public.into(), &tester.remote_key_store, None, - true, + Some(build_creation_time()), ); let cached_remote_addresses = tester.process_value_found(false, kv_pairs).0; @@ -1129,7 +1129,7 @@ fn lookup_throttling() { remote_key, &remote_key_store, None, - true, + Some(build_creation_time()), ) .into_iter() .map(|(key, value)| PeerRecord { From 7e81ca2d8a17d40de8f224d3787385472bc5d87e Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Mon, 15 Jul 2024 14:29:46 +0300 Subject: [PATCH 40/47] Remove unused Signed-off-by: Alexandru Gheorghe --- substrate/client/authority-discovery/src/error.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/substrate/client/authority-discovery/src/error.rs b/substrate/client/authority-discovery/src/error.rs index b9ea5655cdba..3f395e47922e 100644 --- a/substrate/client/authority-discovery/src/error.rs +++ b/substrate/client/authority-discovery/src/error.rs @@ -76,14 +76,6 @@ pub enum Error { #[error("Unable to fetch best block.")] BestBlockFetchingError, - #[error("Received dht value found event with records received from different peers.")] - ReceivingDhtValueFoundFromDifferentPeerIds, - - #[error("Failed to verify a dht creation time signature.")] - VerifyingDhtPayloadCreationTime, - - #[error("Received dht value found event with different record creation time.")] - ReceivingDhtValueFoundWithDifferentRecordCreationTime, #[error("Publisher not present.")] MissingPublisher, From f083318ccfd48445a3e7194321fb55970dc369cb Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe <49718502+alexggh@users.noreply.github.com> Date: Thu, 18 Jul 2024 10:45:03 +0300 Subject: [PATCH 41/47] Update substrate/client/network/src/litep2p/mod.rs Co-authored-by: Dmitry Markin --- substrate/client/network/src/litep2p/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/substrate/client/network/src/litep2p/mod.rs b/substrate/client/network/src/litep2p/mod.rs index 808cb5264e9a..418713b85c8a 100644 --- a/substrate/client/network/src/litep2p/mod.rs +++ b/substrate/client/network/src/litep2p/mod.rs @@ -838,7 +838,6 @@ impl NetworkBackend for Litep2pNetworkBac .with_label_values(&["value-get"]) .observe(started.elapsed().as_secs_f64()); } - } } } From a5fcc6bf52f3f170b9ebc5a80abcd1ec452810be Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe <49718502+alexggh@users.noreply.github.com> Date: Thu, 18 Jul 2024 10:45:13 +0300 Subject: [PATCH 42/47] Update substrate/client/authority-discovery/src/worker.rs Co-authored-by: Dmitry Markin --- substrate/client/authority-discovery/src/worker.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/substrate/client/authority-discovery/src/worker.rs b/substrate/client/authority-discovery/src/worker.rs index 39c40fda85d6..bc29656e8b00 100644 --- a/substrate/client/authority-discovery/src/worker.rs +++ b/substrate/client/authority-discovery/src/worker.rs @@ -160,7 +160,7 @@ pub struct Worker { /// Set of in-flight lookups. in_flight_lookups: HashMap, - /// Set of lookups we can still received records. + /// Set of lookups we can still receive records. /// These are the entries in the `in_flight_lookups` for which /// we got at least one successfull result. known_lookups: HashMap, From e54c96dc3f63ff52760084370c4f8e3733624384 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe <49718502+alexggh@users.noreply.github.com> Date: Thu, 18 Jul 2024 10:45:25 +0300 Subject: [PATCH 43/47] Update substrate/client/authority-discovery/src/worker.rs Co-authored-by: Dmitry Markin --- substrate/client/authority-discovery/src/worker.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/substrate/client/authority-discovery/src/worker.rs b/substrate/client/authority-discovery/src/worker.rs index bc29656e8b00..d70009fd8f09 100644 --- a/substrate/client/authority-discovery/src/worker.rs +++ b/substrate/client/authority-discovery/src/worker.rs @@ -873,7 +873,7 @@ where self.last_known_records.insert(kademlia_key, new_record); true } else if new_record.creation_time == current_record_info.creation_time { - // Same record just update in case this is a record from old nods that don't have + // Same record just update in case this is a record from old nodes that don't have // timestamp. debug!( target: LOG_TARGET, From 66959dd1def6bcceca3ec502ba8c92de9a396ec8 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Thu, 18 Jul 2024 10:59:51 +0300 Subject: [PATCH 44/47] Update documentation Signed-off-by: Alexandru Gheorghe --- substrate/client/authority-discovery/src/worker.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/substrate/client/authority-discovery/src/worker.rs b/substrate/client/authority-discovery/src/worker.rs index d70009fd8f09..be51115f0a93 100644 --- a/substrate/client/authority-discovery/src/worker.rs +++ b/substrate/client/authority-discovery/src/worker.rs @@ -846,6 +846,8 @@ where Ok(()) } + // Handles receiving a new DHT record for the authorithy. + // Returns true if the record was new, false if the record was older than the current one. fn handle_new_record( &mut self, authority_id: &AuthorityId, From 06285ef586f8eb79102ade7371984aae9aeb7c91 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe <49718502+alexggh@users.noreply.github.com> Date: Thu, 18 Jul 2024 13:51:07 +0300 Subject: [PATCH 45/47] Update substrate/client/authority-discovery/src/worker.rs Co-authored-by: Alexandru Vasile <60601340+lexnv@users.noreply.github.com> --- substrate/client/authority-discovery/src/worker.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/substrate/client/authority-discovery/src/worker.rs b/substrate/client/authority-discovery/src/worker.rs index be51115f0a93..16494bacc0d3 100644 --- a/substrate/client/authority-discovery/src/worker.rs +++ b/substrate/client/authority-discovery/src/worker.rs @@ -181,11 +181,11 @@ pub struct Worker { #[derive(Debug, Clone)] struct RecordInfo { - // Time since UNIX_EPOCH in nanoseconds. + /// Time since UNIX_EPOCH in nanoseconds. creation_time: u128, - // Peers that we know have this record. + /// Peers that we know have this record. peers_with_record: HashSet, - // The record itself. + /// The record itself. record: Record, } From 3a23fbd1b0ca38799760e2b4c2f9f03772cb9fb8 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Thu, 18 Jul 2024 15:20:44 +0300 Subject: [PATCH 46/47] Address review feedback Signed-off-by: Alexandru Gheorghe --- .../client/authority-discovery/src/worker.rs | 68 ++++++++++--------- 1 file changed, 36 insertions(+), 32 deletions(-) diff --git a/substrate/client/authority-discovery/src/worker.rs b/substrate/client/authority-discovery/src/worker.rs index 16494bacc0d3..42994bbc7ea8 100644 --- a/substrate/client/authority-discovery/src/worker.rs +++ b/substrate/client/authority-discovery/src/worker.rs @@ -37,14 +37,14 @@ use ip_network::IpNetwork; use libp2p::kad::{PeerRecord, Record}; use linked_hash_set::LinkedHashSet; -use log::{debug, error, log_enabled}; +use log::{debug, error}; use prometheus_endpoint::{register, Counter, CounterVec, Gauge, Opts, U64}; use prost::Message; use rand::{seq::SliceRandom, thread_rng}; use sc_network::{ - event::DhtEvent, multiaddr, KademliaKey, Multiaddr, NetworkDHTProvider, NetworkSigner, - NetworkStateInfo, + config::DEFAULT_KADEMLIA_REPLICATION_FACTOR, event::DhtEvent, multiaddr, KademliaKey, + Multiaddr, NetworkDHTProvider, NetworkSigner, NetworkStateInfo, }; use sc_network_types::{multihash::Code, PeerId}; use schema::PeerSignature; @@ -183,7 +183,8 @@ pub struct Worker { struct RecordInfo { /// Time since UNIX_EPOCH in nanoseconds. creation_time: u128, - /// Peers that we know have this record. + /// Peers that we know have this record, bounded to no more than + /// DEFAULT_KADEMLIA_REPLICATION_FACTOR(20). peers_with_record: HashSet, /// The record itself. record: Record, @@ -566,9 +567,7 @@ where metrics.dht_event_received.with_label_values(&["value_found"]).inc(); } - if log_enabled!(log::Level::Debug) { - debug!(target: LOG_TARGET, "Value for hash '{:?}' found on Dht.", v.record.key); - } + debug!(target: LOG_TARGET, "Value for hash '{:?}' found on Dht.", v.record.key); if let Err(e) = self.handle_dht_value_found_event(v) { if let Some(metrics) = &self.metrics { @@ -758,6 +757,7 @@ where let authority_id: AuthorityId = if let Some(authority_id) = self.in_flight_lookups.remove(&remote_key) { + self.known_lookups.insert(remote_key.clone(), authority_id.clone()); authority_id } else if let Some(authority_id) = self.known_lookups.get(&remote_key) { authority_id.clone() @@ -765,8 +765,6 @@ where return Err(Error::ReceivingUnexpectedRecord); }; - self.known_lookups.insert(remote_key.clone(), authority_id.clone()); - let local_peer_id = self.network.local_peer_id(); let schema::SignedAuthorityRecord { record, peer_signature, .. } = @@ -775,20 +773,19 @@ where &authority_id, )?; - let authority_record = schema::AuthorityRecord::decode(record.as_slice()); + let authority_record = + schema::AuthorityRecord::decode(record.as_slice()).map_err(Error::DecodingProto)?; let records_creation_time: u128 = authority_record + .creation_time .as_ref() - .ok() - .and_then(|a| a.creation_time.as_ref()) .map(|creation_time| { u128::decode(&mut &creation_time.timestamp[..]).unwrap_or_default() }) .unwrap_or_default(); // 0 is a sane default for records that do not have creation time present. let addresses: Vec = authority_record - .map(|a| a.addresses) - .map_err(Error::DecodingProto)? + .addresses .into_iter() .map(|a| a.try_into()) .collect::>() @@ -858,6 +855,7 @@ where .last_known_records .entry(kademlia_key.clone()) .or_insert_with(|| new_record.clone()); + if new_record.creation_time > current_record_info.creation_time { let peers_that_need_updating = current_record_info.peers_with_record.clone(); self.network.put_record_to( @@ -873,8 +871,10 @@ where authority_id, new_record.creation_time, current_record_info.creation_time ); self.last_known_records.insert(kademlia_key, new_record); - true - } else if new_record.creation_time == current_record_info.creation_time { + return true + } + + if new_record.creation_time == current_record_info.creation_time { // Same record just update in case this is a record from old nodes that don't have // timestamp. debug!( @@ -882,23 +882,27 @@ where "Found same record for {:?} record creation time {:?}", authority_id, new_record.creation_time ); - current_record_info.peers_with_record.extend(new_record.peers_with_record); - true - } else { - debug!( - target: LOG_TARGET, - "Found old record for {:?} received record creation time {:?} current record creation time {:?}", - authority_id, new_record.creation_time, current_record_info.creation_time, - ); - self.network.put_record_to( - current_record_info.record.clone(), - new_record.peers_with_record.clone(), - // If this is empty it means we received the answer from our node local - // storage, so we need to update that as well. - new_record.peers_with_record.is_empty(), - ); - false + if current_record_info.peers_with_record.len() + new_record.peers_with_record.len() <= + DEFAULT_KADEMLIA_REPLICATION_FACTOR + { + current_record_info.peers_with_record.extend(new_record.peers_with_record); + } + return true } + + debug!( + target: LOG_TARGET, + "Found old record for {:?} received record creation time {:?} current record creation time {:?}", + authority_id, new_record.creation_time, current_record_info.creation_time, + ); + self.network.put_record_to( + current_record_info.record.clone(), + new_record.peers_with_record.clone(), + // If this is empty it means we received the answer from our node local + // storage, so we need to update that as well. + new_record.peers_with_record.is_empty(), + ); + return false } /// Retrieve our public keys within the current and next authority set. From 0256b3c7466583979649da36f5b47101bfce1031 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Tue, 23 Jul 2024 12:31:49 +0300 Subject: [PATCH 47/47] Add PrDoc Signed-off-by: Alexandru Gheorghe --- prdoc/pr_3786.prdoc | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 prdoc/pr_3786.prdoc diff --git a/prdoc/pr_3786.prdoc b/prdoc/pr_3786.prdoc new file mode 100644 index 000000000000..0bb9e6c23f75 --- /dev/null +++ b/prdoc/pr_3786.prdoc @@ -0,0 +1,22 @@ +title: Make changing of peer-id while active a bit more robust + +doc: + - audience: Node Dev + description: | + Implemetation of https://github.com/polkadot-fellows/RFCs/pull/91, to use `creation_time` field to determine + the newest DHT record and to update nodes known to have the old record. + + Gossip-support is modified to try to re-resolve new address authorithies every 5 minutes instead of each session, + so that we pick autorithies that changed their address faster and try to connect to them. + +crates: +- name: sc-authority-discovery + bump: major +- name: polkadot-gossip-support + bump: major +- name: polkadot-network-bridge + bump: major +- name: polkadot-node-subsystem-types + bump: major +- name: sc-network + bump: minor \ No newline at end of file