From cdc601f1ee730b50304124f3e09e05151ab61e53 Mon Sep 17 00:00:00 2001 From: Kamil Braun Date: Wed, 9 Nov 2022 13:47:27 +0100 Subject: [PATCH] message: messaging_service: fix topology_ignored for pending endpoints in get_rpc_client `get_rpc_client` calculates a `topology_ignored` field when creating a client which says whether the client's endpoint had topology information when topology was created. This is later used to check if that client needs to be dropped and replaced with a new client which uses the correct topology information. The `topology_ignored` field was incorrectly calculated as `true` for pending endpoints even though we had topology information for them. This would lead to unnecessary drops of RPC clients later. Fix this. Remove the default parameter for `with_pending` from `topology::has_endpoint` to avoid similar bugs in the future. Apparently this fixes #11780. The verbs used by decommission operation use RPC client index 1 (see `do_get_rpc_client_idx` in message/messaging_service.cc). From local testing with additional logging I found that by the time this client is created (i.e. the first verb in this group is used), we already know the topology. The node is pending at that point - hence the bug would cause us to assume we don't know the topology, leading us to dropping the RPC client later, possibly in the middle of a decommission operation. Fixes: #11780 --- locator/token_metadata.cc | 2 +- locator/token_metadata.hh | 5 +++-- message/messaging_service.cc | 4 +++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/locator/token_metadata.cc b/locator/token_metadata.cc index b1533c6a1728..ca4969128f83 100644 --- a/locator/token_metadata.cc +++ b/locator/token_metadata.cc @@ -562,7 +562,7 @@ const std::unordered_map& token_metadata_impl::get_endpoi } bool token_metadata_impl::is_member(inet_address endpoint) const { - return _topology.has_endpoint(endpoint); + return _topology.has_endpoint(endpoint, topology::pending::no); } void token_metadata_impl::add_bootstrap_token(token t, inet_address endpoint) { diff --git a/locator/token_metadata.hh b/locator/token_metadata.hh index a3cfc43064c0..68e8680b777f 100644 --- a/locator/token_metadata.hh +++ b/locator/token_metadata.hh @@ -67,9 +67,10 @@ public: void remove_endpoint(inet_address ep); /** - * Returns true iff contains given endpoint + * Returns true iff contains given endpoint. + * Excludes pending endpoints if `with_pending == pending::no`. */ - bool has_endpoint(inet_address, pending with_pending = pending::no) const; + bool has_endpoint(inet_address, pending with_pending) const; const std::unordered_map>& diff --git a/message/messaging_service.cc b/message/messaging_service.cc index f9982c09c4e7..8b0632fb95ce 100644 --- a/message/messaging_service.cc +++ b/message/messaging_service.cc @@ -710,7 +710,9 @@ shared_ptr messaging_service::ge std::optional topology_status; auto has_topology = [&] { if (!topology_status.has_value()) { - topology_status = _token_metadata ? _token_metadata->get()->get_topology().has_endpoint(id.addr) : false; + topology_status = _token_metadata + ? _token_metadata->get()->get_topology().has_endpoint(id.addr, locator::topology::pending::yes) + : false; } return *topology_status; };