From bbcf8305bb2098e62c9c2a602bc72d8312db404d Mon Sep 17 00:00:00 2001 From: Kamil Braun Date: Tue, 4 Jul 2023 09:41:24 +0200 Subject: [PATCH 1/8] storage_service: don't calculate `ignore_nodes` before waiting for normal handlers Before this commit the `wait_for_normal_state_handled_on_boot` would wait for a static set of nodes (`sync_nodes`), calculated using the `get_nodes_to_sync_with` function and `parse_node_list`; the latter was used to obtain a list of "nodes to ignore" (for replace operation) and translate them, using `token_metadata`, from IP addresses to Host IDs and vice versa. `sync_nodes` was also used in `_gossiper.wait_alive` call which we do after `wait_for_normal_state_handled_on_boot`. Recently we started doing these calculations and this wait very early in the boot procedure - immediately after we start gossiping (50e8ec77c6332b759a1e13125771524d26b7af34). Unfortunately, as always with gossiper, there are complications. In #14468 and #14487 two problems were detected: - Gossiper may contain obsolete entries for nodes which were recently replaced or changed their IPs. These entries are still using status `NORMAL` or `shutdown` (which is treated like `NORMAL`, e.g. `handle_state_normal` is also called for it). The `_gossiper.wait_alive` call would wait for those entries too and eventually time out. - Furthermore, by the time we call `parse_node_list`, `token_metadata` may not be populated yet, which is required to do the IP<->Host ID translations -- and populating `token_metadata` happens inside `handle_state_normal`, so we have a chicken-and-egg problem here. The `parse_node_list` problem is solved in this commit. It turns out that we don't need to calculate `sync_nodes` (and hence `ignore_nodes`) in order to wait for NORMAL state handlers. We can wait for handlers to finish for *any* `NORMAL`/`shutdown` entries appearing in gossiper, even those that correspond to dead/ignored nodes and obsolete IPs. `handle_state_normal` is called, and eventually finishes, for all of them. `wait_for_normal_state_handled_on_boot` no longer receives a set of nodes as parameter and is modified appropriately, it's now calculating the necessary set of nodes on each retry (the set may shrink while we're waiting, e.g. because an entry corresponding to a node that was replaced is garbage-collected from gossiper state). Thanks to this, we can now put the `sync_nodes` calculation (which is still necessary for `_gossiper.wait_alive`), and hence the `parse_node_list` call, *after* we wait for NORMAL state handlers, solving the chickend-and-egg problem. This addresses the immediate failure described in #14487, but the test will still fail. That's because `_gossiper.wait_alive` may still receive a too large set of nodes -- we may still include obsolete IPs or entries corresponding to replaced nodes in the `sync_nodes` set. We fix this in the following commit which will solve both issues. --- service/storage_service.cc | 57 +++++++++++++++++++++++++++----------- service/storage_service.hh | 2 +- 2 files changed, 42 insertions(+), 17 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 6018a7d49ff2..95fe9ffa9eb3 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -78,6 +78,7 @@ #include #include #include +#include using token = dht::token; using UUID = utils::UUID; @@ -1953,6 +1954,10 @@ future<> storage_service::join_token_ring(cdc::generation_service& cdc_gen_servi // may trivially finish without waiting for anyone. co_await _gossiper.wait_for_live_nodes_to_show_up(2); + // Note: in Raft topology mode this is unnecessary. + // Node state changes are propagated to the cluster through explicit global barriers. + co_await wait_for_normal_state_handled_on_boot(); + auto ignore_nodes = ri ? parse_node_list(_db.local().get_config().ignore_dead_nodes_for_replace(), get_token_metadata()) // TODO: specify ignore_nodes for bootstrap @@ -1962,10 +1967,6 @@ future<> storage_service::join_token_ring(cdc::generation_service& cdc_gen_servi sync_nodes.erase(ri->address); } - // Note: in Raft topology mode this is unnecessary. - // Node state changes are propagated to the cluster through explicit global barriers. - co_await wait_for_normal_state_handled_on_boot(sync_nodes); - // NORMAL doesn't necessarily mean UP (#14042). Wait for these nodes to be UP as well // to reduce flakiness (we need them to be UP to perform CDC generation write and for repair/streaming). // @@ -5706,21 +5707,45 @@ bool storage_service::is_normal_state_handled_on_boot(gms::inet_address node) { return _normal_state_handled_on_boot.contains(node); } -// Wait for normal state handler to finish on boot -future<> storage_service::wait_for_normal_state_handled_on_boot(const std::unordered_set& nodes) { - slogger.info("Started waiting for normal state handler for nodes {}", nodes); +// Wait for normal state handlers to finish on boot +future<> storage_service::wait_for_normal_state_handled_on_boot() { + static logger::rate_limit rate_limit{std::chrono::seconds{5}}; + static auto fmt_nodes_with_statuses = [this] (const auto& eps) { + return boost::algorithm::join( + eps | boost::adaptors::transformed([this] (const auto& ep) { + return ::format("({}, status={})", ep, _gossiper.get_gossip_status(ep)); + }), ", "); + }; + + slogger.info("Started waiting for normal state handlers to finish"); auto start_time = std::chrono::steady_clock::now(); - for (auto& node: nodes) { - while (!is_normal_state_handled_on_boot(node)) { - slogger.debug("Waiting for normal state handler for node {}", node); - co_await sleep_abortable(std::chrono::milliseconds(100), _abort_source); - if (std::chrono::steady_clock::now() > start_time + std::chrono::seconds(60)) { - throw std::runtime_error(::format("Node {} did not finish normal state handler, reject the node ops", node)); - } + std::vector eps; + while (true) { + eps = _gossiper.get_endpoints(); + auto it = std::partition(eps.begin(), eps.end(), + [this, me = get_broadcast_address()] (const gms::inet_address& ep) { + return ep == me || !_gossiper.is_normal_ring_member(ep) || is_normal_state_handled_on_boot(ep); + }); + + if (it == eps.end()) { + break; + } + + if (std::chrono::steady_clock::now() > start_time + std::chrono::seconds(60)) { + auto err = ::format("Timed out waiting for normal state handlers to finish for nodes {}", + fmt_nodes_with_statuses(boost::make_iterator_range(it, eps.end()))); + slogger.error("{}", err); + throw std::runtime_error{std::move(err)}; } + + slogger.log(log_level::info, rate_limit, "Normal state handlers not yet finished for nodes {}", + fmt_nodes_with_statuses(boost::make_iterator_range(it, eps.end()))); + + co_await sleep_abortable(std::chrono::milliseconds{100}, _abort_source); } - slogger.info("Finished waiting for normal state handler for nodes {}", nodes); - co_return; + + slogger.info("Finished waiting for normal state handlers; endpoints observed in gossip: {}", + fmt_nodes_with_statuses(eps)); } future storage_service::is_cleanup_allowed(sstring keyspace) { diff --git a/service/storage_service.hh b/service/storage_service.hh index c0ba08ac6733..c5eaf5448b08 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -749,7 +749,7 @@ public: private: std::unordered_set _normal_state_handled_on_boot; bool is_normal_state_handled_on_boot(gms::inet_address); - future<> wait_for_normal_state_handled_on_boot(const std::unordered_set& nodes); + future<> wait_for_normal_state_handled_on_boot(); friend class group0_state_machine; bool _raft_topology_change_enabled = false; From 96278a09d4e8b2306170b648c134cc4dc957b763 Mon Sep 17 00:00:00 2001 From: Kamil Braun Date: Tue, 4 Jul 2023 15:03:29 +0200 Subject: [PATCH 2/8] storage_service: use `token_metadata` to calculate nodes waited for to be UP At bootstrap, after we start gossiping, we calculate a set of nodes (`sync_nodes`) which we need to "synchronize" with, waiting for them to be UP before proceeding; these nodes are required for streaming/repair and CDC generation data write, and generally are supposed to constitute the current set of cluster members. In #14468 and #14487 we observed that this set may calculate entries corresponding to nodes that were just replaced or changed their IPs (but the old-IP entry is still there). We pass them to `_gossiper.wait_alive` and the call eventually times out. We need a better way to calculate `sync_nodes` which detects ignores obsolete IPs and nodes that are already gone but just weren't garbage-collected from gossiper state yet. In fact such a method was already introduced in the past: ca61d88764f234851c95a26123dbe72abd28ed4b but it wasn't used everywhere. There, we use `token_metadata` in which collisions between Host IDs and tokens are resolved, so it contains only entries that correspond to the "real" current set of NORMAL nodes. We use this method to calculate the set of nodes passed to `_gossiper.wait_alive`. Fixes #14468 Fixes #14487 --- service/storage_service.cc | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 95fe9ffa9eb3..72d74a7476d2 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -1958,15 +1958,6 @@ future<> storage_service::join_token_ring(cdc::generation_service& cdc_gen_servi // Node state changes are propagated to the cluster through explicit global barriers. co_await wait_for_normal_state_handled_on_boot(); - auto ignore_nodes = ri - ? parse_node_list(_db.local().get_config().ignore_dead_nodes_for_replace(), get_token_metadata()) - // TODO: specify ignore_nodes for bootstrap - : std::unordered_set{}; - auto sync_nodes = co_await get_nodes_to_sync_with(ignore_nodes); - if (ri) { - sync_nodes.erase(ri->address); - } - // NORMAL doesn't necessarily mean UP (#14042). Wait for these nodes to be UP as well // to reduce flakiness (we need them to be UP to perform CDC generation write and for repair/streaming). // @@ -1974,10 +1965,30 @@ future<> storage_service::join_token_ring(cdc::generation_service& cdc_gen_servi // has to be done based on topology state machine instead of gossiper as it is here; // furthermore, the place in the code where we do this has to be different (it has to be coordinated // by the topology coordinator after it joins the node to the cluster). - std::vector sync_nodes_vec{sync_nodes.begin(), sync_nodes.end()}; - slogger.info("Waiting for nodes {} to be alive", sync_nodes_vec); - co_await _gossiper.wait_alive(sync_nodes_vec, std::chrono::seconds{30}); - slogger.info("Nodes {} are alive", sync_nodes_vec); + // + // We calculate nodes to wait for based on token_metadata. Previously we would use gossiper + // directly for this, but gossiper may still contain obsolete entries from 1. replaced nodes + // and 2. nodes that have changed their IPs; these entries are eventually garbage-collected, + // but here they may still be present if we're performing topology changes in quick succession. + // `token_metadata` has all host ID / token collisions resolved so in particular it doesn't contain + // these obsolete IPs. Refs: #14487, #14468 + auto& tm = get_token_metadata(); + auto ignore_nodes = ri + ? parse_node_list(_db.local().get_config().ignore_dead_nodes_for_replace(), tm) + // TODO: specify ignore_nodes for bootstrap + : std::unordered_set{}; + + std::vector sync_nodes; + tm.get_topology().for_each_node([&] (const locator::node* np) { + auto ep = np->endpoint(); + if (!ignore_nodes.contains(ep) && (!ri || ep != ri->address)) { + sync_nodes.push_back(ep); + } + }); + + slogger.info("Waiting for nodes {} to be alive", sync_nodes); + co_await _gossiper.wait_alive(sync_nodes, std::chrono::seconds{30}); + slogger.info("Nodes {} are alive", sync_nodes); } assert(_group0); From 9b8e5550b14b1690bcff11da42f316708fd40198 Mon Sep 17 00:00:00 2001 From: Kamil Braun Date: Tue, 4 Jul 2023 18:59:03 +0200 Subject: [PATCH 3/8] storage_service: remove `get_nodes_to_sync_with` It's no longer used. --- service/storage_service.cc | 15 --------------- service/storage_service.hh | 2 -- 2 files changed, 17 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 72d74a7476d2..8aa38ad25ab5 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -2251,21 +2251,6 @@ std::unordered_set storage_service::parse_node_list(sstring c return ignore_nodes; } -future> storage_service::get_nodes_to_sync_with( - const std::unordered_set& ignore_nodes) { - std::unordered_set result; - for (const auto& node :_gossiper.get_endpoints()) { - co_await coroutine::maybe_yield(); - slogger.info("Check node={}, status={}", node, _gossiper.get_gossip_status(node)); - if (node != get_broadcast_address() && - _gossiper.is_normal_ring_member(node) && - !ignore_nodes.contains(node)) { - result.insert(node); - } - } - co_return result; -} - // Runs inside seastar::async context future<> storage_service::bootstrap(cdc::generation_service& cdc_gen_service, std::unordered_set& bootstrap_tokens, std::optional& cdc_gen_id, const std::optional& replacement_info) { return seastar::async([this, &bootstrap_tokens, &cdc_gen_id, &cdc_gen_service, &replacement_info] { diff --git a/service/storage_service.hh b/service/storage_service.hh index c5eaf5448b08..f38ec6d1413c 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -293,8 +293,6 @@ private: void run_replace_ops(std::unordered_set& bootstrap_tokens, replacement_info replace_info); void run_bootstrap_ops(std::unordered_set& bootstrap_tokens); - future> get_nodes_to_sync_with( - const std::unordered_set& ignore_dead_nodes); future<> wait_for_ring_to_settle(); public: From 9b136ee574700a4c878bc6604389469c05b9622a Mon Sep 17 00:00:00 2001 From: Kamil Braun Date: Tue, 4 Jul 2023 09:39:44 +0200 Subject: [PATCH 4/8] test: scylla_cluster: accept `ignore_dead_nodes` in `ReplaceConfig` --- test/pylib/scylla_cluster.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/pylib/scylla_cluster.py b/test/pylib/scylla_cluster.py index 8ec18cde1b2b..ae716e7eb699 100644 --- a/test/pylib/scylla_cluster.py +++ b/test/pylib/scylla_cluster.py @@ -47,6 +47,7 @@ class ReplaceConfig(NamedTuple): replaced_id: ServerNum reuse_ip_addr: bool use_host_id: bool + ignore_dead_nodes: list[IPAddress | HostID] = [] def make_scylla_conf(workdir: pathlib.Path, host_addr: str, seed_addrs: List[str], cluster_name: str) -> dict[str, object]: @@ -679,6 +680,9 @@ async def add_server(self, replace_cfg: Optional[ReplaceConfig] = None, cmdline: else: extra_config['replace_address_first_boot'] = replaced_srv.ip_addr + if replace_cfg.ignore_dead_nodes: + extra_config['ignore_dead_nodes_for_replace'] = ','.join(replace_cfg.ignore_dead_nodes) + assert replaced_id not in self.removed, \ f"add_server: cannot replace removed server {replaced_srv}" assert replaced_id in self.stopped, \ From 00f51ea753b2561d7e0b296a36da6897fa358632 Mon Sep 17 00:00:00 2001 From: Kamil Braun Date: Tue, 4 Jul 2023 09:40:13 +0200 Subject: [PATCH 5/8] test: node replace with `ignore_dead_nodes` test Regression test for #14487 on steroids. It performs 3 consecutive node replace operations, starting with 3 dead nodes. In order to have a Raft majority, we have to boot a 7-node cluster, so we enable this test only in one mode; the choice was between `dev` and `release`, I picked `dev` because it compiles faster and I develop on it. --- test/topology_custom/suite.yaml | 2 + .../test_replace_ignore_nodes.py | 56 +++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 test/topology_custom/test_replace_ignore_nodes.py diff --git a/test/topology_custom/suite.yaml b/test/topology_custom/suite.yaml index 60fd05ad684e..e0c287df2833 100644 --- a/test/topology_custom/suite.yaml +++ b/test/topology_custom/suite.yaml @@ -7,5 +7,7 @@ extra_scylla_config_options: authorizer: AllowAllAuthorizer skip_in_release: - test_shutdown_hang + - test_replace_ignore_nodes skip_in_debug: - test_shutdown_hang + - test_replace_ignore_nodes diff --git a/test/topology_custom/test_replace_ignore_nodes.py b/test/topology_custom/test_replace_ignore_nodes.py new file mode 100644 index 000000000000..c91278648d63 --- /dev/null +++ b/test/topology_custom/test_replace_ignore_nodes.py @@ -0,0 +1,56 @@ +# +# Copyright (C) 2023-present ScyllaDB +# +# SPDX-License-Identifier: AGPL-3.0-or-later +# +import time +import pytest +import logging + +from test.pylib.internal_types import IPAddress, HostID +from test.pylib.scylla_cluster import ReplaceConfig +from test.pylib.manager_client import ManagerClient +from test.topology.util import wait_for_token_ring_and_group0_consistency + + +logger = logging.getLogger(__name__) + + +@pytest.mark.asyncio +async def test_replace_ignore_nodes(manager: ManagerClient) -> None: + """Replace a node in presence of multiple dead nodes. + Regression test for #14487. Does not apply to Raft-topology mode. + + This is a slow test with a 7 node cluster any 3 replace operations, + we don't want to run it in debug mode. + Preferably run it only in one mode e.g. dev. + """ + cfg = {'experimental_features': list[str]()} + logger.info(f"Booting initial cluster") + servers = [await manager.server_add(config=cfg) for _ in range(7)] + s2_id = await manager.get_host_id(servers[2].server_id) + logger.info(f"Stopping servers {servers[:3]}") + await manager.server_stop(servers[0].server_id) + await manager.server_stop(servers[1].server_id) + await manager.server_stop_gracefully(servers[2].server_id) + + # The parameter accepts both IP addrs with host IDs. + # We must be able to resolve them in both ways. + ignore_dead: list[IPAddress | HostID] = [servers[1].ip_addr, s2_id] + logger.info(f"Replacing {servers[0]}, ignore_dead_nodes = {ignore_dead}") + replace_cfg = ReplaceConfig(replaced_id = servers[0].server_id, reuse_ip_addr = False, use_host_id = False, + ignore_dead_nodes = ignore_dead) + await manager.server_add(replace_cfg=replace_cfg, config=cfg) + await wait_for_token_ring_and_group0_consistency(manager, time.time() + 30) + + ignore_dead = [servers[2].ip_addr] + logger.info(f"Replacing {servers[1]}, ignore_dead_nodes = {ignore_dead}") + replace_cfg = ReplaceConfig(replaced_id = servers[1].server_id, reuse_ip_addr = False, use_host_id = False, + ignore_dead_nodes = ignore_dead) + await manager.server_add(replace_cfg=replace_cfg, config=cfg) + await wait_for_token_ring_and_group0_consistency(manager, time.time() + 30) + + logger.info(f"Replacing {servers[2]}") + replace_cfg = ReplaceConfig(replaced_id = servers[2].server_id, reuse_ip_addr = False, use_host_id = False) + await manager.server_add(replace_cfg=replace_cfg, config=cfg) + await wait_for_token_ring_and_group0_consistency(manager, time.time() + 30) From 2032d7dbe40f648b016a1355d46753d53e7c4608 Mon Sep 17 00:00:00 2001 From: Kamil Braun Date: Tue, 4 Jul 2023 15:29:18 +0200 Subject: [PATCH 6/8] test: scylla_cluster: return the new IP from `change_ip` API Also simplify the API by getting rid of `ActionReturn` and returning errors through exceptions (which are correctly forwarded to the client for some time already). --- test/pylib/manager_client.py | 6 ++++-- test/pylib/scylla_cluster.py | 15 ++++++--------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/test/pylib/manager_client.py b/test/pylib/manager_client.py index 9cdd56716e81..425ffac43ede 100644 --- a/test/pylib/manager_client.py +++ b/test/pylib/manager_client.py @@ -226,9 +226,11 @@ async def server_update_config(self, server_id: ServerNum, key: str, value: obje await self.client.put_json(f"/cluster/server/{server_id}/update_config", {"key": key, "value": value}) - async def server_change_ip(self, server_id: ServerNum) -> None: + async def server_change_ip(self, server_id: ServerNum) -> IPAddress: """Change server IP address. Applicable only to a stopped server""" - await self.client.put_json(f"/cluster/server/{server_id}/change_ip", {}) + ret = await self.client.put_json(f"/cluster/server/{server_id}/change_ip", {}, + response_type="json") + return IPAddress(ret["ip_addr"]) async def wait_for_host_known(self, dst_server_id: str, expect_host_id: str, deadline: Optional[float] = None) -> None: diff --git a/test/pylib/scylla_cluster.py b/test/pylib/scylla_cluster.py index ae716e7eb699..9422014144e1 100644 --- a/test/pylib/scylla_cluster.py +++ b/test/pylib/scylla_cluster.py @@ -906,24 +906,23 @@ def setLogger(self, logger: logging.LoggerAdapter): for srv in self.servers.values(): srv.setLogger(self.logger) - async def change_ip(self, server_id: ServerNum) -> ActionReturn: + async def change_ip(self, server_id: ServerNum) -> IPAddress: """Lease a new IP address and update conf/scylla.yaml with it. The original IP is released at the end of the test to avoid an immediate recycle within the same cluster. The server must be stopped before its ip is changed.""" if server_id not in self.servers: - return ScyllaCluster.ActionReturn(success=False, msg=f"Server {server_id} unknown") + raise RuntimeError(f"Server {server_id} unknown") server = self.servers[server_id] if server.is_running: - msg = f"Server {server_id} is running: stop it first and then change its ip" - return ScyllaCluster.ActionReturn(success=False, msg=msg) + raise RuntimeError(f"Server {server_id} is running: stop it first and then change its ip") self.is_dirty = True ip_addr = IPAddress(await self.host_registry.lease_host()) self.leased_ips.add(ip_addr) logging.info("Cluster %s changed server %s IP from %s to %s", self.name, server_id, server.ip_addr, ip_addr) server.change_ip(ip_addr) - return ScyllaCluster.ActionReturn(success=True) + return ip_addr class ScyllaClusterManager: @@ -1235,10 +1234,8 @@ async def _server_change_ip(self, request: aiohttp.web.Request) -> aiohttp.web.R """Pass change_ip command for the given server to the cluster""" assert self.cluster server_id = ServerNum(int(request.match_info["server_id"])) - ret = await self.cluster.change_ip(server_id) - if not ret.success: - return aiohttp.web.Response(status=404, text=ret.msg) - return aiohttp.web.Response() + ip_addr = await self.cluster.change_ip(server_id) + return aiohttp.web.json_response({"ip_addr": ip_addr}) @asynccontextmanager From 452d9a3c77089aa26c62222e3e4c8b1256c602a1 Mon Sep 17 00:00:00 2001 From: Kamil Braun Date: Tue, 4 Jul 2023 15:41:19 +0200 Subject: [PATCH 7/8] test: test bootstrap after IP change Regression test for #14468. --- .../test_boot_after_ip_change.py | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 test/topology_custom/test_boot_after_ip_change.py diff --git a/test/topology_custom/test_boot_after_ip_change.py b/test/topology_custom/test_boot_after_ip_change.py new file mode 100644 index 000000000000..e3f3d07d5d70 --- /dev/null +++ b/test/topology_custom/test_boot_after_ip_change.py @@ -0,0 +1,52 @@ +# +# Copyright (C) 2023-present ScyllaDB +# +# SPDX-License-Identifier: AGPL-3.0-or-later +# +import time +import pytest +import logging + +from test.pylib.internal_types import IPAddress, HostID +from test.pylib.scylla_cluster import ReplaceConfig +from test.pylib.manager_client import ManagerClient +from test.topology.util import wait_for_token_ring_and_group0_consistency + + +logger = logging.getLogger(__name__) + + +@pytest.mark.asyncio +async def test_boot_after_ip_change(manager: ManagerClient) -> None: + """Bootstrap a new node after existing one changed its IP. + Regression test for #14468. Does not apply to Raft-topology mode. + """ + cfg = {'experimental_features': list[str]()} + logger.info(f"Booting initial cluster") + servers = [await manager.server_add(config=cfg) for _ in range(2)] + await wait_for_token_ring_and_group0_consistency(manager, time.time() + 30) + + logger.info(f"Stopping server {servers[1]}") + await manager.server_stop_gracefully(servers[1].server_id) + + logger.info(f"Changing IP of server {servers[1]}") + new_ip = await manager.server_change_ip(servers[1].server_id) + servers[1] = servers[1]._replace(ip_addr = new_ip) + logger.info(f"New IP: {new_ip}") + + logger.info(f"Restarting server {servers[1]}") + await manager.server_start(servers[1].server_id) + + # We need to do this wait before we boot a new node. + # Otherwise the newly booting node may contact servers[0] even before servers[0] + # saw the new IP of servers[1], and then the booting node will try to wait + # for servers[1] to be alive using its old IP (and eventually time out). + # + # Note that this still acts as a regression test for #14468. + # In #14468, the problem was that a booting node would try to wait for the old IP + # of servers[0] even after all existing servers saw the IP change. + logger.info(f"Wait until {servers[0]} sees the new IP of {servers[1]}") + await manager.server_sees_other_server(servers[0].ip_addr, servers[1].ip_addr) + + logger.info(f"Booting new node") + await manager.server_add(config=cfg) From 431a8f85911841e47b5b937f57160e0340d970b4 Mon Sep 17 00:00:00 2001 From: Kamil Braun Date: Mon, 3 Jul 2023 15:17:54 +0200 Subject: [PATCH 8/8] test: rename `test_topology_ip.py` to `test_replace.py` No idea why it was named like that before. --- test/topology/{test_topology_ip.py => test_replace.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test/topology/{test_topology_ip.py => test_replace.py} (100%) diff --git a/test/topology/test_topology_ip.py b/test/topology/test_replace.py similarity index 100% rename from test/topology/test_topology_ip.py rename to test/topology/test_replace.py