From 3737bf8fa29faedce1cdf030e3f85406d8491d4b Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Wed, 12 Jul 2023 21:03:19 +0400 Subject: [PATCH] topology.cc: unindex_node: _dc_racks removal fix The eps reference was reused to manipulate the racks dictionary. This resulted in assigning a set of nodes from the racks dictionary to an element of the _dc_endpoints dictionary. The problem was demonstrated by the dtest test_decommission_last_node_in_rack (scylladb/scylla-dtest#3299). The test set up four nodes, three on one rack and one on another, all within a single data center (dc). It then switched to a 'network_topology_strategy' for one keyspace and tried to decommission the single node on the second rack. This decomission command with error message 'zero replica after the removal.' This happened because unindex_node assigned the empty list from the second rack as a value for the single dc in _dc_endpoints dictionary. As a result, we got empty nodes list for single dc in natural_endpoints_tracker::_all_endpoints, node_count == 0 in data_center_endpoints, _rf_left == 0, so network_topology_strategy::calculate_natural_endpoints rejected all the endpoints and returned an empty endpoint_set. In repair_service::do_decommission_removenode_with_repair this caused the 'zero replica after the removal' error. With this fix the test passes both with --consistent-cluster-management option and without it. The specific unit test for this problem was added. Fixes: #14184 Closes #14673 --- locator/topology.cc | 6 ++-- test/boost/locator_topology_test.cc | 45 +++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/locator/topology.cc b/locator/topology.cc index 7cee6e6e4d22..e4d6bd9b0b1e 100644 --- a/locator/topology.cc +++ b/locator/topology.cc @@ -362,9 +362,9 @@ void topology::unindex_node(const node* node) { _dc_rack_nodes[dc][rack].erase(node); auto& racks = _dc_racks[dc]; if (auto rit = racks.find(rack); rit != racks.end()) { - eps = rit->second; - eps.erase(ep); - if (eps.empty()) { + auto& rack_eps = rit->second; + rack_eps.erase(ep); + if (rack_eps.empty()) { racks.erase(rit); } } diff --git a/test/boost/locator_topology_test.cc b/test/boost/locator_topology_test.cc index fbf7f41ea4d5..efa1bae3f634 100644 --- a/test/boost/locator_topology_test.cc +++ b/test/boost/locator_topology_test.cc @@ -178,6 +178,51 @@ SEASTAR_THREAD_TEST_CASE(test_update_node) { BOOST_REQUIRE_EQUAL(node->get_state(), locator::node::state::left); } +SEASTAR_THREAD_TEST_CASE(test_remove_endpoint) { + using dc_endpoints_t = std::unordered_map>; + using dc_racks_t = std::unordered_map>>; + using dcs_t = std::unordered_set; + + const auto id1 = host_id::create_random_id(); + const auto ep1 = gms::inet_address("127.0.0.1"); + const auto id2 = host_id::create_random_id(); + const auto ep2 = gms::inet_address("127.0.0.2"); + const auto dc_rack1 = endpoint_dc_rack { + .dc = "dc1", + .rack = "rack1" + }; + const auto dc_rack2 = endpoint_dc_rack { + .dc = "dc1", + .rack = "rack2" + }; + + utils::fb_utilities::set_broadcast_address(ep1); + topology::config cfg = { + .this_host_id = id1, + .this_endpoint = ep1, + .local_dc_rack = dc_rack1 + }; + + auto topo = topology(cfg); + + topo.add_node(id1, ep1, dc_rack1, node::state::normal); + topo.add_node(id2, ep2, dc_rack2, node::state::normal); + + BOOST_REQUIRE_EQUAL(topo.get_datacenter_endpoints(), (dc_endpoints_t{{"dc1", {ep1, ep2}}})); + BOOST_REQUIRE_EQUAL(topo.get_datacenter_racks(), (dc_racks_t{{"dc1", {{"rack1", {ep1}}, {"rack2", {ep2}}}}})); + BOOST_REQUIRE_EQUAL(topo.get_datacenters(), (dcs_t{"dc1"})); + + topo.remove_endpoint(ep2); + BOOST_REQUIRE_EQUAL(topo.get_datacenter_endpoints(), (dc_endpoints_t{{"dc1", {ep1}}})); + BOOST_REQUIRE_EQUAL(topo.get_datacenter_racks(), (dc_racks_t{{"dc1", {{"rack1", {ep1}}}}})); + BOOST_REQUIRE_EQUAL(topo.get_datacenters(), (dcs_t{"dc1"})); + + topo.remove_endpoint(ep1); + BOOST_REQUIRE_EQUAL(topo.get_datacenter_endpoints(), (dc_endpoints_t{})); + BOOST_REQUIRE_EQUAL(topo.get_datacenter_racks(), (dc_racks_t{})); + BOOST_REQUIRE_EQUAL(topo.get_datacenters(), (dcs_t{"dc1"})); +} + SEASTAR_THREAD_TEST_CASE(test_load_sketch) { inet_address ip1("192.168.0.1"); inet_address ip2("192.168.0.2");