Skip to content

Commit

Permalink
Merge 'storage_service: Enable Repair Based Node Operations (RBNO) by…
Browse files Browse the repository at this point in the history
… default for all node ops' from Asias He

Since 97bb2e4 (storage_service: Enable Repair Based Node Operations (RBNO) by default for replace), RBNO was enabled by default for replace ops.

After more testing, we decided to enable repair based node operations by default for all node operations.

Closes #12173

* github.com:scylladb/scylladb:
  storage_service: Enable Repair Based Node Operations (RBNO) by default for all node ops
  test: Increase START_TIMEOUT
  test: Increase max-networking-io-control-blocks
  storage_service: Check node has left in node_ops_cmd::decommission_done
  repair: Use remote dc neighbors for everywhere strategy
  • Loading branch information
avikivity committed Feb 6, 2023
2 parents 511c012 + e7d5e50 commit 1e6cc9c
Show file tree
Hide file tree
Showing 7 changed files with 32 additions and 9 deletions.
2 changes: 1 addition & 1 deletion db/config.cc
Expand Up @@ -794,7 +794,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
, ignore_dead_nodes_for_replace(this, "ignore_dead_nodes_for_replace", value_status::Used, "", "List dead nodes to ingore for replace operation using a comma-separated list of host IDs. E.g., scylla --ignore-dead-nodes-for-replace 8d5ed9f4-7764-4dbd-bad8-43fddce94b7c,125ed9f4-7777-1dbn-mac8-43fddce9123e")
, override_decommission(this, "override_decommission", value_status::Used, false, "Set true to force a decommissioned node to join the cluster")
, enable_repair_based_node_ops(this, "enable_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, true, "Set true to use enable repair based node operations instead of streaming based")
, allowed_repair_based_node_ops(this, "allowed_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, "replace", "A comma separated list of node operations which are allowed to enable repair based node operations. The operations can be bootstrap, replace, removenode, decommission and rebuild")
, allowed_repair_based_node_ops(this, "allowed_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, "replace,removenode,rebuild,bootstrap,decommission", "A comma separated list of node operations which are allowed to enable repair based node operations. The operations can be bootstrap, replace, removenode, decommission and rebuild")
, ring_delay_ms(this, "ring_delay_ms", value_status::Used, 30 * 1000, "Time a node waits to hear from other nodes before joining the ring in milliseconds. Same as -Dcassandra.ring_delay_ms in cassandra.")
, shadow_round_ms(this, "shadow_round_ms", value_status::Used, 300 * 1000, "The maximum gossip shadow round time. Can be used to reduce the gossip feature check time during node boot up.")
, fd_max_interval_ms(this, "fd_max_interval_ms", value_status::Used, 2 * 1000, "The maximum failure_detector interval time in milliseconds. Interval larger than the maximum will be ignored. Larger cluster may need to increase the default.")
Expand Down
4 changes: 3 additions & 1 deletion repair/repair.cc
Expand Up @@ -1445,7 +1445,9 @@ future<> repair_service::bootstrap_with_repair(locator::token_metadata_ptr tmptr
auto old_endpoints_in_local_dc = get_old_endpoints_in_local_dc();
auto rf_in_local_dc = get_rf_in_local_dc();
if (everywhere_topology) {
neighbors = old_endpoints_in_local_dc;
neighbors = old_endpoints_in_local_dc.empty() ? old_endpoints : old_endpoints_in_local_dc;
rlogger.debug("bootstrap_with_repair: keyspace={}, range={}, old_endpoints={}, new_endpoints={}, old_endpoints_in_local_dc={}, neighbors={}",
keyspace_name, desired_range, old_endpoints, new_endpoints, old_endpoints_in_local_dc, neighbors);
} else if (old_endpoints.size() == replication_factor) {
// For example, with RF = 3 and 3 nodes n1, n2, n3
// in the cluster, n4 is bootstrapped, old_replicas
Expand Down
20 changes: 20 additions & 0 deletions service/storage_service.cc
Expand Up @@ -2721,6 +2721,26 @@ future<node_ops_cmd_response> storage_service::node_ops_cmd_handler(gms::inet_ad
slogger.debug("decommission[{}]: Updated heartbeat from coordinator={}", req.ops_uuid, coordinator);
node_ops_update_heartbeat(ops_uuid).get();
} else if (req.cmd == node_ops_cmd::decommission_done) {
bool check_again = false;
auto start_time = std::chrono::steady_clock::now();
slogger.info("decommission[{}]: Started to check if nodes={} have left the cluster, coordinator={}", req.ops_uuid, req.leaving_nodes, coordinator);
do {
check_again = false;
for (auto& node : req.leaving_nodes) {
auto tmptr = get_token_metadata_ptr();
if (tmptr->is_normal_token_owner(node)) {
check_again = true;
if (std::chrono::steady_clock::now() > start_time + std::chrono::seconds(60)) {
auto msg = format("decommission[{}]: Node {} is still in the cluster", req.ops_uuid, node);
throw std::runtime_error(msg);
}
slogger.warn("decommission[{}]: Node {} is still in the cluster, sleep and check again", req.ops_uuid, node);
sleep_abortable(std::chrono::milliseconds(500), _abort_source).get();
break;
}
}
} while (check_again);
slogger.info("decommission[{}]: Finished to check if nodes={} have left the cluster, coordinator={}", req.ops_uuid, req.leaving_nodes, coordinator);
slogger.info("decommission[{}]: Marked ops done from coordinator={}", req.ops_uuid, coordinator);
slogger.debug("Triggering off-strategy compaction for all non-system tables on decommission completion");
_db.invoke_on_all([](replica::database &db) {
Expand Down
2 changes: 1 addition & 1 deletion test/cql-pytest/run.py
Expand Up @@ -202,7 +202,7 @@ def run_scylla_cmd(pid, dir):
'--smp', '2',
'-m', '1G',
'--overprovisioned',
'--max-networking-io-control-blocks', '100',
'--max-networking-io-control-blocks', '1000',
'--unsafe-bypass-fsync', '1',
'--kernel-page-cache', '1',
'--commitlog-use-o-dsync', '0',
Expand Down
5 changes: 3 additions & 2 deletions test/pylib/manager_client.py
Expand Up @@ -15,7 +15,7 @@
from test.pylib.rest_client import UnixRESTClient, ScyllaRESTAPIClient
from test.pylib.util import wait_for
from test.pylib.internal_types import ServerNum, IPAddress, HostID, ServerInfo
from test.pylib.scylla_cluster import ReplaceConfig
from test.pylib.scylla_cluster import ReplaceConfig, ScyllaServer
from cassandra.cluster import Session as CassandraSession # type: ignore # pylint: disable=no-name-in-module
from cassandra.cluster import Cluster as CassandraCluster # type: ignore # pylint: disable=no-name-in-module
import aiohttp
Expand Down Expand Up @@ -156,7 +156,8 @@ async def server_add(self, replace_cfg: Optional[ReplaceConfig] = None, cmdline:
data['replace_cfg'] = replace_cfg._asdict()
if cmdline:
data['cmdline'] = cmdline
server_info = await self.client.put_json("/cluster/addserver", data, response_type="json")
server_info = await self.client.put_json("/cluster/addserver", data, response_type="json",
timeout=ScyllaServer.START_TIMEOUT)
except Exception as exc:
raise Exception("Failed to add server") from exc
try:
Expand Down
4 changes: 2 additions & 2 deletions test/pylib/rest_client.py
Expand Up @@ -100,9 +100,9 @@ async def post(self, resource_uri: str, host: Optional[str] = None,

async def put_json(self, resource_uri: str, data: Mapping, host: Optional[str] = None,
port: Optional[int] = None, params: Optional[dict[str, str]] = None,
response_type: Optional[str] = None) -> Any:
response_type: Optional[str] = None, timeout: Optional[float] = None) -> Any:
ret = await self._fetch("PUT", resource_uri, response_type = response_type, host = host,
port = port, params = params, json = data)
port = port, params = params, json = data, timeout = timeout)
return ret

async def delete(self, resource_uri: str, host: Optional[str] = None,
Expand Down
4 changes: 2 additions & 2 deletions test/pylib/scylla_cluster.py
Expand Up @@ -115,7 +115,7 @@ def make_scylla_conf(workdir: pathlib.Path, host_addr: str, seed_addrs: List[str
'-m', '1G',
'--collectd', '0',
'--overprovisioned',
'--max-networking-io-control-blocks', '100',
'--max-networking-io-control-blocks', '1000',
'--unsafe-bypass-fsync', '1',
'--kernel-page-cache', '1',
'--commitlog-use-o-dsync', '0',
Expand Down Expand Up @@ -190,7 +190,7 @@ class ScyllaServer:
"""Starts and handles a single Scylla server, managing logs, checking if responsive,
and cleanup when finished."""
# pylint: disable=too-many-instance-attributes
START_TIMEOUT = 300 # seconds
START_TIMEOUT = 1000 # seconds
start_time: float
sleep_interval: float
log_file: BufferedWriter
Expand Down

0 comments on commit 1e6cc9c

Please sign in to comment.