Skip to content

Commit

Permalink
test: node replace with ignore_dead_nodes test
Browse files Browse the repository at this point in the history
Regression test for scylladb#14487 on steroids. It performs 3 consecutive node
replace operations, starting with 3 dead nodes.

In order to have a Raft majority, we have to boot a 7-node cluster, so
we enable this test only in one mode; the choice was between `dev` and
`release`, I picked `dev` because it compiles faster and I develop on
it.
  • Loading branch information
kbr-scylla committed Jul 6, 2023
1 parent 9b136ee commit 00f51ea
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 0 deletions.
2 changes: 2 additions & 0 deletions test/topology_custom/suite.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,7 @@ extra_scylla_config_options:
authorizer: AllowAllAuthorizer
skip_in_release:
- test_shutdown_hang
- test_replace_ignore_nodes
skip_in_debug:
- test_shutdown_hang
- test_replace_ignore_nodes
56 changes: 56 additions & 0 deletions test/topology_custom/test_replace_ignore_nodes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#
# Copyright (C) 2023-present ScyllaDB
#
# SPDX-License-Identifier: AGPL-3.0-or-later
#
import time
import pytest
import logging

from test.pylib.internal_types import IPAddress, HostID
from test.pylib.scylla_cluster import ReplaceConfig
from test.pylib.manager_client import ManagerClient
from test.topology.util import wait_for_token_ring_and_group0_consistency


logger = logging.getLogger(__name__)


@pytest.mark.asyncio
async def test_replace_ignore_nodes(manager: ManagerClient) -> None:
"""Replace a node in presence of multiple dead nodes.
Regression test for #14487. Does not apply to Raft-topology mode.
This is a slow test with a 7 node cluster any 3 replace operations,
we don't want to run it in debug mode.
Preferably run it only in one mode e.g. dev.
"""
cfg = {'experimental_features': list[str]()}
logger.info(f"Booting initial cluster")
servers = [await manager.server_add(config=cfg) for _ in range(7)]
s2_id = await manager.get_host_id(servers[2].server_id)
logger.info(f"Stopping servers {servers[:3]}")
await manager.server_stop(servers[0].server_id)
await manager.server_stop(servers[1].server_id)
await manager.server_stop_gracefully(servers[2].server_id)

# The parameter accepts both IP addrs with host IDs.
# We must be able to resolve them in both ways.
ignore_dead: list[IPAddress | HostID] = [servers[1].ip_addr, s2_id]
logger.info(f"Replacing {servers[0]}, ignore_dead_nodes = {ignore_dead}")
replace_cfg = ReplaceConfig(replaced_id = servers[0].server_id, reuse_ip_addr = False, use_host_id = False,
ignore_dead_nodes = ignore_dead)
await manager.server_add(replace_cfg=replace_cfg, config=cfg)
await wait_for_token_ring_and_group0_consistency(manager, time.time() + 30)

ignore_dead = [servers[2].ip_addr]
logger.info(f"Replacing {servers[1]}, ignore_dead_nodes = {ignore_dead}")
replace_cfg = ReplaceConfig(replaced_id = servers[1].server_id, reuse_ip_addr = False, use_host_id = False,
ignore_dead_nodes = ignore_dead)
await manager.server_add(replace_cfg=replace_cfg, config=cfg)
await wait_for_token_ring_and_group0_consistency(manager, time.time() + 30)

logger.info(f"Replacing {servers[2]}")
replace_cfg = ReplaceConfig(replaced_id = servers[2].server_id, reuse_ip_addr = False, use_host_id = False)
await manager.server_add(replace_cfg=replace_cfg, config=cfg)
await wait_for_token_ring_and_group0_consistency(manager, time.time() + 30)

0 comments on commit 00f51ea

Please sign in to comment.