-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
mv: handle different ERMs for base and view table
When calculating the base-view mapping while the topology is changing, we may encounter a situation where the base table noticed the change in its effective replication map while the view table hasn't, or vice-versa. This can happen because the ERM update may be performed during the preemption between taking the base ERM and view ERM, or, due to f2ff701, the update may have just been performed partially when we are taking the ERMs. Until now, we assumed that the ERMs are synchronized while calling finding the base-view endpoint mapping, so in particular, we were using the topology from the base's ERM to check the datacenters of all endpoints. Now that the ERMs are more likely to not be the same, we may try to get the datacenter of a view endpoint that doesn't exist in the base's topology, causing us to crash. This is fixed in this patch by using the view table's topology for endpoints coming from the view ERM. The mapping resulting from the call might now be a temporary mapping between endpoints in different topologies, but it still maps base and view replicas 1-to-1. Fixes: #17786 Fixes: #18709
- Loading branch information
Showing
3 changed files
with
98 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
# | ||
# Copyright (C) 2024-present ScyllaDB | ||
# | ||
# SPDX-License-Identifier: AGPL-3.0-or-later | ||
# | ||
import asyncio | ||
import pytest | ||
import time | ||
import logging | ||
|
||
from cassandra.cluster import ConnectionException, NoHostAvailable # type: ignore | ||
|
||
from test.pylib.manager_client import ManagerClient | ||
from test.topology.conftest import skip_mode | ||
|
||
|
||
logger = logging.getLogger(__name__) | ||
|
||
# This test reproduces issues #17786 and #18709 | ||
# In the test, we create a keyspace with a table and a materialized view. | ||
# We then start writing to the table, causing the materialized view to be updated. | ||
# While the writes are in progress, we add then decommission a node in the cluster. | ||
# The test verifies that no node crashes as a result of the topology change combined | ||
# with the writes. | ||
@pytest.mark.asyncio | ||
@skip_mode('release', 'error injections are not supported in release mode') | ||
async def test_mv_topology_change(manager: ManagerClient): | ||
cfg = {'force_gossip_topology_changes': True, 'error_injections_at_startup': ['view_update_generator_max_concurrent_updates']} | ||
|
||
servers = [await manager.server_add(config=cfg, timeout=60) for _ in range(3)] | ||
|
||
cql = manager.get_cql() | ||
await cql.run_async("CREATE KEYSPACE ks WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 3};") | ||
await cql.run_async("CREATE TABLE ks.t (pk int primary key, v int)") | ||
await cql.run_async("CREATE materialized view ks.t_view AS select pk, v from ks.t where v is not null primary key (v, pk)") | ||
|
||
stop_event = asyncio.Event() | ||
concurrency = 10 | ||
async def do_writes(start_it, repeat) -> int: | ||
iteration = start_it | ||
while not stop_event.is_set(): | ||
start_time = time.time() | ||
try: | ||
await cql.run_async(f"insert into ks.t (pk, v) values ({iteration}, {iteration})") | ||
except NoHostAvailable as e: | ||
for _, err in e.errors.items(): | ||
# ConnectionException can be raised when the node is shutting down. | ||
if not isinstance(err, ConnectionException): | ||
logger.error(f"Write started {time.time() - start_time}s ago failed: {e}") | ||
raise | ||
except Exception as e: | ||
logger.error(f"Write started {time.time() - start_time}s ago failed: {e}") | ||
raise | ||
iteration += concurrency | ||
if not repeat: | ||
break | ||
await asyncio.sleep(0.01) | ||
return iteration | ||
|
||
|
||
# to hit the issue #18709 it's enough to start one batch of writes, the effective | ||
# replication maps for base and view will change after the writes start but before they finish | ||
tasks = [asyncio.create_task(do_writes(i, repeat=False)) for i in range(concurrency)] | ||
|
||
server = await manager.server_add(config=cfg) | ||
|
||
await asyncio.gather(*tasks) | ||
|
||
[await manager.api.disable_injection(s.ip_addr, "delay_before_get_view_natural_endpoint") for s in servers] | ||
[await manager.api.enable_injection(s.ip_addr, "delay_after_erm_update", False, parameters={'ks_name': 'ks', 'cf_name': 't'}) for s in servers] | ||
|
||
# to hit the issue #17786 we need to run multiple batches of writes, so that some write is processed while the | ||
# effective replication maps for base and view are different | ||
tasks = [asyncio.create_task(do_writes(i, repeat=True)) for i in range(concurrency)] | ||
await manager.decommission_node(server.server_id) | ||
|
||
stop_event.set() | ||
await asyncio.gather(*tasks) | ||
|