Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge 'repair: streaming: handle no_such_column_family from remote no…
…de' from Aleksandra Martyniuk RPC calls lose information about the type of returned exception. Thus, if a table is dropped on receiver node, but it still exists on a sender node and sender node streams the table's data, then the whole operation fails. To prevent that, add a method which synchronizes schema and then checks, if the exception was caused by table drop. If so, the exception is swallowed. Use the method in streaming and repair to continue them when the table is dropped in the meantime. Fixes: #17028. Fixes: #15370. Fixes: #15598. Closes #17231 * github.com:scylladb/scylladb: repair: handle no_such_column_family from remote node gracefully test: test drop table on receiver side during streaming streaming: fix indentation streaming: handle no_such_column_family from remote node gracefully repair: add methods to skip dropped table
- Loading branch information
Showing
9 changed files
with
180 additions
and
55 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
/* | ||
* Copyright (C) 2024-present ScyllaDB | ||
*/ | ||
|
||
/* | ||
* SPDX-License-Identifier: AGPL-3.0-or-later | ||
*/ | ||
|
||
#include "replica/database.hh" | ||
#include "repair/table_check.hh" | ||
#include "service/migration_manager.hh" | ||
|
||
namespace repair { | ||
|
||
future<table_dropped> table_sync_and_check(replica::database& db, service::migration_manager& mm, const table_id& uuid) { | ||
if (mm.use_raft()) { | ||
abort_on_expiry aoe(lowres_clock::now() + std::chrono::seconds{10}); | ||
auto& as = aoe.abort_source(); | ||
auto sub = mm.get_abort_source().subscribe([&as] () noexcept { | ||
if (!as.abort_requested()) { | ||
as.request_abort(); | ||
} | ||
}); | ||
|
||
// Trigger read barrier to synchronize schema. | ||
co_await mm.get_group0_barrier().trigger(as); | ||
} | ||
|
||
co_return !db.column_family_exists(uuid); | ||
} | ||
|
||
future<table_dropped> with_table_drop_silenced(replica::database& db, service::migration_manager& mm, const table_id& uuid, | ||
std::function<future<>(const table_id&)> f) { | ||
std::exception_ptr ex = nullptr; | ||
try { | ||
co_await f(uuid); | ||
co_return table_dropped::no; | ||
} catch (replica::no_such_column_family&) { | ||
// No need to synchronize while we know the table was dropped. | ||
} catch (...) { | ||
// This node may still see a table while it is dropped on the remote node | ||
// and so the remote node returns an error. In that case we want to skip | ||
// that table and continue with the operation. | ||
// | ||
// But since RPC does not enable returning the exception type, the cause | ||
// of the failure cannot be determined. Synchronize schema to see the latest | ||
// changes and determine whether the table was dropped. | ||
ex = std::current_exception(); | ||
} | ||
|
||
if (ex) { | ||
auto dropped = co_await table_sync_and_check(db, mm, uuid); | ||
if (!dropped) { | ||
co_await coroutine::return_exception_ptr(std::move(ex)); | ||
} | ||
} | ||
co_return table_dropped::yes; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
/* | ||
* Copyright (C) 2024-present ScyllaDB | ||
*/ | ||
|
||
/* | ||
* SPDX-License-Identifier: AGPL-3.0-or-later | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <seastar/core/future.hh> | ||
#include <seastar/util/bool_class.hh> | ||
|
||
#include "schema/schema_fwd.hh" | ||
|
||
using table_dropped = bool_class<class table_dropped_tag>; | ||
|
||
namespace raft { | ||
class server; | ||
} | ||
|
||
namespace replica { | ||
class database; | ||
} | ||
|
||
namespace service { | ||
class migration_manager; | ||
} | ||
|
||
namespace repair { | ||
|
||
class database; | ||
|
||
future<table_dropped> table_sync_and_check(replica::database& db, service::migration_manager& mm, const table_id& uuid); | ||
|
||
// Runs function f on given table. If f throws and the table is dropped, the exception is swallowed. | ||
// Function is aimed to handle no_such_column_family on remote node or different shard, as it synchronizes | ||
// schema before checking the table. Prefer standard error handling whenever possible. | ||
future<table_dropped> with_table_drop_silenced(replica::database& db, service::migration_manager& mm, const table_id& uuid, | ||
std::function<future<>(const table_id&)> f); | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
from test.pylib.manager_client import ManagerClient | ||
import pytest | ||
|
||
@pytest.mark.asyncio | ||
async def test_drop_table_during_streaming_receiver_side(manager: ManagerClient): | ||
servers = [await manager.server_add(config={ | ||
'error_injections_at_startup': ['stream_mutation_fragments_table_dropped'], | ||
'enable_repair_based_node_ops': False, | ||
'enable_user_defined_functions': False, | ||
'experimental_features': [] | ||
}) for _ in range(2)] |