Skip to content

Commit

Permalink
db: schema_tables: Make table creation shadow earlier concurrent changes
Browse files Browse the repository at this point in the history
Issuing two CREATE TABLE statements with a different name for one of
the partition key columns leads to the following assertion failure on
all replicas:

scylla: schema.cc:363: schema::schema(const schema::raw_schema&, std::optional<raw_view_info>): Assertion `!def.id || def.id == id - column_offset(def.kind)' failed.

The reason is that once the create table mutations are merged, the
columns table contains two entries for the same position in the
partition key tuple.

If the schemas were the same, or not conflicting in a way which leads
to abort, the current behavior would be to drop the older table as if
the last CREATE TABLE was preceded by a DROP TABLE.

The proposed fix is to make CREATE TABLE mutation include a tombstone
for all older schema changes of this table, effectively overriding
them. The behavior will be the same as if the schemas were not
different, older table will be dropped.

Fixes #11396
  • Loading branch information
tgrabiec committed Aug 29, 2022
1 parent 661db27 commit ae8d2a5
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 1 deletion.
47 changes: 46 additions & 1 deletion db/schema_tables.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2222,11 +2222,54 @@ std::vector<mutation> make_drop_aggregate_mutations(schema_features features, sh
* Table metadata serialization/deserialization.
*/

/// Returns mutations which when applied to the database will cause all schema changes
/// which create or alter the table with a given name, made with timestamps smaller than t,
/// have no effect. Used when overriding schema to shadow concurrent conflicting schema changes.
/// Shouldn't be needed if schema changes are serialized with RAFT.
static schema_mutations make_table_deleting_mutations(const sstring& ks, const sstring& table, bool is_view, api::timestamp_type t) {
tombstone tomb;

// Generate neutral mutations if t == api::min_timestamp
if (t > api::min_timestamp) {
tomb = tombstone(t - 1, gc_clock::now());
}

auto tables_m_s = is_view ? views() : tables();
mutation tables_m{tables_m_s, partition_key::from_singular(*tables_m_s, ks)};
{
auto ckey = clustering_key::from_singular(*tables_m_s, table);
tables_m.partition().apply_delete(*tables_m_s, ckey, tomb);
}

mutation scylla_tables_m{scylla_tables(), partition_key::from_singular(*scylla_tables(), ks)};
{
auto ckey = clustering_key::from_singular(*scylla_tables(), table);
scylla_tables_m.partition().apply_delete(*scylla_tables(), ckey, tomb);
}

auto make_drop_columns = [&] (const schema_ptr& s) {
mutation m{s, partition_key::from_singular(*s, ks)};
auto ckey = clustering_key::from_exploded(*s, {utf8_type->decompose(table)});
m.partition().apply_delete(*s, ckey, tomb);
return m;
};

return schema_mutations(std::move(tables_m),
make_drop_columns(columns()),
make_drop_columns(view_virtual_columns()),
make_drop_columns(computed_columns()),
mutation(indexes(), partition_key::from_singular(*indexes(), ks)),

This comment has been minimized.

Copy link
@avikivity

avikivity Aug 30, 2022

Member

Why isn't this a tombstone?

make_drop_columns(dropped_columns()),
std::move(scylla_tables_m)
);
}

std::vector<mutation> make_create_table_mutations(schema_ptr table, api::timestamp_type timestamp)
{
std::vector<mutation> mutations;
add_table_or_view_to_schema_mutation(table, timestamp, true, mutations);

make_table_deleting_mutations(table->ks_name(), table->cf_name(), table->is_view(), timestamp)
.copy_to(mutations);
return mutations;
}

Expand Down Expand Up @@ -3223,6 +3266,8 @@ std::vector<mutation> make_create_view_mutations(lw_shared_ptr<keyspace_metadata
auto base = keyspace->cf_meta_data().at(view->view_info()->base_name());
add_table_or_view_to_schema_mutation(base, timestamp, true, mutations);
add_table_or_view_to_schema_mutation(view, timestamp, true, mutations);
make_table_deleting_mutations(view->ks_name(), view->cf_name(), view->is_view(), timestamp)
.copy_to(mutations);
return mutations;
}

Expand Down
44 changes: 44 additions & 0 deletions test/boost/schema_change_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,50 @@ SEASTAR_TEST_CASE(test_combined_column_add_and_drop) {
});
}

// Tests behavior when incompatible CREATE TABLE statements are
// issued concurrently in the cluster.
// Relevant only for the pre-raft schema management, with raft the scenario should not happen.
SEASTAR_TEST_CASE(test_concurrent_table_creation_with_different_schema) {
return do_with_cql_env([](cql_test_env& e) {
return seastar::async([&] {
service::migration_manager& mm = e.migration_manager().local();

e.execute_cql("create keyspace tests with replication = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 };").get();

auto s1 = schema_builder("ks", "table1")
.with_column("pk1", bytes_type, column_kind::partition_key)
.with_column("pk2", bytes_type, column_kind::partition_key)
.with_column("v1", bytes_type)
.build();

auto s2 = schema_builder("ks", "table1")
.with_column("pk1", bytes_type, column_kind::partition_key)
.with_column("pk3", bytes_type, column_kind::partition_key)
.with_column("v1", bytes_type)
.build();

auto ann1 = mm.prepare_new_column_family_announcement(s1, api::new_timestamp()).get();
auto ann2 = mm.prepare_new_column_family_announcement(s2, api::new_timestamp()).get();

{
auto group0_guard = mm.start_group0_operation().get();
mm.announce(std::move(ann1), std::move(group0_guard)).get();
}

{
auto group0_guard = mm.start_group0_operation().get();
mm.announce(std::move(ann2), std::move(group0_guard)).get();
}

auto&& keyspace = e.db().local().find_keyspace(s1->ks_name()).metadata();

// s2 should win because it has higher timestamp
auto new_schema = e.db().local().find_schema(s2->id());
BOOST_REQUIRE(*new_schema == *s2);
});
});
}

SEASTAR_TEST_CASE(test_merging_does_not_alter_tables_which_didnt_change) {
return do_with_cql_env([](cql_test_env& e) {
return seastar::async([&] {
Expand Down

0 comments on commit ae8d2a5

Please sign in to comment.