schema: Stop using deprecated constructor

This is another boring patch. One of schema constructors has been deprecated for many years now but was used in several places anyway. Usage of this constructor could lead to data corruption when using MX sstables because this constructor does not set schema version. MX reading/writing code depends on schema version. This patch replaces all the places the deprecated constructor is used with schema_builder equivalent. The schema_builder sets the schema version correctly. Fixes #8507 Test: unit(dev) Signed-off-by: Piotr Jastrzebski <piotr@scylladb.com> Message-Id: <4beabc8c942ebf2c1f9b09cfab7668777ce5b384.1622357125.git.piotr@scylladb.com>
scylladb · May 30, 2021 · 76d7c76 · 76d7c76
1 parent 1507bbb
commit 76d7c76
Show file tree

Hide file tree

Showing 10 changed files with 160 additions and 174 deletions.
diff --git a/db/schema_tables.cc b/db/schema_tables.cc
@@ -645,30 +645,17 @@ schema_ptr aggregates() {
 
 schema_ptr scylla_table_schema_history() {
     static thread_local auto s = [] {
-        schema_builder builder(make_lw_shared(schema(
-            generate_legacy_id(db::system_keyspace::NAME, SCYLLA_TABLE_SCHEMA_HISTORY), db::system_keyspace::NAME, SCYLLA_TABLE_SCHEMA_HISTORY,
-            // partition key
-            {{"cf_id", uuid_type}},
-            // clustering key
-            {{"schema_version", uuid_type}, {"column_name", utf8_type}},
-            // regular columns
-            // mirrors the structure of the "columns" table which is essentially
-            // needed to represent a column mapping in serialized form
-            {
-                {"clustering_order", utf8_type},
-                {"column_name_bytes", bytes_type},
-                {"kind", utf8_type},
-                {"position", int32_type},
-                {"type", utf8_type},
-            },
-            // static columns
-            {},
-            // regular column name type
-            utf8_type,
-            // comment
-            "Scylla specific table to store a history of column mappings "
-            "for each table schema version upon an CREATE TABLE/ALTER TABLE operations"
-        )));
+        schema_builder builder(db::system_keyspace::NAME, SCYLLA_TABLE_SCHEMA_HISTORY, generate_legacy_id(db::system_keyspace::NAME, SCYLLA_TABLE_SCHEMA_HISTORY));
+        builder.with_column("cf_id", uuid_type, column_kind::partition_key);
+        builder.with_column("schema_version", uuid_type, column_kind::clustering_key);
+        builder.with_column("column_name", utf8_type, column_kind::clustering_key);
+        builder.with_column("clustering_order", utf8_type);
+        builder.with_column("column_name_bytes", bytes_type);
+        builder.with_column("kind", utf8_type);
+        builder.with_column("position", int32_type);
+        builder.with_column("type", utf8_type);
+        builder.set_comment("Scylla specific table to store a history of column mappings "
+            "for each table schema version upon an CREATE TABLE/ALTER TABLE operations");
         builder.with_version(generate_schema_version(builder.uuid()));
         builder.with_null_sharder();
         return builder.build(schema_builder::compact_storage::no);

diff --git a/schema.cc b/schema.cc
@@ -415,41 +415,6 @@ schema::schema(const raw_schema& raw, std::optional<raw_view_info> raw_view_info
     }
 }
 
-schema::schema(std::optional<utils::UUID> id,
-    std::string_view ks_name,
-    std::string_view cf_name,
-    std::vector<column> partition_key,
-    std::vector<column> clustering_key,
-    std::vector<column> regular_columns,
-    std::vector<column> static_columns,
-    data_type regular_column_name_type,
-    std::string_view comment)
-    : schema([&] {
-        raw_schema raw(id ? *id : utils::UUID_gen::get_time_UUID());
-
-        raw._comment = sstring(comment);
-        raw._ks_name = sstring(ks_name);
-        raw._cf_name = sstring(cf_name);
-        raw._regular_column_name_type = regular_column_name_type;
-
-        auto build_columns = [&raw](std::vector<column>& columns, column_kind kind) {
-            for (auto& sc : columns) {
-                if (sc.type->is_multi_cell()) {
-                    raw._collections.emplace(sc.name, sc.type);
-                }
-                raw._columns.emplace_back(std::move(sc.name), std::move(sc.type), kind);
-            }
-        };
-
-        build_columns(partition_key, column_kind::partition_key);
-        build_columns(clustering_key, column_kind::clustering_key);
-        build_columns(static_columns, column_kind::static_column);
-        build_columns(regular_columns, column_kind::regular_column);
-
-        return raw;
-    }(), std::nullopt)
-{}
-
 schema::schema(const schema& o)
     : _raw(o._raw)
     , _offsets(o._offsets)
@@ -463,13 +428,25 @@ schema::schema(const schema& o)
     }
 }
 
-lw_shared_ptr<schema> make_shared_schema(std::optional<utils::UUID> id, std::string_view ks_name,
+lw_shared_ptr<const schema> make_shared_schema(std::optional<utils::UUID> id, std::string_view ks_name,
     std::string_view cf_name, std::vector<schema::column> partition_key, std::vector<schema::column> clustering_key,
     std::vector<schema::column> regular_columns, std::vector<schema::column> static_columns,
-    data_type regular_column_name_type, std::string_view comment) {
-    return make_lw_shared<schema>(std::move(id), std::move(ks_name), std::move(cf_name), std::move(partition_key),
-        std::move(clustering_key), std::move(regular_columns), std::move(static_columns),
-        std::move(regular_column_name_type), std::move(comment));
+    data_type regular_column_name_type, sstring comment) {
+    schema_builder builder(std::move(ks_name), std::move(cf_name), std::move(id), std::move(regular_column_name_type));
+    for (auto&& column : partition_key) {
+        builder.with_column(std::move(column.name), std::move(column.type), column_kind::partition_key);
+    }
+    for (auto&& column : clustering_key) {
+        builder.with_column(std::move(column.name), std::move(column.type), column_kind::clustering_key);
+    }
+    for (auto&& column : regular_columns) {
+        builder.with_column(std::move(column.name), std::move(column.type));
+    }
+    for (auto&& column : static_columns) {
+        builder.with_column(std::move(column.name), std::move(column.type), column_kind::static_column);
+    }
+    builder.set_comment(comment);
+    return builder.build();
 }
 
 schema::~schema() {

diff --git a/schema.hh b/schema.hh
@@ -699,16 +699,6 @@ private:
     void rebuild();
     schema(const raw_schema&, std::optional<raw_view_info>);
 public:
-    // deprecated, use schema_builder.
-    schema(std::optional<utils::UUID> id,
-        std::string_view ks_name,
-        std::string_view cf_name,
-        std::vector<column> partition_key,
-        std::vector<column> clustering_key,
-        std::vector<column> regular_columns,
-        std::vector<column> static_columns,
-        data_type regular_column_name_type,
-        std::string_view comment = {});
     schema(const schema&);
     ~schema();
     table_schema_version version() const {
@@ -978,9 +968,9 @@ public:
     }
 };
 
-lw_shared_ptr<schema> make_shared_schema(std::optional<utils::UUID> id, std::string_view ks_name, std::string_view cf_name,
+lw_shared_ptr<const schema> make_shared_schema(std::optional<utils::UUID> id, std::string_view ks_name, std::string_view cf_name,
     std::vector<schema::column> partition_key, std::vector<schema::column> clustering_key, std::vector<schema::column> regular_columns,
-    std::vector<schema::column> static_columns, data_type regular_column_name_type, std::string_view comment = {});
+    std::vector<schema::column> static_columns, data_type regular_column_name_type, sstring comment = "");
 
 bool operator==(const schema&, const schema&);
 

diff --git a/test/boost/cql_functions_test.cc b/test/boost/cql_functions_test.cc
@@ -48,8 +48,11 @@ SEASTAR_TEST_CASE(test_functions) {
     return do_with_cql_env([] (cql_test_env& e) {
         return e.create_table([](std::string_view ks_name) {
             // CQL: create table cf (p1 varchar primary key, u uuid, tu timeuuid);
-            return schema({}, ks_name, "cf",
-                {{"p1", utf8_type}}, {{"c1", int32_type}}, {{"tu", timeuuid_type}}, {}, utf8_type);
+            return *schema_builder(ks_name, "cf")
+                    .with_column("p1", utf8_type, column_kind::partition_key)
+                    .with_column("c1", int32_type, column_kind::clustering_key)
+                    .with_column("tu", timeuuid_type)
+                    .build();
         }).then([&e] {
             return e.execute_cql("insert into cf (p1, c1, tu) values ('key1', 1, now());").discard_result();
         }).then([&e] {
@@ -152,8 +155,11 @@ struct aggregate_function_test {
     {
         const auto cf_name = table_name();
         _e.create_table([column_type, cf_name] (std::string_view ks_name) {
-            return schema({}, ks_name, cf_name,
-                {{"pk", utf8_type}}, {{"ck", int32_type}}, {{"value", column_type}}, {}, utf8_type);
+            return *schema_builder(ks_name, cf_name)
+                    .with_column("pk", utf8_type, column_kind::partition_key)
+                    .with_column("ck", int32_type, column_kind::clustering_key)
+                    .with_column("value", column_type)
+                    .build();
         }).get();
 
         auto prepared = _e.prepare(format("insert into {} (pk, ck, value) values ('key1', ?, ?)", cf_name)).get0();

diff --git a/test/boost/cql_query_large_test.cc b/test/boost/cql_query_large_test.cc
@@ -147,16 +147,12 @@ SEASTAR_TEST_CASE(test_insert_large_collection_values) {
             auto list_type = list_type_impl::get_instance(utf8_type, true);
             e.create_table([map_type, set_type, list_type] (std::string_view ks_name) {
                 // CQL: CREATE TABLE tbl (pk text PRIMARY KEY, m map<text, text>, s set<text>, l list<text>);
-                return schema({}, ks_name, "tbl",
-                    {{"pk", utf8_type}},
-                    {},
-                    {
-                        {"m", map_type},
-                        {"s", set_type},
-                        {"l", list_type}
-                    },
-                    {},
-                    utf8_type);
+                return *schema_builder(ks_name, "tbl")
+                        .with_column("pk", utf8_type, column_kind::partition_key)
+                        .with_column("m", map_type)
+                        .with_column("s", set_type)
+                        .with_column("l", list_type)
+                        .build();
             }).get();
             sstring long_value(std::numeric_limits<uint16_t>::max() + 10, 'x');
             e.execute_cql(format("INSERT INTO tbl (pk, l) VALUES ('Zamyatin', ['{}']);", long_value)).get();

diff --git a/test/boost/cql_query_test.cc b/test/boost/cql_query_test.cc
@@ -413,12 +413,12 @@ SEASTAR_TEST_CASE(test_select_statement) {
    return do_with_cql_env([] (cql_test_env& e) {
         return e.create_table([](std::string_view ks_name) {
             // CQL: create table cf (p1 varchar, c1 int, c2 int, r1 int, PRIMARY KEY (p1, c1, c2));
-            return schema({}, ks_name, "cf",
-                {{"p1", utf8_type}},
-                {{"c1", int32_type}, {"c2", int32_type}},
-                {{"r1", int32_type}},
-                {},
-                utf8_type);
+            return *schema_builder(ks_name, "cf")
+                    .with_column("p1", utf8_type, column_kind::partition_key)
+                    .with_column("c1", int32_type, column_kind::clustering_key)
+                    .with_column("c2", int32_type, column_kind::clustering_key)
+                    .with_column("r1", int32_type)
+                    .build();
         }).then([&e] {
             return e.execute_cql("insert into cf (p1, c1, c2, r1) values ('key1', 1, 2, 3);").discard_result();
         }).then([&e] {
@@ -505,16 +505,14 @@ SEASTAR_TEST_CASE(test_cassandra_stress_like_write_and_read) {
         };
 
         return e.create_table([](std::string_view ks_name) {
-            return schema({}, ks_name, "cf",
-                          {{"KEY", bytes_type}},
-                          {},
-                          {{"C0", bytes_type},
-                           {"C1", bytes_type},
-                           {"C2", bytes_type},
-                           {"C3", bytes_type},
-                           {"C4", bytes_type}},
-                          {},
-                          utf8_type);
+            return *schema_builder(ks_name, "cf")
+                    .with_column("KEY", bytes_type, column_kind::partition_key)
+                    .with_column("C0", bytes_type)
+                    .with_column("C1", bytes_type)
+                    .with_column("C2", bytes_type)
+                    .with_column("C3", bytes_type)
+                    .with_column("C4", bytes_type)
+                    .build();
         }).then([execute_update_for_key, verify_row_for_key] {
             static auto make_key = [](int suffix) { return format("0xdeadbeefcafebabe{:02d}", suffix); };
             auto suffixes = boost::irange(0, 10);
@@ -532,12 +530,12 @@ SEASTAR_TEST_CASE(test_cassandra_stress_like_write_and_read) {
 SEASTAR_TEST_CASE(test_range_queries) {
    return do_with_cql_env([] (cql_test_env& e) {
         return e.create_table([](std::string_view ks_name) {
-            return schema({}, ks_name, "cf",
-                {{"k", bytes_type}},
-                {{"c0", bytes_type}, {"c1", bytes_type}},
-                {{"v", bytes_type}},
-                {},
-                utf8_type);
+            return *schema_builder(ks_name, "cf")
+                    .with_column("k", bytes_type, column_kind::partition_key)
+                    .with_column("c0", bytes_type, column_kind::clustering_key)
+                    .with_column("c1", bytes_type, column_kind::clustering_key)
+                    .with_column("v", bytes_type)
+                    .build();
         }).then([&e] {
             return e.execute_cql("update cf set v = 0x01 where k = 0x00 and c0 = 0x01 and c1 = 0x01;").discard_result();
         }).then([&e] {
@@ -629,13 +627,13 @@ SEASTAR_TEST_CASE(test_range_queries) {
 SEASTAR_TEST_CASE(test_ordering_of_composites_with_variable_length_components) {
     return do_with_cql_env([] (cql_test_env& e) {
         return e.create_table([](std::string_view ks) {
-            return schema({}, ks, "cf",
-                {{"k", bytes_type}},
-                // We need more than one clustering column so that the single-element tuple format optimisation doesn't kick in
-                {{"c0", bytes_type}, {"c1", bytes_type}},
-                {{"v", bytes_type}},
-                {},
-                utf8_type);
+            return *schema_builder(ks, "cf")
+                    .with_column("k", bytes_type, column_kind::partition_key)
+                    // We need more than one clustering column so that the single-element tuple format optimisation doesn't kick in
+                    .with_column("c0", bytes_type, column_kind::clustering_key)
+                    .with_column("c1", bytes_type, column_kind::clustering_key)
+                    .with_column("v", bytes_type)
+                    .build();
         }).then([&e] {
             return e.execute_cql("update cf set v = 0x01 where k = 0x00 and c0 = 0x0001 and c1 = 0x00;").discard_result();
         }).then([&e] {
@@ -977,8 +975,11 @@ SEASTAR_TEST_CASE(test_deletion_scenarios) {
     return do_with_cql_env([] (cql_test_env& e) {
         return e.create_table([](std::string_view ks) {
             // CQL: create table cf (k bytes, c bytes, v bytes, primary key (k, c));
-            return schema({}, ks, "cf",
-                {{"k", bytes_type}}, {{"c", bytes_type}}, {{"v", bytes_type}}, {}, utf8_type);
+            return *schema_builder(ks, "cf")
+                    .with_column("k", bytes_type, column_kind::partition_key)
+                    .with_column("c", bytes_type, column_kind::clustering_key)
+                    .with_column("v", bytes_type)
+                    .build();
         }).then([&e] {
             return e.execute_cql("insert into cf (k, c, v) values (0x00, 0x05, 0x01) using timestamp 1;").discard_result();
         }).then([&e] {
@@ -1149,8 +1150,10 @@ SEASTAR_TEST_CASE(test_map_insert_update) {
         auto my_map_type = make_my_map_type();
         return e.create_table([make_my_map_type] (std::string_view ks_name) {
             // CQL: create table cf (p1 varchar primary key, map1 map<int, int>);
-            return schema({}, ks_name, "cf",
-                          {{"p1", utf8_type}}, {}, {{"map1", make_my_map_type()}}, {}, utf8_type);
+            return *schema_builder(ks_name, "cf")
+                    .with_column("p1", utf8_type, column_kind::partition_key)
+                    .with_column("map1", make_my_map_type())
+                    .build();
         }).then([&e] {
             return e.execute_cql("insert into cf (p1, map1) values ('key1', { 1001: 2001 });").discard_result();
         }).then([&e, my_map_type] {
@@ -1234,8 +1237,10 @@ SEASTAR_TEST_CASE(test_set_insert_update) {
         auto my_set_type = make_my_set_type();
         return e.create_table([make_my_set_type](std::string_view ks_name) {
             // CQL: create table cf (p1 varchar primary key, set1 set<int>);
-            return schema({}, ks_name, "cf",
-                          {{"p1", utf8_type}}, {}, {{"set1", make_my_set_type()}}, {}, utf8_type);
+            return *schema_builder(ks_name, "cf")
+                    .with_column("p1", utf8_type, column_kind::partition_key)
+                    .with_column("set1", make_my_set_type())
+                    .build();
         }).then([&e] {
             return e.execute_cql("insert into cf (p1, set1) values ('key1', { 1001 });").discard_result();
         }).then([&e, my_set_type] {
@@ -1470,8 +1475,10 @@ SEASTAR_TEST_CASE(test_tuples) {
             // this runs on all cores, so create a local tt for each core:
             auto tt = make_tt();
             // CQL: "create table cf (id int primary key, t tuple<int, bigint, text>);
-            return schema({}, ks_name, "cf",
-                {{"id", int32_type}}, {}, {{"t", tt}}, {}, utf8_type);
+            return *schema_builder(ks_name, "cf")
+                    .with_column("id", int32_type, column_kind::partition_key)
+                    .with_column("t", tt)
+                    .build();
         }).then([&e] {
             return e.execute_cql("insert into cf (id, t) values (1, (1001, 2001, 'abc1'));").discard_result();
         }).then([&e] {
@@ -1719,8 +1726,12 @@ SEASTAR_TEST_CASE(test_ttl) {
         auto make_my_list_type = [] { return list_type_impl::get_instance(utf8_type, true); };
         auto my_list_type = make_my_list_type();
         return e.create_table([make_my_list_type] (std::string_view ks_name) {
-            return schema({}, ks_name, "cf",
-                {{"p1", utf8_type}}, {}, {{"r1", utf8_type}, {"r2", utf8_type}, {"r3", make_my_list_type()}}, {}, utf8_type);
+            return *schema_builder(ks_name, "cf")
+                    .with_column("p1", utf8_type, column_kind::partition_key)
+                    .with_column("r1", utf8_type)
+                    .with_column("r2", utf8_type)
+                    .with_column("r3", make_my_list_type())
+                    .build();
         }).then([&e] {
             return e.execute_cql(
                 "update cf using ttl 100000 set r1 = 'value1_1', r3 = ['a', 'b', 'c'] where p1 = 'key1';").discard_result();

diff --git a/test/boost/database_test.cc b/test/boost/database_test.cc
@@ -356,12 +356,12 @@ future<> do_with_some_data(std::function<future<> (cql_test_env& env)> func) {
         db_cfg_ptr->data_file_directories(std::vector<sstring>({ tmpdir_for_data.path().string() }));
         do_with_cql_env_thread([func = std::move(func)] (cql_test_env& e) {
             e.create_table([](std::string_view ks_name) {
-                return schema({}, ks_name, "cf",
-                              {{"p1", utf8_type}},
-                              {{"c1", int32_type}, {"c2", int32_type}},
-                              {{"r1", int32_type}},
-                              {},
-                              utf8_type);
+                return *schema_builder(ks_name, "cf")
+                        .with_column("p1", utf8_type, column_kind::partition_key)
+                        .with_column("c1", int32_type, column_kind::clustering_key)
+                        .with_column("c2", int32_type, column_kind::clustering_key)
+                        .with_column("r1", int32_type)
+                        .build();
             }).get();
             e.execute_cql("insert into cf (p1, c1, c2, r1) values ('key1', 1, 2, 3);").get();
             e.execute_cql("insert into cf (p1, c1, c2, r1) values ('key1', 2, 2, 3);").get();