Skip to content

Commit

Permalink
Merge 'treewide: add uuid_sstable_identifier_enabled support' from Ke…
Browse files Browse the repository at this point in the history
…fu Chai

this series adds an option named "uuid_sstable_identifier_enabled", and the related cluster feature bit, which is set once all nodes in this cluster set this option to "true". and the sstable subsystem will start using timeuuid instead plain integer for the identifier of sstables. timeuuid should be a better choice for identifiers as we don't need to worry about the id conflicts anymore. but we still have quite a few tests using static sstables with integer in their names, these tests are not changed in this series. we will create some tests to exercise the sstable subsystem with this option set.

a very simple inter-op test with Cassandra 4.1.1 was also performed to verify that the generated sstables can be read by the Cassandra:

1. start scylla, and connect it with cqlsh, run following commands, and stop it
    ```
    cqlsh> CREATE  KEYSPACE ks WITH REPLICATION = { 'class' : 'SimpleStrategy','replication_factor':1} ;
    cqlsh> CREATE TABLE ks.cf ( name text primary key, value text );
    cqlsh> INSERT INTO ks.cf (name, value) VALUES ('1', 'one');
    cqlsh> SELECT * FROM ks.cf;
    ```
2. enable Cassandra's `uuid_sstable_identifiers_enabled`, and start Cassandra 4.1.1, and connect it with cqlsh, run following commands, and stop it
    ```
    cqlsh> CREATE  KEYSPACE ks WITH REPLICATION = { 'class' : 'SimpleStrategy','replication_factor':1} ;
    cqlsh> CREATE TABLE ks.cf ( name text primary key, value text );
    cqlsh> INSERT INTO ks.cf (name, value) VALUES ('1', 'one');
    cqlsh> SELECT * FROM ks.cf;
    ```
2. move away the sstables generated by Cassandra, and replace it with the sstables generated by scylladb:
    ```console
    $ mv ~/cassandra/data/data/ks/cf-b29d23a009d911eeb5fed163c4d0af49 /tmp
    $ mv ~/scylla/ks/cf-db47a12009d611eea6b8b179df3a2d5d ~/cassandra/data/data/ks/cf-b29d23a009d911eeb5fed163c4d0af49
    ```
3. start Cassandra 4.1.1 again, and connect it with cqlsh, run following commands
    ```
    cqlsh> SELECT * FROM ks.cf;
     name | value
    ------+-------
        1 |   one
    ```

Fixes #10459

Closes #13932

* github.com:scylladb/scylladb:
  replica,sstable: introduce invalid generation id
  sstables, replica: pass uuid_sstable_identifiers to generation generator
  gms/feature_service: introduce UUID_SSTABLE_IDENTIFIERS cluster feature
  db: config: add uuid_sstable_identifiers_enabled option
  sstables, replica: support UUID in generation_type
  • Loading branch information
denesb committed Jun 15, 2023
2 parents 3a73048 + 2d265e8 commit d1dc579
Show file tree
Hide file tree
Showing 19 changed files with 275 additions and 66 deletions.
1 change: 1 addition & 0 deletions configure.py
Expand Up @@ -462,6 +462,7 @@ def find_headers(repodir, excluded_dirs):
'test/boost/snitch_reset_test',
'test/boost/sstable_3_x_test',
'test/boost/sstable_datafile_test',
'test/boost/sstable_generation_test',
'test/boost/sstable_mutation_test',
'test/boost/sstable_partition_index_cache_test',
'test/boost/schema_changes_test',
Expand Down
3 changes: 3 additions & 0 deletions db/config.cc
Expand Up @@ -871,6 +871,9 @@ db::config::config(std::shared_ptr<db::extensions> exts)
, enable_sstables_mc_format(this, "enable_sstables_mc_format", value_status::Unused, true, "Enable SSTables 'mc' format to be used as the default file format. Deprecated, please use \"sstable_format\" instead.")
, enable_sstables_md_format(this, "enable_sstables_md_format", value_status::Unused, true, "Enable SSTables 'md' format to be used as the default file format. Deprecated, please use \"sstable_format\" instead.")
, sstable_format(this, "sstable_format", value_status::Used, "me", "Default sstable file format", {"md", "me"})
, uuid_sstable_identifiers_enabled(this,
"uuid_sstable_identifiers_enabled", liveness::LiveUpdate, value_status::Used, true, "If set to true, each newly created sstable will have a UUID "
"based generation identifier, and such files are not readable by previous Scylla versions.")
, enable_dangerous_direct_import_of_cassandra_counters(this, "enable_dangerous_direct_import_of_cassandra_counters", value_status::Used, false, "Only turn this option on if you want to import tables from Cassandra containing counters, and you are SURE that no counters in that table were created in a version earlier than Cassandra 2.1."
" It is not enough to have ever since upgraded to newer versions of Cassandra. If you EVER used a version earlier than 2.1 in the cluster where these SSTables come from, DO NOT TURN ON THIS OPTION! You will corrupt your data. You have been warned.")
, enable_shard_aware_drivers(this, "enable_shard_aware_drivers", value_status::Used, true, "Enable native transport drivers to use connection-per-shard for better performance")
Expand Down
1 change: 1 addition & 0 deletions db/config.hh
Expand Up @@ -356,6 +356,7 @@ public:
named_value<bool> enable_sstables_mc_format;
named_value<bool> enable_sstables_md_format;
named_value<sstring> sstable_format;
named_value<bool> uuid_sstable_identifiers_enabled;
named_value<bool> enable_dangerous_direct_import_of_cassandra_counters;
named_value<bool> enable_shard_aware_drivers;
named_value<bool> enable_ipv6_dns_lookup;
Expand Down
2 changes: 1 addition & 1 deletion db/system_keyspace.cc
Expand Up @@ -3729,7 +3729,7 @@ future<> system_keyspace::sstables_registry_list(sstring location, sstable_regis
co_await _qp.local().query_internal(req, db::consistency_level::ONE, { location }, 1000, [ consumer = std::move(consumer) ] (const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
auto uuid = row.get_as<utils::UUID>("uuid");
auto status = row.get_as<sstring>("status");
auto gen = sstables::generation_type::from_uuid(row.get_as<utils::UUID>("generation"));
auto gen = sstables::generation_type(row.get_as<utils::UUID>("generation"));
auto ver = sstables::version_from_string(row.get_as<sstring>("version"));
auto fmt = sstables::format_from_string(row.get_as<sstring>("format"));
sstables::entry_descriptor desc("", "", "", gen, ver, fmt, sstables::component_type::TOC);
Expand Down
3 changes: 3 additions & 0 deletions gms/feature_service.cc
Expand Up @@ -72,6 +72,9 @@ feature_config feature_config_from_db_config(const db::config& cfg, std::set<sst
if (!cfg.check_experimental(db::experimental_features_t::feature::TABLETS)) {
fcfg._disabled_features.insert("TABLETS"s);
}
if (!cfg.uuid_sstable_identifiers_enabled()) {
fcfg._disabled_features.insert("UUID_SSTABLE_IDENTIFIERS"s);
}

if (!utils::get_local_injector().enter("features_enable_test_feature")) {
fcfg._disabled_features.insert("TEST_ONLY_FEATURE"s);
Expand Down
1 change: 1 addition & 0 deletions gms/feature_service.hh
Expand Up @@ -117,6 +117,7 @@ public:
gms::feature large_collection_detection { *this, "LARGE_COLLECTION_DETECTION"sv };
gms::feature secondary_indexes_on_static_columns { *this, "SECONDARY_INDEXES_ON_STATIC_COLUMNS"sv };
gms::feature tablets { *this, "TABLETS"sv };
gms::feature uuid_sstable_identifiers { *this, "UUID_SSTABLE_IDENTIFIERS"sv };

// A feature just for use in tests. It must not be advertised unless
// the "features_enable_test_feature" injection is enabled.
Expand Down
2 changes: 1 addition & 1 deletion replica/database.hh
Expand Up @@ -571,7 +571,7 @@ private:

// update the sstable generation, making sure (in calculate_generation_for_new_table)
// that new new sstables don't overwrite this one.
void update_sstables_known_generation(std::optional<sstables::generation_type> generation);
void update_sstables_known_generation(sstables::generation_type generation);

sstables::generation_type calculate_generation_for_new_table();
private:
Expand Down
18 changes: 8 additions & 10 deletions replica/distributed_loader.cc
Expand Up @@ -453,14 +453,16 @@ distributed_loader::process_upload_dir(distributed<replica::database>& db, distr
process_sstable_dir(directory, flags).get();

sharded<sstables::sstable_generation_generator> sharded_gen;
auto highest_generation = highest_generation_seen(directory).get0().value_or(
sstables::generation_type{0});
sharded_gen.start(highest_generation.as_int()).get();
auto highest_generation = highest_generation_seen(directory).get0();
sharded_gen.start(highest_generation ? highest_generation.as_int() : 0).get();
auto stop_generator = deferred_stop(sharded_gen);

auto make_sstable = [&] (shard_id shard) {
auto& sstm = global_table->get_sstables_manager();
auto generation = sharded_gen.invoke_on(shard, [] (auto& gen) { return gen(); }).get();
bool uuid_sstable_identifiers = db.local().features().uuid_sstable_identifiers;
auto generation = sharded_gen.invoke_on(shard, [uuid_sstable_identifiers] (auto& gen) {
return gen(sstables::uuid_identifiers{uuid_sstable_identifiers});
}).get();
return sstm.make_sstable(global_table->schema(), global_table->get_storage_options(),
upload.native(), generation, sstm.get_highest_supported_format(),
sstables::sstable_format_types::big, gc_clock::now(), &error_handler_gen_for_upload_dir);
Expand Down Expand Up @@ -534,7 +536,7 @@ class table_populator {
fs::path _base_path;
std::unordered_map<sstring, lw_shared_ptr<sharded<sstables::sstable_directory>>> _sstable_directories;
sstables::sstable_version_types _highest_version = sstables::oldest_writable_sstable_format;
std::optional<sstables::generation_type> _highest_generation;
sstables::generation_type _highest_generation;

public:
table_populator(global_table_ptr ptr, distributed<replica::database>& db, sstring ks, sstring cf)
Expand Down Expand Up @@ -626,11 +628,7 @@ future<> table_populator::start_subdir(sstring subdir) {
auto generation = co_await highest_generation_seen(directory);

_highest_version = std::max(sst_version, _highest_version);
if (generation) {
_highest_generation = _highest_generation ?
std::max(*generation, *_highest_generation) :
*generation;
}
_highest_generation = std::max(generation, _highest_generation);
}

sstables::shared_sstable make_sstable(replica::table& table, fs::path dir, sstables::generation_type generation, sstables::sstable_version_types v) {
Expand Down
8 changes: 5 additions & 3 deletions replica/table.cc
Expand Up @@ -41,6 +41,7 @@
#include "utils/fb_utilities.hh"
#include "mutation/mutation_source_metadata.hh"
#include "gms/gossiper.hh"
#include "gms/feature_service.hh"
#include "db/config.hh"
#include "db/commitlog/commitlog.hh"
#include "utils/lister.hh"
Expand All @@ -66,8 +67,8 @@ static seastar::metrics::label keyspace_label("ks");

using namespace std::chrono_literals;

void table::update_sstables_known_generation(std::optional<sstables::generation_type> generation) {
auto gen = generation.value_or(sstables::generation_type(0)).as_int();
void table::update_sstables_known_generation(sstables::generation_type generation) {
auto gen = generation ? generation.as_int() : 0;
if (_sstable_generation_generator) {
_sstable_generation_generator->update_known_generation(gen);
} else {
Expand All @@ -82,7 +83,8 @@ sstables::generation_type table::calculate_generation_for_new_table() {
// See https://github.com/scylladb/scylladb/issues/10459
// for uuid-based sstable generation
assert(_sstable_generation_generator);
auto ret = std::invoke(*_sstable_generation_generator);
auto ret = std::invoke(*_sstable_generation_generator,
uuid_identifiers{_sstables_manager.uuid_sstable_identifiers()});
tlogger.debug("{}.{} new sstable generation {}", schema()->ks_name(), schema()->cf_name(), ret);
return ret;
}
Expand Down
132 changes: 115 additions & 17 deletions sstables/generation_type.hh
Expand Up @@ -15,9 +15,11 @@
#include <compare>
#include <limits>
#include <iostream>
#include <stdexcept>
#include <type_traits>
#include <boost/range/adaptors.hpp>
#include <seastar/core/on_internal_error.hh>
#include <boost/regex.hpp>
#include <seastar/core/smp.hh>
#include <seastar/core/sstring.hh>
#include "types/types.hh"
Expand All @@ -35,18 +37,24 @@ public:
private:
utils::UUID _value;

explicit constexpr generation_type(utils::UUID value) noexcept
: _value(value) {}

public:
generation_type() = delete;
// create an invalid sstable identifier
generation_type() = default;

// use zero as the timestamp to differentiate from the regular timeuuid,
// and use the least_sig_bits to encode the value of generation identifier.
explicit constexpr generation_type(int_t value) noexcept
: _value(utils::UUID_gen::create_time(std::chrono::milliseconds::zero()), value) {}
explicit constexpr generation_type(utils::UUID value) noexcept
: _value(value) {}
constexpr utils::UUID as_uuid() const noexcept {
if (_value.is_null() || _value.timestamp() == 0) {
on_internal_error(sstlog, "int generation used as a UUID ");
}
return _value;
}
constexpr int_t as_int() const noexcept {
if (_value.timestamp() != 0) {
if (_value.is_null() || _value.timestamp() != 0) {
on_internal_error(sstlog, "UUID generation used as an int");
}
return _value.get_least_significant_bits();
Expand All @@ -57,9 +65,31 @@ public:
ec == std::errc() && ptr == s.data() + s.size()) {
return generation_type(int_value);
} else {
throw std::invalid_argument(fmt::format("invalid UUID: {}", s));
static const boost::regex pattern("([0-9a-z]{4})_([0-9a-z]{4})_([0-9a-z]{5})([0-9a-z]{13})");
boost::smatch match;
if (!boost::regex_match(s, match, pattern)) {
throw std::invalid_argument(fmt::format("invalid UUID: {}", s));
}
utils::UUID_gen::decimicroseconds timestamp = {};
auto decode_base36 = [](const std::string& s) {
std::size_t pos{};
auto n = std::stoull(s, &pos, 36);
if (pos != s.size()) {
throw std::invalid_argument(fmt::format("invalid part in UUID: {}", s));
}
return n;
};
timestamp += std::chrono::days{decode_base36(match[1])};
timestamp += std::chrono::seconds{decode_base36(match[2])};
timestamp += ::utils::UUID_gen::decimicroseconds{decode_base36(match[3])};
int64_t lsb = decode_base36(match[4]);
return generation_type{utils::UUID_gen::get_time_UUID_raw(timestamp, lsb)};
}
}
// return true if the generation holds a valid id
explicit operator bool() const noexcept {
return bool(_value);
}
// convert to data_value
//
// this function is used when performing queries to SSTABLES_REGISTRY in
Expand All @@ -75,14 +105,27 @@ public:
explicit operator data_value() const noexcept {
return _value;
}
static generation_type from_uuid(utils::UUID value) {
// if the encoded value is an int64_t, the UUID's timestamp must be
// zero, and the least significant bits is used to encode the value
// of the int64_t.
assert(value.timestamp() == 0);
return generation_type(value);
constexpr bool is_uuid_based() const noexcept {
// if the value of generation_type should be an int64_t, its timestamp
// must be zero, and the least significant bits is used to encode the
// value of the int64_t.
return _value.timestamp() != 0;
}
std::strong_ordering operator<=>(const generation_type& other) const noexcept {
if (bool(*this) && is_uuid_based() &&
bool(other) && other.is_uuid_based()) {
return this->_value <=> other._value;
}
int_t lhs = 0, rhs = 0;
if (bool(*this) && !is_uuid_based()) {
lhs = this->_value.get_least_significant_bits();
}
if (bool(other) && !other.is_uuid_based()) {
rhs = other._value.get_least_significant_bits();
}
return lhs <=> rhs;
}
std::strong_ordering operator<=>(const generation_type& other) const noexcept = default;
bool operator==(const generation_type& other) const noexcept = default;
};

constexpr generation_type generation_from_value(generation_type::int_t value) {
Expand All @@ -103,6 +146,7 @@ Target generations_from_values(std::initializer_list<generation_type::int_t> val
}));
}

using uuid_identifiers = bool_class<struct uuid_identifiers_tag>;
class sstable_generation_generator {
// We still want to do our best to keep the generation numbers shard-friendly.
// Each destination shard will manage its own generation counter.
Expand All @@ -124,7 +168,10 @@ public:
_last_generation = generation;
}
}
sstables::generation_type operator()() {
generation_type operator()(uuid_identifiers use_uuid = uuid_identifiers::no) {
if (use_uuid) {
return generation_type(utils::UUID_gen::get_time_UUID());
}
// each shard has its own "namespace" so we increment the generation id
// by smp::count to avoid name confliction of sstables
_last_generation += seastar::smp::count;
Expand All @@ -134,7 +181,14 @@ public:
/// way to determine that is overlapping its partition-ranges with the shard's
/// owned ranges.
static bool maybe_owned_by_this_shard(const sstables::generation_type& gen) {
return gen.as_int() % smp::count == seastar::this_shard_id();
assert(bool(gen));
int64_t hint = 0;
if (gen.is_uuid_based()) {
hint = std::hash<utils::UUID>{}(gen.as_uuid());
} else {
hint = gen.as_int();
}
return hint % smp::count == seastar::this_shard_id();
}
};

Expand All @@ -144,7 +198,11 @@ namespace std {
template <>
struct hash<sstables::generation_type> {
size_t operator()(const sstables::generation_type& generation) const noexcept {
return hash<sstables::generation_type::int_t>{}(generation.as_int());
if (generation.is_uuid_based()) {
return hash<utils::UUID>{}(generation.as_uuid());
} else {
return hash<int64_t>{}(generation.as_int());
}
}
};

Expand All @@ -164,6 +222,46 @@ template <>
struct fmt::formatter<sstables::generation_type> : fmt::formatter<std::string_view> {
template <typename FormatContext>
auto format(const sstables::generation_type& generation, FormatContext& ctx) const {
return fmt::format_to(ctx.out(), "{}", generation.as_int());
if (!generation) {
return fmt::format_to(ctx.out(), "-");
} else if (generation.is_uuid_based()) {
// format the uuid with 4 parts splitted with "_". each these parts is encoded
// as base36 chars.
//
// This matches the way how Cassandra formats UUIDBasedSSTableId, but we
// don't have to. just don't want to use "-" as the delimeter in UUID, as
// "-" is already used to split different parts in a SStable filename like
// "nb-1-big-Data.db".
const auto uuid = generation.as_uuid();
auto timestamp = ::utils::UUID_gen::decimicroseconds(uuid.timestamp());

char days_buf[4] = {};
auto days = std::chrono::duration_cast<std::chrono::days>(timestamp);
timestamp -= days;
char* days_end = std::to_chars(std::begin(days_buf), std::end(days_buf),
days.count(), 36).ptr;

char secs_buf[4] = {};
auto secs = std::chrono::duration_cast<std::chrono::seconds>(timestamp);
timestamp -= secs;
char* secs_end = std::to_chars(std::begin(secs_buf), std::end(secs_buf),
secs.count(), 36).ptr;

char decimicro_buf[5] = {};
char* decimicro_end = std::to_chars(std::begin(decimicro_buf), std::end(decimicro_buf),
timestamp.count(), 36).ptr;

char lsb_buf[13] = {};
char* lsb_end = std::to_chars(std::begin(lsb_buf), std::end(lsb_buf),
static_cast<uint64_t>(uuid.get_least_significant_bits()), 36).ptr;

return fmt::format_to(ctx.out(), "{:0>4}_{:0>4}_{:0>5}{:0>13}",
std::string_view(days_buf, days_end),
std::string_view(secs_buf, secs_end),
std::string_view(decimicro_buf, decimicro_end),
std::string_view(lsb_buf, lsb_end));
} else {
return fmt::format_to(ctx.out(), "{}", generation.as_int());
}
}
};

0 comments on commit d1dc579

Please sign in to comment.