diff --git a/docs/operating-scylla/admin-tools/scylla-sstable.rst b/docs/operating-scylla/admin-tools/scylla-sstable.rst index 95b26920a45c..e54410349e3a 100644 --- a/docs/operating-scylla/admin-tools/scylla-sstable.rst +++ b/docs/operating-scylla/admin-tools/scylla-sstable.rst @@ -512,6 +512,8 @@ The content is dumped in JSON, using the following schema: "above_threshold": Uint } +.. _scylla-sstable-validate-operation: + validate ^^^^^^^^ @@ -527,6 +529,25 @@ The following things are validated: Any errors found will be logged with error level to ``stderr``. +scrub +^^^^^ + +Rewrites the SStable, skipping or fixing corrupt parts. Not all kinds of corruption can be skipped or fixed by scrub. +It is limited to ordering issues on the partition, row, or mutation-fragment level. See `sstable content `_ for more details. + +Scrub has several modes: + +* **abort** - Aborts the scrub as soon as any error is found (recognized or not). This mode is only included for the sake of completeness. We recommend using the **validate** mode so that all errors are reported. +* **skip** - Skips over any corruptions found, thus omitting them from the output. Note that this mode can result in omitting more than is strictly necessary, but it guarantees that all detectable corruptions will be omitted. +* **segregate** - Fixes partition/row/mutation-fragment out-of-order errors by segregating the output into as many SStables as required so that the content of each output SStable is properly ordered. +* **validate** - Validates the content of the SStable, reporting any corruptions found. Writes no output SStables. In this mode, scrub has the same outcome as the `validate operation `_ - and the validate operation is recommended over scrub. + +Output SStables are written to the directory specified via ``--output-directory``. They will be written with the ``BIG`` format and the highest supported SStable format, with generations chosen by scylla-sstable. Generations are chosen such +that they are unique among the SStables written by the current scrub. + +The output directory must be empty; otherwise, scylla-sstable will abort scrub. You can allow writing to a non-empty directory by setting the ``--unsafe-accept-nonempty-output-dir`` command line flag. +Note that scrub will be aborted if an SStable cannot be written because its generation clashes with a pre-existing SStable in the output directory. + validate-checksums ^^^^^^^^^^^^^^^^^^ diff --git a/test/cql-pytest/conftest.py b/test/cql-pytest/conftest.py index 2106d596b2f6..53459b788745 100644 --- a/test/cql-pytest/conftest.py +++ b/test/cql-pytest/conftest.py @@ -36,6 +36,11 @@ def pytest_addoption(parser): help='CQL server port to connect to') parser.addoption('--ssl', action='store_true', help='Connect to CQL via an encrypted TLSv1.2 connection') + # Used by the wrapper script only, not by pytest, added here so it appears + # in --help output and so that pytest's argparser won't protest against its + # presence. + parser.addoption('--omit-scylla-output', action='store_true', + help='Omit scylla\'s output from the test output') # "cql" fixture: set up client object for communicating with the CQL API. # The host/port combination of the server are determined by the --host and diff --git a/test/cql-pytest/run b/test/cql-pytest/run index 9a9e031c9867..7e4391e51028 100755 --- a/test/cql-pytest/run +++ b/test/cql-pytest/run @@ -26,6 +26,12 @@ if '--raft' in sys.argv: run_with_raft.orig_cmd = cmd cmd = run_with_raft +if "-h" in sys.argv or "--help" in sys.argv: + run.run_pytest(sys.path[0], sys.argv) + exit(0) + +run.omit_scylla_output = "--omit-scylla-output" in sys.argv + pid = run.run_with_temporary_dir(cmd) ip = run.pid_to_ip(pid) diff --git a/test/cql-pytest/run.py b/test/cql-pytest/run.py index e69f1b09aa1b..44134e58cb80 100755 --- a/test/cql-pytest/run.py +++ b/test/cql-pytest/run.py @@ -118,10 +118,12 @@ def abort_run_with_dir(pid, tmpdir): def abort_run_with_temporary_dir(pid): return abort_run_with_dir(pid, pid_to_dir(pid)) +omit_scylla_output = False summary='' run_pytest_pids = set() def cleanup_all(): + global omit_scylla_output global summary global run_with_temporary_dir_pids global run_pytest_pids @@ -136,9 +138,10 @@ def cleanup_all(): pass for pid in run_with_temporary_dir_pids: f = abort_run_with_temporary_dir(pid) - print('\nSubprocess output:\n') - sys.stdout.flush() - shutil.copyfileobj(f, sys.stdout.buffer) + if not omit_scylla_output: + print('\nSubprocess output:\n') + sys.stdout.flush() + shutil.copyfileobj(f, sys.stdout.buffer) scylla_set = set() print(summary) diff --git a/test/cql-pytest/test_tools.py b/test/cql-pytest/test_tools.py index 6bf210a00db0..998916b051a7 100644 --- a/test/cql-pytest/test_tools.py +++ b/test/cql-pytest/test_tools.py @@ -15,6 +15,7 @@ import subprocess import tempfile import random +import re import shutil import util @@ -707,3 +708,142 @@ def test_external_dir_autodetect_conf_dir_home_env(self, scylla_path, system_scy ext_sstable, system_scylla_local_reference_dump, env={"SCYLLA_HOME": scylla_home_dir}) + + +@pytest.fixture(scope="module") +def scrub_schema_file(): + """Create a schema.cql for the scrub tests""" + with tempfile.NamedTemporaryFile("w+t") as f: + f.write("CREATE TABLE ks.tbl (pk int, ck int, v text, PRIMARY KEY (pk, ck))") + f.flush() + yield f.name + + +@pytest.fixture(scope="module") +def scrub_good_sstable(scylla_path, scrub_schema_file): + """A good sstable used by the scrub tests.""" + with tempfile.TemporaryDirectory() as tmp_dir: + sst_json_path = os.path.join(tmp_dir, "sst.json") + with open(sst_json_path, "w") as f: + sst_json = [ + { + "key": { "raw": "0004000000c8" }, + "clustering_elements": [ + { "type": "clustering-row", "key": { "raw": "000400000001" }, "columns": { "v": { "is_live": True, "type": "regular", "timestamp": 1686815362417553, "value": "vv" } } } + ] + } + ] + json.dump(sst_json, f) + subprocess.check_call([scylla_path, "sstable", "write", "--schema-file", scrub_schema_file, "--output-dir", tmp_dir, "--generation", "1", "--input-file", sst_json_path]) + ssts = glob.glob(os.path.join(tmp_dir, "*-Data.db")) + assert len(ssts) == 1 + yield ssts[0] + + +@pytest.fixture(scope="module") +def scrub_bad_sstable(scylla_path, scrub_schema_file): + """A bad sstable (out-of-order rows) used by the scrub tests.""" + with tempfile.TemporaryDirectory() as tmp_dir: + sst_json_path = os.path.join(tmp_dir, "sst.json") + with open(sst_json_path, "w") as f: + # rows are out-of-order + sst_json = [ + { + "key": { "raw": "0004000000c8" }, + "clustering_elements": [ + { "type": "clustering-row", "key": { "raw": "000400000002" }, "columns": { "v": { "is_live": True, "type": "regular", "timestamp": 1686815362417553, "value": "vv" } } }, + { "type": "clustering-row", "key": { "raw": "000400000001" }, "columns": { "v": { "is_live": True, "type": "regular", "timestamp": 1686815362417553, "value": "vv" } } } + ] + } + ] + json.dump(sst_json, f) + subprocess.check_call([scylla_path, "sstable", "write", "--schema-file", scrub_schema_file, "--output-dir", tmp_dir, "--generation", "1", "--input-file", sst_json_path, "--validation-level", "none"]) + ssts = glob.glob(os.path.join(tmp_dir, "*-Data.db")) + assert len(ssts) == 1 + yield ssts[0] + + +def subprocess_check_error(args, pattern): + """Invoke scubprocess.run() with the provided args and check that it fails with stderr matching the provided pattern.""" + res = subprocess.run(args, check=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + assert res.returncode != 0 + err = res.stderr.decode('utf-8') + assert re.search(pattern, err) is not None + + +def check_scrub_output_dir(sst_dir, num_sstables): + assert len(glob.glob(os.path.join(sst_dir, "*-Data.db"))) == num_sstables + + +def test_scrub_no_sstables(scylla_path, scrub_schema_file): + subprocess_check_error([scylla_path, "sstable", "scrub", "--schema-file", scrub_schema_file, "--scrub-mode", "validate"], "error processing arguments: no sstables specified on the command line") + + +def test_scrub_missing_scrub_mode_cli_arg(scylla_path, scrub_schema_file, scrub_bad_sstable, scrub_good_sstable): + subprocess_check_error([scylla_path, "sstable", "scrub", "--schema-file", scrub_schema_file, scrub_good_sstable], "error processing arguments: missing mandatory command-line argument --scrub-mode") + + +def test_scrub_output_dir(scylla_path, scrub_schema_file, scrub_good_sstable): + with tempfile.TemporaryDirectory() as tmp_dir: + # Empty output directory is accepted. + subprocess.check_call([scylla_path, "sstable", "scrub", "--schema-file", scrub_schema_file, "--scrub-mode", "abort", "--output-dir", tmp_dir, scrub_good_sstable]) + + with tempfile.TemporaryDirectory() as tmp_dir: + with open(os.path.join(tmp_dir, "dummy.txt"), "w") as f: + f.write("dummy") + f.flush() + + # Non-empty output directory is rejected. + subprocess_check_error([scylla_path, "sstable", "scrub", "--schema-file", scrub_schema_file, "--scrub-mode", "abort", "--output-dir", tmp_dir, scrub_good_sstable], "error processing arguments: output-directory is not empty, pass --unsafe-accept-nonempty-output-dir if you are sure you want to write into this directory\n") + + # Validate doesn't write output sstables, so it doesn't care if output dir is non-empty. + subprocess.check_call([scylla_path, "sstable", "scrub", "--schema-file", scrub_schema_file, "--scrub-mode", "validate", "--output-dir", tmp_dir, scrub_good_sstable]) + + # Check that overriding with --unsafe-accept-nonempty-output-dir works. + subprocess.check_call([scylla_path, "sstable", "scrub", "--schema-file", scrub_schema_file, "--scrub-mode", "abort", "--output-dir", tmp_dir, "--unsafe-accept-nonempty-output-dir", scrub_good_sstable]) + + +def test_scrub_output_dir_sstable_clash(scylla_path, scrub_schema_file, scrub_good_sstable): + with tempfile.TemporaryDirectory() as tmp_dir: + subprocess.check_call([scylla_path, "sstable", "scrub", "--schema-file", scrub_schema_file, "--scrub-mode", "abort", "--output-dir", tmp_dir, "--unsafe-accept-nonempty-output-dir", scrub_good_sstable]) + check_scrub_output_dir(tmp_dir, 1) + subprocess_check_error([scylla_path, "sstable", "scrub", "--schema-file", scrub_schema_file, "--scrub-mode", "abort", "--output-dir", tmp_dir, "--unsafe-accept-nonempty-output-dir", scrub_good_sstable], "cannot create output sstable .*, file already exists") + + +def test_scrub_abort_mode(scylla_path, scrub_schema_file, scrub_good_sstable, scrub_bad_sstable): + with tempfile.TemporaryDirectory() as tmp_dir: + subprocess.check_call([scylla_path, "sstable", "scrub", "--schema-file", scrub_schema_file, "--scrub-mode", "abort", "--output-dir", tmp_dir, scrub_good_sstable]) + check_scrub_output_dir(tmp_dir, 1) + + with tempfile.TemporaryDirectory() as tmp_dir: + subprocess_check_error([scylla_path, "sstable", "scrub", "--schema-file", scrub_schema_file, "--scrub-mode", "abort", "--output-dir", tmp_dir, scrub_bad_sstable], "compaction_aborted_exception \\(Compaction for ks/tbl was aborted due to: scrub compaction found invalid data\\)") + check_scrub_output_dir(tmp_dir, 0) + + +def test_scrub_skip_mode(scylla_path, scrub_schema_file, scrub_good_sstable, scrub_bad_sstable): + with tempfile.TemporaryDirectory() as tmp_dir: + subprocess.check_call([scylla_path, "sstable", "scrub", "--schema-file", scrub_schema_file, "--scrub-mode", "skip", "--output-dir", tmp_dir, scrub_good_sstable]) + check_scrub_output_dir(tmp_dir, 1) + + with tempfile.TemporaryDirectory() as tmp_dir: + subprocess.check_call([scylla_path, "sstable", "scrub", "--schema-file", scrub_schema_file, "--scrub-mode", "skip", "--output-dir", tmp_dir, scrub_bad_sstable]) + check_scrub_output_dir(tmp_dir, 1) + + +def test_scrub_segregate_mode(scylla_path, scrub_schema_file, scrub_good_sstable, scrub_bad_sstable): + with tempfile.TemporaryDirectory() as tmp_dir: + subprocess.check_call([scylla_path, "sstable", "scrub", "--schema-file", scrub_schema_file, "--scrub-mode", "segregate", "--output-dir", tmp_dir, scrub_good_sstable]) + check_scrub_output_dir(tmp_dir, 1) + + with tempfile.TemporaryDirectory() as tmp_dir: + subprocess.check_call([scylla_path, "sstable", "scrub", "--schema-file", scrub_schema_file, "--scrub-mode", "segregate", "--output-dir", tmp_dir, scrub_bad_sstable]) + check_scrub_output_dir(tmp_dir, 2) + + +def test_scrub_validate_mode(scylla_path, scrub_schema_file, scrub_good_sstable, scrub_bad_sstable): + with tempfile.TemporaryDirectory() as tmp_dir: + subprocess.check_call([scylla_path, "sstable", "scrub", "--schema-file", scrub_schema_file, "--scrub-mode", "validate", "--output-dir", tmp_dir, scrub_good_sstable]) + check_scrub_output_dir(tmp_dir, 0) + + subprocess.check_call([scylla_path, "sstable", "scrub", "--schema-file", scrub_schema_file, "--scrub-mode", "validate", "--output-dir", tmp_dir, scrub_bad_sstable]) + check_scrub_output_dir(tmp_dir, 0) diff --git a/tools/scylla-sstable.cc b/tools/scylla-sstable.cc index f1292fd9bc11..b75a0caf4c9d 100644 --- a/tools/scylla-sstable.cc +++ b/tools/scylla-sstable.cc @@ -16,6 +16,8 @@ #include #include "compaction/compaction.hh" +#include "compaction/compaction_strategy.hh" +#include "compaction/compaction_strategy_state.hh" #include "db/config.hh" #include "db/large_data_handler.hh" #include "gms/feature_service.hh" @@ -134,7 +136,7 @@ std::pair get_keyspace_and_table_options(const bpo::variables_ auto k_it = app_config.find("keyspace"); auto t_it = app_config.find("table"); if (k_it == app_config.end() || t_it == app_config.end()) { - throw std::runtime_error("don't know which schema to load: --keyspace and/or --table are not provided"); + throw std::invalid_argument("don't know which schema to load: --keyspace and/or --table are not provided"); } return std::pair(k_it->second.as(), t_it->second.as()); } @@ -163,11 +165,11 @@ schema_ptr try_load_schema_from_user_provided_source(const bpo::variables_map& a return tools::load_schema_from_schema_tables(std::filesystem::path(cfg.data_file_directories()[0]), keyspace_name, table_name).get(); } } catch (...) { - fmt::print(std::cerr, "error: could not load schema via {}: {}\n", schema_source_opt, std::current_exception()); + fmt::print(std::cerr, "error processing arguments: could not load schema via {}: {}\n", schema_source_opt, std::current_exception()); return nullptr; } // Should not happen, but if it does (we all know it will), let's at least have a message printed. - fmt::print(std::cerr, "error: could not load schema from known schema sources: unknown error\n"); + fmt::print(std::cerr, "error processing arguments: could not load schema from known schema sources: unknown error\n"); return nullptr; } @@ -235,10 +237,10 @@ const std::vector load_sstables(schema_ptr schema, sst if (const auto ftype_opt = co_await file_type(sst_path.c_str(), follow_symlink::yes)) { if (!ftype_opt) { - throw std::invalid_argument(fmt::format("error: failed to determine type of file pointed to by provided sstable path {}", sst_path.c_str())); + throw std::invalid_argument(fmt::format("failed to determine type of file pointed to by provided sstable path {}", sst_path.c_str())); } if (*ftype_opt != directory_entry_type::regular) { - throw std::invalid_argument(fmt::format("error: file pointed to by provided sstable path {} is not a regular file", sst_path.c_str())); + throw std::invalid_argument(fmt::format("file pointed to by provided sstable path {} is not a regular file", sst_path.c_str())); } } @@ -293,7 +295,7 @@ output_format get_output_format_from_options(const bpo::variables_map& opts, out } else if (value == "json") { return output_format::json; } else { - throw std::invalid_argument(fmt::format("error: invalid value for dump option output-format: {}", value)); + throw std::invalid_argument(fmt::format("invalid value for dump option output-format: {}", value)); } } return default_format; @@ -766,7 +768,7 @@ class writetime_histogram_collecting_consumer : public sstable_consumer { } else if (value == "hours") { _bucket = bucket::hours; } else { - throw std::invalid_argument(fmt::format("error: invalid value for writetime-histogram option bucket: {}", value)); + throw std::invalid_argument(fmt::format("invalid value for writetime-histogram option bucket: {}", value)); } } } @@ -912,6 +914,94 @@ void consume_sstables(schema_ptr schema, reader_permit permit, std::vector old_ssts, std::vector new_ssts) override { } + virtual double backlog(const compaction_backlog_tracker::ongoing_writes& ow, const compaction_backlog_tracker::ongoing_compactions& oc) const override { return 0.0; } + }; + +private: + schema_ptr _schema; + reader_permit _permit; + sstables::sstables_manager& _sst_man; + std::string _output_dir; + sstables::sstable_set _main_set; + sstables::sstable_set _maintenance_set; + std::vector _compacted_undeleted_sstables; + mutable sstables::compaction_strategy _compaction_strategy; + compaction_strategy_state _compaction_strategy_state; + tombstone_gc_state _tombstone_gc_state; + compaction_backlog_tracker _backlog_tracker; + std::string _group_id; + condition_variable _staging_done_condition; + mutable sstable_generation_generator _generation_generator; + +private: + sstables::shared_sstable do_make_sstable() const { + const auto format = sstables::sstable_format_types::big; + const auto version = sstables::get_highest_sstable_version(); + auto generation = _generation_generator(); + auto sst_name = sstables::sstable::filename(_output_dir, _schema->ks_name(), _schema->cf_name(), version, generation, format, component_type::Data); + if (file_exists(sst_name).get()) { + throw std::runtime_error(fmt::format("cannot create output sstable {}, file already exists", sst_name)); + } + data_dictionary::storage_options local; + return _sst_man.make_sstable(_schema, local, _output_dir, generation, version, format); + } + sstables::sstable_writer_config do_configure_writer(sstring origin) const { + return _sst_man.configure_writer(std::move(origin)); + } + +public: + scylla_sstable_table_state(schema_ptr schema, reader_permit permit, sstables::sstables_manager& sst_man, std::string output_dir) + : _schema(std::move(schema)) + , _permit(std::move(permit)) + , _sst_man(sst_man) + , _output_dir(std::move(output_dir)) + , _main_set(sstables::make_partitioned_sstable_set(_schema, false)) + , _maintenance_set(sstables::make_partitioned_sstable_set(_schema, false)) + , _compaction_strategy(compaction::make_compaction_strategy(_schema->compaction_strategy(), _schema->compaction_strategy_options())) + , _compaction_strategy_state(compaction::compaction_strategy_state::make(_compaction_strategy)) + , _tombstone_gc_state(nullptr) + , _backlog_tracker(std::make_unique()) + , _group_id("dummy-group") + , _generation_generator(0) + { } + virtual const schema_ptr& schema() const noexcept override { return _schema; } + virtual unsigned min_compaction_threshold() const noexcept override { return _schema->min_compaction_threshold(); } + virtual bool compaction_enforce_min_threshold() const noexcept override { return false; } + virtual const sstables::sstable_set& main_sstable_set() const override { return _main_set; } + virtual const sstables::sstable_set& maintenance_sstable_set() const override { return _maintenance_set; } + virtual std::unordered_set fully_expired_sstables(const std::vector& sstables, gc_clock::time_point compaction_time) const override { return {}; } + virtual const std::vector& compacted_undeleted_sstables() const noexcept override { return _compacted_undeleted_sstables; } + virtual sstables::compaction_strategy& get_compaction_strategy() const noexcept override { return _compaction_strategy; } + virtual compaction_strategy_state& get_compaction_strategy_state() noexcept override { return _compaction_strategy_state; } + virtual reader_permit make_compaction_reader_permit() const override { return _permit; } + virtual sstables::sstables_manager& get_sstables_manager() noexcept override { return _sst_man; } + virtual sstables::shared_sstable make_sstable() const override { return do_make_sstable(); } + virtual sstables::sstable_writer_config configure_writer(sstring origin) const override { return do_configure_writer(std::move(origin)); } + virtual api::timestamp_type min_memtable_timestamp() const override { return api::min_timestamp; } + virtual future<> on_compaction_completion(sstables::compaction_completion_desc desc, sstables::offstrategy offstrategy) override { return make_ready_future<>(); } + virtual bool is_auto_compaction_disabled_by_user() const noexcept override { return false; } + virtual bool tombstone_gc_enabled() const noexcept override { return false; } + virtual const tombstone_gc_state& get_tombstone_gc_state() const noexcept override { return _tombstone_gc_state; } + virtual compaction_backlog_tracker& get_backlog_tracker() override { return _backlog_tracker; } + virtual const std::string& get_group_id() const noexcept override { return _group_id; } + virtual seastar::condition_variable& get_staging_done_condition() noexcept override { return _staging_done_condition; } +}; + +void validate_output_dir(std::filesystem::path output_dir, bool accept_nonempty_output_dir) { + auto fd = open_file_dma(output_dir.native(), open_flags::ro).get(); + unsigned entries = 0; + fd.list_directory([&entries] (directory_entry) { + ++entries; + return make_ready_future<>(); + }).done().get(); + if (entries && !accept_nonempty_output_dir) { + throw std::invalid_argument("output-directory is not empty, pass --unsafe-accept-nonempty-output-dir if you are sure you want to write into this directory"); + } +} + using operation_func = void(*)(schema_ptr, reader_permit, const std::vector&, sstables::sstables_manager&, const bpo::variables_map&); class operation { @@ -943,7 +1033,7 @@ class operation { void validate_operation(schema_ptr schema, reader_permit permit, const std::vector& sstables, sstables::sstables_manager& sst_man, const bpo::variables_map& vm) { if (sstables.empty()) { - throw std::runtime_error("error: no sstables specified on the command line"); + throw std::invalid_argument("no sstables specified on the command line"); } abort_source abort; @@ -953,10 +1043,53 @@ void validate_operation(schema_ptr schema, reader_permit permit, const std::vect } } +void scrub_operation(schema_ptr schema, reader_permit permit, const std::vector& sstables, + sstables::sstables_manager& sst_man, const bpo::variables_map& vm) { + static const std::vector> scrub_modes{ + {"abort", compaction_type_options::scrub::mode::abort}, + {"skip", compaction_type_options::scrub::mode::skip}, + {"segregate", compaction_type_options::scrub::mode::segregate}, + {"validate", compaction_type_options::scrub::mode::validate}, + }; + + if (sstables.empty()) { + throw std::invalid_argument("no sstables specified on the command line"); + } + compaction_type_options::scrub::mode scrub_mode; + { + if (!vm.count("scrub-mode")) { + throw std::invalid_argument("missing mandatory command-line argument --scrub-mode"); + } + const auto mode_name = vm["scrub-mode"].as(); + auto mode_it = boost::find_if(scrub_modes, [&mode_name] (const std::pair& v) { + return v.first == mode_name; + }); + if (mode_it == scrub_modes.end()) { + throw std::invalid_argument(fmt::format("invalid scrub-mode: {}", mode_name)); + } + scrub_mode = mode_it->second; + } + auto output_dir = vm["output-dir"].as(); + if (scrub_mode != compaction_type_options::scrub::mode::validate) { + validate_output_dir(output_dir, vm.count("unsafe-accept-nonempty-output-dir")); + } + + scylla_sstable_table_state table_state(schema, permit, sst_man, output_dir); + + auto compaction_descriptor = sstables::compaction_descriptor(std::move(sstables)); + compaction_descriptor.options = sstables::compaction_type_options::make_scrub(scrub_mode); + compaction_descriptor.creator = [&table_state] (shard_id) { return table_state.make_sstable(); }; + compaction_descriptor.replacer = [] (sstables::compaction_completion_desc) { }; + + auto compaction_data = sstables::compaction_data{}; + + sstables::compact_sstables(std::move(compaction_descriptor), compaction_data, table_state).get(); +} + void dump_index_operation(schema_ptr schema, reader_permit permit, const std::vector& sstables, sstables::sstables_manager& sst_man, const bpo::variables_map&) { if (sstables.empty()) { - throw std::runtime_error("error: no sstables specified on the command line"); + throw std::invalid_argument("no sstables specified on the command line"); } json_writer writer; @@ -995,7 +1128,7 @@ sstring disk_string_to_string(const sstables::disk_string& ds) { void dump_compression_info_operation(schema_ptr schema, reader_permit permit, const std::vector& sstables, sstables::sstables_manager& sst_man, const bpo::variables_map&) { if (sstables.empty()) { - throw std::runtime_error("error: no sstables specified on the command line"); + throw std::invalid_argument("no sstables specified on the command line"); } json_writer writer; @@ -1033,7 +1166,7 @@ void dump_compression_info_operation(schema_ptr schema, reader_permit permit, co void dump_summary_operation(schema_ptr schema, reader_permit permit, const std::vector& sstables, sstables::sstables_manager& sst_man, const bpo::variables_map&) { if (sstables.empty()) { - throw std::runtime_error("error: no sstables specified on the command line"); + throw std::invalid_argument("no sstables specified on the command line"); } json_writer writer; @@ -1304,7 +1437,7 @@ void dump_serialization_header(json_writer& writer, sstables::sstable_version_ty void dump_statistics_operation(schema_ptr schema, reader_permit permit, const std::vector& sstables, sstables::sstables_manager& sst_man, const bpo::variables_map&) { if (sstables.empty()) { - throw std::runtime_error("error: no sstables specified on the command line"); + throw std::invalid_argument("no sstables specified on the command line"); } auto to_string = [] (sstables::metadata_type t) { @@ -1476,7 +1609,7 @@ class scylla_metadata_visitor : public boost::static_visitor<> { void dump_scylla_metadata_operation(schema_ptr schema, reader_permit permit, const std::vector& sstables, sstables::sstables_manager& sst_man, const bpo::variables_map&) { if (sstables.empty()) { - throw std::runtime_error("error: no sstables specified on the command line"); + throw std::invalid_argument("no sstables specified on the command line"); } json_writer writer; @@ -1500,7 +1633,7 @@ void dump_scylla_metadata_operation(schema_ptr schema, reader_permit permit, con void validate_checksums_operation(schema_ptr schema, reader_permit permit, const std::vector& sstables, sstables::sstables_manager& sst_man, const bpo::variables_map&) { if (sstables.empty()) { - throw std::runtime_error("error: no sstables specified on the command line"); + throw std::invalid_argument("no sstables specified on the command line"); } for (auto& sst : sstables) { @@ -1512,7 +1645,7 @@ void validate_checksums_operation(schema_ptr schema, reader_permit permit, const void decompress_operation(schema_ptr schema, reader_permit permit, const std::vector& sstables, sstables::sstables_manager& sst_man, const bpo::variables_map& vm) { if (sstables.empty()) { - throw std::runtime_error("error: no sstables specified on the command line"); + throw std::invalid_argument("no sstables specified on the command line"); } for (const auto& sst : sstables) { @@ -2413,16 +2546,17 @@ class json_mutation_stream_parser { void write_operation(schema_ptr schema, reader_permit permit, const std::vector& sstables, sstables::sstables_manager& manager, const bpo::variables_map& vm) { static const std::vector> valid_validation_levels{ + {"none", mutation_fragment_stream_validation_level::none}, {"partition_region", mutation_fragment_stream_validation_level::partition_region}, {"token", mutation_fragment_stream_validation_level::token}, {"partition_key", mutation_fragment_stream_validation_level::partition_key}, {"clustering_key", mutation_fragment_stream_validation_level::clustering_key}, }; if (!sstables.empty()) { - throw std::invalid_argument("error: write operation does not operate on input sstables"); + throw std::invalid_argument("write operation does not operate on input sstables"); } if (!vm.count("input-file")) { - throw std::invalid_argument("error: missing required option '--input-file'"); + throw std::invalid_argument("missing required option '--input-file'"); } mutation_fragment_stream_validation_level validation_level; { @@ -2431,14 +2565,14 @@ void write_operation(schema_ptr schema, reader_permit permit, const std::vector< return v.first == vl_name; }); if (vl_it == valid_validation_levels.end()) { - throw std::invalid_argument(fmt::format("error: invalid validation-level {}", vl_name)); + throw std::invalid_argument(fmt::format("invalid validation-level {}", vl_name)); } validation_level = vl_it->second; } auto input_file = vm["input-file"].as(); auto output_dir = vm["output-dir"].as(); if (!vm.count("generation")) { - throw std::invalid_argument("error: missing required option '--generation'"); + throw std::invalid_argument("missing required option '--generation'"); } auto generation = sstables::generation_type(vm["generation"].as()); auto format = sstables::sstable_format_types::big; @@ -2447,7 +2581,7 @@ void write_operation(schema_ptr schema, reader_permit permit, const std::vector< { auto sst_name = sstables::sstable::filename(output_dir, schema->ks_name(), schema->cf_name(), version, generation, format, component_type::Data); if (file_exists(sst_name).get()) { - throw std::runtime_error(fmt::format("error: cannot create output sstable {}, file already exists", sst_name)); + throw std::invalid_argument(fmt::format("cannot create output sstable {}, file already exists", sst_name)); } } @@ -2466,12 +2600,12 @@ void write_operation(schema_ptr schema, reader_permit permit, const std::vector< void script_operation(schema_ptr schema, reader_permit permit, const std::vector& sstables, sstables::sstables_manager& manager, const bpo::variables_map& vm) { if (sstables.empty()) { - throw std::runtime_error("error: no sstables specified on the command line"); + throw std::invalid_argument("no sstables specified on the command line"); } const auto merge = vm.count("merge"); const auto partitions = partition_set(0, {}, decorated_key_equal(*schema)); if (!vm.count("script-file")) { - throw std::invalid_argument("error: missing required option '--script-file'"); + throw std::invalid_argument("missing required option '--script-file'"); } const auto script_file = vm["script-file"].as(); auto script_params = vm["script-arg"].as(); @@ -2487,7 +2621,7 @@ template void sstable_consumer_operation(schema_ptr schema, reader_permit permit, const std::vector& sstables, sstables::sstables_manager& sst_man, const bpo::variables_map& vm) { if (sstables.empty()) { - throw std::runtime_error("error: no sstables specified on the command line"); + throw std::invalid_argument("no sstables specified on the command line"); } const auto merge = vm.count("merge"); const auto no_skips = vm.count("no-skips"); @@ -2538,19 +2672,19 @@ class typed_option : public basic_option { typed_option(const char* name, const char* description) : basic_option(name, description) { } }; -class option { +class operation_option { shared_ptr _opt; // need copy to support convenient range declaration of std::vector