diff --git a/modules/util/dump/ddl_dumper_options.cc b/modules/util/dump/ddl_dumper_options.cc index aca6d5e6d..95aad0743 100644 --- a/modules/util/dump/ddl_dumper_options.cc +++ b/modules/util/dump/ddl_dumper_options.cc @@ -90,6 +90,10 @@ const shcore::Option_pack_def .optional("checksum", &Ddl_dumper_options::m_checksum) .optional("lakehouseTarget", &Ddl_dumper_options::set_lakehouse_target) + .optional("maxKeyPrefixLength", + &Ddl_dumper_options::set_max_key_prefix_len) + .optional("adaptiveStepStrategy", + &Ddl_dumper_options::set_adaptive_step_strategy) .on_done(&Ddl_dumper_options::on_unpacked_options); return opts; @@ -207,6 +211,42 @@ void Ddl_dumper_options::set_threads(uint64_t threads) { m_worker_threads = threads; } +void Ddl_dumper_options::set_max_key_prefix_len(const size_t &value) { + if (!split()) { + throw std::invalid_argument( + "The option 'maxKeyPrefixLength' cannot be used if the 'chunking' " + "option is set to false."); + } + + m_max_key_prefix_len = value; +} + +static AdaptiveStepStrategy to_adaptive_step_strategy( + const std::string &option) { + if (option == "enhanced") { + return AdaptiveStepStrategy::ENHANCED; + } else if (option == "original") { + return AdaptiveStepStrategy::ORIGINAL; + } + throw std::invalid_argument( + "Invalid value for 'adaptiveStepStrategy' option: " + option); +} + +void Ddl_dumper_options::set_adaptive_step_strategy(const std::string &value) { + if (value.empty()) { + throw std::invalid_argument( + "The option 'adaptiveStepStrategy' cannot be set to an empty string."); + } + + if (!split()) { + throw std::invalid_argument( + "The option 'adaptiveStepStrategy' cannot be used if the 'chunking' " + "option is set to false."); + } + + m_adaptive_step_strategy = to_adaptive_step_strategy(value); +} + void Ddl_dumper_options::on_set_url( const std::string &url, Storage_type storage, const mysqlshdk::storage::Config_ptr &config) { diff --git a/modules/util/dump/ddl_dumper_options.h b/modules/util/dump/ddl_dumper_options.h index 8b185b19d..302ed7378 100644 --- a/modules/util/dump/ddl_dumper_options.h +++ b/modules/util/dump/ddl_dumper_options.h @@ -81,6 +81,12 @@ class Ddl_dumper_options : public Dump_options { bool checksum() const override { return m_checksum; } + AdaptiveStepStrategy adaptive_step_strategy() const override { + return m_adaptive_step_strategy; + } + + size_t max_key_prefix_length() const override { return m_max_key_prefix_len; } + void enable_mds_compatibility_checks(); using Dump_options::set_compatibility_option; @@ -106,6 +112,8 @@ class Ddl_dumper_options : public Dump_options { void set_target_version_str(const std::string &value); void set_dry_run(bool dry_run); void set_threads(uint64_t threads); + void set_max_key_prefix_len(const size_t &value); + void set_adaptive_step_strategy(const std::string &value); bool m_split = true; uint64_t m_bytes_per_chunk; @@ -126,6 +134,12 @@ class Ddl_dumper_options : public Dump_options { bool m_skip_consistency_checks = false; bool m_skip_upgrade_checks = false; bool m_checksum = false; + + // max nesting depth for composite keys + size_t m_max_key_prefix_len = 1; + + AdaptiveStepStrategy m_adaptive_step_strategy = + AdaptiveStepStrategy::ORIGINAL; }; } // namespace dump diff --git a/modules/util/dump/decimal.cc b/modules/util/dump/decimal.cc index 967bcd38d..407b5b0eb 100644 --- a/modules/util/dump/decimal.cc +++ b/modules/util/dump/decimal.cc @@ -48,6 +48,8 @@ Decimal::Decimal(const std::string &d) { } } +Decimal::Decimal(std::int64_t value) : m_decimal(value) {} + std::string Decimal::to_string() const { auto str = m_decimal.to_string(); diff --git a/modules/util/dump/decimal.h b/modules/util/dump/decimal.h index 2d521d6db..de16af4ac 100644 --- a/modules/util/dump/decimal.h +++ b/modules/util/dump/decimal.h @@ -51,6 +51,9 @@ class Decimal final { public: explicit Decimal(const std::string &d); + /** Integer value with no fractional digits (e.g. generic chunking code). */ + explicit Decimal(std::int64_t value); + Decimal(const Decimal &other) = default; Decimal(Decimal &&other) = default; @@ -77,6 +80,11 @@ class Decimal final { Decimal &operator*=(const Decimal &rhs); Decimal &operator/=(const Decimal &rhs); + inline Decimal &operator=(shcore::Bignum rhs) { + m_decimal = shcore::Bignum(rhs); + return *this; + } + inline Decimal &operator+=(shcore::Bignum rhs) { *this += convert(std::move(rhs)); return *this; diff --git a/modules/util/dump/dump_options.cc b/modules/util/dump/dump_options.cc index 40af58caa..0f09007be 100644 --- a/modules/util/dump/dump_options.cc +++ b/modules/util/dump/dump_options.cc @@ -87,7 +87,6 @@ void Dump_options::set_string_option(const std::string &option, throw std::invalid_argument( "The option 'compression' cannot be set to an empty string."); } - m_compression = mysqlshdk::storage::to_compression(value, &m_compression_options); } else { diff --git a/modules/util/dump/dump_options.h b/modules/util/dump/dump_options.h index edd54cb97..fa6799c42 100644 --- a/modules/util/dump/dump_options.h +++ b/modules/util/dump/dump_options.h @@ -57,6 +57,11 @@ enum class Dry_run { DONT_WRITE_ANY_FILES, }; +enum class AdaptiveStepStrategy { + ORIGINAL, + ENHANCED, +}; + class Dump_options : public mysqlsh::common::Common_options { public: using Filtering_options = mysqlshdk::db::Filtering_options; @@ -158,6 +163,10 @@ class Dump_options : public mysqlsh::common::Common_options { void set_report_dump_option(bool value) { m_report_dump_option = value; } + virtual AdaptiveStepStrategy adaptive_step_strategy() const = 0; + + virtual size_t max_key_prefix_length() const = 0; + virtual bool split() const = 0; virtual uint64_t bytes_per_chunk() const = 0; diff --git a/modules/util/dump/dumper.cc b/modules/util/dump/dumper.cc index 39e3e7a35..d1eef6169 100644 --- a/modules/util/dump/dumper.cc +++ b/modules/util/dump/dumper.cc @@ -44,6 +44,7 @@ #include #include +#include "dump_options.h" #include "mysqlshdk/include/scripting/shexcept.h" #include "mysqlshdk/include/shellcore/console.h" #include "mysqlshdk/include/shellcore/shell_init.h" @@ -189,6 +190,19 @@ std::string quote(const std::string &schema, const std::string &table) { shcore::quote_identifier(table); } +// Returns a `FORCE INDEX (...)` clause (with leading space) that pins the query +// to the index used by the chunker, or an empty string when no index is in use. +// Used by the EXPLAIN COUNT(*) probes in `adaptive_step_v2` for the integer +// chunking path, to stabilize optimizer row-count estimates - without it, the +// optimizer may pick a different access path (e.g. a `ref` lookup on a shorter +// index that ignores the BETWEEN predicate on a later key part), making the +// EXPLAIN row counts non-monotonic in the range width and breaking the binary +// chop loop. +std::string force_index_clause(const Instance_cache::Index *index) { + if (!index) return {}; + return " FORCE INDEX (" + index->quoted_name() + ")"; +} + Row fetch_row(const mysqlshdk::db::IRow *row) { Row result; @@ -782,6 +796,14 @@ void Dumper::Output_config::create_directory() { void Dumper::Output_config::fini() { dir.reset(); } +template +std::string to_string(const T &value) { + return std::to_string(value); +} +std::string to_string(const Decimal &value) { return value.to_string(); } + +#define V2S(x) to_string(x).c_str() + class Dumper::Table_worker final { public: enum class Exception_strategy { ABORT, CONTINUE }; @@ -1236,6 +1258,9 @@ class Dumper::Table_worker final { std::string order_by; std::string order_by_desc; std::size_t index_column; + std::size_t &ranges_counter; + + Chunking_info(std::size_t &counter) : ranges_counter(counter) {} }; static std::string compare(const Chunking_info &info, const Row &value, @@ -1388,14 +1413,24 @@ class Dumper::Table_worker final { } template - static T constant_step(const T & /* from */, const T &step) { - return step; + static T mul(uint64_t value1, const T &value2) { + if (value1 == 0 || value2 == 0) return T(0); + + using R = std::common_type_t; + + R v1 = static_cast(value1); + R v2 = static_cast(value2); + R max = static_cast(std::numeric_limits::max()); + + if (v1 > max / v2) return std::numeric_limits::max(); + + return static_cast(v1 * v2); } template T adaptive_step(const T &from, const T &step, const T &max, - const Chunking_info &info, - const std::string &chunk_id) const { + const Chunking_info &info, const std::string &chunk_id, + uint64_t *rows_cnt, bool *use_returned_cnt) const { static constexpr int k_chunker_retries = 10; static constexpr int k_chunker_iterations = 20; @@ -1408,6 +1443,8 @@ class Dumper::Table_worker final { int retry = 0; uint64_t delta = info.accuracy + 1; + *use_returned_cnt = false; + const auto row_count = [&info, &comment, this](const auto begin, const auto end) { return row_count_from_explain( @@ -1424,6 +1461,7 @@ class Dumper::Table_worker final { if (max - retry * double_step <= from) { // if left boundary is greater than max, stop here middle = max; + *use_returned_cnt = true; break; } @@ -1465,6 +1503,7 @@ class Dumper::Table_worker final { if (delta <= info.accuracy) { // we're close enough + *use_returned_cnt = true; break; } } @@ -1486,13 +1525,331 @@ class Dumper::Table_worker final { } } + *rows_cnt = rows; return ensure_not_zero(middle - from); } + template + static T constant_step(const T &, const T &step_hint, const T & /*max*/, + const Chunking_info &info, const std::string &, + uint64_t *rows_cnt, bool *) { + *rows_cnt = info.rows_per_chunk; + return step_hint; + } + +#define DBG_STEP(x) // x +#define DBG(x) // x +#define DBG_GLUE(x) // x + + template + class IGluer { + public: + struct GlueResult { + T begin; + T end; + uint64_t rows_cnt; + bool empty; + bool flushed; + + GlueResult(T b, T e, uint64_t r, bool em, bool f) + : begin(b), end(e), rows_cnt(r), empty(em), flushed(f) {} + GlueResult() + : begin(0), end(0), rows_cnt(0), empty(true), flushed(false) {} + }; + + virtual ~IGluer() {} + virtual GlueResult flush() = 0; + virtual GlueResult glue(T begin, T end, uint64_t rows_cnt, bool last) = 0; + }; + + template + class DummyGluer : public IGluer { + public: + DummyGluer() {} + + virtual IGluer::GlueResult flush() override { + typename IGluer::GlueResult res; + return res; + } + + virtual IGluer::GlueResult glue(T begin, T end, uint64_t rows_cnt, + bool) override { + typename IGluer::GlueResult res(begin, end, rows_cnt, false, true); + return res; + } + }; + + template + class Gluer : public IGluer { + public: + Gluer(uint64_t max_rows_cnt) + : begin_(0), + end_(0), + rows_cnt_(0), + max_rows_cnt_(max_rows_cnt), + zero_result_(), + empty_(true) {} + + virtual ~Gluer() { + DBG_GLUE(if (!empty_ || rows_cnt_ > 0 || begin_ > 0 || end_ > 0) { + log_debug( + "~Gluer() destroying nonepty Gluer! (%s - %s) rows: %ld, " + "empty?: %d", + V2S(begin_), V2S(end_), rows_cnt_, empty_); + }) + + // Before Gluer deletion, it has to be empty (and flushed). + assert(empty_ && rows_cnt_ == 0 && begin_ == 0 && end_ == 0); + } + + virtual typename IGluer::GlueResult flush() override { + DBG_GLUE(log_debug("Gluer::flush() (%s - %s) rows: %ld, empty?: %d", + V2S(begin_), V2S(end_), rows_cnt_, empty_);) + typename IGluer::GlueResult res(begin_, end_, rows_cnt_, empty_, true); + begin_ = end_ = 0; + rows_cnt_ = 0; + empty_ = true; + return res; + } + + virtual typename IGluer::GlueResult glue(T begin, T end, + uint64_t rows_cnt, + bool last) override { + DBG_GLUE( + log_debug("Gluer::glue() %s - %s, rows: %ld, last?: %d (current: " + "%s - %s, %ld)", + V2S(begin), V2S(end), rows_cnt, last, V2S(begin_), + V2S(end_), rows_cnt_);) + if (empty_) { + // Whatever is the chunk, wait for the next one to decide + DBG_GLUE(log_debug("Gluer::glue() - Gluer empty, starting new glue");) + begin_ = begin; + end_ = end; + rows_cnt_ += rows_cnt; + empty_ = false; + } else { + // We have some rows accumulated + + // If we decided to glue, the next chunk needs to follow what we already + // accumulated + assert(begin == end_ + 1); + DBG_GLUE(if (begin != end_ + 1) { + log_debug("Gluer::glue() inconsistency detected! end_: %s, begin: %s", + V2S(end_), V2S(begin)); + }) + + if (rows_cnt_ > max_rows_cnt_) { + // we are ready to flush, just wait for good conditions + if (rows_cnt > max_rows_cnt_ / 2) { + // if the upcoming chunk is a good candidate to start new glue, + // return the current glue and remember this chunk + DBG_GLUE(log_debug("Gluer::glue() - return current, start new");) + typename IGluer::GlueResult res(begin_, end_, rows_cnt_, empty_, + false); + begin_ = begin; + end_ = end; + rows_cnt_ = rows_cnt; + return res; + } else { + // glue it + DBG_GLUE(log_debug("Gluer::glue() - ready to flush but decided to " + "glue. rows: %ld", + rows_cnt_);) + end_ = end; + rows_cnt_ += rows_cnt; + } + } else { + // we didn't accumulate enough rows yet + // glue it + DBG_GLUE(log_debug("Gluer::glue() - gluing");) + end_ = end; + rows_cnt_ += rows_cnt; + } + + static constexpr uint64_t flush_threshold_multiplier = 3; + if (rows_cnt_ > flush_threshold_multiplier * max_rows_cnt_ || last) { + // flush gluer if we accumulated a lot of chunks or this is the last + // one + DBG_GLUE(log_debug("Gluer::glue() - Gluer full. rows: %ld, last?: %d", + rows_cnt_, last);) + return flush(); + } + } + + if (last) { + // flush last chunk + DBG_GLUE( + log_debug( + "Gluer::glue() - Last chunk. Flushing. rows: %ld, last?: %d", + rows_cnt_, last);) + return flush(); + } + + // The chunk was accumulated + DBG_GLUE(log_debug("Gluer::glue() - accumulated %s - %s, rows: %ld", + V2S(begin_), V2S(end_), rows_cnt_);) + return zero_result_; + } + + private: + T begin_; + T end_; + uint64_t rows_cnt_; + uint64_t max_rows_cnt_; + IGluer::GlueResult zero_result_; + bool empty_; + }; + + template + T adaptive_step_v2(const T &from, const T &step_hint, const T &max, + const Chunking_info &info, const std::string &chunk_id, + uint64_t *rows_cnt, bool *use_returned_cnt) const { + auto rows = info.rows_per_chunk; + const auto comment = this->get_query_comment(*info.table, chunk_id); + + uint64_t retry = 0; + uint64_t delta = info.accuracy + 1; + + const auto row_count = [&info, &comment, this](const auto begin, + const auto end) { + return row_count_from_explain( + query("EXPLAIN FORMAT=JSON SELECT " + + m_dumper->optimizer_hints(info.table->info) + "COUNT(*) FROM " + + info.table->quoted_name + info.partition + + force_index_clause(info.table->index.info) + + where(*info.table, between(info, begin, end)) + info.order_by + + comment) + ->fetch_one_or_throw() + ->get_as_string(0)); + }; + + auto right = from; + + *use_returned_cnt = false; + + /* Phase 1: Linear expansion. + Expand the range until we have enough rows or we reach the maximum range. + */ + while (true) { + // Calculate the current search range + right = sum(from, mul((retry + 1), step_hint)); + right = std::min(right, max); + + DBG_STEP(log_debug( + "Nest level: %ld, retry: %ld searching range: %s - %s (%s), " + "step_hint: %s", + info.index_column, retry, V2S(from), V2S(right), + V2S(right - from), V2S(step_hint));) + + // check if there is enough rows in currently checked range + rows = row_count(from, right); + DBG_STEP(log_debug("Nest level: %ld, rows: %ld in range: %s - %s", + info.index_column, rows, V2S(from), V2S(right));) + if (rows >= info.rows_per_chunk) { + // We have enough rows, no need to expand the range, move to the next + // phase + break; + } + if (right >= max) { + // We have reached the maximum range, stop here. + break; + } + // We didn't find enough rows in this range, move farther to the right + retry++; + } + + // Phase 2: Binary chop - shrinking (if needed) + if (rows > info.rows_per_chunk + info.accuracy) { + /* We have too much rows. + The previous step produced the range that contains too much rows. + Here we will try to shrink it by doing a binary search in therange. + We may eventually end up with the range of 1 and still too much rows. + We will return this range and the caller will try to chunk by the next + keypart if possible. */ + + DBG_STEP(log_debug("Nest level: %ld, rows: %ld Trying to chop the last " + "range: %s - %s", + info.index_column, rows, V2S(from), V2S(right));) + + auto left = from; + right = from + (right - left) / 2; + while (1) { + DBG_STEP( + log_debug("Nest level: %ld, checking range: %s - %s (left: %s)", + info.index_column, V2S(from), V2S(right), V2S(left));) + rows = row_count(from, right); + + delta = rows > info.rows_per_chunk ? rows - info.rows_per_chunk + : info.rows_per_chunk - rows; + if (delta <= info.accuracy) { + DBG_STEP(log_debug("Nest level: %ld, close enough: rows: %ld, " + "rows_per_chunk: %ld, delta: %ld, range: %s - %s", + info.index_column, rows, info.rows_per_chunk, + delta, V2S(from), V2S(right));) + *use_returned_cnt = true; + break; + } + + if (right == from) { + // We shrinked the range to 1. No possibility to shrink more. + // It means that for this keypart there are more rows than requested. + // Return the current result, the caller will try to chunk by the next + // keypart if possible. + log_debug( + " Nest level: %ld, reached range 1, rows: %ld, range: %s " + "- %s", + info.index_column, rows, V2S(from), V2S(right)); + break; + } + + if (left >= right) { + // No way to chop it more. Use the current estimate. + DBG_STEP(log_debug("Nest level: %ld, No way to chop it more. Will " + "use the current estimate. rows: %ld, " + "rows_per_chunk: %ld, delta: %ld, range: %s - %s", + info.index_column, rows, info.rows_per_chunk, + delta, V2S(from), V2S(right));) + *use_returned_cnt = true; + break; + } + + if (rows > info.rows_per_chunk) { + // Shrink the range + DBG_STEP(log_debug("Nest level: %ld, (shrink) > rows: %ld, " + "rows_per_chunk: %ld, range: %s - %s", + info.index_column, rows, info.rows_per_chunk, + V2S(from), V2S(right));) + right = right - ensure_not_zero((right - left) / 2); + } else { + // Expand the range + DBG_STEP(log_debug("Nest level: %ld, (expand) <= rows: %ld, " + "rows_per_chunk: %ld, range: %s - %s", + info.index_column, rows, info.rows_per_chunk, + V2S(from), V2S(right));) + auto left_tmp = right; + right = right + (right - left) / 2; + left = left_tmp; + } + } + } + + *rows_cnt = rows; + DBG_STEP(log_debug("Nest level: %ld, adaptive_step() returning %ld rows " + "(left: %s, right: %s, ret: %s)", + info.index_column, rows, V2S(from), V2S(right), + V2S(ensure_not_zero(right - from)));) + return ensure_not_zero(right - from); + } + + bool optimized_chunking_possible(mysqlshdk::db::Type type) const { + return (type == mysqlshdk::db::Type::Integer || + type == mysqlshdk::db::Type::UInteger || + type == mysqlshdk::db::Type::Decimal); + } + template std::size_t chunk_integer_column(const Chunking_info &info, const T &min, const T &max) const { - std::size_t ranges_count = 0; // if rows_per_chunk <= 1 it may mean that the rows are bigger than chunk // size, which means we # chunks ~= # rows @@ -1514,52 +1871,323 @@ class Dumper::Table_worker final { ? index_range - info.row_count : info.row_count - index_range) <= row_count_accuracy; + DBG(log_debug( + "Nest level: %ld, chunk_integer_column(). min: %s, max: %s, " + "row_count: " + "%ld, estimated_chunks: %ld, estimated_step: %s, constant?: %d", + info.index_column, V2S(min), V2S(max), info.row_count, + estimated_chunks, V2S(estimated_step), use_constant_step);) + std::string chunk_id; - const auto next_step = - use_constant_step - ? std::function( - constant_step) - // using the default capture [&] below results in problems with - // GCC 5.4.0 (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80543) - : [&info, &max, &chunk_id, this](const auto &from, - const auto &step) { - return this->adaptive_step(from, step, max, info, chunk_id); - }; - - auto current = min; - const auto step = estimated_step; - - log_info("%sChunking %s using integer algorithm with %s step", - m_log_id.c_str(), info.table->task_name.c_str(), - use_constant_step ? "constant" : "adaptive"); + using Fn = std::function; + + const Fn adaptive_step_fn = + (m_dumper->m_options.adaptive_step_strategy() == + mysqlsh::dump::AdaptiveStepStrategy::ORIGINAL) + ? Fn{[&info, &max, &chunk_id, this]( + const auto &from, const auto &step, uint64_t *rows_cnt, + bool *use_returned_cnt) { + return this->adaptive_step(from, step, max, info, chunk_id, + rows_cnt, use_returned_cnt); + }} + : Fn{[&info, &max, &chunk_id, this]( + const auto &from, const auto &step, uint64_t *rows_cnt, + bool *use_returned_cnt) { + return this->adaptive_step_v2(from, step, max, info, chunk_id, + rows_cnt, use_returned_cnt); + }}; + + const Fn next_step = + use_constant_step ? Fn{[&info, &max, &chunk_id, this]( + const auto &from, const auto &step, + uint64_t *rows_cnt, bool *use_returned_cnt) { + return this->constant_step(from, step, max, info, chunk_id, rows_cnt, + use_returned_cnt); + }} + : adaptive_step_fn; + + auto current_chunk_begin = min; - bool last_chunk = false; + log_debug( + "%sChunking %s using integer algorithm with %s step. step: " + "%s, alg: %s, depth: %ld", + m_log_id.c_str(), info.table->task_name.c_str(), + use_constant_step ? "constant" : "adaptive", V2S(estimated_step), + (m_dumper->m_options.adaptive_step_strategy() == + mysqlsh::dump::AdaptiveStepStrategy::ORIGINAL + ? "original" + : "enhanced"), + m_dumper->m_options.max_key_prefix_length()); + + DBG(log_debug( + "Nest level: %ld, trying to chunk by %ld, rows to chunk: %ld, rows " + "per chunk: %ld, index columns cnt: %ld", + info.index_column, info.index_column, info.row_count, + info.rows_per_chunk, info.table->index.info->columns().size());) + + bool last_chunk_in_dump = false; + bool last_chunk_on_this_level = false; + bool gluing_to_next_chunk = false; + + std::shared_ptr> gluer; + if (m_dumper->m_options.adaptive_step_strategy() == + mysqlsh::dump::AdaptiveStepStrategy::ORIGINAL) { + gluer = std::make_shared>(); + } else { + gluer = std::make_shared>(info.rows_per_chunk); + } - while (!last_chunk) { + while (!last_chunk_in_dump && !last_chunk_on_this_level) { if (m_dumper->m_worker_interrupt.test()) { - return ranges_count; + return info.ranges_counter; + } + + chunk_id = std::to_string(info.ranges_counter); + uint64_t rows_cnt = 0; + bool use_returned_cnt = false; + bool processed_by_deep_chunking = false; + + auto new_step = next_step(current_chunk_begin, estimated_step, &rows_cnt, + &use_returned_cnt); + + size_t idx_columns_cnt = info.table->index.info->columns().size(); + bool possible_to_chunk_next_column = + info.index_column < idx_columns_cnt - 1 && + (info.index_column < + m_dumper->m_options.max_key_prefix_length() - 1 || + m_dumper->m_options.max_key_prefix_length() == 0); + + if (new_step == 1 && !possible_to_chunk_next_column) { + DBG(log_debug("Nest level: %ld, Not possible to chunk deeper. Use the " + "current result", + info.index_column);) + use_returned_cnt = true; } - chunk_id = std::to_string(ranges_count); - const auto begin = current; - auto new_step = next_step(current, step); + if (!use_returned_cnt && new_step == 1 && possible_to_chunk_next_column) { + /* We reached the range of 1. It is possible that: + 1. There is still too much rows + 2. Rows count is OK, or too small */ + if (rows_cnt > info.rows_per_chunk + info.accuracy) { + /* For this keypart, there is too much rows. + We will try to use the next keypart for chunking */ + const auto type = + info.table->index.info->columns()[info.index_column + 1]->type; + if (!optimized_chunking_possible(type)) { + /* The next column is not INT-like type, so it is not possible to + use optimized chunking when using it. + TODO: Maybe we could mix chunk_integer_column and + chunk_non_integer_column approaches in the future? In sucha case + it would be possible to deep chunk all kind PKs. On some levels + (non-INT) it would do the full range scan (not necessarily + the full table scan) and on other level (INT) it would use the + optimized approach */ + DBG(log_debug("Nest level: %ld, range: %s, but rows_cnt: %ld. Next " + "PK column " + "not compatible with deep chunking. Dumping as is.", + info.index_column, V2S(new_step), rows_cnt);) + use_returned_cnt = true; + gluing_to_next_chunk = false; + } else { + /* The next column is compatible with deep chunking. */ + Chunking_info new_info = info; + if (!new_info.boundary.empty()) { + new_info.boundary += " AND "; + } + new_info.boundary += + info.table->index.info->columns()[info.index_column] + ->quoted_name + + "=" + to_string(current_chunk_begin); + new_info.index_column++; + new_info.row_count = rows_cnt; + DBG(log_debug( + "Nest level: %ld, too much rows (tried chunk by %ld), " + "trying to " + "chunk by %ld, rows to chunk: %ld, rows per chunk: %ld, " + "rows_cnt (from previous): %ld, new_step: %s", + info.index_column, new_info.index_column - 1, + new_info.index_column, new_info.row_count, + new_info.rows_per_chunk, rows_cnt, V2S(new_step));) + + // If we have anything in gluer accumulated before going to the next + // level - flush it + typename Gluer::GlueResult glue_res = gluer->flush(); + if (!glue_res.empty) { + DBG(log_debug( + "Nest level: %ld, will chunk deeper. creating dump task " + "for chunk: " + "%s, rows_cnt: %ld (r: %2f, rpc: %ld, acc: %ld), " + "new_step: " + "%s, last?: %d, idx_column: %ld, cond: %s", + info.index_column, chunk_id.c_str(), glue_res.rows_cnt, + (double)glue_res.rows_cnt / (double)info.rows_per_chunk, + info.rows_per_chunk, info.accuracy, V2S((new_step + 1)), + last_chunk_in_dump, info.index_column, + between(info, glue_res.begin, glue_res.end).c_str());) + create_and_push_table_data_chunk_task( + *info.table, between(info, glue_res.begin, glue_res.end), + chunk_id, info.ranges_counter++, last_chunk_in_dump); + } + + chunk_column(new_info); + processed_by_deep_chunking = true; + } // optimized chunking possible + } else { + // We have some rows. For sure not too much. + if (gluing_to_next_chunk) { + /* We already tried to glue the previous chunk to this one, + but still not enough rows. It might happen that the chunker logic + produced the same chunk again. This is the case when small chunk + precedes a huge one. Most probably in the next iteration we will + go into the nested chunking. Just use this chunk. */ + DBG(log_debug("Nest level: %ld, range: %s, but rows_cnt: %ld. No " + "possibility to " + "glue. Using it.", + info.index_column, V2S(new_step), rows_cnt);) + use_returned_cnt = true; + gluing_to_next_chunk = false; + } else { + /* Try to glue this chunk to the next one */ + DBG(log_debug("Nest level: %ld, range: %s, but rows_cnt: %ld. " + "Trying to glue.", + info.index_column, V2S(new_step), rows_cnt);) + gluing_to_next_chunk = true; + } + } + } // nested chunking // ensure that there's no integer overflow --new_step; - current = (current > max - new_step ? max : current + new_step); - - const auto end = current; + auto current_chunk_end = (current_chunk_begin > max - new_step + ? max + : current_chunk_begin + new_step); + + if (current_chunk_end >= max) { + DBG(log_debug("Nest level: %ld, End of range. rows (%ld). No more rows " + "in range.", + info.index_column, rows_cnt);) + DBG(log_debug("Nest level: %ld, This was the last chunk on this level. " + "Need to " + "dump it as it is", + info.index_column);) + last_chunk_on_this_level = true; + } - last_chunk = (current >= max); + last_chunk_in_dump = last_chunk_on_this_level && info.index_column == 0; + + // If the current chunk was processed by nested chunking, we have + // nothing to dump. Just go to the next chunk on this level. + // In other case, we need to glue. + auto current_chunk_begin_for_glue = current_chunk_begin; + + current_chunk_begin = current_chunk_end; + ++current_chunk_begin; + if (processed_by_deep_chunking) { + if (!last_chunk_in_dump) continue; + + // This is the last chunk in the dump and it was processed by nested + // chunking. Nothing else to dump. We need to create an empty chunk + // to mark the end of dump. + DBG(log_debug("Nest level: %ld, this is the last chunk in the dump. It " + "was processed by nested chunking. Creating an empty " + "chunk to mark the end of dump.", + info.index_column);) + // The following condition will evaluate to 'false' always, causing + // the empty chunk to be created. + const_cast(info).boundary = "1=0"; + chunk_id = std::to_string(info.ranges_counter); + rows_cnt = 0; + } - create_and_push_table_data_chunk_task(*info.table, - between(info, begin, end), chunk_id, - ranges_count++, last_chunk); + // Put the chunk through gluer logic + typename Gluer::GlueResult glue_res = + gluer->glue(current_chunk_begin_for_glue, current_chunk_end, rows_cnt, + last_chunk_on_this_level || last_chunk_in_dump); + if (!glue_res.empty) { + DBG( + if (last_chunk_in_dump && glue_res.flushed) { + log_debug( + "Nest level: %ld, this is the last chunk in the dump. Was " + "flushed " + "during last glue.", + info.index_column); + } + if (last_chunk_on_this_level && !last_chunk_in_dump && + glue_res.flushed) { + log_debug( + "Nest level: %ld, this is the last chunk on this nested " + "level. Was " + "flushed during last glue.", + info.index_column); + } + log_info("Nest level: %ld) creating dump task for chunk: %s, " + "rows_cnt: " + "%ld (r: %2f, rpc: %ld, acc: %ld), new_step: %s, last?: " + "%d, idx_column: %ld, cond: %s", + info.index_column, chunk_id.c_str(), glue_res.rows_cnt, + (double)glue_res.rows_cnt / (double)info.rows_per_chunk, + info.rows_per_chunk, info.accuracy, V2S(new_step + 1), + last_chunk_in_dump, info.index_column, + between(info, glue_res.begin, glue_res.end).c_str());) + create_and_push_table_data_chunk_task( + *info.table, between(info, glue_res.begin, glue_res.end), chunk_id, + info.ranges_counter++, (last_chunk_in_dump && glue_res.flushed)); + } - ++current; + if (!glue_res.flushed && + (last_chunk_on_this_level || last_chunk_in_dump)) { + // Gluer was not flushed during last glue, and this is the last chunk on + // this level or on top level + chunk_id = std::to_string(info.ranges_counter); + glue_res = gluer->flush(); + + if (last_chunk_in_dump) { + // top level. Create even an empty last chunk + DBG(log_debug( + "Nest level: %ld, this is the last chunk in the dump. Needed " + "additional flush after last glue.", + info.index_column); + log_debug( + "Nest level: %ld, creating dump task for chunk: %s, " + "rows_cnt: " + "%ld (r: %2f, rpc: %ld, acc: %ld), new_step: %s, last?: " + "%d, idx_column: %ld, cond: %s", + info.index_column, chunk_id.c_str(), glue_res.rows_cnt, + (double)glue_res.rows_cnt / (double)info.rows_per_chunk, + info.rows_per_chunk, info.accuracy, V2S(new_step + 1), + last_chunk_in_dump, info.index_column, + between(info, glue_res.begin, glue_res.end).c_str());) + create_and_push_table_data_chunk_task( + *info.table, between(info, glue_res.begin, glue_res.end), + chunk_id, info.ranges_counter++, true); + } else { + // Nested level. If there is something to dump, do it. + if (!glue_res.empty) { + DBG(log_debug("Nest level: %ld, this is the last chunk on this " + "nested level. " + "Needed " + "additional flush after last glue.", + info.index_column); + log_debug( + "Nest level: %ld, creating dump task for chunk: %s, " + "rows_cnt: " + "%ld (r: " + "%2f, rpc: %ld, acc: %ld), new_step: %s, last?: %d, " + "idx_column: %ld, cond: %s", + info.index_column, chunk_id.c_str(), glue_res.rows_cnt, + (double)glue_res.rows_cnt / (double)info.rows_per_chunk, + info.rows_per_chunk, info.accuracy, V2S(new_step + 1), + last_chunk_in_dump, info.index_column, + between(info, glue_res.begin, glue_res.end).c_str());) + create_and_push_table_data_chunk_task( + *info.table, between(info, glue_res.begin, glue_res.end), + chunk_id, info.ranges_counter++, false); + } + } + } } - return ranges_count; + return info.ranges_counter; } std::size_t chunk_integer_column(const Chunking_info &info, const Row &begin, @@ -1662,8 +2290,11 @@ class Dumper::Table_worker final { auto row = result->fetch_one(); const auto handle_empty_table = [&info, this]() { - create_and_push_table_data_chunk_task(*info.table, info.boundary, "0", 0, - true); + // If this is PK top level, it means the table is empty + if (info.index_column == 0) { + create_and_push_table_data_chunk_task(*info.table, info.boundary, "0", + 0, true); + } return 1; }; @@ -1750,9 +2381,10 @@ class Dumper::Table_worker final { current_console()->print_note(msg); } } + size_t ranges_counter = 0; + Chunking_info info(ranges_counter); - Chunking_info info; - + info.ranges_counter = ranges_counter; info.table = &table; info.row_count = partition ? partition->row_count : table.info->row_count; info.rows_per_chunk = @@ -1889,8 +2521,8 @@ class Dumper::Table_worker final { uint64_t row_count_from_explain(const std::string &explain) const { const auto error = [&explain](const std::string &msg) { - log_error("JSON output of malformed EXPLAIN statement:\n%s", - explain.c_str()); + log_error("JSON output of malformed EXPLAIN statement:\n%s\nmsg: %s", + explain.c_str(), msg.c_str()); return std::runtime_error(msg); }; @@ -1903,13 +2535,45 @@ class Dumper::Table_worker final { "Failed to parse JSON output of an EXPLAIN statement: %s", e.what())); } + // This function is fragile, as originally it expects info about rows + // count to be in a specific place in the JSON output of EXPLAIN statement. + // Moreover it expects it to be a number. However, in some cases + // (e.g. when there are no rows to process) the output may differ between + // MySQL versions. + // That's why I think it should return 0 when it is not able to find the row + // count in the expected place. + if (auto *v = rapidjson::Pointer("/query_block/message").Get(json)) { + if (v->IsString()) { + std::string msg = v->GetString(); + if (msg.find("no matching row") != std::string::npos || + msg.find("no rows") != std::string::npos) { + log_info( + "EXPLAIN statement returned message indicating that there are no " + "rows to process: %s", + msg.c_str()); + return 0; + } + } + } + if (auto *v = rapidjson::Pointer("/query_plan/access_type").Get(json)) { + if (v->IsString()) { + std::string msg = v->GetString(); + if (msg.find("zero_rows_aggregated") != std::string::npos) { + log_info( + "EXPLAIN statement returned message indicating that there are no " + "rows to process: %s", + msg.c_str()); + return 0; + } + } + } if (!m_json_path) { for (const auto path : { "/query_plan/inputs/0/estimated_rows", // v2 + BUG#35239659 "/inputs/0/estimated_rows", // JSON format ver. 2 "/query_block/table/rows_examined_per_scan", // 5.7+ (ver. 1) "/query_block/ordering_operation/table/rows", // 5.6 - "/query_block/nested_loop/0/table/rows", // MariaDB + "/query_block/nested_loop/0/table/rows" // MariaDB }) { if (rapidjson::Pointer(path).Get(json)) { m_json_path = path; @@ -1940,6 +2604,10 @@ class Dumper::Table_worker final { return value->GetUint64(); } } else { + log_debug( + "Value at path '%s' in JSON output of an EXPLAIN statement is not a " + "number: %s", + m_json_path, shcore::json::to_string(*value).c_str()); throw error( "The row count in JSON output of an EXPLAIN statement is not a " "number"); @@ -1965,6 +2633,10 @@ template <> Decimal Dumper::Table_worker::sum(const Decimal &value, const Decimal &delta) { return value + delta; } +template <> +Decimal Dumper::Table_worker::mul(uint64_t value1, const Decimal &value2) { + return value1 * value2; +} Dumper::Dumper(const Dump_options &options) : m_options(options), @@ -2249,6 +2921,21 @@ void Dumper::do_run() { current_console()->print_status(msg); } + // print chunking strategy information only if chunking is enabled + if (m_options.split()) { + std::string strategy = (m_options.adaptive_step_strategy() == + mysqlsh::dump::AdaptiveStepStrategy::ENHANCED) + ? "enhanced" + : "original"; + msg = "Using " + strategy + " adaptive step strategy."; + current_console()->print_status(msg); + msg = "Maximum chunking nesting depth: " + + (m_options.max_key_prefix_length() == 0 + ? "unlimited" + : std::to_string(m_options.max_key_prefix_length())); + current_console()->print_status(msg); + } + if (!m_options.is_dry_run() && m_options.show_progress() && m_options.dump_data()) { current_console()->print_note( diff --git a/modules/util/dump/export_table_options.h b/modules/util/dump/export_table_options.h index e43a382fd..14d445bcc 100644 --- a/modules/util/dump/export_table_options.h +++ b/modules/util/dump/export_table_options.h @@ -93,6 +93,12 @@ class Export_table_options : public Dump_options { bool checksum() const override { return false; } + AdaptiveStepStrategy adaptive_step_strategy() const override { + return AdaptiveStepStrategy::ORIGINAL; + } + + size_t max_key_prefix_length() const override { return 1; } + private: void on_set_session( const std::shared_ptr &session) override; diff --git a/modules/util/dump/instance_cache.cc b/modules/util/dump/instance_cache.cc index 116d40f62..4fe9ff9ae 100644 --- a/modules/util/dump/instance_cache.cc +++ b/modules/util/dump/instance_cache.cc @@ -133,6 +133,10 @@ void Instance_cache::Index::add_column(const Column *column) { m_columns_sql += column->quoted_name; } +void Instance_cache::Index::set_name(const std::string &name) { + m_quoted_name = shcore::quote_identifier(name); +} + Instance_cache_builder::Instance_cache_builder( const std::shared_ptr &session, const mysqlshdk::db::Filtering_options &filters, Instance_cache &&cache) @@ -839,6 +843,7 @@ void Instance_cache_builder::fetch_table_indexes() { } if (add_index) { + new_index.set_name(index.first); auto ptr = &t.indexes.emplace(index.first, std::move(new_index)) .first->second; diff --git a/modules/util/dump/instance_cache.h b/modules/util/dump/instance_cache.h index 470e6554e..a425dc7f7 100644 --- a/modules/util/dump/instance_cache.h +++ b/modules/util/dump/instance_cache.h @@ -86,9 +86,15 @@ struct Instance_cache { inline const std::string &columns_sql() const { return m_columns_sql; } + inline const std::string "ed_name() const { return m_quoted_name; } + + void set_name(const std::string &name); + void add_column(const Column *column); private: + std::string m_quoted_name; + std::vector m_columns; std::string m_columns_sql; diff --git a/modules/util/mod_util.cc b/modules/util/mod_util.cc index 900cc44db..87c9e8925 100644 --- a/modules/util/mod_util.cc +++ b/modules/util/mod_util.cc @@ -1806,6 +1806,12 @@ compatibility issues with MySQL HeatWave Service. number of bytes to be written to each chunk file, enables chunking. @li threads: int (default: 4) - Use N threads to dump data chunks from the server. +@li adaptiveStepStrategy: string (default: original) - Select which +algorithm to use for chunk boundary calculation. Set to “original” for +the legacy algorithm or “enhanced” for the new algorithm. +@li maxKeyPrefixLength: int (default: 1) - Define how many primary key +columns, starting from the left, are used for chunking. Set to 0 for no limit. +If the value exceeds the key’s length, the entire key is used. )*"); REGISTER_HELP_DETAIL_TEXT(TOPIC_UTIL_DUMP_DDL_COMPRESSION, R"*( @@ -2430,6 +2436,13 @@ number of bytes to be copied in each chunk, enables chunking. the source server and additional N threads to write the data to the target server. +@li adaptiveStepStrategy: string (default: original) - Select which +algorithm to use for chunk boundary calculation. Set to “original” for +the legacy algorithm or “enhanced” for the new algorithm. +@li maxKeyPrefixLength: int (default: 1) - Define how many primary key +columns, starting from the left, are used for chunking. Set to 0 for no limit. +If the value exceeds the key’s length, the entire key is used. + @li maxRate: string (default: "0") - Limit data read throughput to maximum rate, measured in bytes per second per thread. Use maxRate="0" to set no limit. diff --git a/mysqlshdk/libs/storage/backend/object_storage_bucket.cc b/mysqlshdk/libs/storage/backend/object_storage_bucket.cc index 9409efc1c..1e5ab5786 100644 --- a/mysqlshdk/libs/storage/backend/object_storage_bucket.cc +++ b/mysqlshdk/libs/storage/backend/object_storage_bucket.cc @@ -32,6 +32,8 @@ #include "mysqlshdk/libs/utils/fault_injection.h" #include "mysqlshdk/libs/utils/logger.h" +#include "mysqlshdk/libs/utils/utils_encoding.h" +#include "mysqlshdk/libs/utils/utils_ssl.h" namespace mysqlshdk { namespace storage { @@ -157,6 +159,11 @@ void Container::put_object(const std::string &object_name, const char *data, size_t size) { Headers headers{{"content-type", "application/octet-stream"}}; + auto &md5 = headers["Content-MD5"]; + std::string_view data_view(data, size); + const auto hash = shcore::ssl::restricted::md5(data_view); + shcore::encode_base64(hash.data(), hash.size(), &md5); + auto request = put_object_request(object_name, std::move(headers)); request.body = data; request.size = size; diff --git a/unittest/scripts/auto/js_shell/validation/cli_help_norecord.js b/unittest/scripts/auto/js_shell/validation/cli_help_norecord.js index c5cbdbaf6..c26b443ff 100644 --- a/unittest/scripts/auto/js_shell/validation/cli_help_norecord.js +++ b/unittest/scripts/auto/js_shell/validation/cli_help_norecord.js @@ -344,6 +344,11 @@ OPTIONS with masking policies will result in an error. When enabled, downgrades this error to a warning. Default: false. +--adaptiveStepStrategy= + Select which algorithm to use for chunk boundary calculation. Set + to “original” for the legacy algorithm or “enhanced” for + the new algorithm. Default: original. + --analyzeTables= "off", "on", "histogram" (default: off) - If 'on', executes ANALYZE TABLE for all tables, once copied. If set to 'histogram', only @@ -498,6 +503,11 @@ OPTIONS (Megabytes), G (Gigabytes). Minimum value: 4096. Default: the value of bytesPerChunk. +--maxKeyPrefixLength= + Define how many primary key columns, starting from the left, are + used for chunking. Set to 0 for no limit. If the value exceeds the + key’s length, the entire key is used. Default: 1. + --maxRate= Limit data read throughput to maximum rate, measured in bytes per second per thread. Use maxRate="0" to set no limit. Default: "0". @@ -578,6 +588,11 @@ OPTIONS with masking policies will result in an error. When enabled, downgrades this error to a warning. Default: false. +--adaptiveStepStrategy= + Select which algorithm to use for chunk boundary calculation. Set + to “original” for the legacy algorithm or “enhanced” for + the new algorithm. Default: original. + --analyzeTables= "off", "on", "histogram" (default: off) - If 'on', executes ANALYZE TABLE for all tables, once copied. If set to 'histogram', only @@ -712,6 +727,11 @@ OPTIONS (Megabytes), G (Gigabytes). Minimum value: 4096. Default: the value of bytesPerChunk. +--maxKeyPrefixLength= + Define how many primary key columns, starting from the left, are + used for chunking. Set to 0 for no limit. If the value exceeds the + key’s length, the entire key is used. Default: 1. + --maxRate= Limit data read throughput to maximum rate, measured in bytes per second per thread. Use maxRate="0" to set no limit. Default: "0". @@ -785,6 +805,11 @@ WHERE establish a connection to the target instance. OPTIONS +--adaptiveStepStrategy= + Select which algorithm to use for chunk boundary calculation. Set + to “original” for the legacy algorithm or “enhanced” for + the new algorithm. Default: original. + --all= Copy all views and tables from the specified schema, requires the tables argument to be an empty list. Default: false. @@ -891,6 +916,11 @@ OPTIONS (Megabytes), G (Gigabytes). Minimum value: 4096. Default: the value of bytesPerChunk. +--maxKeyPrefixLength= + Define how many primary key columns, starting from the left, are + used for chunking. Set to 0 for no limit. If the value exceeds the + key’s length, the entire key is used. Default: 1. + --maxRate= Limit data read throughput to maximum rate, measured in bytes per second per thread. Use maxRate="0" to set no limit. Default: "0". @@ -1066,6 +1096,11 @@ OPTIONS with masking policies will result in an error. When enabled, downgrades this error to a warning. Default: false. +--adaptiveStepStrategy= + Select which algorithm to use for chunk boundary calculation. Set + to “original” for the legacy algorithm or “enhanced” for + the new algorithm. Default: original. + --azureConfigFile= Use the specified Azure configuration file instead of the one at the default location. Default: not set. @@ -1230,6 +1265,11 @@ OPTIONS SELECT ... INTO OUTFILE. See Section 13.2.10.1, "SELECT ... INTO Statement". Default: "\n". +--maxKeyPrefixLength= + Define how many primary key columns, starting from the left, are + used for chunking. Set to 0 for no limit. If the value exceeds the + key’s length, the entire key is used. Default: 1. + --maxRate= Limit data read throughput to maximum rate, measured in bytes per second per thread. Use maxRate="0" to set no limit. Default: "0". @@ -1344,6 +1384,11 @@ OPTIONS with masking policies will result in an error. When enabled, downgrades this error to a warning. Default: false. +--adaptiveStepStrategy= + Select which algorithm to use for chunk boundary calculation. Set + to “original” for the legacy algorithm or “enhanced” for + the new algorithm. Default: original. + --azureConfigFile= Use the specified Azure configuration file instead of the one at the default location. Default: not set. @@ -1488,6 +1533,11 @@ OPTIONS SELECT ... INTO OUTFILE. See Section 13.2.10.1, "SELECT ... INTO Statement". Default: "\n". +--maxKeyPrefixLength= + Define how many primary key columns, starting from the left, are + used for chunking. Set to 0 for no limit. If the value exceeds the + key’s length, the entire key is used. Default: 1. + --maxRate= Limit data read throughput to maximum rate, measured in bytes per second per thread. Use maxRate="0" to set no limit. Default: "0". @@ -1597,6 +1647,11 @@ WHERE tables: List of tables/views to be dumped. OPTIONS +--adaptiveStepStrategy= + Select which algorithm to use for chunk boundary calculation. Set + to “original” for the legacy algorithm or “enhanced” for + the new algorithm. Default: original. + --all= Dump all views and tables from the specified schema. Default: false. @@ -1713,6 +1768,11 @@ OPTIONS SELECT ... INTO OUTFILE. See Section 13.2.10.1, "SELECT ... INTO Statement". Default: "\n". +--maxKeyPrefixLength= + Define how many primary key columns, starting from the left, are + used for chunking. Set to 0 for no limit. If the value exceeds the + key’s length, the entire key is used. Default: 1. + --maxRate= Limit data read throughput to maximum rate, measured in bytes per second per thread. Use maxRate="0" to set no limit. Default: "0". @@ -1823,6 +1883,8 @@ OPTIONS with masking policies will result in an error. When enabled, downgrades this error to a warning. Default: false. +--adaptiveStepStrategy= + --azureConfigFile= Use the specified Azure configuration file instead of the one at the default location. Default: not set. @@ -1878,6 +1940,8 @@ OPTIONS SELECT ... INTO OUTFILE. See Section 13.2.10.1, "SELECT ... INTO Statement". Default: "\n". +--maxKeyPrefixLength= + --maxRate= Limit data read throughput to maximum rate, measured in bytes per second per thread. Use maxRate="0" to set no limit. Default: "0". diff --git a/unittest/scripts/auto/js_shell/validation/util_help_norecord.js b/unittest/scripts/auto/js_shell/validation/util_help_norecord.js index 2a14f8aba..5ab8fbd95 100644 --- a/unittest/scripts/auto/js_shell/validation/util_help_norecord.js +++ b/unittest/scripts/auto/js_shell/validation/util_help_norecord.js @@ -298,6 +298,12 @@ DESCRIPTION - threads: int (default: 4) - Use N threads to read the data from the source server and additional N threads to write the data to the target server. + - adaptiveStepStrategy: string (default: original) - Select which + algorithm to use for chunk boundary calculation. Set to “original” + for the legacy algorithm or “enhanced” for the new algorithm. + - maxKeyPrefixLength: int (default: 1) - Define how many primary key + columns, starting from the left, are used for chunking. Set to 0 for no + limit. If the value exceeds the key’s length, the entire key is used. - maxRate: string (default: "0") - Limit data read throughput to maximum rate, measured in bytes per second per thread. Use maxRate="0" to set no limit. @@ -455,6 +461,12 @@ DESCRIPTION - threads: int (default: 4) - Use N threads to read the data from the source server and additional N threads to write the data to the target server. + - adaptiveStepStrategy: string (default: original) - Select which + algorithm to use for chunk boundary calculation. Set to “original” + for the legacy algorithm or “enhanced” for the new algorithm. + - maxKeyPrefixLength: int (default: 1) - Define how many primary key + columns, starting from the left, are used for chunking. Set to 0 for no + limit. If the value exceeds the key’s length, the entire key is used. - maxRate: string (default: "0") - Limit data read throughput to maximum rate, measured in bytes per second per thread. Use maxRate="0" to set no limit. @@ -595,6 +607,12 @@ DESCRIPTION - threads: int (default: 4) - Use N threads to read the data from the source server and additional N threads to write the data to the target server. + - adaptiveStepStrategy: string (default: original) - Select which + algorithm to use for chunk boundary calculation. Set to “original” + for the legacy algorithm or “enhanced” for the new algorithm. + - maxKeyPrefixLength: int (default: 1) - Define how many primary key + columns, starting from the left, are used for chunking. Set to 0 for no + limit. If the value exceeds the key’s length, the entire key is used. - maxRate: string (default: "0") - Limit data read throughput to maximum rate, measured in bytes per second per thread. Use maxRate="0" to set no limit. @@ -891,6 +909,12 @@ DESCRIPTION of bytes to be written to each chunk file, enables chunking. - threads: int (default: 4) - Use N threads to dump data chunks from the server. + - adaptiveStepStrategy: string (default: original) - Select which + algorithm to use for chunk boundary calculation. Set to “original” + for the legacy algorithm or “enhanced” for the new algorithm. + - maxKeyPrefixLength: int (default: 1) - Define how many primary key + columns, starting from the left, are used for chunking. Set to 0 for no + limit. If the value exceeds the key’s length, the entire key is used. - fieldsTerminatedBy: string (default: "\t") - This option has the same meaning as the corresponding clause for SELECT ... INTO OUTFILE. - fieldsEnclosedBy: char (default: '') - This option has the same meaning @@ -1363,6 +1387,12 @@ DESCRIPTION of bytes to be written to each chunk file, enables chunking. - threads: int (default: 4) - Use N threads to dump data chunks from the server. + - adaptiveStepStrategy: string (default: original) - Select which + algorithm to use for chunk boundary calculation. Set to “original” + for the legacy algorithm or “enhanced” for the new algorithm. + - maxKeyPrefixLength: int (default: 1) - Define how many primary key + columns, starting from the left, are used for chunking. Set to 0 for no + limit. If the value exceeds the key’s length, the entire key is used. - fieldsTerminatedBy: string (default: "\t") - This option has the same meaning as the corresponding clause for SELECT ... INTO OUTFILE. - fieldsEnclosedBy: char (default: '') - This option has the same meaning @@ -1806,6 +1836,12 @@ DESCRIPTION of bytes to be written to each chunk file, enables chunking. - threads: int (default: 4) - Use N threads to dump data chunks from the server. + - adaptiveStepStrategy: string (default: original) - Select which + algorithm to use for chunk boundary calculation. Set to “original” + for the legacy algorithm or “enhanced” for the new algorithm. + - maxKeyPrefixLength: int (default: 1) - Define how many primary key + columns, starting from the left, are used for chunking. Set to 0 for no + limit. If the value exceeds the key’s length, the entire key is used. - fieldsTerminatedBy: string (default: "\t") - This option has the same meaning as the corresponding clause for SELECT ... INTO OUTFILE. - fieldsEnclosedBy: char (default: '') - This option has the same meaning diff --git a/unittest/scripts/auto/py_shell/validation/util_help_norecord.py b/unittest/scripts/auto/py_shell/validation/util_help_norecord.py index c672dd4f8..2250d6853 100644 --- a/unittest/scripts/auto/py_shell/validation/util_help_norecord.py +++ b/unittest/scripts/auto/py_shell/validation/util_help_norecord.py @@ -298,6 +298,12 @@ - threads: int (default: 4) - Use N threads to read the data from the source server and additional N threads to write the data to the target server. + - adaptiveStepStrategy: string (default: original) - Select which + algorithm to use for chunk boundary calculation. Set to “original” + for the legacy algorithm or “enhanced” for the new algorithm. + - maxKeyPrefixLength: int (default: 1) - Define how many primary key + columns, starting from the left, are used for chunking. Set to 0 for no + limit. If the value exceeds the key’s length, the entire key is used. - maxRate: string (default: "0") - Limit data read throughput to maximum rate, measured in bytes per second per thread. Use maxRate="0" to set no limit. @@ -455,6 +461,12 @@ - threads: int (default: 4) - Use N threads to read the data from the source server and additional N threads to write the data to the target server. + - adaptiveStepStrategy: string (default: original) - Select which + algorithm to use for chunk boundary calculation. Set to “original” + for the legacy algorithm or “enhanced” for the new algorithm. + - maxKeyPrefixLength: int (default: 1) - Define how many primary key + columns, starting from the left, are used for chunking. Set to 0 for no + limit. If the value exceeds the key’s length, the entire key is used. - maxRate: string (default: "0") - Limit data read throughput to maximum rate, measured in bytes per second per thread. Use maxRate="0" to set no limit. @@ -595,6 +607,12 @@ - threads: int (default: 4) - Use N threads to read the data from the source server and additional N threads to write the data to the target server. + - adaptiveStepStrategy: string (default: original) - Select which + algorithm to use for chunk boundary calculation. Set to “original” + for the legacy algorithm or “enhanced” for the new algorithm. + - maxKeyPrefixLength: int (default: 1) - Define how many primary key + columns, starting from the left, are used for chunking. Set to 0 for no + limit. If the value exceeds the key’s length, the entire key is used. - maxRate: string (default: "0") - Limit data read throughput to maximum rate, measured in bytes per second per thread. Use maxRate="0" to set no limit. @@ -892,6 +910,12 @@ of bytes to be written to each chunk file, enables chunking. - threads: int (default: 4) - Use N threads to dump data chunks from the server. + - adaptiveStepStrategy: string (default: original) - Select which + algorithm to use for chunk boundary calculation. Set to “original” + for the legacy algorithm or “enhanced” for the new algorithm. + - maxKeyPrefixLength: int (default: 1) - Define how many primary key + columns, starting from the left, are used for chunking. Set to 0 for no + limit. If the value exceeds the key’s length, the entire key is used. - fieldsTerminatedBy: string (default: "\t") - This option has the same meaning as the corresponding clause for SELECT ... INTO OUTFILE. - fieldsEnclosedBy: char (default: '') - This option has the same meaning @@ -1364,6 +1388,12 @@ of bytes to be written to each chunk file, enables chunking. - threads: int (default: 4) - Use N threads to dump data chunks from the server. + - adaptiveStepStrategy: string (default: original) - Select which + algorithm to use for chunk boundary calculation. Set to “original” + for the legacy algorithm or “enhanced” for the new algorithm. + - maxKeyPrefixLength: int (default: 1) - Define how many primary key + columns, starting from the left, are used for chunking. Set to 0 for no + limit. If the value exceeds the key’s length, the entire key is used. - fieldsTerminatedBy: string (default: "\t") - This option has the same meaning as the corresponding clause for SELECT ... INTO OUTFILE. - fieldsEnclosedBy: char (default: '') - This option has the same meaning @@ -1807,6 +1837,12 @@ of bytes to be written to each chunk file, enables chunking. - threads: int (default: 4) - Use N threads to dump data chunks from the server. + - adaptiveStepStrategy: string (default: original) - Select which + algorithm to use for chunk boundary calculation. Set to “original” + for the legacy algorithm or “enhanced” for the new algorithm. + - maxKeyPrefixLength: int (default: 1) - Define how many primary key + columns, starting from the left, are used for chunking. Set to 0 for no + limit. If the value exceeds the key’s length, the entire key is used. - fieldsTerminatedBy: string (default: "\t") - This option has the same meaning as the corresponding clause for SELECT ... INTO OUTFILE. - fieldsEnclosedBy: char (default: '') - This option has the same meaning