From 8e45d7ed753d6c978721f6b1080df103ff106a5c Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Wed, 18 Dec 2019 16:30:44 +0100 Subject: [PATCH] Bug #30473261: CONVERT THE INDEX SUBQUERY ENGINES INTO USING THE ITERATOR EXECUTOR [patch 4/10, actual conversion] Convert the index subquery engines into using the iterator executor. These are the last pieces of code that explicitly depend on the internals of the pre-iterator executor (e.g., TABLE_LIST::materialize_derived). Rewrite them to using the iterator executor everywhere, which also cleans up a fair bit of unneeded complexity. Note that this assumes every query is executed using the iterator executor. Change-Id: I3dacfb6e1bf884e02690fb7076144d85f216c937 --- mysql-test/r/subquery_sj_mat_nosj.result | 30 +- sql/item_subselect.cc | 445 ++++++----------------- sql/item_subselect.h | 19 +- sql/opt_explain.cc | 3 +- sql/opt_explain.h | 6 +- sql/ref_row_iterators.h | 7 +- sql/sql_executor.cc | 232 ++++++------ sql/sql_executor.h | 2 + sql/sql_lex.h | 11 +- sql/sql_optimizer.cc | 1 + sql/sql_optimizer.h | 2 + sql/sql_union.cc | 48 ++- 12 files changed, 337 insertions(+), 469 deletions(-) diff --git a/mysql-test/r/subquery_sj_mat_nosj.result b/mysql-test/r/subquery_sj_mat_nosj.result index 074cca9a55be..79224167f346 100644 --- a/mysql-test/r/subquery_sj_mat_nosj.result +++ b/mysql-test/r/subquery_sj_mat_nosj.result @@ -4014,10 +4014,14 @@ EXPLAIN -> Filter: (t3.a,t3.a in (select #2)) (cost=10.54 rows=100) -> Table scan on t3 (cost=10.54 rows=100) -> Select #2 (subquery in condition; run only once) - -> Nested loop inner join (cost=38.66 rows=48) - -> Filter: ((t1.kp1 < 20) and (t1.c is not null)) (cost=21.86 rows=48) - -> Index range scan on t1 using kp1 (cost=21.86 rows=48) - -> Single-row index lookup on t4 using PRIMARY (pk=t1.c) (cost=0.25 rows=1) + -> Filter: ((t3.a = `materialized-subquery`.kp1)) + -> Limit: 1 row(s) + -> Index lookup on materialized-subquery using (kp1=t3.a) + -> Materialize with deduplication + -> Nested loop inner join (cost=38.66 rows=48) + -> Filter: ((t1.kp1 < 20) and (t1.c is not null)) (cost=21.86 rows=48) + -> Index range scan on t1 using kp1 (cost=21.86 rows=48) + -> Single-row index lookup on t4 using PRIMARY (pk=t1.c) (cost=0.25 rows=1) select * from t3 where a in (select t1.kp1 from t1,t4 where kp1<20 and t4.pk=t1.c); @@ -12495,13 +12499,17 @@ EXPLAIN -> Filter: (t4.col_int_key,t4.col_int_key in (select #2)) -> Table scan on t4 -> Select #2 (subquery in condition; run only once) - -> Filter: (t3.col_int is null) (cost=3.40 rows=7) - -> Nested loop left join (cost=3.40 rows=7) - -> Index scan on t1 using col_int_key (cost=0.95 rows=7) - -> Nested loop inner join (cost=1.21 rows=1) - -> Single-row index lookup on t2 using PRIMARY (pk=t1.pk) (cost=0.26 rows=1) - -> Filter: (t3.col_int_key = t2.col_int) (cost=0.09 rows=1) - -> Table scan on t3 (cost=0.09 rows=4) + -> Filter: ((t4.col_int_key = `materialized-subquery`.col_int_key)) + -> Limit: 1 row(s) + -> Index lookup on materialized-subquery using (col_int_key=t4.col_int_key) + -> Materialize with deduplication + -> Filter: (t3.col_int is null) (cost=3.40 rows=7) + -> Nested loop left join (cost=3.40 rows=7) + -> Index scan on t1 using col_int_key (cost=0.95 rows=7) + -> Nested loop inner join (cost=1.21 rows=1) + -> Single-row index lookup on t2 using PRIMARY (pk=t1.pk) (cost=0.26 rows=1) + -> Filter: (t3.col_int_key = t2.col_int) (cost=0.09 rows=1) + -> Table scan on t3 (cost=0.09 rows=4) SELECT * FROM t4 diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index 1925b3cb0df6..017973a6a9ff 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -68,6 +68,7 @@ #include "sql/parse_tree_nodes.h" // PT_subquery #include "sql/query_options.h" #include "sql/query_result.h" +#include "sql/ref_row_iterators.h" #include "sql/sql_class.h" // THD #include "sql/sql_const.h" #include "sql/sql_error.h" @@ -86,6 +87,7 @@ #include "sql/temp_table_param.h" #include "sql/thd_raii.h" #include "sql/thr_malloc.h" +#include "sql/timing_iterator.h" #include "sql/window.h" #include "sql_string.h" #include "template_utils.h" @@ -299,6 +301,10 @@ void Item_subselect::accumulate_join_condition( } } +void Item_subselect::create_iterators(THD *thd) { + engine->create_iterators(thd); +} + void Item_subselect::cleanup() { DBUG_TRACE; Item_result_field::cleanup(); @@ -536,6 +542,15 @@ void Item_in_subselect::cleanup() { Item_subselect::cleanup(); } +RowIterator *Item_in_subselect::root_iterator() const { + // Only subselect_hash_sj_engine owns its own iterator; + // for subselect_indexsubquery_engine, the unit still has it, since it's a + // normally executed query block. Thus, we should never get called otherwise. + DBUG_ASSERT(exec_method == EXEC_MATERIALIZATION && + engine->engine_type() == subselect_engine::HASH_SJ_ENGINE); + return down_cast(engine)->root_iterator(); +} + Item_subselect::~Item_subselect() { destroy(engine); } bool Item_subselect::fix_fields(THD *thd, Item **ref) { @@ -1096,14 +1111,16 @@ enum Item_result Item_singlerow_subselect::result_type() const { return engine->type(); } -bool Item_singlerow_subselect::resolve_type(THD *) { +bool Item_singlerow_subselect::resolve_type(THD *thd) { if ((max_columns = engine->cols()) == 1) { engine->fix_length_and_dec(row = &value); } else { - if (!(row = (Item_cache **)(*THR_MALLOC) - ->Alloc(sizeof(Item_cache *) * max_columns))) + row = thd->mem_root->ArrayAlloc(max_columns); + if (row == nullptr) { return true; + } engine->fix_length_and_dec(row); + DBUG_ASSERT(*row != nullptr); value = *row; } set_data_type(engine->field_type()); @@ -2797,336 +2814,65 @@ bool subselect_iterator_engine::exec(THD *thd) { } /** - Search, using a table scan, for at least one row satisfying select - condition. - - The caller must set item's 'value' to 'false' before calling this - function. This function will set it to 'true' if it finds a matching row. + Run a query to see if it returns at least one row (stops after the first + has been found, or on error). Unless there was an error, whether the row + was found in "found". - @returns false if ok, true if read error. -*/ -bool subselect_indexsubquery_engine::scan_table() { - int error; - TABLE *table = tab->table(); - DBUG_TRACE; - - // We never need to do a table scan of the materialized table. - DBUG_ASSERT(engine_type() != HASH_SJ_ENGINE); + @retval true on error + */ +bool ExecuteExistsQuery(THD *thd, SELECT_LEX_UNIT *unit, RowIterator *iterator, + bool *found) { + Opt_trace_context *const trace = &thd->opt_trace; + Opt_trace_object trace_wrapper(trace); + Opt_trace_object trace_exec(trace, "join_execution"); + if (unit->is_simple()) { + trace_exec.add_select_number(unit->first_select()->select_number); + } + Opt_trace_array trace_steps(trace, "steps"); - if ((table->file->inited && (error = table->file->ha_index_end())) || - (error = table->file->ha_rnd_init(true))) { - (void)report_handler_error(table, error); + if (unit->ClearForExecution(thd)) { return true; } - for (;;) { - error = table->file->ha_rnd_next(table->record[0]); - if (error && error != HA_ERR_END_OF_FILE) { - error = report_handler_error(table, error); - break; - } - /* No more rows */ - if (!table->has_row()) break; - - if (!cond || cond->val_int()) { - static_cast(item)->value = true; - break; - } + unit->set_executed(); + thd->get_stmt_da()->reset_current_row_for_condition(); + if (iterator->Init()) { + return true; } - table->file->ha_rnd_end(); - return error != 0; -} - -/** - Copy ref key and check for null parts in it - - Construct a search tuple to be used for index lookup. If one of the - key parts have a NULL value, the following logic applies: - - For top level items, e.g. - - "WHERE IN (SELECT ...)" - - where one of the outer values are NULL, the IN predicate evaluates - to false/UNKNOWN (we don't care) and it's not necessary to evaluate - the subquery. That shortcut is taken in - Item_in_optimizer::val_int(). Thus, if a key part with a NULL value - is found here, the NULL is either not outer or this subquery is not - top level. Therefore we cannot shortcut subquery execution if a NULL - is found here. - - Thus, if one of the key parts have a NULL value there are two - possibilities: - - a) The NULL is from the outer_value_list. Since this is not a top - level item (see above) we need to check whether this predicate - evaluates to NULL or false. That is done by checking if the - subquery has a row if the conditions based on outer NULL values - are disabled. Index lookup cannot be used for this, so a table - scan must be done. - - b) The NULL is local to the subquery, e.g.: - - "WHERE ... IN (SELECT ... WHERE inner_col IS NULL)" - - In this case we're looking for rows with the exact inner_col - value of NULL, not rows that match if the "inner_col IS NULL" - condition is disabled. Index lookup can be used for this. - - @see subselect_indexsubquery_engine::exec() - @see Item_in_optimizer::val_int() - - @param[out] require_scan true if a NULL value is found that falls - into category a) above, false if index - lookup can be used. - @param[out] convert_error true if an error occurred during conversion - of values from one type to another, false - otherwise. - -*/ -void subselect_indexsubquery_engine::copy_ref_key(bool *require_scan, - bool *convert_error) { - DBUG_TRACE; - - *require_scan = false; - *convert_error = false; - for (uint part_no = 0; part_no < tab->ref().key_parts; part_no++) { - store_key *s_key = tab->ref().key_copy[part_no]; - if (s_key == nullptr) - continue; // key is const and does not need to be reevaluated - - const enum store_key::store_key_result store_res = s_key->copy(); - tab->ref().key_err = store_res; - - if (s_key->null_key) { - /* - If we have materialized the subquery (HASH_SJ_ENGINE): - - this NULL ref item cannot be local to the subquery (any such - equality condition is attached to the subquery's JOIN and is thus - handled during materialization (by join->exec() in - subselect_hash_sj_engine::exec()) - - The case of an outer NULL ref item is caught in - subselect_hash_sj_engine::exec() so shouldn't come here; but this is - not guaranteed if the outer expression is not deterministic: this - expression is evaluated early in Item_in_subselect::exec() (for - left_expr_cache) and then in s_key->copy() just above; so it is - possible that it is non-NULL (so, not caught) then NULL (so, coming - here). In such case, there is no meaningful value for IN, any value - will do. - */ - - /* - NULL value is from the outer_value_list if the key part has a - cond guard that deactivates the condition. @see - TABLE_REF::cond_guards - */ - if (tab->ref().cond_guards && tab->ref().cond_guards[part_no] && - !*tab->ref().cond_guards[part_no]) { - DBUG_ASSERT(!(down_cast(item)->abort_on_null)); - - *require_scan = true; - return; - } - } - - /* - Check if the error is equal to STORE_KEY_FATAL. This is not expressed - using the store_key::store_key_result enum because ref().key_err is a - boolean and we want to detect both true and STORE_KEY_FATAL from the - space of the union of the values of [TRUE, FALSE] and - store_key::store_key_result. - TODO: fix the variable an return types. - */ - if (store_res == store_key::STORE_KEY_FATAL) { - /* - Error converting the left IN operand to the column type of the right - IN operand. - */ - tab->table()->set_no_row(); - *convert_error = true; - return; - } + // See if we can get at least one row. + int error = iterator->Read(); + if (error == 1 || thd->is_error()) { + return true; } + + *found = (error == 0); + return false; } /* Index-lookup subselect 'engine' - run the subquery - SYNOPSIS - subselect_indexsubquery_engine:exec() - full_scan - DESCRIPTION The engine is used to resolve subqueries in form oe IN (SELECT key FROM tbl WHERE subq_where) - The value of the predicate is calculated as follows: - 1. If oe IS NULL, this is a special case, do a full table scan on - table tbl and search for row that satisfies subq_where. If such - row is found, return NULL, otherwise return false. - 2. Make an index lookup via key=oe, search for a row that satisfies - subq_where. If found, return true. - 3. If check_null==true, make another lookup via key=NULL, search for a - row that satisfies subq_where. If found, return NULL, otherwise - return false. - 4. If unique==true, there can be only one row with key=oe and only one row - with key=NULL, we use that fact to shorten the search process. - - TODO - The step #1 can be optimized further when the index has several key - parts. Consider a subquery: - - (oe1, oe2) IN (SELECT keypart1, keypart2 FROM tbl WHERE subq_where) - - and suppose we need to evaluate it for {oe1, oe2}=={const1, NULL}. - Current code will do a full table scan and obtain correct result. There - is a better option: instead of evaluating - - SELECT keypart1, keypart2 FROM tbl WHERE subq_where (1) - - and checking if it has produced any matching rows, evaluate - - SELECT keypart2 FROM tbl WHERE subq_where AND keypart1=const1 (2) - - If this query produces a row, the result is NULL (as we're evaluating - "(const1, NULL) IN { (const1, X), ... }", which has a value of UNKNOWN, - i.e. NULL). If the query produces no rows, the result is false. - - We currently evaluate (1) by doing a full table scan. (2) can be - evaluated by doing a "ref" scan on "keypart1=const1", which can be much - cheaper. We can use index statistics to quickly check whether "ref" scan - will be cheaper than full table scan. - - RETURN - 0 - 1 + by asking the iterator for the inner query for a single row, and then + immediately stopping. The iterator would usually do a simple ref lookup, + but could in theory be anything. */ -bool subselect_indexsubquery_engine::exec(THD *) { - DBUG_TRACE; - int error; - bool null_finding = false; - TABLE *const table = tab->table(); - uchar *key; - uint key_length; - key_part_map key_parts_map; - ulonglong tmp_hash; - const bool unique = tab->type() == JT_EQ_REF; - const bool check_null = tab->type() == JT_REF_OR_NULL; - - // 'tl' is NULL if this is a tmp table created by subselect_hash_sj_engine. - TABLE_LIST *const tl = tab->table_ref; - Item_in_subselect *const item_in = static_cast(item); - item_in->value = false; - - if (tl && tl->uses_materialization()) // A derived table with index - { - /* - Table cannot have lateral references (as it's the only table in this - query block) but it may have refs to outer queries. As execution of - subquery doesn't go through unit::execute() or JOIN::reset(), we have to - do manual clearing: - */ - item->unit->clear_correlated_query_blocks(); - if (!table->materialized) { - THD *const thd = table->in_use; - bool err = tl->create_materialized_table(thd); - if (!err) { - if (tl->is_table_function()) - err = tl->table_function->fill_result_table(); - else { - err = tl->materialize_derived(thd); - err |= tl->cleanup_derived(thd); - } - } - if (err) return true; /* purecov: inspected */ - } - } - - if (check_null) { - /* We need to check for NULL if there wasn't a matching value */ - *tab->ref().null_ref_key = 0; // Search first for not null - item_in->was_null = false; - } - - /* Copy the ref key and check for nulls... */ - bool require_scan, convert_error; - hash = 0; - copy_ref_key(&require_scan, &convert_error); - if (convert_error) return false; - - if (require_scan) { - const bool scan_result = scan_table(); - return scan_result; - } - - if (!table->file->inited && (error = table->file->ha_index_init( - tab->ref().key, !unique /* sorted */))) { - (void)report_handler_error(table, error); +bool subselect_indexsubquery_engine::exec(THD *thd) { + SELECT_LEX_UNIT *unit = item->unit; + bool found; + if (ExecuteExistsQuery(thd, unit, unit->root_iterator(), &found)) { return true; } - if (table->hash_field) { - /* - Create key of proper endianness, hash_field->ptr can't be use directly - as it will be overwritten during read. - */ - table->hash_field->store(hash, true); - memcpy(&tmp_hash, table->hash_field->ptr, sizeof(ulonglong)); - key = (uchar *)&tmp_hash; - key_length = sizeof(hash); - key_parts_map = 1; - } else { - key = tab->ref().key_buff; - key_length = tab->ref().key_length; - key_parts_map = make_prev_keypart_map(tab->ref().key_parts); - } - error = table->file->ha_index_read_map(table->record[0], key, key_parts_map, - HA_READ_KEY_EXACT); - if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) - error = report_handler_error(table, error); - else { - for (;;) { - error = 0; - if (table->has_row()) { - if ((!cond || cond->val_int()) && (!having || having->val_int())) { - item_in->value = true; - if (null_finding) { - /* - This is dead code; subqueries with check_null==true are always - transformed with IN-to-EXISTS and thus their artificial HAVING - rejects NULL values... - */ - DBUG_ASSERT(false); - item_in->was_null = true; - } - break; - } - if (unique) break; - error = - table->file->ha_index_next_same(table->record[0], key, key_length); - if (error && error != HA_ERR_END_OF_FILE) { - error = report_handler_error(table, error); - break; - } - } else { - if (!check_null || null_finding) - break; /* We don't need to check nulls */ - /* - Check if there exists a row with a null value in the index. We come - here only if ref_or_null, and ref_or_null is always on a single - column (first keypart of the index). So we have only one NULL bit to - turn on: - */ - *tab->ref().null_ref_key = 1; - null_finding = true; - if ((error = (safe_index_read(tab) == 1))) break; - } - } - } - item->unit->set_executed(); - return error != 0; + Item_in_subselect *item_in = down_cast(item); + item_in->value = found; + item_in->assigned(true); + return false; } uint subselect_iterator_engine::cols() const { @@ -3439,6 +3185,7 @@ bool subselect_hash_sj_engine::setup(THD *thd, List *tmp_columns) { } tab->ref().key_err = true; tab->ref().key_parts = tmp_key_parts; + tab->table_ref = tmp_table_ref; if (cond->fix_fields(thd, &cond)) return true; @@ -3447,12 +3194,57 @@ bool subselect_hash_sj_engine::setup(THD *thd, List *tmp_columns) { the subquery if not yet created. */ materialize_engine->prepare(thd); - /* Let our engine reuse this query plan for materialization. */ - materialize_engine->unit->change_query_result(thd, result, nullptr); - return false; } +void subselect_hash_sj_engine::create_iterators(THD *thd) { + if (materialize_engine->unit->root_iterator() == nullptr) { + m_iterator = NewIterator( + thd, "Not optimized, outer query is empty"); + return; + } + + // We're only ever reading one row from the iterator, and record[1] isn't + // properly set up at this point, so we're not using EQRefIterator. + // (As a microoptimization, we add a LIMIT 1 if there's a filter and the + // index is unique, so that any filter added doesn't try to read a second row + // if the condition fails -- there wouldn't be one anyway.) + // + // Also, note that we never need to worry about searching for NULLs + // (which would require the AlternativeIterator); subqueries with + // JT_REF_OR_NULL are always transformed with IN-to-EXISTS, and thus, + // their artificial HAVING rejects NULL values. + DBUG_ASSERT(tab->type() != JT_REF_OR_NULL); + tab->iterator = + NewIterator>(thd, tab->table(), &tab->ref(), + /*use_order=*/false, tab, + /*examined_rows=*/nullptr); + + if (tab->type() == JT_EQ_REF && (cond != nullptr || having != nullptr)) { + tab->iterator = NewIterator( + thd, move(tab->iterator), /*limit=*/1, /*offset=*/0, + /*count_all_rows=*/false, /*skipped_rows=*/nullptr); + } + if (cond != nullptr) { + tab->iterator = NewIterator(thd, move(tab->iterator), cond); + } + if (having != nullptr) { + tab->iterator = + NewIterator(thd, move(tab->iterator), having); + } + + tab->table_ref->set_derived_unit(materialize_engine->unit); + unique_ptr_destroy_only iterator; + if (tab->table_ref->is_table_function()) { + iterator = NewIterator( + thd, tab->table_ref->table_function, tab->table(), move(tab->iterator)); + } else { + iterator = GetIteratorForDerivedTable(thd, tab); + } + + m_iterator = move(iterator); +} + subselect_hash_sj_engine::~subselect_hash_sj_engine() { /* Assure that cleanup has been called for this engine. */ DBUG_ASSERT(!tab); @@ -3474,6 +3266,7 @@ void subselect_hash_sj_engine::cleanup(THD *thd) { if (result != nullptr) result->cleanup(thd); /* Resets the temp table as well. */ DEBUG_SYNC(thd, "before_index_end_in_subselect"); + m_iterator.reset(); if (tab != nullptr) { TABLE *const table = tab->table(); if (table->file->inited) @@ -3510,15 +3303,11 @@ bool subselect_hash_sj_engine::exec(THD *thd) { DBUG_ASSERT( materialize_engine->single_select_lex()->master_unit()->is_optimized()); - bool error; - if (materialize_engine->unit->root_iterator() != nullptr) { - error = materialize_engine->unit->ExecuteIteratorQuery(thd); - } else { - JOIN *join = materialize_engine->single_select_lex()->join; - join->exec(); - error = join->error; - } - if (error || thd->is_fatal_error()) goto err; + // Init() triggers materialization. + // (It also triggers some unneeded setup of the RefIterator, but it is + // cheap.) + bool error = m_iterator->Init(); + if (error || thd->is_fatal_error()) return true; /* TODO: @@ -3547,7 +3336,6 @@ bool subselect_hash_sj_engine::exec(THD *thd) { tmp_param = &(item_in->unit->outer_select()->join->tmp_table_param); if (tmp_param && tmp_param->copy_fields.empty()) tmp_param = nullptr; - err: thd->lex->set_current_select(save_select); if (error) return error; } // if (!is_materialized) @@ -3579,10 +3367,15 @@ bool subselect_hash_sj_engine::exec(THD *thd) { return false; } - if (subselect_indexsubquery_engine::exec(thd)) // Search with index + hash = 0; + bool found; + if (ExecuteExistsQuery(thd, item->unit, m_iterator.get(), &found)) { return true; + } + item_in->value = found; + item_in->assigned(true); - if (!item_in->value && // no exact match + if (!found && // no exact match mat_table_has_nulls != NEX_IRRELEVANT_OR_FALSE) { /* There is only one outer expression. It's not NULL. exec() above has set diff --git a/sql/item_subselect.h b/sql/item_subselect.h index daaa7481faec..a628b6750e14 100644 --- a/sql/item_subselect.h +++ b/sql/item_subselect.h @@ -38,6 +38,7 @@ #include "sql/enum_query_type.h" #include "sql/item.h" // Item_result_field #include "sql/parse_tree_node_base.h" +#include "sql/row_iterator.h" #include "sql/sql_const.h" #include "template_utils.h" @@ -112,6 +113,9 @@ class Item_subselect : public Item_result_field { /// EXPLAIN needs read-only access to the engine const subselect_engine *get_engine_for_explain() const { return engine; } + void create_iterators(THD *thd); + virtual RowIterator *root_iterator() const { return nullptr; } + protected: /* engine that perform execution of subselect (single select or union) */ subselect_engine *engine; @@ -197,7 +201,6 @@ class Item_subselect : public Item_result_field { void update_used_tables() override; void print(const THD *thd, String *str, enum_query_type query_type) const override; - virtual bool have_guarded_conds() { return false; } bool change_engine(subselect_engine *eng) { old_engine = engine; engine = eng; @@ -575,7 +578,6 @@ class Item_in_subselect : public Item_exists_subselect { void set_cond_guard_var(int i, bool v) { if (pushed_cond_guards) pushed_cond_guards[i] = v; } - bool have_guarded_conds() override { return pushed_cond_guards != nullptr; } Item_in_subselect(Item *left_expr, SELECT_LEX *select_lex); Item_in_subselect(const POS &pos, Item *left_expr, @@ -642,6 +644,8 @@ class Item_in_subselect : public Item_exists_subselect { */ bool finalize_materialization_transform(THD *thd, JOIN *join); + RowIterator *root_iterator() const override; + friend class Item_ref_null_helper; friend class Item_is_not_null_test; friend class Item_in_optimizer; @@ -695,12 +699,13 @@ class subselect_engine { res_type(STRING_RESULT), res_field_type(MYSQL_TYPE_VAR_STRING), maybe_null(false) {} - virtual ~subselect_engine() {} // to satisfy compiler + virtual ~subselect_engine() = default; /** Cleanup engine after complete query execution, free all resources. */ virtual void cleanup(THD *thd) = 0; + virtual void create_iterators(THD *) {} virtual bool prepare(THD *thd) = 0; virtual void fix_length_and_dec(Item_cache **row) = 0; /* @@ -790,8 +795,7 @@ class subselect_indexsubquery_engine : public subselect_engine { /// Table which is read, using one of eq_ref, ref, ref_or_null. QEP_TAB *tab; Item *cond; /* The WHERE condition of subselect */ - ulonglong hash; /* Hash value calculated by copy_ref_key, when needed. */ - private: + ulonglong hash; /* Hash value calculated by RefIterator, when needed. */ /* The "having" clause. This clause (further referred to as "artificial having") was inserted by subquery transformation code. It contains @@ -823,8 +827,6 @@ class subselect_indexsubquery_engine : public subselect_engine { table_map upper_select_const_tables() const override { return 0; } bool change_query_result(THD *thd, Item_subselect *si, Query_result_subquery *result) override; - bool scan_table(); - void copy_ref_key(bool *require_scan, bool *convert_error); }; /* @@ -871,6 +873,7 @@ class subselect_hash_sj_engine final : public subselect_indexsubquery_engine { subselect_iterator_engine::[prepare | cols]. */ subselect_iterator_engine *materialize_engine; + unique_ptr_destroy_only m_iterator; /* Temp table context of the outer select's JOIN. */ Temp_table_param *tmp_param; @@ -884,6 +887,7 @@ class subselect_hash_sj_engine final : public subselect_indexsubquery_engine { ~subselect_hash_sj_engine() override; bool setup(THD *thd, List *tmp_columns); + void create_iterators(THD *thd) override; void cleanup(THD *thd) override; bool prepare(THD *thd) override { return materialize_engine->prepare(thd); } bool exec(THD *thd) override; @@ -892,5 +896,6 @@ class subselect_hash_sj_engine final : public subselect_indexsubquery_engine { enum_engine_type engine_type() const override { return HASH_SJ_ENGINE; } const QEP_TAB *get_qep_tab() const { return tab; } + RowIterator *root_iterator() const { return m_iterator.get(); } }; #endif /* ITEM_SUBSELECT_INCLUDED */ diff --git a/sql/opt_explain.cc b/sql/opt_explain.cc index e509942fd7c2..59383441828e 100644 --- a/sql/opt_explain.cc +++ b/sql/opt_explain.cc @@ -2605,7 +2605,8 @@ void ForEachSubselect( callback(select_number, is_dependent, is_cacheable, subselect->unit->root_iterator()); } else { - callback(select_number, is_dependent, is_cacheable, nullptr); + callback(select_number, is_dependent, is_cacheable, + subselect->unit->item->root_iterator()); } } return false; diff --git a/sql/opt_explain.h b/sql/opt_explain.h index 5ea31a28910a..8f78e8bf8d93 100644 --- a/sql/opt_explain.h +++ b/sql/opt_explain.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. +/* Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License, version 2.0, @@ -202,9 +202,7 @@ class Sql_cmd_explain_other_thread final : public Sql_cmd { std::string PrintQueryPlan(int level, RowIterator *iterator); // For each subselect within the given item, call the given functor -// with its SELECT number, dependent/cacheable status and an iterator -// (or nullptr if none; this may happen if the query is not executable -// by the iterator executor). +// with its SELECT number, dependent/cacheable status and an iterator. void ForEachSubselect( Item *parent_item, const std::function ConnectJoins( table_map *conditions_depend_on_outer_tables); /// @endcond +unique_ptr_destroy_only GetIteratorForDerivedTable( + THD *thd, QEP_TAB *qep_tab) { + SELECT_LEX_UNIT *unit = qep_tab->table_ref->derived_unit(); + JOIN *subjoin = nullptr; + Temp_table_param *tmp_table_param; + int select_number; + + // If we have a single query block at the end of the QEP_TAB array, + // it may contain aggregation that have already set up fields and items + // to copy, and we need to pass those to MaterializeIterator, so reuse its + // tmp_table_param. If not, make a new object, so that we don't + // disturb the materialization going on inside our own query block. + if (unit->is_simple()) { + subjoin = unit->first_select()->join; + tmp_table_param = &unit->first_select()->join->tmp_table_param; + select_number = subjoin->select_lex->select_number; + } else if (unit->fake_select_lex != nullptr) { + // NOTE: subjoin here is never used, as ConvertItemsToCopy only uses it + // for ROLLUP, and fake_select_lex can't have ROLLUP. + subjoin = unit->fake_select_lex->join; + tmp_table_param = &unit->fake_select_lex->join->tmp_table_param; + select_number = unit->fake_select_lex->select_number; + } else { + tmp_table_param = new (thd->mem_root) Temp_table_param; + select_number = unit->first_select()->select_number; + } + ConvertItemsToCopy(unit->get_field_list(), + qep_tab->table()->visible_field_ptr(), tmp_table_param, + subjoin); + bool copy_fields_and_items_in_materialize = true; + if (unit->is_simple()) { + // See if AggregateIterator already does this for us. + JOIN *join = unit->first_select()->join; + copy_fields_and_items_in_materialize = + !join->streaming_aggregation || + join->tmp_table_param.precomputed_group_by; + } + + MaterializeIterator *materialize = nullptr; + unique_ptr_destroy_only iterator; + + if (unit->unfinished_materialization()) { + // The unit is a UNION capable of materializing directly into our result + // table. This saves us from doing double materialization (first into + // a UNION result table, then from there into our own). + // + // We will already have set up a unique index on the table if + // required; see TABLE_LIST::setup_materialized_derived_tmp_table(). + iterator = NewIterator( + thd, unit->release_query_blocks_to_materialize(), qep_tab->table(), + move(qep_tab->iterator), qep_tab->table_ref->common_table_expr(), unit, + /*subjoin=*/nullptr, + /*ref_slice=*/-1, qep_tab->rematerialize, unit->select_limit_cnt); + materialize = down_cast(iterator->real_iterator()); + if (unit->offset_limit_cnt != 0) { + // LIMIT is handled inside MaterializeIterator, but OFFSET is not. + // SQL_CALC_FOUND_ROWS cannot occur in a derived table's definition. + iterator = NewIterator( + thd, move(iterator), unit->select_limit_cnt, unit->offset_limit_cnt, + /*count_all_rows=*/false, + /*skipped_rows=*/nullptr); + } + } else if (qep_tab->table_ref->common_table_expr() == nullptr && + qep_tab->rematerialize && qep_tab->using_table_scan()) { + // We don't actually need the materialization for anything (we would + // just reading the rows straight out from the table, never to be used + // again), so we can just stream records directly over to the next + // iterator. This saves both CPU time and memory (for the temporary + // table). + // + // NOTE: Currently, qep_tab->rematerialize is true only for JSON_TABLE. + // We could extend this to other situations, such as the leftmost + // table of the join (assuming nested loop only). The test for CTEs is + // also conservative; if the CTEs is defined within this join and used + // only once, we could still stream without losing performance. + iterator = NewIterator( + thd, unit->release_root_iterator(), &subjoin->tmp_table_param, + qep_tab->table(), copy_fields_and_items_in_materialize); + } else { + iterator = NewIterator( + thd, unit->release_root_iterator(), tmp_table_param, qep_tab->table(), + move(qep_tab->iterator), qep_tab->table_ref->common_table_expr(), + select_number, unit, /*subjoin=*/nullptr, + /*ref_slice=*/-1, copy_fields_and_items_in_materialize, + qep_tab->rematerialize, tmp_table_param->end_write_records); + materialize = down_cast(iterator->real_iterator()); + } + + if (!qep_tab->rematerialize) { + if (qep_tab->invalidators != nullptr) { + for (const CacheInvalidatorIterator *invalidator : + *qep_tab->invalidators) { + materialize->AddInvalidator(invalidator); + } + } + } + + return iterator; +} + /** Get the RowIterator used for scanning the given table, with any required materialization operations done first. @@ -1892,101 +1992,7 @@ unique_ptr_destroy_only GetTableIterator(THD *thd, QEP_TAB *qep_tabs) { unique_ptr_destroy_only table_iterator; if (qep_tab->materialize_table == join_materialize_derived) { - SELECT_LEX_UNIT *unit = qep_tab->table_ref->derived_unit(); - JOIN *subjoin = nullptr; - Temp_table_param *tmp_table_param; - int select_number; - - // If we have a single query block at the end of the QEP_TAB array, - // it may contain aggregation that have already set up fields and items - // to copy, and we need to pass those to MaterializeIterator, so reuse its - // tmp_table_param. If not, make a new object, so that we don't - // disturb the materialization going on inside our own query block. - if (unit->is_simple()) { - subjoin = unit->first_select()->join; - tmp_table_param = &unit->first_select()->join->tmp_table_param; - select_number = subjoin->select_lex->select_number; - } else if (unit->fake_select_lex != nullptr) { - // NOTE: subjoin here is never used, as ConvertItemsToCopy only uses it - // for ROLLUP, and fake_select_lex can't have ROLLUP. - subjoin = unit->fake_select_lex->join; - tmp_table_param = &unit->fake_select_lex->join->tmp_table_param; - select_number = unit->fake_select_lex->select_number; - } else { - tmp_table_param = new (thd->mem_root) Temp_table_param; - select_number = unit->first_select()->select_number; - } - ConvertItemsToCopy(unit->get_field_list(), - qep_tab->table()->visible_field_ptr(), tmp_table_param, - subjoin); - bool copy_fields_and_items_in_materialize = true; - if (unit->is_simple()) { - // See if AggregateIterator already does this for us. - JOIN *join = unit->first_select()->join; - copy_fields_and_items_in_materialize = - !join->streaming_aggregation || - join->tmp_table_param.precomputed_group_by; - } - - MaterializeIterator *materialize = nullptr; - - if (unit->unfinished_materialization()) { - // The unit is a UNION capable of materializing directly into our result - // table. This saves us from doing double materialization (first into - // a UNION result table, then from there into our own). - // - // We will already have set up a unique index on the table if - // required; see TABLE_LIST::setup_materialized_derived_tmp_table(). - table_iterator = NewIterator( - thd, unit->release_query_blocks_to_materialize(), qep_tab->table(), - move(qep_tab->iterator), qep_tab->table_ref->common_table_expr(), - unit, /*subjoin=*/nullptr, - /*ref_slice=*/-1, qep_tab->rematerialize, unit->select_limit_cnt); - materialize = - down_cast(table_iterator->real_iterator()); - if (unit->offset_limit_cnt != 0) { - // LIMIT is handled inside MaterializeIterator, but OFFSET is not. - // SQL_CALC_FOUND_ROWS cannot occur in a derived table's definition. - table_iterator = NewIterator( - thd, move(table_iterator), unit->select_limit_cnt, - unit->offset_limit_cnt, /*count_all_rows=*/false, - /*skipped_rows=*/nullptr); - } - } else if (qep_tab->table_ref->common_table_expr() == nullptr && - qep_tab->rematerialize && qep_tab->using_table_scan()) { - // We don't actually need the materialization for anything (we would - // just reading the rows straight out from the table, never to be used - // again), so we can just stream records directly over to the next - // iterator. This saves both CPU time and memory (for the temporary - // table). - // - // NOTE: Currently, qep_tab->rematerialize is true only for JSON_TABLE. - // We could extend this to other situations, such as the leftmost - // table of the join (assuming nested loop only). The test for CTEs is - // also conservative; if the CTEs is defined within this join and used - // only once, we could still stream without losing performance. - table_iterator = NewIterator( - thd, unit->release_root_iterator(), &subjoin->tmp_table_param, - qep_tab->table(), copy_fields_and_items_in_materialize); - } else { - table_iterator = NewIterator( - thd, unit->release_root_iterator(), tmp_table_param, qep_tab->table(), - move(qep_tab->iterator), qep_tab->table_ref->common_table_expr(), - select_number, unit, /*subjoin=*/nullptr, - /*ref_slice=*/-1, copy_fields_and_items_in_materialize, - qep_tab->rematerialize, tmp_table_param->end_write_records); - materialize = - down_cast(table_iterator->real_iterator()); - } - - if (!qep_tab->rematerialize) { - if (qep_tab->invalidators != nullptr) { - for (const CacheInvalidatorIterator *iterator : - *qep_tab->invalidators) { - materialize->AddInvalidator(iterator); - } - } - } + table_iterator = GetIteratorForDerivedTable(thd, qep_tab); } else if (qep_tab->materialize_table == join_materialize_table_function) { table_iterator = NewIterator( thd, qep_tab->table_ref->table_function, qep_tab->table(), @@ -3489,6 +3495,32 @@ JOIN::attach_iterators_for_having_and_limit( return iterator; } +void JOIN::create_iterators_for_index_subquery() { + create_table_iterators(); + + QEP_TAB *first_qep_tab = &qep_tab[0]; + if (first_qep_tab->condition() != nullptr) { + first_qep_tab->iterator = NewIterator( + thd, move(first_qep_tab->iterator), first_qep_tab->condition()); + } + + TABLE_LIST *const tl = qep_tab->table_ref; + if (tl && tl->uses_materialization()) { + if (tl->is_table_function()) { + m_root_iterator = NewIterator( + thd, tl->table_function, first_qep_tab->table(), + move(first_qep_tab->iterator)); + } else { + m_root_iterator = GetIteratorForDerivedTable(thd, first_qep_tab); + } + } else { + m_root_iterator = move(first_qep_tab->iterator); + } + + m_root_iterator = + attach_iterators_for_having_and_limit(move(m_root_iterator)); +} + // Used only in the specific, odd case of a UNION between a non-iterator // and an iterator query block. static int ExecuteIteratorQuery(JOIN *join) { @@ -4981,16 +5013,6 @@ vector PushedJoinRefIterator::DebugString() const { table()->file->explain_extra()}; } -template -RefIterator::RefIterator(THD *thd, TABLE *table, TABLE_REF *ref, - bool use_order, QEP_TAB *qep_tab, - ha_rows *examined_rows) - : TableRowIterator(thd, table), - m_ref(ref), - m_use_order(use_order), - m_qep_tab(qep_tab), - m_examined_rows(examined_rows) {} - template bool RefIterator::Init() { m_first_record_since_init = true; diff --git a/sql/sql_executor.h b/sql/sql_executor.h index fd25f4dd6302..155c29cf6b66 100644 --- a/sql/sql_executor.h +++ b/sql/sql_executor.h @@ -866,6 +866,8 @@ bool bring_back_frame_row(THD *thd, Window *w, Temp_table_param *out_param, int64 rowno, Window_retrieve_cached_row_reason reason, int fno = 0); +unique_ptr_destroy_only GetIteratorForDerivedTable( + THD *thd, QEP_TAB *qep_tab); void ConvertItemsToCopy(List *items, Field **fields, Temp_table_param *param, JOIN *join); std::string RefToString(const TABLE_REF &ref, const KEY *key, diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 559a7a042dbe..032fbd880ec2 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -831,8 +831,8 @@ class SELECT_LEX_UNIT { unfinished materialize. This means that it will collect iterators for all the query blocks and prepare them for materializing into the given table, but not actually create a root iterator for this query expression; - the caller is responsible for calling release_tables_to_materialize() and - creating the iterator itself. + the caller is responsible for calling release_query_blocks_to_materialize() + and creating the iterator itself. Even if materialize_destination is non-nullptr, this function may choose to make a regular iterator. The caller is responsible for checking @@ -845,6 +845,13 @@ class SELECT_LEX_UNIT { */ bool optimize(THD *thd, TABLE *materialize_destination); + /** + Do everything that would be needed before running Init() on the root + iterator. In particular, clear out data from previous execution iterations, + if needed. + */ + bool ClearForExecution(THD *thd); + bool ExecuteIteratorQuery(THD *thd); bool execute(THD *thd); bool explain(THD *explain_thd, const THD *query_thd); diff --git a/sql/sql_optimizer.cc b/sql/sql_optimizer.cc index 1d2a9dcfd273..2d02799ab248 100644 --- a/sql/sql_optimizer.cc +++ b/sql/sql_optimizer.cc @@ -689,6 +689,7 @@ bool JOIN::optimize() { // See if this subquery can be evaluated with subselect_indexsubquery_engine if (const int ret = replace_index_subquery()) { + create_iterators_for_index_subquery(); set_plan_state(PLAN_READY); /* We leave optimize() because the rest of it is only about order/group diff --git a/sql/sql_optimizer.h b/sql/sql_optimizer.h index 348392c5a294..73cbf7869926 100644 --- a/sql/sql_optimizer.h +++ b/sql/sql_optimizer.h @@ -1025,6 +1025,8 @@ class JOIN { */ void create_iterators_for_zero_rows(); + void create_iterators_for_index_subquery(); + /** @{ Helpers for create_iterators. */ void create_table_iterators(); unique_ptr_destroy_only create_root_iterator_for_join(); diff --git a/sql/sql_union.cc b/sql/sql_union.cc index 0fba2aa1c1e1..7ddd852813de 100644 --- a/sql/sql_union.cc +++ b/sql/sql_union.cc @@ -853,6 +853,17 @@ bool SELECT_LEX_UNIT::optimize(THD *thd, TABLE *materialize_destination) { PrintQueryPlan(0, m_root_iterator.get()).c_str()); } set_optimized(); // All query blocks optimized, update the state + + if (item != nullptr) { + // If we're part of an IN subquery, the containing engine may want to + // add its own iterators on top, e.g. to materialize us. + // + // TODO(sgunders): See if we can do away with the engine concept + // altogether, now that there's much less execution logic in them. + DBUG_ASSERT(!unfinished_materialization()); + item->create_iterators(thd); + } + return false; } @@ -1406,18 +1417,7 @@ class Recursive_executor { } }; -bool SELECT_LEX_UNIT::ExecuteIteratorQuery(THD *thd) { - THD_STAGE_INFO(thd, stage_executing); - DEBUG_SYNC(thd, "before_join_exec"); - - Opt_trace_context *const trace = &thd->opt_trace; - Opt_trace_object trace_wrapper(trace); - Opt_trace_object trace_exec(trace, "join_execution"); - if (is_simple()) { - trace_exec.add_select_number(first_select()->select_number); - } - Opt_trace_array trace_steps(trace, "steps"); - +bool SELECT_LEX_UNIT::ClearForExecution(THD *thd) { if (is_executed()) { if (clear_correlated_query_blocks()) return true; @@ -1449,6 +1449,24 @@ bool SELECT_LEX_UNIT::ExecuteIteratorQuery(THD *thd) { } } } + return false; +} + +bool SELECT_LEX_UNIT::ExecuteIteratorQuery(THD *thd) { + THD_STAGE_INFO(thd, stage_executing); + DEBUG_SYNC(thd, "before_join_exec"); + + Opt_trace_context *const trace = &thd->opt_trace; + Opt_trace_object trace_wrapper(trace); + Opt_trace_object trace_exec(trace, "join_execution"); + if (is_simple()) { + trace_exec.add_select_number(first_select()->select_number); + } + Opt_trace_array trace_steps(trace, "steps"); + + if (ClearForExecution(thd)) { + return true; + } List *fields = get_field_list(); Query_result *query_result = this->query_result(); @@ -1730,6 +1748,12 @@ bool SELECT_LEX_UNIT::cleanup(THD *thd, bool full) { error |= fake_select_lex->cleanup(thd, full); } + // subselect_hash_sj_engine may hold iterators that need to be cleaned up + // before the MEM_ROOT goes away. + if (item != nullptr) { + item->cleanup(); + } + // fake_select_lex's table depends on Temp_table_param inside union_result if (full && union_result) { union_result->cleanup(thd);