Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
mysql-server/sql/handler.cc
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
8605 lines (7367 sloc)
294 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Copyright (c) 2000, 2020, Oracle and/or its affiliates. | |
This program is free software; you can redistribute it and/or modify | |
it under the terms of the GNU General Public License, version 2.0, | |
as published by the Free Software Foundation. | |
This program is also distributed with certain software (including | |
but not limited to OpenSSL) that is licensed under separate terms, | |
as designated in a particular file or component or in included license | |
documentation. The authors of MySQL hereby grant you an additional | |
permission to link the program and your derivative works with the | |
separately licensed software that they have included with MySQL. | |
This program is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
GNU General Public License, version 2.0, for more details. | |
You should have received a copy of the GNU General Public License | |
along with this program; if not, write to the Free Software | |
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ | |
/** @file sql/handler.cc | |
@brief | |
Implements functions in the handler interface that are shared between all | |
storage engines. | |
*/ | |
#include "sql/handler.h" | |
#include <ctype.h> | |
#include <errno.h> | |
#include <limits.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <algorithm> | |
#include <atomic> | |
#include <cmath> | |
#include <list> | |
#include <random> // std::uniform_real_distribution | |
#include <string> | |
#include <vector> | |
#include "keycache.h" | |
#include "libbinlogevents/include/binlog_event.h" | |
#include "m_ctype.h" | |
#include "m_string.h" | |
#include "my_bit.h" // my_count_bits | |
#include "my_bitmap.h" // MY_BITMAP | |
#include "my_check_opt.h" | |
#include "my_dbug.h" | |
#include "my_loglevel.h" | |
#include "my_macros.h" | |
#include "my_pointer_arithmetic.h" | |
#include "my_psi_config.h" | |
#include "my_sqlcommand.h" | |
#include "my_sys.h" // MEM_DEFINED_IF_ADDRESSABLE() | |
#include "myisam.h" // TT_FOR_UPGRADE | |
#include "mysql/components/services/log_builtins.h" | |
#include "mysql/components/services/log_shared.h" | |
#include "mysql/plugin.h" | |
#include "mysql/psi/mysql_file.h" | |
#include "mysql/psi/mysql_mutex.h" | |
#include "mysql/psi/mysql_table.h" | |
#include "mysql/psi/mysql_transaction.h" | |
#include "mysql/psi/psi_base.h" | |
#include "mysql/psi/psi_table.h" | |
#include "mysql/service_mysql_alloc.h" | |
#include "mysql_com.h" | |
#include "mysql_version.h" // MYSQL_VERSION_ID | |
#include "mysqld_error.h" | |
#include "prealloced_array.h" | |
#include "sql/auth/auth_common.h" // check_readonly() and SUPER_ACL | |
#include "sql/binlog.h" // mysql_bin_log | |
#include "sql/check_stack.h" | |
#include "sql/clone_handler.h" | |
#include "sql/current_thd.h" | |
#include "sql/dd/cache/dictionary_client.h" // dd::cache::Dictionary_client | |
#include "sql/dd/dd.h" // dd::get_dictionary | |
#include "sql/dd/dictionary.h" // dd:acquire_shared_table_mdl | |
#include "sql/dd/types/table.h" // dd::Table | |
#include "sql/dd_table_share.h" // open_table_def | |
#include "sql/debug_sync.h" // DEBUG_SYNC | |
#include "sql/derror.h" // ER_DEFAULT | |
#include "sql/error_handler.h" // Internal_error_handler | |
#include "sql/field.h" | |
#include "sql/item.h" | |
#include "sql/lock.h" // MYSQL_LOCK | |
#include "sql/log.h" | |
#include "sql/log_event.h" // Write_rows_log_event | |
#include "sql/mdl.h" | |
#include "sql/mysqld.h" // global_system_variables heap_hton .. | |
#include "sql/opt_costconstantcache.h" // reload_optimizer_cost_constants | |
#include "sql/opt_costmodel.h" | |
#include "sql/opt_hints.h" | |
#include "sql/protocol.h" | |
#include "sql/psi_memory_key.h" | |
#include "sql/query_options.h" | |
#include "sql/record_buffer.h" // Record_buffer | |
#include "sql/rpl_filter.h" | |
#include "sql/rpl_gtid.h" | |
#include "sql/rpl_handler.h" // RUN_HOOK | |
#include "sql/rpl_rli.h" // is_atomic_ddl_commit_on_slave | |
#include "sql/rpl_slave_commit_order_manager.h" // Commit_order_manager | |
#include "sql/rpl_write_set_handler.h" // add_pke | |
#include "sql/sdi_utils.h" // import_serialized_meta_data | |
#include "sql/session_tracker.h" | |
#include "sql/sql_base.h" // free_io_cache | |
#include "sql/sql_bitmap.h" | |
#include "sql/sql_class.h" | |
#include "sql/sql_error.h" | |
#include "sql/sql_lex.h" | |
#include "sql/sql_parse.h" // check_stack_overrun | |
#include "sql/sql_plugin.h" // plugin_foreach | |
#include "sql/sql_select.h" // actual_key_parts | |
#include "sql/sql_table.h" // build_table_filename | |
#include "sql/strfunc.h" // strnncmp_nopads | |
#include "sql/system_variables.h" | |
#include "sql/table.h" | |
#include "sql/tc_log.h" | |
#include "sql/thr_malloc.h" | |
#include "sql/transaction.h" // trans_commit_implicit | |
#include "sql/transaction_info.h" | |
#include "sql/xa.h" | |
#include "sql_string.h" | |
#include "sql_tmp_table.h" // free_tmp_table | |
#include "template_utils.h" | |
#include "uniques.h" // Unique_on_insert | |
#include "varlen_sort.h" | |
/** | |
@def MYSQL_TABLE_IO_WAIT | |
Instrumentation helper for table io_waits. | |
Note that this helper is intended to be used from | |
within the handler class only, as it uses members | |
from @c handler | |
Performance schema events are instrumented as follows: | |
- in non batch mode, one event is generated per call | |
- in batch mode, the number of rows affected is saved | |
in @c m_psi_numrows, so that @c end_psi_batch_mode() | |
generates a single event for the batch. | |
@param OP the table operation to be performed | |
@param INDEX the table index used if any, or MAX_KEY. | |
@param RESULT the result of the table operation performed | |
@param PAYLOAD instrumented code to execute | |
@sa handler::end_psi_batch_mode. | |
*/ | |
#ifdef HAVE_PSI_TABLE_INTERFACE | |
#define MYSQL_TABLE_IO_WAIT(OP, INDEX, RESULT, PAYLOAD) \ | |
{ \ | |
if (m_psi != NULL) { \ | |
switch (m_psi_batch_mode) { \ | |
case PSI_BATCH_MODE_NONE: { \ | |
PSI_table_locker *sub_locker = NULL; \ | |
PSI_table_locker_state reentrant_safe_state; \ | |
sub_locker = PSI_TABLE_CALL(start_table_io_wait)( \ | |
&reentrant_safe_state, m_psi, OP, INDEX, __FILE__, __LINE__); \ | |
PAYLOAD \ | |
if (sub_locker != NULL) PSI_TABLE_CALL(end_table_io_wait) \ | |
(sub_locker, 1); \ | |
break; \ | |
} \ | |
case PSI_BATCH_MODE_STARTING: { \ | |
m_psi_locker = PSI_TABLE_CALL(start_table_io_wait)( \ | |
&m_psi_locker_state, m_psi, OP, INDEX, __FILE__, __LINE__); \ | |
PAYLOAD \ | |
if (!RESULT) m_psi_numrows++; \ | |
m_psi_batch_mode = PSI_BATCH_MODE_STARTED; \ | |
break; \ | |
} \ | |
case PSI_BATCH_MODE_STARTED: \ | |
default: { \ | |
DBUG_ASSERT(m_psi_batch_mode == PSI_BATCH_MODE_STARTED); \ | |
PAYLOAD \ | |
if (!RESULT) m_psi_numrows++; \ | |
break; \ | |
} \ | |
} \ | |
} else { \ | |
PAYLOAD \ | |
} \ | |
} | |
#else | |
#define MYSQL_TABLE_IO_WAIT(OP, INDEX, RESULT, PAYLOAD) PAYLOAD | |
#endif | |
/** | |
@def MYSQL_TABLE_LOCK_WAIT | |
Instrumentation helper for table io_waits. | |
@param OP the table operation to be performed | |
@param FLAGS per table operation flags. | |
@param PAYLOAD the code to instrument. | |
@sa MYSQL_END_TABLE_WAIT. | |
*/ | |
#ifdef HAVE_PSI_TABLE_INTERFACE | |
#define MYSQL_TABLE_LOCK_WAIT(OP, FLAGS, PAYLOAD) \ | |
{ \ | |
if (m_psi != NULL) { \ | |
PSI_table_locker *locker; \ | |
PSI_table_locker_state state; \ | |
locker = PSI_TABLE_CALL(start_table_lock_wait)(&state, m_psi, OP, FLAGS, \ | |
__FILE__, __LINE__); \ | |
PAYLOAD \ | |
if (locker != NULL) PSI_TABLE_CALL(end_table_lock_wait)(locker); \ | |
} else { \ | |
PAYLOAD \ | |
} \ | |
} | |
#else | |
#define MYSQL_TABLE_LOCK_WAIT(OP, FLAGS, PAYLOAD) PAYLOAD | |
#endif | |
using std::list; | |
using std::log2; | |
using std::max; | |
using std::min; | |
/** | |
While we have legacy_db_type, we have this array to | |
check for dups and to find handlerton from legacy_db_type. | |
Remove when legacy_db_type is finally gone | |
*/ | |
static Prealloced_array<st_plugin_int *, PREALLOC_NUM_HA> se_plugin_array( | |
PSI_NOT_INSTRUMENTED); | |
/** | |
Array allowing to check if handlerton is builtin without | |
acquiring LOCK_plugin. | |
*/ | |
static Prealloced_array<bool, PREALLOC_NUM_HA> builtin_htons( | |
PSI_NOT_INSTRUMENTED); | |
st_plugin_int *hton2plugin(uint slot) { return se_plugin_array[slot]; } | |
size_t num_hton2plugins() { return se_plugin_array.size(); } | |
st_plugin_int *insert_hton2plugin(uint slot, st_plugin_int *plugin) { | |
if (se_plugin_array.assign_at(slot, plugin)) return nullptr; | |
builtin_htons.assign_at(slot, true); | |
return se_plugin_array[slot]; | |
} | |
st_plugin_int *remove_hton2plugin(uint slot) { | |
st_plugin_int *retval = se_plugin_array[slot]; | |
se_plugin_array[slot] = NULL; | |
builtin_htons.assign_at(slot, false); | |
return retval; | |
} | |
const char *ha_resolve_storage_engine_name(const handlerton *db_type) { | |
return db_type == nullptr ? "UNKNOWN" : hton2plugin(db_type->slot)->name.str; | |
} | |
static handlerton *installed_htons[128]; | |
/* number of storage engines (from installed_htons[]) that support 2pc */ | |
ulong total_ha_2pc = 0; | |
/* size of savepoint storage area (see ha_init) */ | |
ulong savepoint_alloc_size = 0; | |
namespace { | |
struct Storage_engine_identifier { | |
const LEX_CSTRING canonical; | |
const LEX_CSTRING legacy; | |
}; | |
const Storage_engine_identifier se_names[] = { | |
{{STRING_WITH_LEN("INNODB")}, {STRING_WITH_LEN("INNOBASE")}}, | |
{{STRING_WITH_LEN("NDBCLUSTER")}, {STRING_WITH_LEN("NDB")}}, | |
{{STRING_WITH_LEN("MEMORY")}, {STRING_WITH_LEN("HEAP")}}, | |
{{STRING_WITH_LEN("MRG_MYISAM")}, {STRING_WITH_LEN("MERGE")}}}; | |
const auto se_names_end = std::end(se_names); | |
std::vector<std::string> disabled_se_names; | |
} // namespace | |
const char *ha_row_type[] = {"", | |
"FIXED", | |
"DYNAMIC", | |
"COMPRESSED", | |
"REDUNDANT", | |
"COMPACT", | |
/* Reserved to be "PAGE" in future versions */ "?", | |
"?", | |
"?", | |
"?"}; | |
const char *tx_isolation_names[] = {"READ-UNCOMMITTED", "READ-COMMITTED", | |
"REPEATABLE-READ", "SERIALIZABLE", NullS}; | |
TYPELIB tx_isolation_typelib = {array_elements(tx_isolation_names) - 1, "", | |
tx_isolation_names, nullptr}; | |
// Called for each SE to check if given db.table_name is a system table. | |
static bool check_engine_system_table_handlerton(THD *unused, plugin_ref plugin, | |
void *arg); | |
static int ha_discover(THD *thd, const char *db, const char *name, | |
uchar **frmblob, size_t *frmlen); | |
/** | |
Structure used by SE during check for system table. | |
This structure is passed to each SE handlerton and the status (OUT param) | |
is collected. | |
*/ | |
struct st_sys_tbl_chk_params { | |
const char *db; // IN param | |
const char *table_name; // IN param | |
bool is_sql_layer_system_table; // IN param | |
legacy_db_type db_type; // IN param | |
enum enum_sys_tbl_chk_status { | |
// db.table_name is not a supported system table. | |
NOT_KNOWN_SYSTEM_TABLE, | |
/* | |
db.table_name is a system table, | |
but may not be supported by SE. | |
*/ | |
KNOWN_SYSTEM_TABLE, | |
/* | |
db.table_name is a system table, | |
and is supported by SE. | |
*/ | |
SUPPORTED_SYSTEM_TABLE | |
} status; // OUT param | |
}; | |
static plugin_ref ha_default_plugin(THD *thd) { | |
if (thd->variables.table_plugin) return thd->variables.table_plugin; | |
return my_plugin_lock(thd, &global_system_variables.table_plugin); | |
} | |
/** @brief | |
Return the default storage engine handlerton used for non-temp tables | |
for thread | |
SYNOPSIS | |
ha_default_handlerton(thd) | |
thd current thread | |
RETURN | |
pointer to handlerton | |
*/ | |
handlerton *ha_default_handlerton(THD *thd) { | |
plugin_ref plugin = ha_default_plugin(thd); | |
DBUG_ASSERT(plugin); | |
handlerton *hton = plugin_data<handlerton *>(plugin); | |
DBUG_ASSERT(hton); | |
return hton; | |
} | |
static plugin_ref ha_default_temp_plugin(THD *thd) { | |
if (thd->variables.temp_table_plugin) return thd->variables.temp_table_plugin; | |
return my_plugin_lock(thd, &global_system_variables.temp_table_plugin); | |
} | |
/** @brief | |
Return the default storage engine handlerton used for explicitly | |
created temp tables for a thread | |
SYNOPSIS | |
ha_default_temp_handlerton(thd) | |
thd current thread | |
RETURN | |
pointer to handlerton | |
*/ | |
handlerton *ha_default_temp_handlerton(THD *thd) { | |
plugin_ref plugin = ha_default_temp_plugin(thd); | |
DBUG_ASSERT(plugin); | |
handlerton *hton = plugin_data<handlerton *>(plugin); | |
DBUG_ASSERT(hton); | |
return hton; | |
} | |
/** | |
Resolve handlerton plugin by name, without checking for "DEFAULT" or | |
HTON_NOT_USER_SELECTABLE. | |
@param thd Thread context. | |
@param name Plugin name. | |
@return plugin or NULL if not found. | |
*/ | |
plugin_ref ha_resolve_by_name_raw(THD *thd, const LEX_CSTRING &name) { | |
return plugin_lock_by_name(thd, name, MYSQL_STORAGE_ENGINE_PLUGIN); | |
} | |
static const CHARSET_INFO &hton_charset() { return *system_charset_info; } | |
/** | |
Return the storage engine handlerton for the supplied name. | |
@param thd Current thread. May be nullptr, (e.g. during initialize). | |
@param name Name of storage engine. | |
@param is_temp_table true if table is a temporary table. | |
@return Pointer to storage engine plugin handle. | |
*/ | |
plugin_ref ha_resolve_by_name(THD *thd, const LEX_CSTRING *name, | |
bool is_temp_table) { | |
if (thd && 0 == strnncmp_nopads(hton_charset(), *name, | |
{STRING_WITH_LEN("DEFAULT")})) { | |
return is_temp_table ? ha_default_plugin(thd) : ha_default_temp_plugin(thd); | |
} | |
// Note that thd CAN be nullptr here - it is not actually needed by | |
// ha_resolve_by_name_raw(). | |
plugin_ref plugin = ha_resolve_by_name_raw(thd, *name); | |
if (plugin == nullptr) { | |
// If we fail to resolve the name passed in, we try to see if it is a | |
// historical alias. | |
auto match = std::find_if( | |
std::begin(se_names), se_names_end, | |
[&](const Storage_engine_identifier &sei) { | |
return (0 == strnncmp_nopads(hton_charset(), *name, sei.legacy)); | |
}); | |
if (match != se_names_end) { | |
// if it is, we resolve using the new name | |
plugin = ha_resolve_by_name_raw(thd, match->canonical); | |
} | |
} | |
if (plugin != nullptr) { | |
handlerton *hton = plugin_data<handlerton *>(plugin); | |
if (hton && !(hton->flags & HTON_NOT_USER_SELECTABLE)) return plugin; | |
/* | |
unlocking plugin immediately after locking is relatively low cost. | |
*/ | |
plugin_unlock(thd, plugin); | |
} | |
return nullptr; | |
} | |
/** | |
Read a comma-separated list of storage engine names. Look up each in the | |
known list of canonical and legacy names. In case of a match; add both the | |
canonical and the legacy name to disabled_se_names, which is a static vector | |
of disabled storage engine names. | |
If there is no match, the unmodified name is added to the vector. | |
*/ | |
void set_externally_disabled_storage_engine_names(const char *disabled_list) { | |
DBUG_ASSERT(disabled_list != nullptr); | |
myu::Split( | |
disabled_list, disabled_list + strlen(disabled_list), myu::IsComma, | |
[](const char *f, const char *l) { | |
auto tr = myu::FindTrimmedRange(f, l, myu::IsSpace); | |
if (tr.first == tr.second) return; | |
const LEX_CSTRING dse{tr.first, | |
static_cast<size_t>(tr.second - tr.first)}; | |
auto match = std::find_if( | |
std::begin(se_names), se_names_end, | |
[&](const Storage_engine_identifier &seid) { | |
return ( | |
(0 == strnncmp_nopads(hton_charset(), dse, seid.canonical)) || | |
(0 == strnncmp_nopads(hton_charset(), dse, seid.legacy))); | |
}); | |
if (match == se_names_end) { | |
disabled_se_names.emplace_back(dse.str, dse.length); | |
return; | |
} | |
disabled_se_names.emplace_back(match->canonical.str, | |
match->canonical.length); | |
disabled_se_names.emplace_back(match->legacy.str, match->legacy.length); | |
}); | |
} | |
static bool is_storage_engine_name_externally_disabled(const char *name) { | |
const LEX_CSTRING n{name, strlen(name)}; | |
return std::any_of( | |
disabled_se_names.begin(), disabled_se_names.end(), | |
[&](const std::string &dse) { | |
return (0 == strnncmp_nopads(hton_charset(), n, | |
{dse.c_str(), dse.length()})); | |
}); | |
} | |
/** | |
Returns true if the storage engine of the handlerton argument has | |
been listed in the disabled_storage_engines system variable. @note | |
that the SE may still be internally enabled, that is | |
HaIsInternallyEnabled may return true. | |
*/ | |
bool ha_is_externally_disabled(const handlerton &htnr) { | |
const char *se_name = ha_resolve_storage_engine_name(&htnr); | |
DBUG_ASSERT(se_name != nullptr); | |
return is_storage_engine_name_externally_disabled(se_name); | |
} | |
// Check if storage engine is disabled for table/tablespace creation. | |
bool ha_is_storage_engine_disabled(handlerton *se_handle) { | |
DBUG_ASSERT(se_handle != nullptr); | |
return ha_is_externally_disabled(*se_handle); | |
} | |
plugin_ref ha_lock_engine(THD *thd, const handlerton *hton) { | |
if (hton) { | |
st_plugin_int **plugin = &se_plugin_array[hton->slot]; | |
#ifdef DBUG_OFF | |
/* | |
Take a shortcut for builtin engines -- return pointer to plugin | |
without acquiring LOCK_plugin mutex. This is safe safe since such | |
plugins are not deleted until shutdown and we don't do reference | |
counting in non-debug builds for them. | |
Since we have reference to handlerton on our hands, this method | |
can't be called concurrently to non-builtin handlerton initialization/ | |
deinitialization. So it is safe to access builtin_htons[] without | |
additional locking. | |
*/ | |
if (builtin_htons[hton->slot]) return *plugin; | |
return my_plugin_lock(thd, plugin); | |
#else | |
/* | |
We can't take shortcut in debug builds. | |
At least assert that builtin_htons[slot] is set correctly. | |
*/ | |
DBUG_ASSERT(builtin_htons[hton->slot] == (plugin[0]->plugin_dl == nullptr)); | |
return my_plugin_lock(thd, &plugin); | |
#endif | |
} | |
return nullptr; | |
} | |
handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type) { | |
plugin_ref plugin; | |
switch (db_type) { | |
case DB_TYPE_DEFAULT: | |
return ha_default_handlerton(thd); | |
default: | |
if (db_type > DB_TYPE_UNKNOWN && db_type < DB_TYPE_DEFAULT && | |
(plugin = ha_lock_engine(thd, installed_htons[db_type]))) | |
return plugin_data<handlerton *>(plugin); | |
/* fall through */ | |
case DB_TYPE_UNKNOWN: | |
return nullptr; | |
} | |
} | |
/** | |
Use other database handler if databasehandler is not compiled in. | |
*/ | |
handlerton *ha_checktype(THD *thd, enum legacy_db_type database_type, | |
bool no_substitute, bool report_error) { | |
DBUG_TRACE; | |
handlerton *hton = ha_resolve_by_legacy_type(thd, database_type); | |
if (ha_storage_engine_is_enabled(hton)) return hton; | |
if (no_substitute) { | |
if (report_error) { | |
const char *engine_name = ha_resolve_storage_engine_name(hton); | |
my_error(ER_FEATURE_DISABLED, MYF(0), engine_name, engine_name); | |
} | |
return nullptr; | |
} | |
(void)RUN_HOOK(transaction, after_rollback, (thd, false)); | |
switch (database_type) { | |
case DB_TYPE_MRG_ISAM: | |
return ha_resolve_by_legacy_type(thd, DB_TYPE_MRG_MYISAM); | |
default: | |
break; | |
} | |
return ha_default_handlerton(thd); | |
} /* ha_checktype */ | |
/** | |
Create handler object for the table in the storage engine. | |
@param share TABLE_SHARE for the table, can be NULL if caller | |
didn't perform full-blown open of table definition. | |
@param partitioned Indicates whether table is partitioned. | |
@param alloc Memory root to be used for allocating handler object. | |
@param db_type Table's storage engine. | |
@note This function will try to use default storage engine if one which | |
was specified through db_type parameter is not available. | |
*/ | |
handler *get_new_handler(TABLE_SHARE *share, bool partitioned, MEM_ROOT *alloc, | |
handlerton *db_type) { | |
handler *file; | |
DBUG_TRACE; | |
DBUG_PRINT("enter", ("alloc: %p", alloc)); | |
if (db_type && db_type->state == SHOW_OPTION_YES && db_type->create) { | |
if ((file = db_type->create(db_type, share, partitioned, alloc))) | |
file->init(); | |
return file; | |
} | |
/* | |
Try the default table type | |
Here the call to current_thd() is ok as we call this function a lot of | |
times but we enter this branch very seldom. | |
*/ | |
return get_new_handler(share, partitioned, alloc, | |
ha_default_handlerton(current_thd)); | |
} | |
static const char **handler_errmsgs; | |
static const char *get_handler_errmsg(int nr) { | |
return handler_errmsgs[nr - HA_ERR_FIRST]; | |
} | |
/** | |
Register handler error messages for use with my_error(). | |
@retval | |
0 OK | |
@retval | |
!=0 Error | |
*/ | |
int ha_init_errors(void) { | |
#define SETMSG(nr, msg) handler_errmsgs[(nr)-HA_ERR_FIRST] = (msg) | |
/* Allocate a pointer array for the error message strings. */ | |
/* Zerofill it to avoid uninitialized gaps. */ | |
if (!(handler_errmsgs = (const char **)my_malloc( | |
key_memory_handler_errmsgs, HA_ERR_ERRORS * sizeof(char *), | |
MYF(MY_WME | MY_ZEROFILL)))) | |
return 1; | |
/* Set the dedicated error messages. */ | |
SETMSG(HA_ERR_KEY_NOT_FOUND, ER_DEFAULT(ER_KEY_NOT_FOUND)); | |
SETMSG(HA_ERR_FOUND_DUPP_KEY, ER_DEFAULT(ER_DUP_KEY)); | |
SETMSG(HA_ERR_RECORD_CHANGED, "Update wich is recoverable"); | |
SETMSG(HA_ERR_WRONG_INDEX, "Wrong index given to function"); | |
SETMSG(HA_ERR_CRASHED, ER_DEFAULT(ER_NOT_KEYFILE)); | |
SETMSG(HA_ERR_WRONG_IN_RECORD, ER_DEFAULT(ER_CRASHED_ON_USAGE)); | |
SETMSG(HA_ERR_OUT_OF_MEM, "Table handler out of memory"); | |
SETMSG(HA_ERR_NOT_A_TABLE, "Incorrect file format '%.64s'"); | |
SETMSG(HA_ERR_WRONG_COMMAND, "Command not supported"); | |
SETMSG(HA_ERR_OLD_FILE, ER_DEFAULT(ER_OLD_KEYFILE)); | |
SETMSG(HA_ERR_NO_ACTIVE_RECORD, "No record read in update"); | |
SETMSG(HA_ERR_RECORD_DELETED, "Intern record deleted"); | |
SETMSG(HA_ERR_RECORD_FILE_FULL, ER_DEFAULT(ER_RECORD_FILE_FULL)); | |
SETMSG(HA_ERR_INDEX_FILE_FULL, "No more room in index file '%.64s'"); | |
SETMSG(HA_ERR_END_OF_FILE, "End in next/prev/first/last"); | |
SETMSG(HA_ERR_UNSUPPORTED, ER_DEFAULT(ER_ILLEGAL_HA)); | |
SETMSG(HA_ERR_TOO_BIG_ROW, "Too big row"); | |
SETMSG(HA_WRONG_CREATE_OPTION, "Wrong create option"); | |
SETMSG(HA_ERR_FOUND_DUPP_UNIQUE, ER_DEFAULT(ER_DUP_UNIQUE)); | |
SETMSG(HA_ERR_UNKNOWN_CHARSET, "Can't open charset"); | |
SETMSG(HA_ERR_WRONG_MRG_TABLE_DEF, ER_DEFAULT(ER_WRONG_MRG_TABLE)); | |
SETMSG(HA_ERR_CRASHED_ON_REPAIR, ER_DEFAULT(ER_CRASHED_ON_REPAIR)); | |
SETMSG(HA_ERR_CRASHED_ON_USAGE, ER_DEFAULT(ER_CRASHED_ON_USAGE)); | |
SETMSG(HA_ERR_LOCK_WAIT_TIMEOUT, ER_DEFAULT(ER_LOCK_WAIT_TIMEOUT)); | |
SETMSG(HA_ERR_LOCK_TABLE_FULL, ER_DEFAULT(ER_LOCK_TABLE_FULL)); | |
SETMSG(HA_ERR_READ_ONLY_TRANSACTION, ER_DEFAULT(ER_READ_ONLY_TRANSACTION)); | |
SETMSG(HA_ERR_LOCK_DEADLOCK, ER_DEFAULT(ER_LOCK_DEADLOCK)); | |
SETMSG(HA_ERR_CANNOT_ADD_FOREIGN, ER_DEFAULT(ER_CANNOT_ADD_FOREIGN)); | |
SETMSG(HA_ERR_NO_REFERENCED_ROW, ER_DEFAULT(ER_NO_REFERENCED_ROW_2)); | |
SETMSG(HA_ERR_ROW_IS_REFERENCED, ER_DEFAULT(ER_ROW_IS_REFERENCED_2)); | |
SETMSG(HA_ERR_NO_SAVEPOINT, "No savepoint with that name"); | |
SETMSG(HA_ERR_NON_UNIQUE_BLOCK_SIZE, "Non unique key block size"); | |
SETMSG(HA_ERR_NO_SUCH_TABLE, "No such table: '%.64s'"); | |
SETMSG(HA_ERR_TABLE_EXIST, ER_DEFAULT(ER_TABLE_EXISTS_ERROR)); | |
SETMSG(HA_ERR_NO_CONNECTION, "Could not connect to storage engine"); | |
SETMSG(HA_ERR_TABLE_DEF_CHANGED, ER_DEFAULT(ER_TABLE_DEF_CHANGED)); | |
SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY, | |
"FK constraint would lead to duplicate key"); | |
SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE, ER_DEFAULT(ER_TABLE_NEEDS_UPGRADE)); | |
SETMSG(HA_ERR_TABLE_READONLY, ER_DEFAULT(ER_OPEN_AS_READONLY)); | |
SETMSG(HA_ERR_AUTOINC_READ_FAILED, ER_DEFAULT(ER_AUTOINC_READ_FAILED)); | |
SETMSG(HA_ERR_AUTOINC_ERANGE, ER_DEFAULT(ER_WARN_DATA_OUT_OF_RANGE)); | |
SETMSG(HA_ERR_TOO_MANY_CONCURRENT_TRXS, | |
ER_DEFAULT(ER_TOO_MANY_CONCURRENT_TRXS)); | |
SETMSG(HA_ERR_INDEX_COL_TOO_LONG, ER_DEFAULT(ER_INDEX_COLUMN_TOO_LONG)); | |
SETMSG(HA_ERR_INDEX_CORRUPT, ER_DEFAULT(ER_INDEX_CORRUPT)); | |
SETMSG(HA_FTS_INVALID_DOCID, "Invalid InnoDB FTS Doc ID"); | |
SETMSG(HA_ERR_TABLE_IN_FK_CHECK, ER_DEFAULT(ER_TABLE_IN_FK_CHECK)); | |
SETMSG(HA_ERR_TABLESPACE_EXISTS, "Tablespace already exists"); | |
SETMSG(HA_ERR_TABLESPACE_MISSING, ER_DEFAULT(ER_TABLESPACE_MISSING)); | |
SETMSG(HA_ERR_FTS_EXCEED_RESULT_CACHE_LIMIT, | |
"FTS query exceeds result cache limit"); | |
SETMSG(HA_ERR_TEMP_FILE_WRITE_FAILURE, | |
ER_DEFAULT(ER_TEMP_FILE_WRITE_FAILURE)); | |
SETMSG(HA_ERR_INNODB_FORCED_RECOVERY, ER_DEFAULT(ER_INNODB_FORCED_RECOVERY)); | |
SETMSG(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE, | |
"Too many words in a FTS phrase or proximity search"); | |
SETMSG(HA_ERR_TABLE_CORRUPT, ER_DEFAULT(ER_TABLE_CORRUPT)); | |
SETMSG(HA_ERR_TABLESPACE_MISSING, ER_DEFAULT(ER_TABLESPACE_MISSING)); | |
SETMSG(HA_ERR_TABLESPACE_IS_NOT_EMPTY, | |
ER_DEFAULT(ER_TABLESPACE_IS_NOT_EMPTY)); | |
SETMSG(HA_ERR_WRONG_FILE_NAME, ER_DEFAULT(ER_WRONG_FILE_NAME)); | |
SETMSG(HA_ERR_NOT_ALLOWED_COMMAND, ER_DEFAULT(ER_NOT_ALLOWED_COMMAND)); | |
SETMSG(HA_ERR_COMPUTE_FAILED, "Compute virtual column value failed"); | |
SETMSG(HA_ERR_DISK_FULL_NOWAIT, ER_DEFAULT(ER_DISK_FULL_NOWAIT)); | |
SETMSG(HA_ERR_NO_SESSION_TEMP, ER_DEFAULT(ER_NO_SESSION_TEMP)); | |
SETMSG(HA_ERR_WRONG_TABLE_NAME, ER_DEFAULT(ER_WRONG_TABLE_NAME)); | |
SETMSG(HA_ERR_TOO_LONG_PATH, ER_DEFAULT(ER_TABLE_NAME_CAUSES_TOO_LONG_PATH)); | |
SETMSG(HA_ERR_FTS_TOO_MANY_NESTED_EXP, | |
"Too many nested sub-expressions in a full-text search"); | |
/* Register the error messages for use with my_error(). */ | |
return my_error_register(get_handler_errmsg, HA_ERR_FIRST, HA_ERR_LAST); | |
} | |
int ha_finalize_handlerton(st_plugin_int *plugin) { | |
handlerton *hton = (handlerton *)plugin->data; | |
DBUG_TRACE; | |
/* hton can be NULL here, if ha_initialize_handlerton() failed. */ | |
if (!hton) goto end; | |
switch (hton->state) { | |
case SHOW_OPTION_NO: | |
case SHOW_OPTION_DISABLED: | |
break; | |
case SHOW_OPTION_YES: | |
if (installed_htons[hton->db_type] == hton) | |
installed_htons[hton->db_type] = nullptr; | |
break; | |
}; | |
if (hton->panic) hton->panic(hton, HA_PANIC_CLOSE); | |
if (plugin->plugin->deinit) { | |
/* | |
Today we have no defined/special behavior for uninstalling | |
engine plugins. | |
*/ | |
DBUG_PRINT("info", ("Deinitializing plugin: '%s'", plugin->name.str)); | |
if (plugin->plugin->deinit(nullptr)) { | |
DBUG_PRINT("warning", ("Plugin '%s' deinit function returned error.", | |
plugin->name.str)); | |
} | |
} | |
/* | |
In case a plugin is uninstalled and re-installed later, it should | |
reuse an array slot. Otherwise the number of uninstall/install | |
cycles would be limited. | |
*/ | |
if (hton->slot != HA_SLOT_UNDEF) { | |
/* Make sure we are not unpluging another plugin */ | |
DBUG_ASSERT(se_plugin_array[hton->slot] == plugin); | |
DBUG_ASSERT(hton->slot < se_plugin_array.size()); | |
se_plugin_array[hton->slot] = NULL; | |
builtin_htons[hton->slot] = false; /* Extra correctness. */ | |
} | |
my_free(hton); | |
plugin->data = nullptr; | |
end: | |
return 0; | |
} | |
int ha_initialize_handlerton(st_plugin_int *plugin) { | |
handlerton *hton; | |
DBUG_TRACE; | |
DBUG_PRINT("plugin", ("initialize plugin: '%s'", plugin->name.str)); | |
hton = (handlerton *)my_malloc(key_memory_handlerton, sizeof(handlerton), | |
MYF(MY_WME | MY_ZEROFILL)); | |
if (hton == nullptr) { | |
LogErr(ERROR_LEVEL, ER_HANDLERTON_OOM, plugin->name.str); | |
goto err_no_hton_memory; | |
} | |
hton->slot = HA_SLOT_UNDEF; | |
/* Historical Requirement */ | |
plugin->data = hton; // shortcut for the future | |
if (plugin->plugin->init && plugin->plugin->init(hton)) { | |
LogErr(ERROR_LEVEL, ER_PLUGIN_INIT_FAILED, plugin->name.str); | |
goto err; | |
} | |
/* | |
the switch below and hton->state should be removed when | |
command-line options for plugins will be implemented | |
*/ | |
DBUG_PRINT("info", ("hton->state=%d", hton->state)); | |
switch (hton->state) { | |
case SHOW_OPTION_NO: | |
break; | |
case SHOW_OPTION_YES: { | |
uint tmp; | |
ulong fslot; | |
/* now check the db_type for conflict */ | |
if (hton->db_type <= DB_TYPE_UNKNOWN || | |
hton->db_type >= DB_TYPE_DEFAULT || installed_htons[hton->db_type]) { | |
int idx = (int)DB_TYPE_FIRST_DYNAMIC; | |
while (idx < (int)DB_TYPE_DEFAULT && installed_htons[idx]) idx++; | |
if (idx == (int)DB_TYPE_DEFAULT) { | |
LogErr(WARNING_LEVEL, ER_TOO_MANY_STORAGE_ENGINES); | |
goto err_deinit; | |
} | |
if (hton->db_type != DB_TYPE_UNKNOWN) | |
LogErr(WARNING_LEVEL, ER_SE_TYPECODE_CONFLICT, plugin->plugin->name, | |
idx); | |
hton->db_type = (enum legacy_db_type)idx; | |
} | |
/* | |
In case a plugin is uninstalled and re-installed later, it should | |
reuse an array slot. Otherwise the number of uninstall/install | |
cycles would be limited. So look for a free slot. | |
*/ | |
DBUG_PRINT("plugin", | |
("total_ha: %lu", static_cast<ulong>(se_plugin_array.size()))); | |
for (fslot = 0; fslot < se_plugin_array.size(); fslot++) { | |
if (!se_plugin_array[fslot]) break; | |
} | |
if (fslot < se_plugin_array.size()) | |
hton->slot = fslot; | |
else { | |
hton->slot = se_plugin_array.size(); | |
} | |
if (se_plugin_array.assign_at(hton->slot, plugin) || | |
builtin_htons.assign_at(hton->slot, (plugin->plugin_dl == nullptr))) | |
goto err_deinit; | |
installed_htons[hton->db_type] = hton; | |
tmp = hton->savepoint_offset; | |
hton->savepoint_offset = savepoint_alloc_size; | |
savepoint_alloc_size += tmp; | |
if (hton->prepare) total_ha_2pc++; | |
break; | |
} | |
/* fall through */ | |
default: | |
hton->state = SHOW_OPTION_DISABLED; | |
break; | |
} | |
/* | |
This is entirely for legacy. We will create a new "disk based" hton and a | |
"memory" hton which will be configurable longterm. We should be able to | |
remove partition and myisammrg. | |
*/ | |
switch (hton->db_type) { | |
case DB_TYPE_HEAP: | |
heap_hton = hton; | |
break; | |
case DB_TYPE_TEMPTABLE: | |
temptable_hton = hton; | |
break; | |
case DB_TYPE_MYISAM: | |
myisam_hton = hton; | |
break; | |
case DB_TYPE_INNODB: | |
innodb_hton = hton; | |
break; | |
default: | |
break; | |
}; | |
/* | |
Re-load the optimizer cost constants since this storage engine can | |
have non-default cost constants. | |
*/ | |
reload_optimizer_cost_constants(); | |
return 0; | |
err_deinit: | |
/* | |
Let plugin do its inner deinitialization as plugin->init() | |
was successfully called before. | |
*/ | |
if (plugin->plugin->deinit) (void)plugin->plugin->deinit(nullptr); | |
err: | |
my_free(hton); | |
err_no_hton_memory: | |
plugin->data = nullptr; | |
return 1; | |
} | |
int ha_init() { | |
int error = 0; | |
DBUG_TRACE; | |
/* | |
Check if there is a transaction-capable storage engine besides the | |
binary log. | |
*/ | |
opt_using_transactions = | |
se_plugin_array.size() > static_cast<ulong>(opt_bin_log); | |
savepoint_alloc_size += sizeof(SAVEPOINT); | |
return error; | |
} | |
void ha_end() { | |
// Unregister handler error messages. | |
my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST); | |
my_free(handler_errmsgs); | |
} | |
static bool dropdb_handlerton(THD *, plugin_ref plugin, void *path) { | |
handlerton *hton = plugin_data<handlerton *>(plugin); | |
if (hton->state == SHOW_OPTION_YES && hton->drop_database) | |
hton->drop_database(hton, (char *)path); | |
return false; | |
} | |
void ha_drop_database(char *path) { | |
plugin_foreach(nullptr, dropdb_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, path); | |
} | |
static bool closecon_handlerton(THD *thd, plugin_ref plugin, void *) { | |
handlerton *hton = plugin_data<handlerton *>(plugin); | |
/* | |
there's no need to rollback here as all transactions must | |
be rolled back already | |
*/ | |
if (hton->state == SHOW_OPTION_YES && thd_get_ha_data(thd, hton)) { | |
if (hton->close_connection) hton->close_connection(hton, thd); | |
/* make sure ha_data is reset and ha_data_lock is released */ | |
thd_set_ha_data(thd, hton, nullptr); | |
} | |
return false; | |
} | |
/** | |
@note | |
don't bother to rollback here, it's done already | |
*/ | |
void ha_close_connection(THD *thd) { | |
plugin_foreach(thd, closecon_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, | |
nullptr); | |
} | |
static bool kill_handlerton(THD *thd, plugin_ref plugin, void *) { | |
handlerton *hton = plugin_data<handlerton *>(plugin); | |
if (hton->state == SHOW_OPTION_YES && hton->kill_connection) { | |
if (thd_get_ha_data(thd, hton)) hton->kill_connection(hton, thd); | |
} | |
return false; | |
} | |
void ha_kill_connection(THD *thd) { | |
plugin_foreach(thd, kill_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, nullptr); | |
} | |
/** Invoke handlerton::pre_dd_shutdown() on a plugin. | |
@param plugin storage engine plugin | |
@retval false (always) */ | |
static bool pre_dd_shutdown_handlerton(THD *, plugin_ref plugin, void *) { | |
handlerton *hton = plugin_data<handlerton *>(plugin); | |
if (hton->state == SHOW_OPTION_YES && hton->pre_dd_shutdown) | |
hton->pre_dd_shutdown(hton); | |
return false; | |
} | |
/** Invoke handlerton::pre_dd_shutdown() on every storage engine plugin. */ | |
void ha_pre_dd_shutdown(void) { | |
plugin_foreach(nullptr, pre_dd_shutdown_handlerton, | |
MYSQL_STORAGE_ENGINE_PLUGIN, nullptr); | |
} | |
/* ======================================================================== | |
======================= TRANSACTIONS ===================================*/ | |
/** | |
Transaction handling in the server | |
================================== | |
In each client connection, MySQL maintains two transactional | |
states: | |
- a statement transaction, | |
- a standard, also called normal transaction. | |
Historical note | |
--------------- | |
"Statement transaction" is a non-standard term that comes | |
from the times when MySQL supported BerkeleyDB storage engine. | |
First of all, it should be said that in BerkeleyDB auto-commit | |
mode auto-commits operations that are atomic to the storage | |
engine itself, such as a write of a record, and are too | |
high-granular to be atomic from the application perspective | |
(MySQL). One SQL statement could involve many BerkeleyDB | |
auto-committed operations and thus BerkeleyDB auto-commit was of | |
little use to MySQL. | |
Secondly, instead of SQL standard savepoints, BerkeleyDB | |
provided the concept of "nested transactions". In a nutshell, | |
transactions could be arbitrarily nested, but when the parent | |
transaction was committed or aborted, all its child (nested) | |
transactions were handled committed or aborted as well. | |
Commit of a nested transaction, in turn, made its changes | |
visible, but not durable: it destroyed the nested transaction, | |
all its changes would become available to the parent and | |
currently active nested transactions of this parent. | |
So the mechanism of nested transactions was employed to | |
provide "all or nothing" guarantee of SQL statements | |
required by the standard. | |
A nested transaction would be created at start of each SQL | |
statement, and destroyed (committed or aborted) at statement | |
end. Such nested transaction was internally referred to as | |
a "statement transaction" and gave birth to the term. | |
(Historical note ends) | |
Since then a statement transaction is started for each statement | |
that accesses transactional tables or uses the binary log. If | |
the statement succeeds, the statement transaction is committed. | |
If the statement fails, the transaction is rolled back. Commits | |
of statement transactions are not durable -- each such | |
transaction is nested in the normal transaction, and if the | |
normal transaction is rolled back, the effects of all enclosed | |
statement transactions are undone as well. Technically, | |
a statement transaction can be viewed as a savepoint which is | |
maintained automatically in order to make effects of one | |
statement atomic. | |
The normal transaction is started by the user and is ended | |
usually upon a user request as well. The normal transaction | |
encloses transactions of all statements issued between | |
its beginning and its end. | |
In autocommit mode, the normal transaction is equivalent | |
to the statement transaction. | |
Since MySQL supports PSEA (pluggable storage engine | |
architecture), more than one transactional engine can be | |
active at a time. Hence transactions, from the server | |
point of view, are always distributed. In particular, | |
transactional state is maintained independently for each | |
engine. In order to commit a transaction the two phase | |
commit protocol is employed. | |
Not all statements are executed in context of a transaction. | |
Administrative and status information statements do not modify | |
engine data, and thus do not start a statement transaction and | |
also have no effect on the normal transaction. Examples of such | |
statements are SHOW STATUS and RESET SLAVE. | |
Similarly DDL statements are not transactional, | |
and therefore a transaction is [almost] never started for a DDL | |
statement. The difference between a DDL statement and a purely | |
administrative statement though is that a DDL statement always | |
commits the current transaction before proceeding, if there is | |
any. | |
At last, SQL statements that work with non-transactional | |
engines also have no effect on the transaction state of the | |
connection. Even though they are written to the binary log, | |
and the binary log is, overall, transactional, the writes | |
are done in "write-through" mode, directly to the binlog | |
file, followed with a OS cache sync, in other words, | |
bypassing the binlog undo log (translog). | |
They do not commit the current normal transaction. | |
A failure of a statement that uses non-transactional tables | |
would cause a rollback of the statement transaction, but | |
in case there no non-transactional tables are used, | |
no statement transaction is started. | |
Data layout | |
----------- | |
The server stores its transaction-related data in | |
thd->transaction. This structure has two members of type | |
THD_TRANS. These members correspond to the statement and | |
normal transactions respectively: | |
- thd->transaction.stmt contains a list of engines | |
that are participating in the given statement | |
- thd->transaction.all contains a list of engines that | |
have participated in any of the statement transactions started | |
within the context of the normal transaction. | |
Each element of the list contains a pointer to the storage | |
engine, engine-specific transactional data, and engine-specific | |
transaction flags. | |
In autocommit mode thd->transaction.all is empty. | |
Instead, data of thd->transaction.stmt is | |
used to commit/rollback the normal transaction. | |
The list of registered engines has a few important properties: | |
- no engine is registered in the list twice | |
- engines are present in the list a reverse temporal order -- | |
new participants are always added to the beginning of the list. | |
Transaction life cycle | |
---------------------- | |
When a new connection is established, thd->transaction | |
members are initialized to an empty state. | |
If a statement uses any tables, all affected engines | |
are registered in the statement engine list. In | |
non-autocommit mode, the same engines are registered in | |
the normal transaction list. | |
At the end of the statement, the server issues a commit | |
or a roll back for all engines in the statement list. | |
At this point transaction flags of an engine, if any, are | |
propagated from the statement list to the list of the normal | |
transaction. | |
When commit/rollback is finished, the statement list is | |
cleared. It will be filled in again by the next statement, | |
and emptied again at the next statement's end. | |
The normal transaction is committed in a similar way | |
(by going over all engines in thd->transaction.all list) | |
but at different times: | |
- upon COMMIT SQL statement is issued by the user | |
- implicitly, by the server, at the beginning of a DDL statement | |
or SET AUTOCOMMIT={0|1} statement. | |
The normal transaction can be rolled back as well: | |
- if the user has requested so, by issuing ROLLBACK SQL | |
statement | |
- if one of the storage engines requested a rollback | |
by setting thd->transaction_rollback_request. This may | |
happen in case, e.g., when the transaction in the engine was | |
chosen a victim of the internal deadlock resolution algorithm | |
and rolled back internally. When such a situation happens, there | |
is little the server can do and the only option is to rollback | |
transactions in all other participating engines. In this case | |
the rollback is accompanied by an error sent to the user. | |
As follows from the use cases above, the normal transaction | |
is never committed when there is an outstanding statement | |
transaction. In most cases there is no conflict, since | |
commits of the normal transaction are issued by a stand-alone | |
administrative or DDL statement, thus no outstanding statement | |
transaction of the previous statement exists. Besides, | |
all statements that manipulate with the normal transaction | |
are prohibited in stored functions and triggers, therefore | |
no conflicting situation can occur in a sub-statement either. | |
The remaining rare cases when the server explicitly has | |
to commit the statement transaction prior to committing the normal | |
one cover error-handling scenarios (see for example | |
SQLCOM_LOCK_TABLES). | |
When committing a statement or a normal transaction, the server | |
either uses the two-phase commit protocol, or issues a commit | |
in each engine independently. The two-phase commit protocol | |
is used only if: | |
- all participating engines support two-phase commit (provide | |
handlerton::prepare PSEA API call) and | |
- transactions in at least two engines modify data (i.e. are | |
not read-only). | |
Note that the two phase commit is used for | |
statement transactions, even though they are not durable anyway. | |
This is done to ensure logical consistency of data in a multiple- | |
engine transaction. | |
For example, imagine that some day MySQL supports unique | |
constraint checks deferred till the end of statement. In such | |
case a commit in one of the engines may yield ER_DUP_KEY, | |
and MySQL should be able to gracefully abort statement | |
transactions of other participants. | |
After the normal transaction has been committed, | |
thd->transaction.all list is cleared. | |
When a connection is closed, the current normal transaction, if | |
any, is rolled back. | |
Roles and responsibilities | |
-------------------------- | |
The server has no way to know that an engine participates in | |
the statement and a transaction has been started | |
in it unless the engine says so. Thus, in order to be | |
a part of a transaction, the engine must "register" itself. | |
This is done by invoking trans_register_ha() server call. | |
Normally the engine registers itself whenever handler::external_lock() | |
is called. trans_register_ha() can be invoked many times: if | |
an engine is already registered, the call does nothing. | |
In case autocommit is not set, the engine must register itself | |
twice -- both in the statement list and in the normal transaction | |
list. | |
In which list to register is a parameter of trans_register_ha(). | |
Note, that although the registration interface in itself is | |
fairly clear, the current usage practice often leads to undesired | |
effects. E.g. since a call to trans_register_ha() in most engines | |
is embedded into implementation of handler::external_lock(), some | |
DDL statements start a transaction (at least from the server | |
point of view) even though they are not expected to. E.g. | |
CREATE TABLE does not start a transaction, since | |
handler::external_lock() is never called during CREATE TABLE. But | |
CREATE TABLE ... SELECT does, since handler::external_lock() is | |
called for the table that is being selected from. This has no | |
practical effects currently, but must be kept in mind | |
nevertheless. | |
Once an engine is registered, the server will do the rest | |
of the work. | |
During statement execution, whenever any of data-modifying | |
PSEA API methods is used, e.g. handler::write_row() or | |
handler::update_row(), the read-write flag is raised in the | |
statement transaction for the involved engine. | |
Currently All PSEA calls are "traced", and the data can not be | |
changed in a way other than issuing a PSEA call. Important: | |
unless this invariant is preserved the server will not know that | |
a transaction in a given engine is read-write and will not | |
involve the two-phase commit protocol! | |
At the end of a statement, server call trans_commit_stmt is | |
invoked. This call in turn invokes handlerton::prepare() | |
for every involved engine. Prepare is followed by a call | |
to handlerton::commit_one_phase() If a one-phase commit | |
will suffice, handlerton::prepare() is not invoked and | |
the server only calls handlerton::commit_one_phase(). | |
At statement commit, the statement-related read-write | |
engine flag is propagated to the corresponding flag in the | |
normal transaction. When the commit is complete, the list | |
of registered engines is cleared. | |
Rollback is handled in a similar fashion. | |
Additional notes on DDL and the normal transaction. | |
--------------------------------------------------- | |
DDLs and operations with non-transactional engines | |
do not "register" in thd->transaction lists, and thus do not | |
modify the transaction state. Besides, each DDL in | |
MySQL is prefixed with an implicit normal transaction commit | |
(a call to trans_commit_implicit()), and thus leaves nothing | |
to modify. | |
However, as it has been pointed out with CREATE TABLE .. SELECT, | |
some DDL statements can start a *new* transaction. | |
Behaviour of the server in this case is currently badly | |
defined. | |
DDL statements use a form of "semantic" logging | |
to maintain atomicity: if CREATE TABLE .. SELECT failed, | |
the newly created table is deleted. | |
In addition, some DDL statements issue interim transaction | |
commits: e.g. ALTER TABLE issues a commit after data is copied | |
from the original table to the internal temporary table. Other | |
statements, e.g. CREATE TABLE ... SELECT do not always commit | |
after itself. | |
And finally there is a group of DDL statements such as | |
RENAME/DROP TABLE that doesn't start a new transaction | |
and doesn't commit. | |
This diversity makes it hard to say what will happen if | |
by chance a stored function is invoked during a DDL -- | |
whether any modifications it makes will be committed or not | |
is not clear. Fortunately, SQL grammar of few DDLs allows | |
invocation of a stored function. | |
A consistent behaviour is perhaps to always commit the normal | |
transaction after all DDLs, just like the statement transaction | |
is always committed at the end of all statements. | |
*/ | |
/** | |
Register a storage engine for a transaction. | |
Every storage engine MUST call this function when it starts | |
a transaction or a statement (that is it must be called both for the | |
"beginning of transaction" and "beginning of statement"). | |
Only storage engines registered for the transaction/statement | |
will know when to commit/rollback it. | |
@note | |
trans_register_ha is idempotent - storage engine may register many | |
times per transaction. | |
*/ | |
void trans_register_ha(THD *thd, bool all, handlerton *ht_arg, | |
const ulonglong *trxid MY_ATTRIBUTE((unused))) { | |
Ha_trx_info *ha_info; | |
Transaction_ctx *trn_ctx = thd->get_transaction(); | |
Transaction_ctx::enum_trx_scope trx_scope = | |
all ? Transaction_ctx::SESSION : Transaction_ctx::STMT; | |
DBUG_TRACE; | |
DBUG_PRINT("enter", ("%s", all ? "all" : "stmt")); | |
if (all) { | |
/* | |
Ensure no active backup engine data exists, unless the current | |
transaction is from replication and in active xa state. | |
*/ | |
DBUG_ASSERT( | |
thd->get_ha_data(ht_arg->slot)->ha_ptr_backup == nullptr || | |
(thd->get_transaction()->xid_state()->has_state(XID_STATE::XA_ACTIVE))); | |
DBUG_ASSERT(thd->get_ha_data(ht_arg->slot)->ha_ptr_backup == nullptr || | |
(thd->is_binlog_applier() || thd->slave_thread)); | |
thd->server_status |= SERVER_STATUS_IN_TRANS; | |
if (thd->tx_read_only) | |
thd->server_status |= SERVER_STATUS_IN_TRANS_READONLY; | |
DBUG_PRINT("info", ("setting SERVER_STATUS_IN_TRANS")); | |
} | |
ha_info = thd->get_ha_data(ht_arg->slot)->ha_info + (all ? 1 : 0); | |
if (ha_info->is_started()) { | |
DBUG_ASSERT(trn_ctx->ha_trx_info(trx_scope)); | |
return; /* already registered, return */ | |
} | |
trn_ctx->register_ha(trx_scope, ha_info, ht_arg); | |
trn_ctx->set_ha_trx_info(trx_scope, ha_info); | |
if (ht_arg->prepare == nullptr) trn_ctx->set_no_2pc(trx_scope, true); | |
trn_ctx->xid_state()->set_query_id(thd->query_id); | |
/* | |
Register transaction start in performance schema if not done already. | |
By doing this, we handle cases when the transaction is started implicitly in | |
autocommit=0 mode, and cases when we are in normal autocommit=1 mode and the | |
executed statement is a single-statement transaction. | |
Explicitly started transactions are handled in trans_begin(). | |
Do not register transactions in which binary log is the only participating | |
transactional storage engine. | |
*/ | |
#ifdef HAVE_PSI_TRANSACTION_INTERFACE | |
if (thd->m_transaction_psi == nullptr && ht_arg->db_type != DB_TYPE_BINLOG && | |
!thd->is_attachable_transaction_active()) { | |
const XID *xid = trn_ctx->xid_state()->get_xid(); | |
bool autocommit = !thd->in_multi_stmt_transaction_mode(); | |
thd->m_transaction_psi = MYSQL_START_TRANSACTION( | |
&thd->m_transaction_state, xid, trxid, thd->tx_isolation, | |
thd->tx_read_only, autocommit); | |
DEBUG_SYNC(thd, "after_set_transaction_psi_before_set_transaction_gtid"); | |
gtid_set_performance_schema_values(thd); | |
} | |
#endif | |
} | |
/** XA Prepare one SE. | |
@param[in] thd Session THD | |
@param[in] ht SE handlerton | |
@return 0 for success, 1 for error - entire transaction is rolled back. */ | |
static int prepare_one_ht(THD *thd, handlerton *ht) { | |
DBUG_TRACE; | |
DBUG_ASSERT(!thd->status_var_aggregated); | |
thd->status_var.ha_prepare_count++; | |
if (ht->prepare) { | |
DBUG_EXECUTE_IF("simulate_xa_failure_prepare", { | |
ha_rollback_trans(thd, true); | |
return 1; | |
}); | |
if (ht->prepare(ht, thd, true)) { | |
ha_rollback_trans(thd, true); | |
return 1; | |
} | |
} else { | |
push_warning_printf(thd, Sql_condition::SL_WARNING, ER_ILLEGAL_HA, | |
ER_THD(thd, ER_ILLEGAL_HA), | |
ha_resolve_storage_engine_name(ht)); | |
} | |
return 0; | |
} | |
/** | |
@retval | |
0 ok | |
@retval | |
1 error, transaction was rolled back | |
*/ | |
int ha_xa_prepare(THD *thd) { | |
int error = 0; | |
Transaction_ctx *trn_ctx = thd->get_transaction(); | |
DBUG_TRACE; | |
if (trn_ctx->is_active(Transaction_ctx::SESSION)) { | |
const Ha_trx_info *ha_info = trn_ctx->ha_trx_info(Transaction_ctx::SESSION); | |
bool gtid_error = false; | |
bool need_clear_owned_gtid = false; | |
std::tie(gtid_error, need_clear_owned_gtid) = commit_owned_gtids(thd, true); | |
if (gtid_error) { | |
DBUG_ASSERT(need_clear_owned_gtid); | |
ha_rollback_trans(thd, true); | |
error = 1; | |
goto err; | |
} | |
/* | |
Ensure externalization order for applier threads. | |
Note: the calls to Commit_order_manager::wait/wait_and_finish() will be | |
no-op for threads other than replication applier threads. | |
*/ | |
if (Commit_order_manager::wait(thd)) { | |
thd->commit_error = THD::CE_NONE; | |
ha_rollback_trans(thd, true); | |
error = 1; | |
gtid_error = true; | |
goto err; | |
} | |
/* Allow GTID to be read by SE for XA prepare. */ | |
{ | |
Clone_handler::XA_Operation xa_guard(thd); | |
/* Prepare binlog SE first, if there. */ | |
while (ha_info != nullptr && error == 0) { | |
auto ht = ha_info->ht(); | |
if (ht->db_type == DB_TYPE_BINLOG) { | |
error = prepare_one_ht(thd, ht); | |
break; | |
} | |
ha_info = ha_info->next(); | |
} | |
/* Prepare all SE other than binlog. */ | |
ha_info = trn_ctx->ha_trx_info(Transaction_ctx::SESSION); | |
while (ha_info != nullptr && error == 0) { | |
auto ht = ha_info->ht(); | |
error = prepare_one_ht(thd, ht); | |
if (error != 0) { | |
break; | |
} | |
ha_info = ha_info->next(); | |
} | |
} | |
DBUG_ASSERT(error != 0 || thd->get_transaction()->xid_state()->has_state( | |
XID_STATE::XA_IDLE)); | |
err: | |
/* | |
After ensuring externalization order for applier thread, remove it | |
from waiting (Commit Order Queue) and allow next applier thread to | |
be ordered. | |
Note: the calls to Commit_order_manager::wait_and_finish() will be | |
no-op for threads other than replication applier threads. | |
*/ | |
Commit_order_manager::wait_and_finish(thd, error); | |
gtid_state_commit_or_rollback(thd, need_clear_owned_gtid, !gtid_error); | |
} | |
return error; | |
} | |
/** | |
Check if we can skip the two-phase commit. | |
A helper function to evaluate if two-phase commit is mandatory. | |
As a side effect, propagates the read-only/read-write flags | |
of the statement transaction to its enclosing normal transaction. | |
If we have at least two engines with read-write changes we must | |
run a two-phase commit. Otherwise we can run several independent | |
commits as the only transactional engine has read-write changes | |
and others are read-only. | |
@retval 0 All engines are read-only. | |
@retval 1 We have the only engine with read-write changes. | |
@retval >1 More than one engine have read-write changes. | |
Note: return value might NOT be the exact number of | |
engines with read-write changes. | |
*/ | |
static uint ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list, | |
bool all) { | |
/* The number of storage engines that have actual changes. */ | |
unsigned rw_ha_count = 0; | |
Ha_trx_info *ha_info; | |
for (ha_info = ha_list; ha_info; ha_info = ha_info->next()) { | |
if (ha_info->is_trx_read_write()) ++rw_ha_count; | |
if (!all) { | |
Ha_trx_info *ha_info_all = | |
&thd->get_ha_data(ha_info->ht()->slot)->ha_info[1]; | |
DBUG_ASSERT(ha_info != ha_info_all); | |
/* | |
Merge read-only/read-write information about statement | |
transaction to its enclosing normal transaction. Do this | |
only if in a real transaction -- that is, if we know | |
that ha_info_all is registered in thd->transaction.all. | |
Since otherwise we only clutter the normal transaction flags. | |
*/ | |
if (ha_info_all->is_started()) /* false if autocommit. */ | |
ha_info_all->coalesce_trx_with(ha_info); | |
} else if (rw_ha_count > 1) { | |
/* | |
It is a normal transaction, so we don't need to merge read/write | |
information up, and the need for two-phase commit has been | |
already established. Break the loop prematurely. | |
*/ | |
break; | |
} | |
} | |
return rw_ha_count; | |
} | |
/** | |
The function computes condition to call gtid persistor wrapper, | |
and executes it. | |
It is invoked at committing a statement or transaction, including XA, | |
and also at XA prepare handling. | |
@param thd Thread context. | |
@param all The execution scope, true for the transaction one, false | |
for the statement one. | |
@return std::pair containing: Error and Owned GTID release status | |
Error | |
@retval 0 Ok | |
@retval !0 Error | |
Owned GTID release status | |
@retval true remove the GTID owned by thread from owned GTIDs | |
@retval false removal of the GTID owned by thread from owned GTIDs | |
is not required | |
*/ | |
std::pair<int, bool> commit_owned_gtids(THD *thd, bool all) { | |
DBUG_TRACE; | |
int error = 0; | |
bool need_clear_owned_gtid = false; | |
/* | |
If the binary log is disabled for this thread (either by | |
log_bin=0 or sql_log_bin=0 or by log_slave_updates=0 for a | |
slave thread), then the statement will not be written to | |
the binary log. In this case, we should save its GTID into | |
mysql.gtid_executed table and @@GLOBAL.GTID_EXECUTED as it | |
did when binlog is enabled. | |
We also skip saving GTID into mysql.gtid_executed table and | |
@@GLOBAL.GTID_EXECUTED when slave-preserve-commit-order is enabled. We skip | |
as GTID will be saved in | |
Commit_order_manager::flush_engine_and_signal_threads (invoked from | |
Commit_order_manager::wait_and_finish). In particular, there is the | |
following call stack under ha_commit_low which save GTID in case its skipped | |
here: | |
ha_commit_low -> | |
Commit_order_manager::wait_and_finish -> | |
Commit_order_manager::finish -> | |
Commit_order_manager::flush_engine_and_signal_threads -> | |
Gtid_state::update_commit_group | |
We also skip saving GTID for intermediate commits i.e. when | |
thd->is_operating_substatement_implicitly is enabled. | |
*/ | |
if (thd->is_current_stmt_binlog_log_slave_updates_disabled() && | |
ending_trans(thd, all) && !thd->is_operating_gtid_table_implicitly && | |
!thd->is_operating_substatement_implicitly) { | |
if (!has_commit_order_manager(thd) && | |
(thd->owned_gtid.sidno > 0 || | |
thd->owned_gtid.sidno == THD::OWNED_SIDNO_ANONYMOUS)) { | |
need_clear_owned_gtid = true; | |
} | |
/* | |
If GTID is not persisted by SE, write it to | |
mysql.gtid_executed table. | |
*/ | |
if (thd->owned_gtid.sidno > 0 && !thd->se_persists_gtid()) { | |
error = gtid_state->save(thd); | |
} | |
} | |
return std::make_pair(error, need_clear_owned_gtid); | |
} | |
/** | |
@param[in] thd Thread handle. | |
@param[in] all Session transaction if true, statement | |
otherwise. | |
@param[in] ignore_global_read_lock Allow commit to complete even if a | |
global read lock is active. This can be | |
used to allow changes to internal tables | |
(e.g. slave status tables). | |
@retval | |
0 ok | |
@retval | |
1 transaction was rolled back | |
@retval | |
2 error during commit, data may be inconsistent | |
@todo | |
Since we don't support nested statement transactions in 5.0, | |
we can't commit or rollback stmt transactions while we are inside | |
stored functions or triggers. So we simply do nothing now. | |
TODO: This should be fixed in later ( >= 5.1) releases. | |
*/ | |
int ha_commit_trans(THD *thd, bool all, bool ignore_global_read_lock) { | |
int error = 0; | |
THD_STAGE_INFO(thd, stage_waiting_for_handler_commit); | |
bool run_slave_post_commit = false; | |
bool need_clear_owned_gtid = false; | |
/* | |
Save transaction owned gtid into table before transaction prepare | |
if binlog is disabled, or binlog is enabled and log_slave_updates | |
is disabled with slave SQL thread or slave worker thread. | |
*/ | |
std::tie(error, need_clear_owned_gtid) = commit_owned_gtids(thd, all); | |
/* | |
'all' means that this is either an explicit commit issued by | |
user, or an implicit commit issued by a DDL. | |
*/ | |
Transaction_ctx *trn_ctx = thd->get_transaction(); | |
Transaction_ctx::enum_trx_scope trx_scope = | |
all ? Transaction_ctx::SESSION : Transaction_ctx::STMT; | |
/* | |
"real" is a nick name for a transaction for which a commit will | |
make persistent changes. E.g. a 'stmt' transaction inside a 'all' | |
transation is not 'real': even though it's possible to commit it, | |
the changes are not durable as they might be rolled back if the | |
enclosing 'all' transaction is rolled back. | |
*/ | |
bool is_real_trans = all || !trn_ctx->is_active(Transaction_ctx::SESSION); | |
Ha_trx_info *ha_info = trn_ctx->ha_trx_info(trx_scope); | |
XID_STATE *xid_state = trn_ctx->xid_state(); | |
DBUG_TRACE; | |
DBUG_PRINT("info", ("all=%d thd->in_sub_stmt=%d ha_info=%p is_real_trans=%d", | |
all, thd->in_sub_stmt, ha_info, is_real_trans)); | |
/* | |
We must not commit the normal transaction if a statement | |
transaction is pending. Otherwise statement transaction | |
flags will not get propagated to its normal transaction's | |
counterpart. | |
*/ | |
DBUG_ASSERT(!trn_ctx->is_active(Transaction_ctx::STMT) || !all); | |
DBUG_EXECUTE_IF("pre_commit_error", { | |
error = true; | |
my_error(ER_UNKNOWN_ERROR, MYF(0)); | |
}); | |
/* | |
When atomic DDL is executed on the slave, we would like to | |
to update slave applier state as part of DDL's transaction. | |
Call Relay_log_info::pre_commit() hook to do this before DDL | |
gets committed in the following block. | |
Failed atomic DDL statements should've been marked as executed/committed | |
during statement rollback, though some like GRANT may continue until | |
this point. | |
When applying a DDL statement on a slave and the statement is filtered | |
out by a table filter, we report an error "ER_SLAVE_IGNORED_TABLE" to | |
warn slave applier thread. We need to save the DDL statement's gtid | |
into mysql.gtid_executed system table if the binary log is disabled | |
on the slave and gtids are enabled. | |
*/ | |
if (is_real_trans && is_atomic_ddl_commit_on_slave(thd) && | |
(!thd->is_error() || | |
(thd->is_operating_gtid_table_implicitly && | |
thd->get_stmt_da()->mysql_errno() == ER_SLAVE_IGNORED_TABLE))) { | |
run_slave_post_commit = true; | |
error = error || thd->rli_slave->pre_commit(); | |
DBUG_EXECUTE_IF("rli_pre_commit_error", { | |
error = true; | |
my_error(ER_UNKNOWN_ERROR, MYF(0)); | |
}); | |
DBUG_EXECUTE_IF("slave_crash_before_commit", { | |
/* This pre-commit crash aims solely at atomic DDL */ | |
DBUG_SUICIDE(); | |
}); | |
} | |
if (thd->in_sub_stmt) { | |
DBUG_ASSERT(0); | |
/* | |
Since we don't support nested statement transactions in 5.0, | |
we can't commit or rollback stmt transactions while we are inside | |
stored functions or triggers. So we simply do nothing now. | |
TODO: This should be fixed in later ( >= 5.1) releases. | |
*/ | |
if (!all) return 0; | |
/* | |
We assume that all statements which commit or rollback main transaction | |
are prohibited inside of stored functions or triggers. So they should | |
bail out with error even before ha_commit_trans() call. To be 100% safe | |
let us throw error in non-debug builds. | |
*/ | |
my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0)); | |
return 2; | |
} | |
MDL_request mdl_request; | |
bool release_mdl = false; | |
if (ha_info && !error) { | |
uint rw_ha_count = 0; | |
bool rw_trans; | |
DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE();); | |
/* | |
skip 2PC if the transaction is empty and it is not marked as started (which | |
can happen when the slave's binlog is disabled) | |
*/ | |
if (ha_info->is_started()) | |
rw_ha_count = ha_check_and_coalesce_trx_read_only(thd, ha_info, all); | |
trn_ctx->set_rw_ha_count(trx_scope, rw_ha_count); | |
/* rw_trans is true when we in a transaction changing data */ | |
rw_trans = is_real_trans && (rw_ha_count > 0); | |
DBUG_EXECUTE_IF("dbug.enabled_commit", { | |
const char act[] = "now signal Reached wait_for signal.commit_continue"; | |
DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(act))); | |
};); | |
DEBUG_SYNC(thd, "ha_commit_trans_before_acquire_commit_lock"); | |
if (rw_trans && !ignore_global_read_lock) { | |
/* | |
Acquire a metadata lock which will ensure that COMMIT is blocked | |
by an active FLUSH TABLES WITH READ LOCK (and vice versa: | |
COMMIT in progress blocks FTWRL). | |
We allow the owner of FTWRL to COMMIT; we assume that it knows | |
what it does. | |
*/ | |
MDL_REQUEST_INIT(&mdl_request, MDL_key::COMMIT, "", "", | |
MDL_INTENTION_EXCLUSIVE, MDL_EXPLICIT); | |
DBUG_PRINT("debug", ("Acquire MDL commit lock")); | |
if (thd->mdl_context.acquire_lock(&mdl_request, | |
thd->variables.lock_wait_timeout)) { | |
ha_rollback_trans(thd, all); | |
return 1; | |
} | |
release_mdl = true; | |
DEBUG_SYNC(thd, "ha_commit_trans_after_acquire_commit_lock"); | |
} | |
if (rw_trans && stmt_has_updated_trans_table(ha_info) && | |
check_readonly(thd, true)) { | |
ha_rollback_trans(thd, all); | |
error = 1; | |
goto end; | |
} | |
if (!trn_ctx->no_2pc(trx_scope) && (trn_ctx->rw_ha_count(trx_scope) > 1)) | |
error = tc_log->prepare(thd, all); | |
} | |
/* | |
The state of XA transaction is changed to Prepared, intermediately. | |
It's going to change to the regular NOTR at the end. | |
The fact of the Prepared state is of interest to binary logger. | |
*/ | |
if (!error && all && xid_state->has_state(XID_STATE::XA_IDLE)) { | |
DBUG_ASSERT( | |
thd->lex->sql_command == SQLCOM_XA_COMMIT && | |
static_cast<Sql_cmd_xa_commit *>(thd->lex->m_sql_cmd)->get_xa_opt() == | |
XA_ONE_PHASE); | |
xid_state->set_state(XID_STATE::XA_PREPARED); | |
} | |
if (error || (error = tc_log->commit(thd, all))) { | |
ha_rollback_trans(thd, all); | |
error = 1; | |
goto end; | |
} | |
/* | |
Mark multi-statement (any autocommit mode) or single-statement | |
(autocommit=1) transaction as rolled back | |
*/ | |
#ifdef HAVE_PSI_TRANSACTION_INTERFACE | |
if (is_real_trans && thd->m_transaction_psi != nullptr) { | |
MYSQL_COMMIT_TRANSACTION(thd->m_transaction_psi); | |
thd->m_transaction_psi = nullptr; | |
} | |
#endif | |
DBUG_EXECUTE_IF("crash_commit_after", | |
if (!thd->is_operating_gtid_table_implicitly) | |
DBUG_SUICIDE();); | |
end: | |
if (release_mdl && mdl_request.ticket) { | |
/* | |
We do not always immediately release transactional locks | |
after ha_commit_trans() (see uses of ha_enable_transaction()), | |
thus we release the commit blocker lock as soon as it's | |
not needed. | |
*/ | |
DBUG_PRINT("debug", ("Releasing MDL commit lock")); | |
thd->mdl_context.release_lock(mdl_request.ticket); | |
} | |
/* Free resources and perform other cleanup even for 'empty' transactions. */ | |
if (is_real_trans) { | |
trn_ctx->cleanup(); | |
thd->tx_priority = 0; | |
} | |
if (need_clear_owned_gtid) { | |
thd->server_status &= ~SERVER_STATUS_IN_TRANS; | |
/* | |
Release the owned GTID when binlog is disabled, or binlog is | |
enabled and log_slave_updates is disabled with slave SQL thread | |
or slave worker thread. | |
*/ | |
if (error) | |
gtid_state->update_on_rollback(thd); | |
else | |
gtid_state->update_on_commit(thd); | |
} else { | |
if (has_commit_order_manager(thd) && error) { | |
gtid_state->update_on_rollback(thd); | |
} | |
} | |
if (run_slave_post_commit) { | |
DBUG_EXECUTE_IF("slave_crash_after_commit", DBUG_SUICIDE();); | |
thd->rli_slave->post_commit(error != 0); | |
/* | |
SERVER_STATUS_IN_TRANS may've been gained by pre_commit alone | |
when the main DDL transaction is filtered out of execution. | |
In such case the status has to be reset now. | |
TODO: move/refactor this handling onto trans_commit/commit_implicit() | |
the caller level. | |
*/ | |
thd->server_status &= ~SERVER_STATUS_IN_TRANS; | |
} else { | |
DBUG_EXECUTE_IF("slave_crash_after_commit", { | |
if (thd->slave_thread && thd->rli_slave && | |
thd->rli_slave->current_event && | |
thd->rli_slave->current_event->get_type_code() == | |
binary_log::XID_EVENT && | |
!thd->is_operating_substatement_implicitly && | |
!thd->is_operating_gtid_table_implicitly) | |
DBUG_SUICIDE(); | |
}); | |
} | |
return error; | |
} | |
/** | |
Commit the sessions outstanding transaction. | |
@pre thd->transaction.flags.commit_low == true | |
@post thd->transaction.flags.commit_low == false | |
@note This function does not care about global read lock; the caller | |
should. | |
@param[in] thd Thread handle. | |
@param[in] all Is set in case of explicit commit | |
(COMMIT statement), or implicit commit | |
issued by DDL. Is not set when called | |
at the end of statement, even if | |
autocommit=1. | |
@param[in] run_after_commit | |
True by default, otherwise, does not execute | |
the after_commit hook in the function. | |
*/ | |
int ha_commit_low(THD *thd, bool all, bool run_after_commit) { | |
int error = 0; | |
Transaction_ctx *trn_ctx = thd->get_transaction(); | |
Transaction_ctx::enum_trx_scope trx_scope = | |
all ? Transaction_ctx::SESSION : Transaction_ctx::STMT; | |
Ha_trx_info *ha_info = trn_ctx->ha_trx_info(trx_scope), *ha_info_next; | |
DBUG_TRACE; | |
if (ha_info) { | |
bool restore_backup_ha_data = false; | |
/* | |
At execution of XA COMMIT ONE PHASE binlog or slave applier | |
reattaches the engine ha_data to THD, previously saved at XA START. | |
*/ | |
if (all && thd->rpl_unflag_detached_engine_ha_data()) { | |
DBUG_PRINT("info", ("query='%s'", thd->query().str)); | |
DBUG_ASSERT(thd->lex->sql_command == SQLCOM_XA_COMMIT); | |
DBUG_ASSERT( | |
static_cast<Sql_cmd_xa_commit *>(thd->lex->m_sql_cmd)->get_xa_opt() == | |
XA_ONE_PHASE); | |
restore_backup_ha_data = true; | |
} | |
bool is_applier_wait_enabled = false; | |
/* | |
Preserve externalization and persistence order for applier threads. | |
The conditions should be understood as follows: | |
- When the binlog is enabled, this will be done from | |
MYSQL_BIN_LOG::ordered_commit and should not be done here. | |
Therefore, we have the condition | |
thd->is_current_stmt_binlog_disabled(). | |
- This function is usually called once per statement, with | |
all=false. We should not preserve the commit order when this | |
function is called in that context. Therefore, we have the | |
condition ending_trans(thd, all). | |
- Statements such as ANALYZE/OPTIMIZE/REPAIR TABLE will call | |
ha_commit_low multiple times with all=true from within | |
mysql_admin_table, mysql_recreate_table, and | |
handle_histogram_command. After returing to | |
mysql_execute_command, it will call ha_commit_low a final | |
time. It is only in this final call that we should preserve | |
the commit order. Therefore, we set the flag | |
thd->is_operating_substatement_implicitly while executing | |
mysql_admin_table, mysql_recreate_table, and | |
handle_histogram_command, clear it when returning from those | |
functions, and check the flag here in ha_commit_low(). | |
- In all the above cases, we should make the current transaction | |
fail early in case a previous transaction has rolled back. | |
Therefore, we also invoke the commit order manager in case | |
get_rollback_status returns true. | |
Note: the calls to Commit_order_manager::wait/wait_and_finish() will be | |
no-op for threads other than replication applier threads. | |
*/ | |
if ((!thd->is_operating_substatement_implicitly && | |
!thd->is_operating_gtid_table_implicitly && | |
thd->is_current_stmt_binlog_log_slave_updates_disabled() && | |
ending_trans(thd, all)) || | |
Commit_order_manager::get_rollback_status(thd)) { | |
if (Commit_order_manager::wait(thd)) { | |
error = 1; | |
/* | |
Remove applier thread from waiting in Commit Order Queue and | |
allow next applier thread to be ordered. | |
*/ | |
Commit_order_manager::wait_and_finish(thd, error); | |
goto err; | |
} | |
is_applier_wait_enabled = true; | |
} | |
for (; ha_info; ha_info = ha_info_next) { | |
int err; | |
handlerton *ht = ha_info->ht(); | |
if ((err = ht->commit(ht, thd, all))) { | |
char errbuf[MYSQL_ERRMSG_SIZE]; | |
my_error(ER_ERROR_DURING_COMMIT, MYF(0), err, | |
my_strerror(errbuf, MYSQL_ERRMSG_SIZE, err)); | |
error = 1; | |
} | |
DBUG_ASSERT(!thd->status_var_aggregated); | |
thd->status_var.ha_commit_count++; | |
ha_info_next = ha_info->next(); | |
if (restore_backup_ha_data) reattach_engine_ha_data_to_thd(thd, ht); | |
ha_info->reset(); /* keep it conveniently zero-filled */ | |
} | |
trn_ctx->reset_scope(trx_scope); | |
/* | |
After ensuring externalization order for applier thread, remove it | |
from waiting (Commit Order Queue) and allow next applier thread to | |
be ordered. | |
Note: the calls to Commit_order_manager::wait_and_finish() will be | |
no-op for threads other than replication applier threads. | |
*/ | |
if (is_applier_wait_enabled) { | |
Commit_order_manager::wait_and_finish(thd, error); | |
} | |
} | |
err: | |
/* Free resources and perform other cleanup even for 'empty' transactions. */ | |
if (all) trn_ctx->cleanup(); | |
/* | |
When the transaction has been committed, we clear the commit_low | |
flag. This allow other parts of the system to check if commit_low | |
was called. | |
*/ | |
trn_ctx->m_flags.commit_low = false; | |
if (run_after_commit && thd->get_transaction()->m_flags.run_hooks) { | |
/* | |
If commit succeeded, we call the after_commit hook. | |
TODO: Investigate if this can be refactored so that there is | |
only one invocation of this hook in the code (in | |
MYSQL_LOG_BIN::finish_commit). | |
*/ | |
if (!error) (void)RUN_HOOK(transaction, after_commit, (thd, all)); | |
trn_ctx->m_flags.run_hooks = false; | |
} | |
return error; | |
} | |
int ha_rollback_low(THD *thd, bool all) { | |
Transaction_ctx *trn_ctx = thd->get_transaction(); | |
int error = 0; | |
Transaction_ctx::enum_trx_scope trx_scope = | |
all ? Transaction_ctx::SESSION : Transaction_ctx::STMT; | |
Ha_trx_info *ha_info = trn_ctx->ha_trx_info(trx_scope), *ha_info_next; | |
(void)RUN_HOOK(transaction, before_rollback, (thd, all)); | |
if (ha_info) { | |
bool restore_backup_ha_data = false; | |
/* | |
Similarly to the commit case, the binlog or slave applier | |
reattaches the engine ha_data to THD. | |
*/ | |
if (all && thd->rpl_unflag_detached_engine_ha_data()) { | |
DBUG_ASSERT(trn_ctx->xid_state()->get_state() != XID_STATE::XA_NOTR || | |
thd->killed == THD::KILL_CONNECTION); | |
restore_backup_ha_data = true; | |
} | |
for (; ha_info; ha_info = ha_info_next) { | |
int err; | |
handlerton *ht = ha_info->ht(); | |
if ((err = ht->rollback(ht, thd, all))) { // cannot happen | |
char errbuf[MYSQL_ERRMSG_SIZE]; | |
my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err, | |
my_strerror(errbuf, MYSQL_ERRMSG_SIZE, err)); | |
error = 1; | |
} | |
DBUG_ASSERT(!thd->status_var_aggregated); | |
thd->status_var.ha_rollback_count++; | |
ha_info_next = ha_info->next(); | |
if (restore_backup_ha_data) reattach_engine_ha_data_to_thd(thd, ht); | |
ha_info->reset(); /* keep it conveniently zero-filled */ | |
} | |
trn_ctx->reset_scope(trx_scope); | |
} | |
/* | |
Thanks to possibility of MDL deadlock rollback request can come even if | |
transaction hasn't been started in any transactional storage engine. | |
It is possible to have a call of ha_rollback_low() while handling | |
failure from ha_xa_prepare() and an error in Daignostics_area still | |
wasn't set. Therefore it is required to check that an error in | |
Diagnostics_area is set before calling the method XID_STATE::set_error(). | |
If it wasn't done it would lead to failure of the assertion | |
DBUG_ASSERT(m_status == DA_ERROR) | |
in the method Diagnostics_area::mysql_errno(). | |
In case ha_xa_prepare is failed and an error wasn't set in Diagnostics_area | |
the error ER_XA_RBROLLBACK is set in the Diagnostics_area from | |
the method Sql_cmd_xa_prepare::trans_xa_prepare() when non-zero result code | |
returned by ha_xa_prepare() is handled. | |
*/ | |
if (all && thd->transaction_rollback_request && thd->is_error()) | |
trn_ctx->xid_state()->set_error(thd); | |
(void)RUN_HOOK(transaction, after_rollback, (thd, all)); | |
return error; | |
} | |
int ha_rollback_trans(THD *thd, bool all) { | |
int error = 0; | |
Transaction_ctx *trn_ctx = thd->get_transaction(); | |
bool is_xa_rollback = trn_ctx->xid_state()->has_state(XID_STATE::XA_PREPARED); | |
/* | |
"real" is a nick name for a transaction for which a commit will | |
make persistent changes. E.g. a 'stmt' transaction inside a 'all' | |
transaction is not 'real': even though it's possible to commit it, | |
the changes are not durable as they might be rolled back if the | |
enclosing 'all' transaction is rolled back. | |
We establish the value of 'is_real_trans' by checking | |
if it's an explicit COMMIT or BEGIN statement, or implicit | |
commit issued by DDL (in these cases all == true), | |
or if we're running in autocommit mode (it's only in the autocommit mode | |
ha_commit_one_phase() is called with an empty | |
transaction.all.ha_list, see why in trans_register_ha()). | |
*/ | |
bool is_real_trans = all || !trn_ctx->is_active(Transaction_ctx::SESSION); | |
DBUG_TRACE; | |
/* | |
We must not rollback the normal transaction if a statement | |
transaction is pending. | |
*/ | |
DBUG_ASSERT(!trn_ctx->is_active(Transaction_ctx::STMT) || !all); | |
if (thd->in_sub_stmt) { | |
DBUG_ASSERT(0); | |
/* | |
If we are inside stored function or trigger we should not commit or | |
rollback current statement transaction. See comment in ha_commit_trans() | |
call for more information. | |
*/ | |
if (!all) return 0; | |
my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0)); | |
return 1; | |
} | |
if (tc_log) error = tc_log->rollback(thd, all); | |
/* | |
Mark multi-statement (any autocommit mode) or single-statement | |
(autocommit=1) transaction as rolled back | |
*/ | |
#ifdef HAVE_PSI_TRANSACTION_INTERFACE | |
if (all || !thd->in_active_multi_stmt_transaction()) { | |
MYSQL_ROLLBACK_TRANSACTION(thd->m_transaction_psi); | |
thd->m_transaction_psi = nullptr; | |
} | |
#endif | |
/* Always cleanup. Even if nht==0. There may be savepoints. */ | |
if (is_real_trans) { | |
trn_ctx->cleanup(); | |
thd->tx_priority = 0; | |
} | |
if (all) thd->transaction_rollback_request = false; | |
/* | |
Only call gtid_rollback(THD*), which will purge thd->owned_gtid, if | |
complete transaction is being rollback or autocommit=1. | |
Notice, XA rollback has just invoked update_on_commit() through | |
tc_log->*rollback* stack. | |
*/ | |
if (is_real_trans && !is_xa_rollback) gtid_state->update_on_rollback(thd); | |
/* | |
If the transaction cannot be rolled back safely, warn; don't warn if this | |
is a slave thread (because when a slave thread executes a ROLLBACK, it has | |
been read from the binary log, so it's 100% sure and normal to produce | |
error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the | |
slave SQL thread, it would not stop the thread but just be printed in | |
the error log; but we don't want users to wonder why they have this | |
message in the error log, so we don't send it. | |
*/ | |
if (is_real_trans && | |
trn_ctx->cannot_safely_rollback(Transaction_ctx::SESSION) && | |
!thd->slave_thread && thd->killed != THD::KILL_CONNECTION) | |
trn_ctx->push_unsafe_rollback_warnings(thd); | |
return error; | |
} | |
/** | |
Commit the attachable transaction in storage engines. | |
@note This is slimmed down version of ha_commit_trans()/ha_commit_low() | |
which commits attachable transaction but skips code which is | |
unnecessary and unsafe for them (like dealing with GTIDs). | |
Since attachable transactions are read-only their commit only | |
needs to release resources and cleanup state in SE. | |
@param thd Current thread | |
@retval 0 - Success | |
@retval non-0 - Failure | |
*/ | |
int ha_commit_attachable(THD *thd) { | |
int error = 0; | |
Transaction_ctx *trn_ctx = thd->get_transaction(); | |
Ha_trx_info *ha_info = trn_ctx->ha_trx_info(Transaction_ctx::STMT); | |
Ha_trx_info *ha_info_next; | |
/* This function only handles attachable transactions. */ | |
DBUG_ASSERT(thd->is_attachable_ro_transaction_active()); | |
/* | |
Since the attachable transaction is AUTOCOMMIT we only need | |
to care about statement transaction. | |
*/ | |
DBUG_ASSERT(!trn_ctx->is_active(Transaction_ctx::SESSION)); | |
if (ha_info) { | |
for (; ha_info; ha_info = ha_info_next) { | |
/* Attachable transaction is not supposed to modify anything. */ | |
DBUG_ASSERT(!ha_info->is_trx_read_write()); | |
handlerton *ht = ha_info->ht(); | |
if (ht->commit(ht, thd, false)) { | |
/* | |
In theory this should not happen since attachable transactions | |
are read only and therefore commit is supposed to only release | |
resources/cleanup state. Even if this happens we will simply | |
continue committing attachable transaction in other SEs. | |
*/ | |
DBUG_ASSERT(false); | |
error = 1; | |
} | |
DBUG_ASSERT(!thd->status_var_aggregated); | |
thd->status_var.ha_commit_count++; | |
ha_info_next = ha_info->next(); | |
ha_info->reset(); /* keep it conveniently zero-filled */ | |
} | |
trn_ctx->reset_scope(Transaction_ctx::STMT); | |
} | |
/* | |
Mark transaction as commited in PSI. | |
*/ | |
#ifdef HAVE_PSI_TRANSACTION_INTERFACE | |
if (thd->m_transaction_psi != nullptr) { | |
MYSQL_COMMIT_TRANSACTION(thd->m_transaction_psi); | |
thd->m_transaction_psi = nullptr; | |
} | |
#endif | |
/* Free resources and perform other cleanup even for 'empty' transactions. */ | |
trn_ctx->cleanup(); | |
return (error); | |
} | |
/** | |
Check if all storage engines used in transaction agree that after | |
rollback to savepoint it is safe to release MDL locks acquired after | |
savepoint creation. | |
@param thd The client thread that executes the transaction. | |
@return true - It is safe to release MDL locks. | |
false - If it is not. | |
*/ | |
bool ha_rollback_to_savepoint_can_release_mdl(THD *thd) { | |
Ha_trx_info *ha_info; | |
Transaction_ctx *trn_ctx = thd->get_transaction(); | |
Transaction_ctx::enum_trx_scope trx_scope = | |
thd->in_sub_stmt ? Transaction_ctx::STMT : Transaction_ctx::SESSION; | |
DBUG_TRACE; | |
/** | |
Checking whether it is safe to release metadata locks after rollback to | |
savepoint in all the storage engines that are part of the transaction. | |
*/ | |
for (ha_info = trn_ctx->ha_trx_info(trx_scope); ha_info; | |
ha_info = ha_info->next()) { | |
handlerton *ht = ha_info->ht(); | |
DBUG_ASSERT(ht); | |
if (ht->savepoint_rollback_can_release_mdl == nullptr || | |
ht->savepoint_rollback_can_release_mdl(ht, thd) == false) | |
return false; | |
} | |
return true; | |
} | |
int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv) { | |
int error = 0; | |
Transaction_ctx *trn_ctx = thd->get_transaction(); | |
Transaction_ctx::enum_trx_scope trx_scope = | |
!thd->in_sub_stmt ? Transaction_ctx::SESSION : Transaction_ctx::STMT; | |
Ha_trx_info *ha_info, *ha_info_next; | |
DBUG_TRACE; | |
trn_ctx->set_rw_ha_count(trx_scope, 0); | |
trn_ctx->set_no_2pc(trx_scope, false); | |
/* | |
rolling back to savepoint in all storage engines that were part of the | |
transaction when the savepoint was set | |
*/ | |
for (ha_info = sv->ha_list; ha_info; ha_info = ha_info->next()) { | |
int err; | |
handlerton *ht = ha_info->ht(); | |
DBUG_ASSERT(ht); | |
DBUG_ASSERT(ht->savepoint_set != nullptr); | |
if ((err = ht->savepoint_rollback( | |
ht, thd, | |
(uchar *)(sv + 1) + ht->savepoint_offset))) { // cannot happen | |
char errbuf[MYSQL_ERRMSG_SIZE]; | |
my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err, | |
my_strerror(errbuf, MYSQL_ERRMSG_SIZE, err)); | |
error = 1; | |
} | |
DBUG_ASSERT(!thd->status_var_aggregated); | |
thd->status_var.ha_savepoint_rollback_count++; | |
if (ht->prepare == nullptr) trn_ctx->set_no_2pc(trx_scope, true); | |
} | |
/* | |
rolling back the transaction in all storage engines that were not part of | |
the transaction when the savepoint was set | |
*/ | |
for (ha_info = trn_ctx->ha_trx_info(trx_scope); ha_info != sv->ha_list; | |
ha_info = ha_info_next) { | |
int err; | |
handlerton *ht = ha_info->ht(); | |
if ((err = ht->rollback(ht, thd, !thd->in_sub_stmt))) { // cannot happen | |
char errbuf[MYSQL_ERRMSG_SIZE]; | |
my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err, | |
my_strerror(errbuf, MYSQL_ERRMSG_SIZE, err)); | |
error = 1; | |
} | |
DBUG_ASSERT(!thd->status_var_aggregated); | |
thd->status_var.ha_rollback_count++; | |
ha_info_next = ha_info->next(); | |
ha_info->reset(); /* keep it conveniently zero-filled */ | |
} | |
trn_ctx->set_ha_trx_info(trx_scope, sv->ha_list); | |
#ifdef HAVE_PSI_TRANSACTION_INTERFACE | |
if (thd->m_transaction_psi != nullptr) | |
MYSQL_INC_TRANSACTION_ROLLBACK_TO_SAVEPOINT(thd->m_transaction_psi, 1); | |
#endif | |
return error; | |
} | |
int ha_prepare_low(THD *thd, bool all) { | |
int error = 0; | |
Transaction_ctx::enum_trx_scope trx_scope = | |
all ? Transaction_ctx::SESSION : Transaction_ctx::STMT; | |
Ha_trx_info *ha_info = thd->get_transaction()->ha_trx_info(trx_scope); | |
DBUG_TRACE; | |
if (ha_info) { | |
for (; ha_info && !error; ha_info = ha_info->next()) { | |
int err = 0; | |
handlerton *ht = ha_info->ht(); | |
/* | |
Do not call two-phase commit if this particular | |
transaction is read-only. This allows for simpler | |
implementation in engines that are always read-only. | |
*/ | |
if (!ha_info->is_trx_read_write()) continue; | |
if ((err = ht->prepare(ht, thd, all))) { | |
char errbuf[MYSQL_ERRMSG_SIZE]; | |
my_error(ER_ERROR_DURING_COMMIT, MYF(0), err, | |
my_strerror(errbuf, MYSQL_ERRMSG_SIZE, err)); | |
error = 1; | |
} | |
DBUG_ASSERT(!thd->status_var_aggregated); | |
thd->status_var.ha_prepare_count++; | |
} | |
DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE();); | |
} | |
return error; | |
} | |
/** | |
@note | |
according to the sql standard (ISO/IEC 9075-2:2003) | |
section "4.33.4 SQL-statements and transaction states", | |
SAVEPOINT is *not* transaction-initiating SQL-statement | |
*/ | |
int ha_savepoint(THD *thd, SAVEPOINT *sv) { | |
int error = 0; | |
Transaction_ctx::enum_trx_scope trx_scope = | |
!thd->in_sub_stmt ? Transaction_ctx::SESSION : Transaction_ctx::STMT; | |
Ha_trx_info *ha_info = thd->get_transaction()->ha_trx_info(trx_scope); | |
Ha_trx_info *begin_ha_info = ha_info; | |
DBUG_TRACE; | |
for (; ha_info; ha_info = ha_info->next()) { | |
int err; | |
handlerton *ht = ha_info->ht(); | |
DBUG_ASSERT(ht); | |
if (!ht->savepoint_set) { | |
my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT"); | |
error = 1; | |
break; | |
} | |
if ((err = ht->savepoint_set( | |
ht, thd, | |
(uchar *)(sv + 1) + ht->savepoint_offset))) { // cannot happen | |
char errbuf[MYSQL_ERRMSG_SIZE]; | |
my_error(ER_GET_ERRNO, MYF(0), err, | |
my_strerror(errbuf, MYSQL_ERRMSG_SIZE, err)); | |
error = 1; | |
} | |
DBUG_ASSERT(!thd->status_var_aggregated); | |
thd->status_var.ha_savepoint_count++; | |
} | |
/* | |
Remember the list of registered storage engines. All new | |
engines are prepended to the beginning of the list. | |
*/ | |
sv->ha_list = begin_ha_info; | |
#ifdef HAVE_PSI_TRANSACTION_INTERFACE | |
if (!error && thd->m_transaction_psi != nullptr) | |
MYSQL_INC_TRANSACTION_SAVEPOINTS(thd->m_transaction_psi, 1); | |
#endif | |
return error; | |
} | |
int ha_release_savepoint(THD *thd, SAVEPOINT *sv) { | |
int error = 0; | |
Ha_trx_info *ha_info = sv->ha_list; | |
DBUG_TRACE; | |
for (; ha_info; ha_info = ha_info->next()) { | |
int err; | |
handlerton *ht = ha_info->ht(); | |
/* Savepoint life time is enclosed into transaction life time. */ | |
DBUG_ASSERT(ht); | |
if (!ht->savepoint_release) continue; | |
if ((err = ht->savepoint_release( | |
ht, thd, | |
(uchar *)(sv + 1) + ht->savepoint_offset))) { // cannot happen | |
char errbuf[MYSQL_ERRMSG_SIZE]; | |
my_error(ER_GET_ERRNO, MYF(0), err, | |
my_strerror(errbuf, MYSQL_ERRMSG_SIZE, err)); | |
error = 1; | |
} | |
} | |
DBUG_EXECUTE_IF("fail_ha_release_savepoint", { | |
my_error(ER_UNKNOWN_ERROR, MYF(0)); | |
error = 1; | |
}); | |
#ifdef HAVE_PSI_TRANSACTION_INTERFACE | |
if (thd->m_transaction_psi != nullptr) | |
MYSQL_INC_TRANSACTION_RELEASE_SAVEPOINT(thd->m_transaction_psi, 1); | |
#endif | |
return error; | |
} | |
static bool snapshot_handlerton(THD *thd, plugin_ref plugin, void *arg) { | |
handlerton *hton = plugin_data<handlerton *>(plugin); | |
if (hton->state == SHOW_OPTION_YES && hton->start_consistent_snapshot) { | |
hton->start_consistent_snapshot(hton, thd); | |
*((bool *)arg) = false; | |
} | |
return false; | |
} | |
int ha_start_consistent_snapshot(THD *thd) { | |
bool warn = true; | |
plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn); | |
/* | |
Same idea as when one wants to CREATE TABLE in one engine which does not | |
exist: | |
*/ | |
if (warn) | |
push_warning(thd, Sql_condition::SL_WARNING, ER_UNKNOWN_ERROR, | |
"This MySQL server does not support any " | |
"consistent-read capable storage engine"); | |
return 0; | |
} | |
static bool flush_handlerton(THD *, plugin_ref plugin, void *arg) { | |
handlerton *hton = plugin_data<handlerton *>(plugin); | |
if (hton->state == SHOW_OPTION_YES && hton->flush_logs && | |
hton->flush_logs(hton, *(static_cast<bool *>(arg)))) | |
return true; | |
return false; | |
} | |
bool ha_flush_logs(bool binlog_group_flush) { | |
if (plugin_foreach(nullptr, flush_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, | |
static_cast<void *>(&binlog_group_flush))) { | |
return true; | |
} | |
return false; | |
} | |
/** | |
@brief make canonical filename | |
@param[in] file table handler | |
@param[in] path original path | |
@param[out] tmp_path buffer for canonized path | |
@details Lower case db name and table name path parts for | |
non file based tables when lower_case_table_names | |
is 2 (store as is, compare in lower case). | |
Filesystem path prefix (mysql_data_home or tmpdir) | |
is left intact. | |
@note tmp_path may be left intact if no conversion was | |
performed. | |
@retval canonized path | |
@todo This may be done more efficiently when table path | |
gets built. Convert this function to something like | |
ASSERT_CANONICAL_FILENAME. | |
*/ | |
const char *get_canonical_filename(handler *file, const char *path, | |
char *tmp_path) { | |
uint i; | |
if (lower_case_table_names != 2 || (file->ha_table_flags() & HA_FILE_BASED)) | |
return path; | |
for (i = 0; i <= mysql_tmpdir_list.max; i++) { | |
if (is_prefix(path, mysql_tmpdir_list.list[i])) return path; | |
} | |
/* Ensure that table handler get path in lower case */ | |
if (tmp_path != path) my_stpcpy(tmp_path, path); | |
/* | |
we only should turn into lowercase database/table part | |
so start the process after homedirectory | |
*/ | |
my_casedn_str(files_charset_info, tmp_path + mysql_data_home_len); | |
return tmp_path; | |
} | |
class Ha_delete_table_error_handler : public Internal_error_handler { | |
public: | |
bool handle_condition(THD *, uint, const char *, | |
Sql_condition::enum_severity_level *level, | |
const char *) override { | |
/* Downgrade errors to warnings. */ | |
if (*level == Sql_condition::SL_ERROR) *level = Sql_condition::SL_WARNING; | |
return false; | |
} | |
}; | |
/** | |
Delete table from the storage engine. | |
@param thd Thread context. | |
@param table_type Handlerton for table's SE. | |
@param path Path to table (without extension). | |
@param db Table database. | |
@param alias Table name. | |
@param table_def dd::Table object describing the table. | |
@param generate_warning Indicates whether errors during deletion | |
should be reported as warnings. | |
@return 0 - in case of success, non-0 in case of failure, ENOENT | |
if the file doesn't exists. | |
*/ | |
int ha_delete_table(THD *thd, handlerton *table_type, const char *path, | |
const char *db, const char *alias, | |
const dd::Table *table_def, bool generate_warning) { | |
handler *file; | |
char tmp_path[FN_REFLEN]; | |
int error; | |
TABLE dummy_table; | |
TABLE_SHARE dummy_share; | |
DBUG_TRACE; | |
dummy_table.s = &dummy_share; | |
/* DB_TYPE_UNKNOWN is used in ALTER TABLE when renaming only .frm files */ | |
if (table_type == nullptr || | |
!(file = | |
get_new_handler((TABLE_SHARE *)nullptr, | |
table_def->partition_type() != dd::Table::PT_NONE, | |
thd->mem_root, table_type))) { | |
return ENOENT; | |
} | |
path = get_canonical_filename(file, path, tmp_path); | |
if ((error = file->ha_delete_table(path, table_def)) && generate_warning) { | |
/* | |
Because file->print_error() use my_error() to generate the error message | |
we use an internal error handler to intercept it and store the text | |
in a temporary buffer. Later the message will be presented to user | |
as a warning. | |
*/ | |
Ha_delete_table_error_handler ha_delete_table_error_handler; | |
/* Fill up strucutures that print_error may need */ | |
dummy_share.path.str = const_cast<char *>(path); | |
dummy_share.path.length = strlen(path); | |
dummy_share.db.str = db; | |
dummy_share.db.length = strlen(db); | |
dummy_share.table_name.str = alias; | |
dummy_share.table_name.length = strlen(alias); | |
dummy_table.alias = alias; | |
file->change_table_ptr(&dummy_table, &dummy_share); | |
/* | |
XXX: should we convert *all* errors to warnings here? | |
What if the error is fatal? | |
*/ | |
thd->push_internal_handler(&ha_delete_table_error_handler); | |
file->print_error(error, 0); | |
thd->pop_internal_handler(); | |
} | |
destroy(file); | |
#ifdef HAVE_PSI_TABLE_INTERFACE | |
if (likely(error == 0)) { | |
/* Table share not available, so check path for temp_table prefix. */ | |
bool temp_table = (strstr(path, tmp_file_prefix) != nullptr); | |
PSI_TABLE_CALL(drop_table_share) | |
(temp_table, db, strlen(db), alias, strlen(alias)); | |
} | |
#endif | |
return error; | |
} | |
// Prepare HA_CREATE_INFO to be used by ALTER as well as upgrade code. | |
void HA_CREATE_INFO::init_create_options_from_share(const TABLE_SHARE *share, | |
uint used_fields) { | |
if (!(used_fields & HA_CREATE_USED_MIN_ROWS)) min_rows = share->min_rows; | |
if (!(used_fields & HA_CREATE_USED_MAX_ROWS)) max_rows = share->max_rows; | |
if (!(used_fields & HA_CREATE_USED_AVG_ROW_LENGTH)) | |
avg_row_length = share->avg_row_length; | |
if (!(used_fields & HA_CREATE_USED_DEFAULT_CHARSET)) | |
default_table_charset = share->table_charset; | |
if (!(used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE)) | |
key_block_size = share->key_block_size; | |
if (!(used_fields & HA_CREATE_USED_STATS_SAMPLE_PAGES)) | |
stats_sample_pages = share->stats_sample_pages; | |
if (!(used_fields & HA_CREATE_USED_STATS_AUTO_RECALC)) | |
stats_auto_recalc = share->stats_auto_recalc; | |
if (!(used_fields & HA_CREATE_USED_TABLESPACE)) | |
tablespace = share->tablespace; | |
if (storage_media == HA_SM_DEFAULT) | |
storage_media = share->default_storage_media; | |
/* Creation of federated table with LIKE clause needs connection string */ | |
if (!(used_fields & HA_CREATE_USED_CONNECTION)) | |
connect_string = share->connect_string; | |
if (!(used_fields & HA_CREATE_USED_COMMENT)) { | |
// Assert to check that used_fields flag and comment are in sync. | |
DBUG_ASSERT(!comment.str); | |
comment = share->comment; | |
} | |
if (!(used_fields & HA_CREATE_USED_COMPRESS)) { | |
// Assert to check that used_fields flag and compress are in sync | |
DBUG_ASSERT(!compress.str); | |
compress = share->compress; | |
} | |
if (!(used_fields & (HA_CREATE_USED_ENCRYPT))) { | |
// Assert to check that used_fields flag and encrypt_type are in sync | |
DBUG_ASSERT(!encrypt_type.str); | |
encrypt_type = share->encrypt_type; | |
} | |
if (!(used_fields & HA_CREATE_USED_SECONDARY_ENGINE)) { | |
DBUG_ASSERT(secondary_engine.str == nullptr); | |
secondary_engine = share->secondary_engine; | |
} | |
if (engine_attribute.str == nullptr) | |
engine_attribute = share->engine_attribute; | |
if (secondary_engine_attribute.str == nullptr) | |
secondary_engine_attribute = share->secondary_engine_attribute; | |
} | |
/**************************************************************************** | |
** General handler functions | |
****************************************************************************/ | |
handler *handler::clone(const char *name, MEM_ROOT *mem_root) { | |
DBUG_TRACE; | |
handler *new_handler = get_new_handler( | |
table->s, (table->s->m_part_info != nullptr), mem_root, ht); | |
if (!new_handler) return nullptr; | |
if (new_handler->set_ha_share_ref(ha_share)) goto err; | |
/* | |
Allocate handler->ref here because otherwise ha_open will allocate it | |
on this->table->mem_root and we will not be able to reclaim that memory | |
when the clone handler object is destroyed. | |
*/ | |
if (!(new_handler->ref = | |
(uchar *)mem_root->Alloc(ALIGN_SIZE(ref_length) * 2))) | |
goto err; | |
/* | |
TODO: Implement a more efficient way to have more than one index open for | |
the same table instance. The ha_open call is not cachable for clone. | |
*/ | |
if (new_handler->ha_open(table, name, table->db_stat, | |
HA_OPEN_IGNORE_IF_LOCKED, nullptr)) | |
goto err; | |
return new_handler; | |
err: | |
destroy(new_handler); | |
return nullptr; | |
} | |
void handler::ha_statistic_increment( | |
ulonglong System_status_var::*offset) const { | |
if (table && table->in_use) (table->in_use->status_var.*offset)++; | |
} | |
THD *handler::ha_thd(void) const { | |
DBUG_ASSERT(!table || !table->in_use || table->in_use == current_thd); | |
return (table && table->in_use) ? table->in_use : current_thd; | |
} | |
void handler::unbind_psi() { | |
#ifdef HAVE_PSI_TABLE_INTERFACE | |
DBUG_ASSERT(m_lock_type == F_UNLCK); | |
DBUG_ASSERT(inited == NONE); | |
/* | |
Notify the instrumentation that this table is not owned | |
by this thread any more. | |
*/ | |
PSI_TABLE_CALL(unbind_table)(m_psi); | |
#endif | |
} | |
void handler::rebind_psi() { | |
#ifdef HAVE_PSI_TABLE_INTERFACE | |
DBUG_ASSERT(m_lock_type == F_UNLCK); | |
DBUG_ASSERT(inited == NONE); | |
/* | |
Notify the instrumentation that this table is now owned | |
by this thread. | |
*/ | |
PSI_table_share *share_psi = ha_table_share_psi(table_share); | |
m_psi = PSI_TABLE_CALL(rebind_table)(share_psi, this, m_psi); | |
#endif | |
} | |
void handler::start_psi_batch_mode() { | |
#ifdef HAVE_PSI_TABLE_INTERFACE | |
DBUG_ASSERT(m_psi_batch_mode == PSI_BATCH_MODE_NONE); | |
DBUG_ASSERT(m_psi_locker == nullptr); | |
m_psi_batch_mode = PSI_BATCH_MODE_STARTING; | |
m_psi_numrows = 0; | |
#endif | |
} | |
void handler::end_psi_batch_mode() { | |
#ifdef HAVE_PSI_TABLE_INTERFACE | |
DBUG_ASSERT(m_psi_batch_mode != PSI_BATCH_MODE_NONE); | |
if (m_psi_locker != nullptr) { | |
DBUG_ASSERT(m_psi_batch_mode == PSI_BATCH_MODE_STARTED); | |
PSI_TABLE_CALL(end_table_io_wait)(m_psi_locker, m_psi_numrows); | |
m_psi_locker = nullptr; | |
} | |
m_psi_batch_mode = PSI_BATCH_MODE_NONE; | |
#endif | |
} | |
PSI_table_share *handler::ha_table_share_psi(const TABLE_SHARE *share) const { | |
return share->m_psi; | |
} | |
/* | |
Open database handler object. | |
Used for opening tables. The name will be the name of the file. | |
A table is opened when it needs to be opened. For instance | |
when a request comes in for a select on the table (tables are not | |
open and closed for each request, they are cached). | |
The server opens all tables by calling ha_open() which then calls | |
the handler specific open(). | |
Try O_RDONLY if cannot open as O_RDWR. Don't wait for locks if not | |
HA_OPEN_WAIT_IF_LOCKED is set | |
@param [out] table_arg Table structure. | |
@param name Full path of table name. | |
@param mode Open mode flags. | |
@param test_if_locked ? | |
@param table_def dd::Table object describing table | |
being open. Can be NULL for temporary | |
tables created by optimizer. | |
@retval >0 Error. | |
@retval 0 Success. | |
*/ | |
int handler::ha_open(TABLE *table_arg, const char *name, int mode, | |
int test_if_locked, const dd::Table *table_def) { | |
int error; | |
DBUG_TRACE; | |
DBUG_PRINT("enter", | |
("name: %s db_type: %d db_stat: %d mode: %d lock_test: %d", | |
name, ht->db_type, table_arg->db_stat, mode, test_if_locked)); | |
table = table_arg; | |
DBUG_ASSERT(table->s == table_share); | |
DBUG_ASSERT(m_lock_type == F_UNLCK); | |
DBUG_PRINT("info", ("old m_lock_type: %d F_UNLCK %d", m_lock_type, F_UNLCK)); | |
MEM_ROOT *mem_root = (test_if_locked & HA_OPEN_TMP_TABLE) | |
? &table->s->mem_root | |
: &table->mem_root; | |
DBUG_ASSERT(alloc_root_inited(mem_root)); | |
if ((error = open(name, mode, test_if_locked, table_def))) { | |
if ((error == EACCES || error == EROFS) && mode == O_RDWR && | |
(table->db_stat & HA_TRY_READ_ONLY)) { | |
table->db_stat |= HA_READ_ONLY; | |
error = open(name, O_RDONLY, test_if_locked, table_def); | |
} | |
} | |
if (error) { | |
set_my_errno(error); /* Safeguard */ | |
DBUG_PRINT("error", ("error: %d errno: %d", error, errno)); | |
} else { | |
DBUG_ASSERT(m_psi == nullptr); | |
DBUG_ASSERT(table_share != nullptr); | |
#ifdef HAVE_PSI_TABLE_INTERFACE | |
PSI_table_share *share_psi = ha_table_share_psi(table_share); | |
m_psi = PSI_TABLE_CALL(open_table)(share_psi, this); | |
#endif | |
if (table->s->db_options_in_use & HA_OPTION_READ_ONLY_DATA) | |
table->db_stat |= HA_READ_ONLY; | |
(void)extra(HA_EXTRA_NO_READCHECK); // Not needed in SQL | |
/* ref is already allocated for us if we're called from handler::clone() */ | |
if (!ref && !(ref = (uchar *)mem_root->Alloc(ALIGN_SIZE(ref_length) * 2))) { | |
ha_close(); | |
error = HA_ERR_OUT_OF_MEM; | |
} else | |
dup_ref = ref + ALIGN_SIZE(ref_length); | |
// Give the table a defined starting cursor, even if it never actually seeks | |
// or writes. This is important for things like weedout on const tables | |
// (which is a nonsensical combination, but can happen). | |
memset(ref, 0, ref_length); | |
cached_table_flags = table_flags(); | |
} | |
return error; | |
} | |
/** | |
Close handler. | |
Called from sql_base.cc, sql_select.cc, and table.cc. | |
In sql_select.cc it is only used to close up temporary tables or during | |
the process where a temporary table is converted over to being a | |
myisam table. | |
For sql_base.cc look at close_data_tables(). | |
@return Operation status | |
@retval 0 Success | |
@retval != 0 Error (error code returned) | |
*/ | |
int handler::ha_close(void) { | |
DBUG_TRACE; | |
#ifdef HAVE_PSI_TABLE_INTERFACE | |
PSI_TABLE_CALL(close_table)(table_share, m_psi); | |
m_psi = nullptr; /* instrumentation handle, invalid after close_table() */ | |
DBUG_ASSERT(m_psi_batch_mode == PSI_BATCH_MODE_NONE); | |
DBUG_ASSERT(m_psi_locker == nullptr); | |
#endif | |
// TODO: set table= NULL to mark the handler as closed? | |
DBUG_ASSERT(m_psi == nullptr); | |
DBUG_ASSERT(m_lock_type == F_UNLCK); | |
DBUG_ASSERT(inited == NONE); | |
if (m_unique) { | |
// It's allocated on memroot and will be freed along with it | |
m_unique->cleanup(); | |
m_unique = nullptr; | |
} | |
return close(); | |
} | |
/** | |
Initialize use of index. | |
@param idx Index to use | |
@param sorted Use sorted order | |
@return Operation status | |
@retval 0 Success | |
@retval != 0 Error (error code returned) | |
*/ | |
int handler::ha_index_init(uint idx, bool sorted) { | |
DBUG_EXECUTE_IF("ha_index_init_fail", return HA_ERR_TABLE_DEF_CHANGED;); | |
int result; | |
DBUG_TRACE; | |
DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK); | |
DBUG_ASSERT(inited == NONE); | |
if (!(result = index_init(idx, sorted))) inited = INDEX; | |
end_range = nullptr; | |
return result; | |
} | |
/** | |
End use of index. | |
@return Operation status | |
@retval 0 Success | |
@retval != 0 Error (error code returned) | |
*/ | |
int handler::ha_index_end() { | |
DBUG_TRACE; | |
/* SQL HANDLER function can call this without having it locked. */ | |
DBUG_ASSERT(table->open_by_handler || | |
table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK); | |
DBUG_ASSERT(inited == INDEX); | |
inited = NONE; | |
end_range = nullptr; | |
m_record_buffer = nullptr; | |
if (m_unique) m_unique->reset(false); | |
return index_end(); | |
} | |
/** | |
Initialize table for random read or scan. | |
@param scan if true: Initialize for random scans through rnd_next() | |
if false: Initialize for random reads through rnd_pos() | |
@return Operation status | |
@retval 0 Success | |
@retval != 0 Error (error code returned) | |
*/ | |
int handler::ha_rnd_init(bool scan) { | |
DBUG_EXECUTE_IF("ha_rnd_init_fail", return HA_ERR_TABLE_DEF_CHANGED;); | |
int result; | |
DBUG_TRACE; | |
DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK); | |
DBUG_ASSERT(inited == NONE || (inited == RND && scan)); | |
inited = (result = rnd_init(scan)) ? NONE : RND; | |
end_range = nullptr; | |
return result; | |
} | |
/** | |
End use of random access. | |
@return Operation status | |
@retval 0 Success | |
@retval != 0 Error (error code returned) | |
*/ | |
int handler::ha_rnd_end() { | |
DBUG_TRACE; | |
/* SQL HANDLER function can call this without having it locked. */ | |
DBUG_ASSERT(table->open_by_handler || | |
table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK); | |
DBUG_ASSERT(inited == RND); | |
inited = NONE; | |
end_range = nullptr; | |
m_record_buffer = nullptr; | |
return rnd_end(); | |
} | |
/** | |
Read next row via random scan. | |
@param buf Buffer to read the row into | |
@return Operation status | |
@retval 0 Success | |
@retval != 0 Error (error code returned) | |
*/ | |
int handler::ha_rnd_next(uchar *buf) { | |
int result; | |
DBUG_EXECUTE_IF("ha_rnd_next_deadlock", return HA_ERR_LOCK_DEADLOCK;); | |
DBUG_TRACE; | |
DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK); | |
DBUG_ASSERT(inited == RND); | |
// Set status for the need to update generated fields | |
m_update_generated_read_fields = table->has_gcol(); | |
MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, MAX_KEY, result, | |
{ result = rnd_next(buf); }) | |
if (!result && m_update_generated_read_fields) { | |
result = update_generated_read_fields(buf, table); | |
m_update_generated_read_fields = false; | |
} | |
table->set_row_status_from_handler(result); | |
return result; | |
} | |
/** | |
Read row via random scan from position. | |
@param[out] buf Buffer to read the row into | |
@param pos Position from position() call | |
@return Operation status | |
@retval 0 Success | |
@retval != 0 Error (error code returned) | |
*/ | |
int handler::ha_rnd_pos(uchar *buf, uchar *pos) { | |
int result; | |
DBUG_TRACE; | |
DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK); | |
/* TODO: Find out how to solve ha_rnd_pos when finding duplicate update. */ | |
/* DBUG_ASSERT(inited == RND); */ | |
// Set status for the need to update generated fields | |
m_update_generated_read_fields = table->has_gcol(); | |
MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, MAX_KEY, result, | |
{ result = rnd_pos(buf, pos); }) | |
if (!result && m_update_generated_read_fields) { | |
result = update_generated_read_fields(buf, table); | |
m_update_generated_read_fields = false; | |
} | |
table->set_row_status_from_handler(result); | |
return result; | |
} | |
int handler::ha_ft_read(uchar *buf) { | |
int result; | |
DBUG_TRACE; | |
// Set status for the need to update generated fields | |
m_update_generated_read_fields = table->has_gcol(); | |
result = ft_read(buf); | |
if (!result && m_update_generated_read_fields) { | |
result = update_generated_read_fields(buf, table); | |
m_update_generated_read_fields = false; | |
} | |
table->set_row_status_from_handler(result); | |
return result; | |
} | |
int handler::ha_sample_init(void *&scan_ctx, double sampling_percentage, | |
int sampling_seed, | |
enum_sampling_method sampling_method) { | |
DBUG_TRACE; | |
DBUG_ASSERT(sampling_percentage >= 0.0); | |
DBUG_ASSERT(sampling_percentage <= 100.0); | |
DBUG_ASSERT(inited == NONE); | |
// Initialise the random number generator. | |
m_random_number_engine.seed(sampling_seed); | |
m_sampling_percentage = sampling_percentage; | |
int result = sample_init(scan_ctx, sampling_percentage, sampling_seed, | |
sampling_method); | |
inited = (result != 0) ? NONE : SAMPLING; | |
return result; | |
} | |
int handler::ha_sample_end(void *scan_ctx) { | |
DBUG_TRACE; | |
DBUG_ASSERT(inited == SAMPLING); | |
inited = NONE; | |
int result = sample_end(scan_ctx); | |
return result; | |
} | |
int handler::ha_sample_next(void *scan_ctx, uchar *buf) { | |
DBUG_TRACE; | |
DBUG_ASSERT(inited == SAMPLING); | |
if (m_sampling_percentage == 0.0) return HA_ERR_END_OF_FILE; | |
m_update_generated_read_fields = table->has_gcol(); | |
int result; | |
MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, MAX_KEY, result, | |
{ result = sample_next(scan_ctx, buf); }) | |
if (result == 0 && m_update_generated_read_fields) { | |
result = update_generated_read_fields(buf, table); | |
m_update_generated_read_fields = false; | |
} | |
table->set_row_status_from_handler(result); | |
return result; | |
} | |
int handler::sample_init(void *&scan_ctx MY_ATTRIBUTE((unused)), double, int, | |
enum_sampling_method) { | |
return rnd_init(true); | |
} | |
int handler::sample_end(void *scan_ctx MY_ATTRIBUTE((unused))) { | |
return rnd_end(); | |
} | |
int handler::sample_next(void *scan_ctx MY_ATTRIBUTE((unused)), uchar *buf) { | |
// Temporary set inited to RND, since we are calling rnd_next(). | |
int res = rnd_next(buf); | |
std::uniform_real_distribution<double> rnd(0.0, 1.0); | |
while (!res && rnd(m_random_number_engine) > (m_sampling_percentage / 100.0)) | |
res = rnd_next(buf); | |
return res; | |
} | |
int handler::records(ha_rows *num_rows) { | |
if (ha_table_flags() & HA_COUNT_ROWS_INSTANT) { | |
*num_rows = stats.records; | |
return 0; | |
} | |
int error = 0; | |
ha_rows rows = 0; | |
start_psi_batch_mode(); | |
if (!(error = ha_rnd_init(true))) { | |
while (!table->in_use->killed) { | |
DBUG_EXECUTE_IF("bug28079850", table->in_use->killed = THD::KILL_QUERY;); | |
if ((error = ha_rnd_next(table->record[0]))) { | |
if (error == HA_ERR_RECORD_DELETED) | |
continue; | |
else | |
break; | |
} | |
++rows; | |
} | |
} | |
*num_rows = rows; | |
end_psi_batch_mode(); | |
int ha_rnd_end_error = 0; | |
if (error != HA_ERR_END_OF_FILE) *num_rows = HA_POS_ERROR; | |
// Call ha_rnd_end() only if only if handler has been initialized. | |
if (inited && (ha_rnd_end_error = ha_rnd_end())) *num_rows = HA_POS_ERROR; | |
return (error != HA_ERR_END_OF_FILE) ? error : ha_rnd_end_error; | |
} | |
int handler::records_from_index(ha_rows *num_rows, uint index) { | |
if (ha_table_flags() & HA_COUNT_ROWS_INSTANT) { | |
*num_rows = stats.records; | |
return 0; | |
} | |
int error = 0; | |
ha_rows rows = 0; | |
uchar *buf = table->record[0]; | |
start_psi_batch_mode(); | |
if (!(error = ha_index_init(index, false))) { | |
if (!(error = ha_index_first(buf))) { | |
rows = 1; | |
while (!table->in_use->killed) { | |
DBUG_EXECUTE_IF("bug28079850", | |
table->in_use->killed = THD::KILL_QUERY;); | |
if ((error = ha_index_next(buf))) { | |
if (error == HA_ERR_RECORD_DELETED) | |
continue; | |
else | |
break; | |
} | |
++rows; | |
} | |
} | |
} | |
*num_rows = rows; | |
end_psi_batch_mode(); | |
int ha_index_end_error = 0; | |
if (error != HA_ERR_END_OF_FILE) *num_rows = HA_POS_ERROR; | |
// Call ha_index_end() only if handler has been initialized. | |
if (inited && (ha_index_end_error = ha_index_end())) *num_rows = HA_POS_ERROR; | |
return (error != HA_ERR_END_OF_FILE) ? error : ha_index_end_error; | |
} | |
int handler::handle_records_error(int error, ha_rows *num_rows) { | |
// If query was killed set the error since not all storage engines do it. | |
if (table->in_use->killed) { | |
*num_rows = HA_POS_ERROR; | |
if (error == 0) error = HA_ERR_QUERY_INTERRUPTED; | |
} | |
if (error != 0) DBUG_ASSERT(*num_rows == HA_POS_ERROR); | |
if (*num_rows == HA_POS_ERROR) DBUG_ASSERT(error != 0); | |
if (error != 0) { | |
/* | |
ha_innobase::records may have rolled back internally. | |
In this case, thd_mark_transaction_to_rollback() will have been called. | |
For the errors below, we need to abort right away. | |
*/ | |
switch (error) { | |
case HA_ERR_LOCK_DEADLOCK: | |
case HA_ERR_LOCK_TABLE_FULL: | |
case HA_ERR_LOCK_WAIT_TIMEOUT: | |
case HA_ERR_QUERY_INTERRUPTED: | |
print_error(error, MYF(0)); | |
return error; | |
default: | |
return error; | |
} | |
} | |
return 0; | |
} | |
/** | |
Read [part of] row via [part of] index. | |
@param[out] buf buffer where store the data | |
@param key Key to search for | |
@param keypart_map Which part of key to use | |
@param find_flag Direction/condition on key usage | |
@returns Operation status | |
@retval 0 Success (found a record, and function has | |
set table status to "has row") | |
@retval HA_ERR_END_OF_FILE Row not found (function has set table status | |
to "no row"). End of index passed. | |
@retval HA_ERR_KEY_NOT_FOUND Row not found (function has set table status | |
to "no row"). Index cursor positioned. | |
@retval != 0 Error | |
@note Positions an index cursor to the index specified in the handle. | |
Fetches the row if available. If the key value is null, | |
begin at the first key of the index. | |
ha_index_read_map can be restarted without calling index_end on the previous | |
index scan and without calling ha_index_init. In this case the | |
ha_index_read_map is on the same index as the previous ha_index_scan. | |
This is particularly used in conjunction with multi read ranges. | |
*/ | |
int handler::ha_index_read_map(uchar *buf, const uchar *key, | |
key_part_map keypart_map, | |
enum ha_rkey_function find_flag) { | |
int result; | |
DBUG_TRACE; | |
DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK); | |
DBUG_ASSERT(inited == INDEX); | |
DBUG_ASSERT(!pushed_idx_cond || buf == table->record[0]); | |
// Set status for the need to update generated fields | |
m_update_generated_read_fields = table->has_gcol(); | |
MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result, { | |
result = index_read_map(buf, key, keypart_map, find_flag); | |
}) | |
if (!result && m_update_generated_read_fields) { | |
result = update_generated_read_fields(buf, table, active_index); | |
m_update_generated_read_fields = false; | |
} | |
table->set_row_status_from_handler(result); | |
return result; | |
} | |
int handler::ha_index_read_last_map(uchar *buf, const uchar *key, | |
key_part_map keypart_map) { | |
int result; | |
DBUG_TRACE; | |
DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK); | |
DBUG_ASSERT(inited == INDEX); | |
DBUG_ASSERT(!pushed_idx_cond || buf == table->record[0]); | |
// Set status for the need to update generated fields | |
m_update_generated_read_fields = table->has_gcol(); | |
MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result, | |
{ result = index_read_last_map(buf, key, keypart_map); }) | |
if (!result && m_update_generated_read_fields) { | |
result = update_generated_read_fields(buf, table, active_index); | |
m_update_generated_read_fields = false; | |
} | |
table->set_row_status_from_handler(result); | |
return result; | |
} | |
/** | |
Initializes an index and read it. | |
@see handler::ha_index_read_map. | |
*/ | |
int handler::ha_index_read_idx_map(uchar *buf, uint index, const uchar *key, | |
key_part_map keypart_map, | |
enum ha_rkey_function find_flag) { | |
int result; | |
DBUG_TRACE; | |
DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK); | |
DBUG_ASSERT(end_range == nullptr); | |
DBUG_ASSERT(!pushed_idx_cond || buf == table->record[0]); | |
// Set status for the need to update generated fields | |
m_update_generated_read_fields = table->has_gcol(); | |
MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, index, result, { | |
result = index_read_idx_map(buf, index, key, keypart_map, find_flag); | |
}) | |
if (!result && m_update_generated_read_fields) { | |
result = update_generated_read_fields(buf, table, index); | |
m_update_generated_read_fields = false; | |
} | |
table->set_row_status_from_handler(result); | |
return result; | |
} | |
/** | |
Reads the next row via index. | |
@param[out] buf Row data | |
@return Operation status. | |
@retval 0 Success | |
@retval HA_ERR_END_OF_FILE Row not found | |
@retval != 0 Error | |
*/ | |
int handler::ha_index_next(uchar *buf) { | |
int result; | |
DBUG_TRACE; | |
DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK); | |
DBUG_ASSERT(inited == INDEX); | |
DBUG_ASSERT(!pushed_idx_cond || buf == table->record[0]); | |
// Set status for the need to update generated fields | |
m_update_generated_read_fields = table->has_gcol(); | |
MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result, | |
{ result = index_next(buf); }) | |
if (!result && m_update_generated_read_fields) { | |
result = update_generated_read_fields(buf, table, active_index); | |
m_update_generated_read_fields = false; | |
} | |
table->set_row_status_from_handler(result); | |
return result; | |
} | |
/** | |
Reads the previous row via index. | |
@param[out] buf Row data | |
@return Operation status. | |
@retval 0 Success | |
@retval HA_ERR_END_OF_FILE Row not found | |
@retval != 0 Error | |
*/ | |
int handler::ha_index_prev(uchar *buf) { | |
int result; | |
DBUG_TRACE; | |
DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK); | |
DBUG_ASSERT(inited == INDEX); | |
DBUG_ASSERT(!pushed_idx_cond || buf == table->record[0]); | |
// Set status for the need to update generated fields | |
m_update_generated_read_fields = table->has_gcol(); | |
MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result, | |
{ result = index_prev(buf); }) | |
if (!result && m_update_generated_read_fields) { | |
result = update_generated_read_fields(buf, table, active_index); | |
m_update_generated_read_fields = false; | |
} | |
table->set_row_status_from_handler(result); | |
return result; | |
} | |
/** | |
Reads the first row via index. | |
@param[out] buf Row data | |
@return Operation status. | |
@retval 0 Success | |
@retval HA_ERR_END_OF_FILE Row not found | |
@retval != 0 Error | |
*/ | |
int handler::ha_index_first(uchar *buf) { | |
int result; | |
DBUG_TRACE; | |
DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK); | |
DBUG_ASSERT(inited == INDEX); | |
DBUG_ASSERT(!pushed_idx_cond || buf == table->record[0]); | |
// Set status for the need to update generated fields | |
m_update_generated_read_fields = table->has_gcol(); | |
MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result, | |
{ result = index_first(buf); }) | |
if (!result && m_update_generated_read_fields) { | |
result = update_generated_read_fields(buf, table, active_index); | |
m_update_generated_read_fields = false; | |
} | |
table->set_row_status_from_handler(result); | |
return result; | |
} | |
/** | |
Reads the last row via index. | |
@param[out] buf Row data | |
@return Operation status. | |
@retval 0 Success | |
@retval HA_ERR_END_OF_FILE Row not found | |
@retval != 0 Error | |
*/ | |
int handler::ha_index_last(uchar *buf) { | |
int result; | |
DBUG_TRACE; | |
DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK); | |
DBUG_ASSERT(inited == INDEX); | |
DBUG_ASSERT(!pushed_idx_cond || buf == table->record[0]); | |
// Set status for the need to update generated fields | |
m_update_generated_read_fields = table->has_gcol(); | |
MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result, | |
{ result = index_last(buf); }) | |
if (!result && m_update_generated_read_fields) { | |
result = update_generated_read_fields(buf, table, active_index); | |
m_update_generated_read_fields = false; | |
} | |
table->set_row_status_from_handler(result); | |
return result; | |
} | |
/** | |
Reads the next same row via index. | |
@param[out] buf Row data | |
@param key Key to search for | |
@param keylen Length of key | |
@return Operation status. | |
@retval 0 Success | |
@retval HA_ERR_END_OF_FILE Row not found | |
@retval != 0 Error | |
*/ | |
int handler::ha_index_next_same(uchar *buf, const uchar *key, uint keylen) { | |
int result; | |
DBUG_TRACE; | |
DBUG_ASSERT(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK); | |
DBUG_ASSERT(inited == INDEX); | |
DBUG_ASSERT(!pushed_idx_cond || buf == table->record[0]); | |
// Set status for the need to update generated fields | |
m_update_generated_read_fields = table->has_gcol(); | |
MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, active_index, result, | |
{ result = index_next_same(buf, key, keylen); }) | |
if (!result && m_update_generated_read_fields) { | |
result = update_generated_read_fields(buf, table, active_index); | |
m_update_generated_read_fields = false; | |
} | |
table->set_row_status_from_handler(result); | |
return result; | |
} | |
/** | |
Read first row (only) from a table. | |