Permalink
Browse files

Bug#21095969 RPL+LOCK_WAIT_TIMEOUT: BOOL TRANS_CHECK_STATE ASSERTS `T…

…HD->GET_TRANSACTION().

The reported assert in the slave temporary failing transaction block happens
*every* time when the replicated transaction faces a temporary error and
the slave's recovery tables (aka info repositories) are of the transactional
type. Indeed, such replicated deadlocked or timed-out transaction is to
be rolled back and re-tried whenever @@global.slave_trans_retries > 0.

Before it is rolled back, the applier calls global_init_info where
thanks to BUG16533802 fixes, global_init_info starts a new (short-lived) transaction
when relay_log_info_repository_type='TABLE'.
And that leads to an assertion about improper transaction state
because the temproary failing one it still active.

Fixed with relocating a part of general cleanup of the slave applier
(cleanup_context) to be executed before global_init_info().
Running the former function prior the latter must be safe and actually
makes much more sense.
  • Loading branch information...
Andrei Elkin
Andrei Elkin committed Jun 23, 2015
1 parent d7b0c4f commit 52db41e94ec7fdd1a01344285b9701c20715a92a
@@ -0,0 +1,45 @@
include/master-slave.inc
Warnings:
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
[connection master]
[connection master]
CREATE TABLE t1 (c1 INT PRIMARY KEY, c2 INT) ENGINE= InnoDB;
include/stop_slave.inc
SET @saved_slave_transaction_retries = @@GLOBAL.slave_transaction_retries;
SET @saved_innodb_lock_wait_timeout = @@GLOBAL.innodb_lock_wait_timeout;
SET @saved_master_info_repository = @@GLOBAL.master_info_repository;
SET @saved_relay_log_info_repository = @@GLOBAL.relay_log_info_repository;
SET @@GLOBAL.innodb_lock_wait_timeout = 1;
SET @@GLOBAL.master_info_repository = TYPE;
SET @@GLOBAL.relay_log_info_repository = TYPE;
[connection slave1]
BEGIN;
INSERT INTO t1 VALUES (7,0);
[connection master]
INSERT INTO t1 VALUES (7,7);
[connection slave]
SET @@GLOBAL.slave_transaction_retries = 0;
include/start_slave.inc
### Specified retry number is expected ###
### Timeout error is expected ###
include/wait_for_slave_sql_error.inc [errno=1205]
[connection slave]
SET @@GLOBAL.slave_transaction_retries = 1;
include/start_slave.inc
### Specified retry number is expected ###
### Timeout error is expected ###
include/wait_for_slave_sql_error.inc [errno=1205]
[connection slave1]
ROLLBACK;
#### Cleanup ####
[connection slave]
include/stop_slave.inc
SET @@GLOBAL.slave_transaction_retries = @saved_slave_transaction_retries;
SET @@GLOBAL.master_info_repository = @saved_master_info_repository;
SET @@GLOBAL.relay_log_info_repository = @saved_relay_log_info_repository;
SET @@GLOBAL.innodb_lock_wait_timeout = @saved_innodb_lock_wait_timeout ;
include/start_slave.inc
[connection master]
DROP TABLE t1;
include/rpl_end.inc
@@ -0,0 +1,91 @@
# ==== Purpose ====
# Testing that temporary error is processed
# when the slave info repository type is TABLE.
#
# ==== Implementation ====
# At applying a transaction the slave applier thread is made blocked
# to time out. Upon a few retries, zero included,
# the temporary error gets escalated to the regular and the slave applier stops.
# After the blocking condition is removed the restarted applier
# succeeds.
#
# ==== References ====
# BUG#21095969 RPL+LOCK_WAIT_TIMEOUT: BOOL TRANS_CHECK_STATE ASSERTS
# `THD->GET_TRANSACTION()..`
# This test case is binary log format agnostic though.
--source include/have_binlog_format_row.inc
--source include/not_mts_slave_parallel_workers.inc
# TODO: Use this in 5.7:
#--source include/have_slave_repository_type_file.inc
--let $repo_type_name = 'TABLE'
--source include/master-slave.inc
--source include/rpl_connection_master.inc
CREATE TABLE t1 (c1 INT PRIMARY KEY, c2 INT) ENGINE= InnoDB;
--sync_slave_with_master
--source include/stop_slave.inc
SET @saved_slave_transaction_retries = @@GLOBAL.slave_transaction_retries;
SET @saved_innodb_lock_wait_timeout = @@GLOBAL.innodb_lock_wait_timeout;
SET @saved_master_info_repository = @@GLOBAL.master_info_repository;
SET @saved_relay_log_info_repository = @@GLOBAL.relay_log_info_repository;
SET @@GLOBAL.innodb_lock_wait_timeout = 1;
--replace_result $repo_type_name TYPE
--eval SET @@GLOBAL.master_info_repository = $repo_type_name
--replace_result $repo_type_name TYPE
--eval SET @@GLOBAL.relay_log_info_repository = $repo_type_name
# To block the SQL thread
--source include/rpl_connection_slave1.inc
BEGIN;
INSERT INTO t1 VALUES (7,0);
# Generate data at the master to be replicated
--source include/rpl_connection_master.inc
INSERT INTO t1 VALUES (7,7);
# Retry 0 and 1 time to test both execution branches.
--let $retry_counted = 0
--let $retry_number = 0
while ($retry_number < 2)
{
--source include/rpl_connection_slave.inc
--eval SET @@GLOBAL.slave_transaction_retries = $retry_number
--source include/start_slave.inc
--echo ### Specified retry number is expected ###
# Notice the status var accumulates retries over the loop
--let $retry_counted = `SELECT $retry_counted + $retry_number`
--let $status_var = Slave_retried_transactions
--let $status_var_value = $retry_counted
--source include/wait_for_status_var.inc
--echo ### Timeout error is expected ###
--let $slave_sql_errno= convert_error(ER_LOCK_WAIT_TIMEOUT)
--source include/wait_for_slave_sql_error.inc
--inc $retry_number
}
--source include/rpl_connection_slave1.inc
ROLLBACK;
--echo #### Cleanup ####
--source include/rpl_connection_slave.inc
--source include/stop_slave.inc
SET @@GLOBAL.slave_transaction_retries = @saved_slave_transaction_retries;
SET @@GLOBAL.master_info_repository = @saved_master_info_repository;
SET @@GLOBAL.relay_log_info_repository = @saved_relay_log_info_repository;
SET @@GLOBAL.innodb_lock_wait_timeout = @saved_innodb_lock_wait_timeout ;
--source include/start_slave.inc
--source include/rpl_connection_master.inc
DROP TABLE t1;
--source include/rpl_end.inc
View
@@ -4350,6 +4350,12 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli)
*/
if (rli->trans_retries < slave_trans_retries)
{
/*
The transactions has to be rolled back before global_init_info is
called. Because global_init_info will starts a new transaction if
master_info_repository is TABLE.
*/
rli->cleanup_context(thd, 1);
/*
We need to figure out if there is a test case that covers
this part. \Alfranio.
@@ -4365,7 +4371,6 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli)
else
{
exec_res= SLAVE_APPLY_EVENT_AND_UPDATE_POS_OK;
rli->cleanup_context(thd, 1);
/* chance for concurrent connection to get more locks */
slave_sleep(thd, min<ulong>(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE),
sql_slave_killed, rli);

0 comments on commit 52db41e

Please sign in to comment.