Skip to content

Commit

Permalink
Bug#21095969 RPL+LOCK_WAIT_TIMEOUT: BOOL TRANS_CHECK_STATE ASSERTS `T…
Browse files Browse the repository at this point in the history
…HD->GET_TRANSACTION().

The reported assert in the slave temporary failing transaction block happens
*every* time when the replicated transaction faces a temporary error and
the slave's recovery tables (aka info repositories) are of the transactional
type. Indeed, such replicated deadlocked or timed-out transaction is to
be rolled back and re-tried whenever @@global.slave_trans_retries > 0.

Before it is rolled back, the applier calls global_init_info where
thanks to BUG16533802 fixes, global_init_info starts a new (short-lived) transaction
when relay_log_info_repository_type='TABLE'.
And that leads to an assertion about improper transaction state
because the temproary failing one it still active.

Fixed with relocating a part of general cleanup of the slave applier
(cleanup_context) to be executed before global_init_info().
Running the former function prior the latter must be safe and actually
makes much more sense.
  • Loading branch information
Andrei Elkin committed Jul 2, 2015
1 parent d7b0c4f commit 52db41e
Show file tree
Hide file tree
Showing 3 changed files with 142 additions and 1 deletion.
45 changes: 45 additions & 0 deletions mysql-test/suite/rpl/r/rpl_temporary_error_table_repository.result
@@ -0,0 +1,45 @@
include/master-slave.inc
Warnings:
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
[connection master]
[connection master]
CREATE TABLE t1 (c1 INT PRIMARY KEY, c2 INT) ENGINE= InnoDB;
include/stop_slave.inc
SET @saved_slave_transaction_retries = @@GLOBAL.slave_transaction_retries;
SET @saved_innodb_lock_wait_timeout = @@GLOBAL.innodb_lock_wait_timeout;
SET @saved_master_info_repository = @@GLOBAL.master_info_repository;
SET @saved_relay_log_info_repository = @@GLOBAL.relay_log_info_repository;
SET @@GLOBAL.innodb_lock_wait_timeout = 1;
SET @@GLOBAL.master_info_repository = TYPE;
SET @@GLOBAL.relay_log_info_repository = TYPE;
[connection slave1]
BEGIN;
INSERT INTO t1 VALUES (7,0);
[connection master]
INSERT INTO t1 VALUES (7,7);
[connection slave]
SET @@GLOBAL.slave_transaction_retries = 0;
include/start_slave.inc
### Specified retry number is expected ###
### Timeout error is expected ###
include/wait_for_slave_sql_error.inc [errno=1205]
[connection slave]
SET @@GLOBAL.slave_transaction_retries = 1;
include/start_slave.inc
### Specified retry number is expected ###
### Timeout error is expected ###
include/wait_for_slave_sql_error.inc [errno=1205]
[connection slave1]
ROLLBACK;
#### Cleanup ####
[connection slave]
include/stop_slave.inc
SET @@GLOBAL.slave_transaction_retries = @saved_slave_transaction_retries;
SET @@GLOBAL.master_info_repository = @saved_master_info_repository;
SET @@GLOBAL.relay_log_info_repository = @saved_relay_log_info_repository;
SET @@GLOBAL.innodb_lock_wait_timeout = @saved_innodb_lock_wait_timeout ;
include/start_slave.inc
[connection master]
DROP TABLE t1;
include/rpl_end.inc
91 changes: 91 additions & 0 deletions mysql-test/suite/rpl/t/rpl_temporary_error_table_repository.test
@@ -0,0 +1,91 @@
# ==== Purpose ====
# Testing that temporary error is processed
# when the slave info repository type is TABLE.
#
# ==== Implementation ====
# At applying a transaction the slave applier thread is made blocked
# to time out. Upon a few retries, zero included,
# the temporary error gets escalated to the regular and the slave applier stops.
# After the blocking condition is removed the restarted applier
# succeeds.
#
# ==== References ====
# BUG#21095969 RPL+LOCK_WAIT_TIMEOUT: BOOL TRANS_CHECK_STATE ASSERTS
# `THD->GET_TRANSACTION()..`

# This test case is binary log format agnostic though.
--source include/have_binlog_format_row.inc
--source include/not_mts_slave_parallel_workers.inc

# TODO: Use this in 5.7:
#--source include/have_slave_repository_type_file.inc
--let $repo_type_name = 'TABLE'

--source include/master-slave.inc

--source include/rpl_connection_master.inc
CREATE TABLE t1 (c1 INT PRIMARY KEY, c2 INT) ENGINE= InnoDB;

--sync_slave_with_master
--source include/stop_slave.inc
SET @saved_slave_transaction_retries = @@GLOBAL.slave_transaction_retries;
SET @saved_innodb_lock_wait_timeout = @@GLOBAL.innodb_lock_wait_timeout;
SET @saved_master_info_repository = @@GLOBAL.master_info_repository;
SET @saved_relay_log_info_repository = @@GLOBAL.relay_log_info_repository;
SET @@GLOBAL.innodb_lock_wait_timeout = 1;
--replace_result $repo_type_name TYPE
--eval SET @@GLOBAL.master_info_repository = $repo_type_name
--replace_result $repo_type_name TYPE
--eval SET @@GLOBAL.relay_log_info_repository = $repo_type_name

# To block the SQL thread
--source include/rpl_connection_slave1.inc
BEGIN;
INSERT INTO t1 VALUES (7,0);

# Generate data at the master to be replicated
--source include/rpl_connection_master.inc
INSERT INTO t1 VALUES (7,7);

# Retry 0 and 1 time to test both execution branches.

--let $retry_counted = 0
--let $retry_number = 0
while ($retry_number < 2)
{
--source include/rpl_connection_slave.inc
--eval SET @@GLOBAL.slave_transaction_retries = $retry_number
--source include/start_slave.inc

--echo ### Specified retry number is expected ###

# Notice the status var accumulates retries over the loop
--let $retry_counted = `SELECT $retry_counted + $retry_number`
--let $status_var = Slave_retried_transactions
--let $status_var_value = $retry_counted
--source include/wait_for_status_var.inc

--echo ### Timeout error is expected ###

--let $slave_sql_errno= convert_error(ER_LOCK_WAIT_TIMEOUT)
--source include/wait_for_slave_sql_error.inc

--inc $retry_number
}

--source include/rpl_connection_slave1.inc
ROLLBACK;

--echo #### Cleanup ####

--source include/rpl_connection_slave.inc
--source include/stop_slave.inc
SET @@GLOBAL.slave_transaction_retries = @saved_slave_transaction_retries;
SET @@GLOBAL.master_info_repository = @saved_master_info_repository;
SET @@GLOBAL.relay_log_info_repository = @saved_relay_log_info_repository;
SET @@GLOBAL.innodb_lock_wait_timeout = @saved_innodb_lock_wait_timeout ;
--source include/start_slave.inc

--source include/rpl_connection_master.inc
DROP TABLE t1;
--source include/rpl_end.inc
7 changes: 6 additions & 1 deletion sql/rpl_slave.cc
Expand Up @@ -4350,6 +4350,12 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli)
*/ */
if (rli->trans_retries < slave_trans_retries) if (rli->trans_retries < slave_trans_retries)
{ {
/*
The transactions has to be rolled back before global_init_info is
called. Because global_init_info will starts a new transaction if
master_info_repository is TABLE.
*/
rli->cleanup_context(thd, 1);
/* /*
We need to figure out if there is a test case that covers We need to figure out if there is a test case that covers
this part. \Alfranio. this part. \Alfranio.
Expand All @@ -4365,7 +4371,6 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli)
else else
{ {
exec_res= SLAVE_APPLY_EVENT_AND_UPDATE_POS_OK; exec_res= SLAVE_APPLY_EVENT_AND_UPDATE_POS_OK;
rli->cleanup_context(thd, 1);
/* chance for concurrent connection to get more locks */ /* chance for concurrent connection to get more locks */
slave_sleep(thd, min<ulong>(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE), slave_sleep(thd, min<ulong>(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE),
sql_slave_killed, rli); sql_slave_killed, rli);
Expand Down

0 comments on commit 52db41e

Please sign in to comment.