Permalink
Browse files

Bug#19316063: MAKE MTS WORK WITH RELAY_LOG_RECOVERY=1 WHEN

GTID IS ENABLED

Analysis:
========
When GTID is enabled, MTS don't need to care about gaps.

If the auto position replication protocol is enabled, it
will make dump thread handle all of the gaps on master side.

If the auto position replication protocol is not enabled,
the dump thread will start replicating from the first gap
and the GTID auto skip feature will make sure that the
already applied transactions will not be applied twice.

Fix:
===
During relay log recovery process check if GTID mode is
enabled or not. If enabled reset the MTS recovery process
so that GTID protocol can fill the MTS gaps.
  • Loading branch information...
Sujatha Sivakumar
Sujatha Sivakumar committed May 6, 2015
1 parent bdacb31 commit fce558959bd0e5af1ae6aac3d8573db00c271dfd
@@ -0,0 +1,121 @@
###############################################################################
# Bug#19316063: MAKE MTS WORK WITH RELAY_LOG_RECOVERY=1 WHEN
# GTID IS ENABLED
#
# Problem:
# ========
# When gaps are present in MTS and trying to restart the server with
# relay-log-recovery=1 will result in the following error.
# "relay-log-recovery cannot be executed when the slave was stopped with an
# error or killed in MTS mode"
#
# Test:
# =====
# Enable GTID protocol along with MTS. Generate gaps in MTS, on the slave and
# restart slave server with relay-log-recovery=1. The gaps should
# be filled because of GTID protocol and slave should be in sync with master.
###############################################################################
--source include/rpl_connection_master.inc
CREATE DATABASE d1;
CREATE DATABASE d2;
CREATE DATABASE d3;
CREATE DATABASE d4;
CREATE TABLE d1.t1 (a int unique) ENGINE=INNODB;
CREATE TABLE d2.t1 (a int unique) ENGINE=INNODB;
CREATE TABLE d3.t1 (a int unique) ENGINE=INNODB;
CREATE TABLE d4.t1 (a int unique) ENGINE=INNODB;
--source include/sync_slave_sql_with_master.inc
STOP SLAVE SQL_THREAD;
SET GLOBAL DEBUG='d,flush_after_reading_user_var_event';
# Generate three blocked workers on slave for 3 GAPS.
BEGIN;
INSERT INTO d1.t1 VALUES (1); # to cause the dup key error
INSERT INTO d2.t1 VALUES (2);
INSERT INTO d3.t1 VALUES (3);
# change-master with gaps
--source include/rpl_connection_master.inc
INSERT INTO d1.t1 VALUES (4);
INSERT INTO d2.t1 VALUES (5);
INSERT INTO d3.t1 VALUES (6);
INSERT INTO d4.t1 VALUES (7);
--source include/rpl_connection_master1.inc
# Using debug extension point d,flush_after_reading_user_var_event
# split the following transaction across the relay logs on slave.
BEGIN;
INSERT INTO d1.t1 VALUES (1);
SET @v= 100;
INSERT INTO d1.t1 VALUES (@v);
INSERT INTO d2.t1 VALUES (2);
INSERT INTO d3.t1 VALUES (3);
--source include/rpl_connection_master.inc
BEGIN;
INSERT INTO d4.t1 VALUES (8); # this worker will race over one inserting (1)
--source include/rpl_connection_master1.inc
COMMIT;
--source include/rpl_connection_slave1.inc
SET DEBUG_SYNC='now WAIT_FOR Reached';
FLUSH LOGS;
--echo # Let IO thread continue splitting has been done now.
SET DEBUG_SYNC= 'now SIGNAL signal.flush_complete_continue';
--source include/rpl_connection_master.inc
COMMIT;
INSERT INTO d4.t1 VALUES (9);
--source include/sync_slave_io_with_master.inc
--source include/rpl_connection_slave1.inc
START SLAVE SQL_THREAD;
# make sure workers doing d2.t1 raced the one that occupied with d1.t1
--let $count= 1
--let $table= d4.t1
--let $wait_condition= select count(*) = 1 from $table where a = 8
--source include/wait_condition.inc
--source include/rpl_connection_slave.inc
# make worker executing (1) to error out
COMMIT;
--let $slave_sql_errno= convert_error(ER_DUP_ENTRY)
--source include/wait_for_slave_sql_error.inc
--source include/stop_slave_io.inc
# Remove tuple 13 from slave so that upon restart when the missing
# transaction is fetched again it will cause duplicate key error.
DELETE FROM d1.t1 where a=1;
DELETE FROM d2.t1 where a=2;
DELETE FROM d3.t1 where a=3;
# Restart the salve server
--let $rpl_server_number= 2
--let $rpl_start_with_gtids= 1
--let $rpl_server_parameters= --skip_slave_start=FALSE --relay_log_info_repository=TABLE --master_info_repository=TABLE --sync_master_info=1 --relay-log-recovery=1
--source include/rpl_restart_server.inc
# Wait for slave thread to apply all events
--let $show_statement= SHOW PROCESSLIST
--let $field= State
--let $condition= = 'Slave has read all relay log; waiting for the slave I/O thread to update it';
--source include/wait_show_condition.inc
# Compare the tables on master and slave they should be in sync
let $i=4;
while($i)
{
--let diff_tables=master:d$i.t1, slave:d$i.t1
--source include/diff_tables.inc
--dec $i
}
#
# cleanup
#
--source include/rpl_connection_master.inc
DROP DATABASE d1;
DROP DATABASE d2;
DROP DATABASE d3;
DROP DATABASE d4;
--source include/sync_slave_sql_with_master.inc
@@ -0,0 +1,147 @@
include/master-slave.inc
Warnings:
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
[connection master]
[connection slave]
CALL mtr.add_suppression("Recovery from master pos*");
CALL mtr.add_suppression("Slave SQL: .*Duplicate entry .1.*");
CALL mtr.add_suppression("Slave SQL: ... The slave coordinator and worker threads are stopped, possibly leaving data in inconsistent state.");
include/stop_slave.inc
CHANGE MASTER TO MASTER_AUTO_POSITION=0;
SET @@global.slave_parallel_workers=4;
SET @@global.relay_log_info_repository='TABLE';
SET @@global.master_info_repository='TABLE';
include/start_slave.inc
[connection master]
CREATE DATABASE d1;
CREATE DATABASE d2;
CREATE DATABASE d3;
CREATE DATABASE d4;
CREATE TABLE d1.t1 (a int unique) ENGINE=INNODB;
CREATE TABLE d2.t1 (a int unique) ENGINE=INNODB;
CREATE TABLE d3.t1 (a int unique) ENGINE=INNODB;
CREATE TABLE d4.t1 (a int unique) ENGINE=INNODB;
include/sync_slave_sql_with_master.inc
STOP SLAVE SQL_THREAD;
SET GLOBAL DEBUG='d,flush_after_reading_user_var_event';
BEGIN;
INSERT INTO d1.t1 VALUES (1);
INSERT INTO d2.t1 VALUES (2);
INSERT INTO d3.t1 VALUES (3);
[connection master]
INSERT INTO d1.t1 VALUES (4);
INSERT INTO d2.t1 VALUES (5);
INSERT INTO d3.t1 VALUES (6);
INSERT INTO d4.t1 VALUES (7);
[connection master1]
BEGIN;
INSERT INTO d1.t1 VALUES (1);
SET @v= 100;
INSERT INTO d1.t1 VALUES (@v);
INSERT INTO d2.t1 VALUES (2);
INSERT INTO d3.t1 VALUES (3);
[connection master]
BEGIN;
INSERT INTO d4.t1 VALUES (8);
[connection master1]
COMMIT;
[connection slave1]
SET DEBUG_SYNC='now WAIT_FOR Reached';
FLUSH LOGS;
# Let IO thread continue splitting has been done now.
SET DEBUG_SYNC= 'now SIGNAL signal.flush_complete_continue';
[connection master]
COMMIT;
INSERT INTO d4.t1 VALUES (9);
include/sync_slave_io_with_master.inc
[connection slave1]
START SLAVE SQL_THREAD;
[connection slave]
COMMIT;
include/wait_for_slave_sql_error.inc [errno=1062]
include/stop_slave_io.inc
DELETE FROM d1.t1 where a=1;
DELETE FROM d2.t1 where a=2;
DELETE FROM d3.t1 where a=3;
include/rpl_restart_server.inc [server_number=2 gtids=on parameters: --skip_slave_start=FALSE --relay_log_info_repository=TABLE --master_info_repository=TABLE --sync_master_info=1 --relay-log-recovery=1]
include/diff_tables.inc [master:d4.t1, slave:d4.t1]
include/diff_tables.inc [master:d3.t1, slave:d3.t1]
include/diff_tables.inc [master:d2.t1, slave:d2.t1]
include/diff_tables.inc [master:d1.t1, slave:d1.t1]
[connection master]
DROP DATABASE d1;
DROP DATABASE d2;
DROP DATABASE d3;
DROP DATABASE d4;
include/sync_slave_sql_with_master.inc
include/rpl_reset.inc
include/stop_slave.inc
CHANGE MASTER TO MASTER_AUTO_POSITION=1;
SET @@global.slave_parallel_workers=4;
SET @@global.relay_log_info_repository='TABLE';
SET @@global.master_info_repository='TABLE';
include/start_slave.inc
[connection master]
CREATE DATABASE d1;
CREATE DATABASE d2;
CREATE DATABASE d3;
CREATE DATABASE d4;
CREATE TABLE d1.t1 (a int unique) ENGINE=INNODB;
CREATE TABLE d2.t1 (a int unique) ENGINE=INNODB;
CREATE TABLE d3.t1 (a int unique) ENGINE=INNODB;
CREATE TABLE d4.t1 (a int unique) ENGINE=INNODB;
include/sync_slave_sql_with_master.inc
STOP SLAVE SQL_THREAD;
SET GLOBAL DEBUG='d,flush_after_reading_user_var_event';
BEGIN;
INSERT INTO d1.t1 VALUES (1);
INSERT INTO d2.t1 VALUES (2);
INSERT INTO d3.t1 VALUES (3);
[connection master]
INSERT INTO d1.t1 VALUES (4);
INSERT INTO d2.t1 VALUES (5);
INSERT INTO d3.t1 VALUES (6);
INSERT INTO d4.t1 VALUES (7);
[connection master1]
BEGIN;
INSERT INTO d1.t1 VALUES (1);
SET @v= 100;
INSERT INTO d1.t1 VALUES (@v);
INSERT INTO d2.t1 VALUES (2);
INSERT INTO d3.t1 VALUES (3);
[connection master]
BEGIN;
INSERT INTO d4.t1 VALUES (8);
[connection master1]
COMMIT;
[connection slave1]
SET DEBUG_SYNC='now WAIT_FOR Reached';
FLUSH LOGS;
# Let IO thread continue splitting has been done now.
SET DEBUG_SYNC= 'now SIGNAL signal.flush_complete_continue';
[connection master]
COMMIT;
INSERT INTO d4.t1 VALUES (9);
include/sync_slave_io_with_master.inc
[connection slave1]
START SLAVE SQL_THREAD;
[connection slave]
COMMIT;
include/wait_for_slave_sql_error.inc [errno=1062]
include/stop_slave_io.inc
DELETE FROM d1.t1 where a=1;
DELETE FROM d2.t1 where a=2;
DELETE FROM d3.t1 where a=3;
include/rpl_restart_server.inc [server_number=2 gtids=on parameters: --skip_slave_start=FALSE --relay_log_info_repository=TABLE --master_info_repository=TABLE --sync_master_info=1 --relay-log-recovery=1]
include/diff_tables.inc [master:d4.t1, slave:d4.t1]
include/diff_tables.inc [master:d3.t1, slave:d3.t1]
include/diff_tables.inc [master:d2.t1, slave:d2.t1]
include/diff_tables.inc [master:d1.t1, slave:d1.t1]
[connection master]
DROP DATABASE d1;
DROP DATABASE d2;
DROP DATABASE d3;
DROP DATABASE d4;
include/sync_slave_sql_with_master.inc
include/rpl_end.inc
@@ -0,0 +1 @@
--gtid-mode=on --enforce-gtid-consistency --log-slave-updates
@@ -0,0 +1 @@
--gtid-mode=on --enforce-gtid-consistency --log-slave-updates --relay_log_info_repository=TABLE --master_info_repository=TABLE --sync_master_info=1 --relay-log-recovery=1 --slave-transaction-retries=0
@@ -0,0 +1,60 @@
###############################################################################
# Bug#19316063: MAKE MTS WORK WITH RELAY_LOG_RECOVERY=1 WHEN
# GTID IS ENABLED
#
# Problem:
# ========
# When gaps are present in MTS and trying to restart the server with
# relay-log-recovery=1 will result in the following error.
# "relay-log-recovery cannot be executed when the slave was stopped with an
# error or killed in MTS mode"
#
# Test:
# =====
# Enable GTID protocol and execute the test with both master_auto_postion
# on and master_auto_postion off. Generate gaps in MTS, on the slave and
# restart slave server with relay-log-recovery=1. The gaps should
# be filled because of GTID protocol and slave should be in sync with master.
###############################################################################
--source include/force_restart.inc
# Script is independent of binlog format hence considering mixed mode
--source include/have_binlog_format_mixed.inc
--source include/have_gtid.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc
--let $rpl_gtid_utils= 1
--source include/master-slave.inc
--source include/rpl_connection_slave.inc
CALL mtr.add_suppression("Recovery from master pos*");
CALL mtr.add_suppression("Slave SQL: .*Duplicate entry .1.*");
CALL mtr.add_suppression("Slave SQL: ... The slave coordinator and worker threads are stopped, possibly leaving data in inconsistent state.");
--let $auto_position=0
--let $loop_count=2
# Execute the following set of instructions twice
# once with change master with auto position 0
# second time change master with auto position 1
while ($loop_count)
{
--source include/stop_slave.inc
eval CHANGE MASTER TO MASTER_AUTO_POSITION=$auto_position;
SET @@global.slave_parallel_workers=4;
SET @@global.relay_log_info_repository='TABLE';
SET @@global.master_info_repository='TABLE';
--source include/start_slave.inc
--source extra/rpl_tests/rpl_gtid_mts_relay_log_recovery.test
if ($auto_position == 0)
{
--source include/rpl_reset.inc
}
--dec $loop_count
--inc $auto_position
}
--source include/rpl_end.inc
View
@@ -344,12 +344,7 @@ bool Rpl_info_factory::reset_workers(Relay_log_info *rli)
if (error)
sql_print_error("Could not delete from Slave Workers info repository.");
rli->recovery_parallel_workers= 0;
if (rli->recovery_groups_inited)
{
bitmap_free(&rli->recovery_groups);
rli->mts_recovery_group_cnt= 0;
rli->recovery_groups_inited= false;
}
rli->clear_mts_recovery_groups();
if (rli->flush_info(true))
{
error= true;
View
@@ -1,4 +1,4 @@
/* Copyright (c) 2005, 2014, Oracle and/or its affiliates. All rights reserved.
/* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -599,6 +599,16 @@ class Relay_log_info : public Rpl_info
return mts_recovery_group_cnt != 0;
}
inline void clear_mts_recovery_groups()
{
if (recovery_groups_inited)
{
bitmap_free(&recovery_groups);
mts_recovery_group_cnt= 0;
recovery_groups_inited= false;
}
}
/**
returns true if events are to be executed in parallel
*/
Oops, something went wrong.

0 comments on commit fce5589

Please sign in to comment.