Permalink
Browse files

Bug#21046372: GAPS IN RETRIEVED_GTID_SET WHILE NO GAPS IN

EXECUTED_GTID_SET

Problem:
========
Under certain circumstances it is possible that
Retrieved_Gtid_Set on slave will contain gaps while actually
no gaps will be in Executed_Gtid_Set and slave binary logs.

This happens when slave rotates relay log in such a way that
last event contains record which sets GTID_NEXT, then
following log contains few GTIDs, then slave restarts. After
restart GTIDs for Retrieved_Gtid_Set executed wrongly and
there are phantom gaps.

Analysis:
========
Let us consider a scenario where gtid set 1-3 is written to
relay log and we are about to write the 4th GTID event.
Existing code first writes the events to relay log and then
adds the new GTID to the Retrieved_Gtid_Set. If a rotation
happens at this moment after writing the GTID event the
newly written relay log will have the Previous_Gtid set as
1-3. If a restart happens at this scenario and few more
gtids are present after this event the Retrieved_Gtid_set
will have gaps like 1-3:5-6 and 4th GTID will be a gap.

Fix:
===
Add the gtid to the Retrieved_Gtid_Set before writing the
actual event to the relay log. If due to some reason
writing to relay log fails remove the GTID from the
Retrieved_Gtid_Set.
  • Loading branch information...
Sujatha Sivakumar
Sujatha Sivakumar committed Aug 5, 2015
1 parent 19e3db0 commit a2b4259f24b76bc5032d9dbfe6f4452cc9dafc70
@@ -0,0 +1,56 @@
include/master-slave.inc
Warnings:
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
[connection master]
"Test case 1"
CREATE TABLE t1(f1 INT) ENGINE=INNODB;
INSERT INTO t1 VALUES(1);
INSERT INTO t1 VALUES(2);
include/sync_slave_sql_with_master.inc
SET @debug_save=@@GLOBAL.debug;
SET GLOBAL debug="d,flush_after_reading_gtid_event";
[connection master]
INSERT INTO t1 VALUES(1);
include/sync_slave_sql_with_master.inc
SET @@GLOBAL.debug=@debug_save;
[connection master]
INSERT INTO t1 VALUES(1);
include/sync_slave_sql_with_master.inc
include/stop_slave.inc
include/rpl_restart_server.inc [server_number=2]
[connection slave]
include/start_slave.inc
[connection master]
INSERT INTO t1 VALUES(1);
include/sync_slave_sql_with_master.inc
include/wait_for_slave_param.inc [Retrieved_Gtid_Set]
[connection master]
DROP TABLE t1;
include/sync_slave_sql_with_master.inc
include/rpl_reset.inc
"Test case 2"
[connection master]
CREATE TABLE t1(f1 INT) ENGINE=INNODB;
INSERT INTO t1 VALUES(1);
INSERT INTO t1 VALUES(2);
include/sync_slave_sql_with_master.inc
call mtr.add_suppression('Relay log write failure: could not queue event from master');
SET @debug_save=@@GLOBAL.debug;
SET GLOBAL debug="d,set_append_buffer_error";
[connection master]
INSERT INTO t1 VALUES(1);
[connection slave]
include/wait_for_slave_io_error.inc [errno=1595]
Last_IO_Error = 'Relay log write failure: could not queue event from master'
SET @@GLOBAL.debug=@debug_save;
include/stop_slave_sql.inc
include/wait_for_slave_param.inc [Retrieved_Gtid_Set]
include/start_slave.inc
[connection master]
INSERT INTO t1 VALUES(1);
include/sync_slave_sql_with_master.inc
[connection master]
DROP TABLE t1;
include/sync_slave_sql_with_master.inc
include/rpl_end.inc
@@ -0,0 +1,107 @@
###############################################################################
# Bug#21046372: GAPS IN RETRIEVED_GTID_SET WHILE NO GAPS IN
# EXECUTED_GTID_SET
#
# Problem:
# ========
# Under certain circumstances it is possible that Retrieved_Gtid_Set on slave
# will contain gaps while actually no gaps will be in Executed_Gtid_Set and
# slave binary logs.
#
# Test:
# =====
# case1:
# Simulate a test scenario where slave rotates relay log in such a way that
# last event contains record which sets GTID_NEXT, then following log contains
# few GTIDs, then slave restarts. Post restart the retrieved set should not
# contain any gaps.
# case2:
# As part of fixing case1 we add GTID to Retrieved_Gtid_Set before we write
# the actual event to relay log. If writing the actual event to relay log
# fails then we remove the GTID from the Retrieved_Gtid_Set. Simulate relay
# log write failure when writing an event with gtidno:4 to relay log. Check
# that Retrieved_Gtid_Set shows gtid set as:1-3.
###############################################################################
--source include/have_gtid.inc
--source include/have_debug.inc
# Script is independent of binlog format hence considering mixed mode
--source include/have_binlog_format_mixed.inc
--source include/master-slave.inc
--echo "Test case 1"
--let $master_uuid= query_get_value(SELECT @@SERVER_UUID, @@SERVER_UUID, 1)
CREATE TABLE t1(f1 INT) ENGINE=INNODB;
INSERT INTO t1 VALUES(1);
INSERT INTO t1 VALUES(2);
--source include/sync_slave_sql_with_master.inc
SET @debug_save=@@GLOBAL.debug;
SET GLOBAL debug="d,flush_after_reading_gtid_event";
--source include/rpl_connection_master.inc
INSERT INTO t1 VALUES(1);
--source include/sync_slave_sql_with_master.inc
SET @@GLOBAL.debug=@debug_save;
--source include/rpl_connection_master.inc
INSERT INTO t1 VALUES(1);
--source include/sync_slave_sql_with_master.inc
--source include/stop_slave.inc
--let $rpl_server_number= 2
--source include/rpl_restart_server.inc
--source include/rpl_connection_slave.inc
--source include/start_slave.inc
--source include/rpl_connection_master.inc
INSERT INTO t1 VALUES(1);
--source include/sync_slave_sql_with_master.inc
--let $slave_param= Retrieved_Gtid_Set
--let $slave_param_value= $master_uuid:1-6
--source include/wait_for_slave_param.inc
--source include/rpl_connection_master.inc
DROP TABLE t1;
--source include/sync_slave_sql_with_master.inc
--source include/rpl_reset.inc
--echo "Test case 2"
--source include/rpl_connection_master.inc
--let $master_uuid= query_get_value(SELECT @@SERVER_UUID, @@SERVER_UUID, 1)
CREATE TABLE t1(f1 INT) ENGINE=INNODB;
INSERT INTO t1 VALUES(1);
INSERT INTO t1 VALUES(2);
--source include/sync_slave_sql_with_master.inc
call mtr.add_suppression('Relay log write failure: could not queue event from master');
SET @debug_save=@@GLOBAL.debug;
SET GLOBAL debug="d,set_append_buffer_error";
--source include/rpl_connection_master.inc
INSERT INTO t1 VALUES(1);
--source include/rpl_connection_slave.inc
--let $slave_io_errno= convert_error(ER_SLAVE_RELAY_LOG_WRITE_FAILURE)
--let $show_slave_io_error= 1
--source include/wait_for_slave_io_error.inc
SET @@GLOBAL.debug=@debug_save;
--source include/stop_slave_sql.inc
--let $slave_param= Retrieved_Gtid_Set
--let $slave_param_value= $master_uuid:1-3
--source include/wait_for_slave_param.inc
--source include/start_slave.inc
--source include/rpl_connection_master.inc
INSERT INTO t1 VALUES(1);
--source include/sync_slave_sql_with_master.inc
--source include/rpl_connection_master.inc
DROP TABLE t1;
--source include/sync_slave_sql_with_master.inc
--source include/rpl_end.inc
View
@@ -5056,11 +5056,19 @@ bool MYSQL_BIN_LOG::after_append_to_relay_log(Master_info *mi)
bool error= false;
if (flush_and_sync(0) == 0)
{
DBUG_EXECUTE_IF ("set_max_size_zero",
{max_size=0;});
// If relay log is too big, rotate
if ((uint) my_b_append_tell(&log_file) >
DBUG_EVALUATE_IF("rotate_slave_debug_group", 500, max_size))
{
error= new_file_without_locking(mi->get_mi_description_event());
DBUG_EXECUTE_IF ("set_max_size_zero",
{
max_size=1073741824;
DBUG_SET("-d,set_max_size_zero");
DBUG_SET("-d,flush_after_reading_gtid_event");
});
}
}
View
@@ -6690,6 +6690,7 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
char *save_buf= NULL; // needed for checksumming the fake Rotate event
char rot_buf[LOG_EVENT_HEADER_LEN + ROTATE_HEADER_LEN + FN_REFLEN];
Gtid gtid= { 0, 0 };
Gtid old_retrieved_gtid;
Log_event_type event_type= (Log_event_type)buf[EVENT_TYPE_OFFSET];
DBUG_ASSERT(checksum_alg == BINLOG_CHECKSUM_ALG_OFF ||
@@ -7117,29 +7118,58 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
}
else
{
DBUG_EXECUTE_IF("flush_after_reading_gtid_event",
if (event_type == GTID_LOG_EVENT && gtid.gno == 4)
DBUG_SET("+d,set_max_size_zero");
);
DBUG_EXECUTE_IF("set_append_buffer_error",
if (event_type == GTID_LOG_EVENT && gtid.gno == 4)
DBUG_SET("+d,simulate_append_buffer_error");
);
/*
Add the GTID to the retrieved set before actually appending it to relay
log. This will ensure that if a rotation happens at this point of time the
new GTID will be reflected as part of Previous_Gtid set and
Retrieved_Gtid_Set will not have any gaps.
*/
if (event_type == GTID_LOG_EVENT)
{
global_sid_lock->rdlock();
old_retrieved_gtid= *(mi->rli->get_last_retrieved_gtid());
int ret= rli->add_logged_gtid(gtid.sidno, gtid.gno);
if (!ret)
rli->set_last_retrieved_gtid(gtid);
global_sid_lock->unlock();
if (ret != 0)
{
mysql_mutex_unlock(log_lock);
goto err;
}
}
/* write the event to the relay log */
if (likely(rli->relay_log.append_buffer(buf, event_len, mi) == 0))
if (!DBUG_EVALUATE_IF("simulate_append_buffer_error", 1, 0) &&
likely(rli->relay_log.append_buffer(buf, event_len, mi) == 0))
{
mi->set_master_log_pos(mi->get_master_log_pos() + inc_pos);
DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->get_master_log_pos()));
rli->relay_log.harvest_bytes_written(&rli->log_space_total);
}
else
{
if (event_type == GTID_LOG_EVENT)
{
global_sid_lock->rdlock();
int ret= rli->add_logged_gtid(gtid.sidno, gtid.gno);
if (!ret)
rli->set_last_retrieved_gtid(gtid);
global_sid_lock->unlock();
if (ret != 0)
Gtid_set * retrieved_set= (const_cast<Gtid_set *>(mi->rli->get_gtid_set()));
if (retrieved_set->_remove_gtid(gtid) != RETURN_STATUS_OK)
{
global_sid_lock->unlock();
mysql_mutex_unlock(log_lock);
goto err;
}
if (!old_retrieved_gtid.empty())
rli->set_last_retrieved_gtid(old_retrieved_gtid);
global_sid_lock->unlock();
}
}
else
{
error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
}
rli->ign_master_log_name_end[0]= 0; // last event is not ignored

0 comments on commit a2b4259

Please sign in to comment.