Skip to content

Commit

Permalink
BUG#20938915 2PC SUCCEEDS EVEN THOUGH BINLOG FLUSH/SYNC FAILS
Browse files Browse the repository at this point in the history
Problem: When flushing cache to binary log fails, the error is not getting
caught by group commit logic.

Analysis: Leader of a group in group commit logic tries to flush cache to
binary log file. If it fails (which is very rarely happens due to some low
level errors like disk failure or disk full), error is thrown to the higher
level. This error is not checked properly and the transaction is getting
committed in storage engine as the error is not checked and not propagated
to the higher level. No error to client and no information in binary logs
is making users to get confused when they see inconsistencies between Master
and Slave data.

Fix: Using binlog_error_action variable, Server will decide the action to do
if this situation happens.
If binlog_error_action == ABORT_SERVER, then it will abort the server after
informing the client with 'ER_BINLOGGING_IMPOSSIBLE' error.

If binlog_error_action == 'IGNORE_ERROR', then it will ignore the error
and disable the binlogging further until server is restarted again.
The same will be mentioned in the error log file.
  • Loading branch information
Venkatesh Duggirala committed May 25, 2015
1 parent e0e1f02 commit 3b6b4bf
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 3 deletions.
15 changes: 15 additions & 0 deletions mysql-test/suite/binlog/r/binlog_error_action.result
Expand Up @@ -6,6 +6,8 @@ call mtr.add_suppression("Could not open .*");
call mtr.add_suppression("Could not use .*");
call mtr.add_suppression("Can't generate a unique log-filename master-bin");
call mtr.add_suppression("The server was unable to create a new log file *");
call mtr.add_suppression("An error occured during flushing cache to file.");
call mtr.add_suppression("Either disk is full or file system is read only");
RESET MASTER;
Test case1
SET GLOBAL binlog_error_action= ABORT_SERVER;
Expand Down Expand Up @@ -59,4 +61,17 @@ Tables_in_test
t2
DROP TABLE t2;
SET SESSION debug="-d,fault_injection_init_name";
Test case9
CREATE TABLE t1(i INT);
SET SESSION debug="+d,simulate_error_during_flush_cache_to_file";
SET GLOBAL binlog_error_action= ABORT_SERVER;
INSERT INTO t1 VALUES (1);
ERROR HY000: Binary logging not possible. Message: An error occured during flushing cache to file. 'binlog_error_action' is set to 'ABORT_SERVER'. Hence aborting the server
include/assert.inc [Count of elements in t1 should be 0.]
SET SESSION debug="+d,simulate_error_during_flush_cache_to_file";
SET GLOBAL binlog_error_action= IGNORE_ERROR;
INSERT INTO t1 VALUES (2);
include/assert.inc [Count of elements in t1 should be 1.]
DROP table t1;
SET SESSION debug="-d,simulate_error_during_flush_cache_to_file";
SET SESSION debug="";
32 changes: 31 additions & 1 deletion mysql-test/suite/binlog/t/binlog_error_action.test
Expand Up @@ -33,7 +33,8 @@ call mtr.add_suppression("Could not open .*");
call mtr.add_suppression("Could not use .*");
call mtr.add_suppression("Can't generate a unique log-filename master-bin");
call mtr.add_suppression("The server was unable to create a new log file *");

call mtr.add_suppression("An error occured during flushing cache to file.");
call mtr.add_suppression("Either disk is full or file system is read only");
let $old=`select @@debug`;

RESET MASTER;
Expand Down Expand Up @@ -135,5 +136,34 @@ DROP TABLE t2;
SET SESSION debug="-d,fault_injection_init_name";
--source include/restart_mysqld.inc

--echo Test case9
CREATE TABLE t1(i INT);
# Simulate error during flushing cache to file and test the behaviour
# when binlog_error_action is set to ABORT_SERVER/IGNORE_ERROR.
SET SESSION debug="+d,simulate_error_during_flush_cache_to_file";
SET GLOBAL binlog_error_action= ABORT_SERVER;
--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
--error ER_BINLOG_LOGGING_IMPOSSIBLE
INSERT INTO t1 VALUES (1);
--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
--enable_reconnect
--source include/wait_until_connected_again.inc

--let $assert_cond= COUNT(*) = 0 FROM t1;
--let $assert_text= Count of elements in t1 should be 0.
--source include/assert.inc

SET SESSION debug="+d,simulate_error_during_flush_cache_to_file";
SET GLOBAL binlog_error_action= IGNORE_ERROR;
INSERT INTO t1 VALUES (2);

--let $assert_cond= COUNT(*) = 1 FROM t1;
--let $assert_text= Count of elements in t1 should be 1.
--source include/assert.inc

DROP table t1;
SET SESSION debug="-d,simulate_error_during_flush_cache_to_file";
--source include/restart_mysqld.inc

# Cleanup
eval SET SESSION debug="$old";
39 changes: 37 additions & 2 deletions sql/binlog.cc
Expand Up @@ -3359,6 +3359,8 @@ bool MYSQL_BIN_LOG::open_binlog(const char *log_name,
my_error(ER_BINLOG_LOGGING_IMPOSSIBLE, MYF(0), "Either disk is full or "
"file system is read only while opening the binlog. Aborting the "
"server");
sql_print_error("Either disk is full or file system is read only while "
"opening the binlog. Aborting the server");
thd->protocol->end_statement();
_exit(EXIT_FAILURE);
}
Expand Down Expand Up @@ -5000,6 +5002,8 @@ int MYSQL_BIN_LOG::new_file_impl(bool need_lock_log, Format_description_log_even
my_error(ER_BINLOG_LOGGING_IMPOSSIBLE, MYF(0), "Either disk is full or "
"file system is read only while rotating the binlog. Aborting "
"the server");
sql_print_error("Either disk is full or file system is read only while "
"rotating the binlog. Aborting the server");
thd->protocol->end_statement();
_exit(EXIT_FAILURE);
}
Expand Down Expand Up @@ -6775,8 +6779,39 @@ MYSQL_BIN_LOG::change_stage(THD *thd,
int
MYSQL_BIN_LOG::flush_cache_to_file(my_off_t *end_pos_var)
{
if (flush_io_cache(&log_file))
return ER_ERROR_ON_WRITE;
if (DBUG_EVALUATE_IF("simulate_error_during_flush_cache_to_file", 1,
flush_io_cache(&log_file)))
{
if (binlog_error_action == ABORT_SERVER)
{
THD *thd= current_thd;
/*
On fatal error when code enters here we should forcefully clear the
previous errors so that a new critical error message can be pushed
to the client side.
*/
thd->clear_error();
my_error(ER_BINLOG_LOGGING_IMPOSSIBLE, MYF(0), "An error occured during "
"flushing cache to file. 'binlog_error_action' is set to "
"'ABORT_SERVER'. Hence aborting the server");
sql_print_error("An error occured during flushing cache to file. "
"'binlog_error_action' is set to 'ABORT_SERVER'. "
"Hence aborting the server");
thd->protocol->end_statement();
_exit(EXIT_FAILURE);
}
else
{
sql_print_error("An error occured during flushing cache to file. "
"'binlog_error_action' is set to 'IGNORE_ERROR'. "
"Hence turning logging off for the whole duration "
"of the MySQL server process. To turn it on "
"again: fix the cause, shutdown the MySQL "
"server and restart it.");
close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT);
return ER_ERROR_ON_WRITE;
}
}
*end_pos_var= my_b_tell(&log_file);
return 0;
}
Expand Down
2 changes: 2 additions & 0 deletions sql/log.cc
Expand Up @@ -1617,6 +1617,8 @@ bool MYSQL_LOG::open(
my_error(ER_BINLOG_LOGGING_IMPOSSIBLE, MYF(0), "Either disk is full or "
"file system is read only while opening the binlog. Aborting the "
"server");
sql_print_error("Either disk is full or file system is read only while "
"opening the binlog. Aborting the server");
thd->protocol->end_statement();
_exit(EXIT_FAILURE);
}
Expand Down

0 comments on commit 3b6b4bf

Please sign in to comment.