Permalink
Browse files

Bug#20369401: MTS STOP SLAVE TAKES WAY TOO LONG

(WHEN WORKER THREADS ARE SLOW)

Analysis:
========
STOP SLAVE waits workers to catch up the queue, which may
take a lot of time for the command to finish. STOP SLAVE
must be executed quickly, even if workers are slow.

Fix:
===
Once receiving STOP instruction the coordinator will notify
all workers to STOP by setting their running_status=STOP.

Upon receiving the STOP command, the workers will identify a
maximum group index already executed (or under execution).

All groups whose index are below or equal to the maximum
group index will be applied by the workers before stopping.

The workers with groups above the maximum group index will
exit without applying these groups by setting their running
status to "STOP_ACCEPTED".

The coordinator will then wait for workers to exit in the
nearest state where GAPs are only possible if some worker
fail to apply a pending group.
  • Loading branch information...
Sujatha Sivakumar
Sujatha Sivakumar committed May 27, 2015
1 parent e7ba4ee commit 37f2e969bd36a7455e81ea2350685707bc859866
@@ -0,0 +1,61 @@
include/master-slave.inc
Warnings:
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
[connection master]
#### I. Initialize ####
[connection slave]
include/stop_slave.inc
SET @save.innodb_lock_wait_timeout= @@global.innodb_lock_wait_timeout;
set @@global.innodb_lock_wait_timeout=5 + 1000;
include/start_slave.inc
[connection master]
CREATE DATABASE d1;
CREATE DATABASE d2;
CREATE TABLE d1.t (a INT PRIMARY KEY, name text) ENGINE=INNODB;
CREATE TABLE d2.t (a INT PRIMARY KEY, name text) ENGINE=INNODB;
#### II. Prepare test scenario ####
include/sync_slave_sql_with_master.inc
BEGIN;
INSERT INTO d2.t VALUES (2, 'Slave local');
INSERT INTO d1.t VALUES (3, 'Slave local');
[connection master]
INSERT INTO d1.t VALUES (1, 'T1');
INSERT INTO d2.t VALUES (1, 'T2');
INSERT INTO d2.t VALUES (2, 'T3');
INSERT INTO d2.t VALUES (3, 'T4');
INSERT INTO d1.t VALUES (2, 'T5');
INSERT INTO d1.t VALUES (3, 'T6');
INSERT INTO d2.t VALUES (4, 'T7');
INSERT INTO d2.t VALUES (5, 'T8');
INSERT INTO d1.t VALUES (4, 'T9');
[connection slave1]
# Now d1.t has two rows and d2.t has one row.
# Now coordinator has read the entire relay log and populated workers' queues.
SELECT * FROM d2.t;
a name
1 T2
SELECT * FROM d1.t;
a name
1 T1
2 T5
#### Verify that STOP SLAVE stops at gap less state ####
STOP SLAVE;
[connection slave]
# Now coordinator is waiting for the worker to consume its queue.
ROLLBACK;
include/wait_for_slave_sql_to_stop.inc
# III. Now all slave threads have stopped. Verify that worker completed its queue:
# d2 should contain Т2, Т3, Т4.
include/assert.inc [Rows until T3 in d2.t must be replicated now]
# d1 should contain Т1, Т5, T6.
include/assert.inc [Rows until 2 in d1.t must be replicated now]
include/start_slave.inc
[connection slave1]
[connection slave]
SET @@global.innodb_lock_wait_timeout= @save.innodb_lock_wait_timeout;
[connection master]
DROP DATABASE d1;
DROP DATABASE d2;
include/sync_slave_sql_with_master.inc
include/rpl_end.inc
@@ -0,0 +1 @@
--slave-transaction-retries=0
@@ -0,0 +1,121 @@
###############################################################################
# Bug#20369401: MTS STOP SLAVE TAKES WAY TOO LONG(WHEN WORKER THREADS ARE SLOW)
#
# Problem:
# ========
# TOP SLAVE waits workers to catch up the queue, which may
# take a lot of time for the command to finish. STOP SLAVE
# must be executed quickly, even if workers are slow.
###############################################################################
# Following test demonstrates that STOP SLAVE command will not leave any gaps.
# It first creates two databases (d1 and d2) and setup slave to use two parallel
# workers. The test case then insert on the slave a tuple that will block
# writes on d2 and generate gaps. Finally, the test case executes "STOP SLAVE"
# and verify that the SQL thread was properly stopped and left no gaps.
--source include/have_binlog_format_statement.inc
--source include/only_mts_slave_parallel_workers.inc
--source include/master-slave.inc
--let $slave_stop_wait=5
--echo #### I. Initialize ####
--source include/rpl_connection_slave.inc
--source include/stop_slave.inc
SET @save.innodb_lock_wait_timeout= @@global.innodb_lock_wait_timeout;
--eval set @@global.innodb_lock_wait_timeout=$slave_stop_wait + 1000
--source include/start_slave.inc
--source include/rpl_connection_master.inc
CREATE DATABASE d1;
CREATE DATABASE d2;
CREATE TABLE d1.t (a INT PRIMARY KEY, name text) ENGINE=INNODB;
CREATE TABLE d2.t (a INT PRIMARY KEY, name text) ENGINE=INNODB;
--echo #### II. Prepare test scenario ####
--source include/sync_slave_sql_with_master.inc
BEGIN;
INSERT INTO d2.t VALUES (2, 'Slave local'); # Hold T3
INSERT INTO d1.t VALUES (3, 'Slave local'); # Hold T6
--source include/rpl_connection_master.inc
INSERT INTO d1.t VALUES (1, 'T1');
INSERT INTO d2.t VALUES (1, 'T2');
INSERT INTO d2.t VALUES (2, 'T3'); # This will be a gap when executed on slave
INSERT INTO d2.t VALUES (3, 'T4'); # This will be a gap when executed on slave
INSERT INTO d1.t VALUES (2, 'T5');
INSERT INTO d1.t VALUES (3, 'T6');
INSERT INTO d2.t VALUES (4, 'T7'); # This should not be executed after STOP SLAVE
INSERT INTO d2.t VALUES (5, 'T8'); # This should not be executed after STOP SLAVE
INSERT INTO d1.t VALUES (4, 'T9'); # This should not be executed after STOP SLAVE
--source include/rpl_connection_slave1.inc
--let $table=d2.t
--let $count=1
--source include/wait_until_rows_count.inc
--let $table=d1.t
--let $count=2
--source include/wait_until_rows_count.inc
--echo # Now d1.t has two rows and d2.t has one row.
# Wait for coordinator to populate worker's queues.
--let $show_statement= SHOW PROCESSLIST
--let $field= State
--let $condition= = 'Slave has read all relay log; waiting for the slave I/O thread to update it'
--source include/wait_show_condition.inc
--echo # Now coordinator has read the entire relay log and populated workers' queues.
# There is now a gap at T3,T4
SELECT * FROM d2.t;
SELECT * FROM d1.t;
--echo #### Verify that STOP SLAVE stops at gap less state ####
--send STOP SLAVE
--source include/rpl_connection_slave.inc
# Despite time elapsed, the slave should still be running, waiting for the
# queue to be completed.
--sleep $slave_stop_wait
--let $show_statement= SHOW PROCESSLIST
--let $field= State
--let $condition= = 'Waiting for workers to exit'
--source include/wait_show_condition.inc
--echo # Now coordinator is waiting for the worker to consume its queue.
ROLLBACK;
--source include/wait_for_slave_sql_to_stop.inc
--echo # III. Now all slave threads have stopped. Verify that worker completed its queue:
--echo # d2 should contain Т2, Т3, Т4.
--let $assert_cond= MAX(a)=3 FROM d2.t;
--let $assert_text= Rows until T3 in d2.t must be replicated now
--source include/assert.inc
--echo # d1 should contain Т1, Т5, T6.
--let $assert_cond= MAX(a)=3 FROM d1.t;
--let $assert_text= Rows until 2 in d1.t must be replicated now
--source include/assert.inc
--source include/start_slave.inc
--source include/rpl_connection_slave1.inc
--reap
#
# Cleanup
#
--source include/rpl_connection_slave.inc
SET @@global.innodb_lock_wait_timeout= @save.innodb_lock_wait_timeout;
--source include/rpl_connection_master.inc
DROP DATABASE d1;
DROP DATABASE d2;
--source include/sync_slave_sql_with_master.inc
--source include/rpl_end.inc
View
@@ -9390,6 +9390,7 @@ PSI_mutex_key
key_relay_log_info_sleep_lock,
key_relay_log_info_log_space_lock, key_relay_log_info_run_lock,
key_mutex_slave_parallel_pend_jobs, key_mutex_mts_temp_tables_lock,
key_mutex_slave_parallel_worker_count,
key_mutex_slave_parallel_worker,
key_structure_guard_mutex, key_TABLE_SHARE_LOCK_ha_data,
key_LOCK_error_messages, key_LOG_INFO_lock, key_LOCK_thread_count,
@@ -9472,6 +9473,7 @@ static PSI_mutex_info all_server_mutexes[]=
{ &key_relay_log_info_log_space_lock, "Relay_log_info::log_space_lock", 0},
{ &key_relay_log_info_run_lock, "Relay_log_info::run_lock", 0},
{ &key_mutex_slave_parallel_pend_jobs, "Relay_log_info::pending_jobs_lock", 0},
{ &key_mutex_slave_parallel_worker_count, "Relay_log_info::exit_count_lock", 0},
{ &key_mutex_mts_temp_tables_lock, "Relay_log_info::temp_tables_lock", 0},
{ &key_mutex_slave_parallel_worker, "Worker_info::jobs_lock", 0},
{ &key_structure_guard_mutex, "Query_cache::structure_guard_mutex", 0},
View
@@ -344,6 +344,7 @@ extern PSI_mutex_key
key_relay_log_info_log_space_lock, key_relay_log_info_run_lock,
key_mutex_slave_parallel_pend_jobs, key_mutex_mts_temp_tables_lock,
key_mutex_slave_parallel_worker,
key_mutex_slave_parallel_worker_count,
key_structure_guard_mutex, key_TABLE_SHARE_LOCK_ha_data,
key_LOCK_error_messages, key_LOCK_thread_count, key_LOCK_thd_remove,
key_LOCK_log_throttle_qni;
View
@@ -1,4 +1,4 @@
/* Copyright (c) 2006, 2014, Oracle and/or its affiliates. All rights reserved.
/* Copyright (c) 2006, 2015, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -91,8 +91,10 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery
tables_to_lock(0), tables_to_lock_count(0),
rows_query_ev(NULL), last_event_start_time(0), deferred_events(NULL),
slave_parallel_workers(0),
exit_counter(0),
max_updated_index(0),
recovery_parallel_workers(0), checkpoint_seqno(0),
checkpoint_group(opt_mts_checkpoint_group),
checkpoint_group(opt_mts_checkpoint_group),
recovery_groups_inited(false), mts_recovery_group_cnt(0),
mts_recovery_index(0), mts_recovery_group_seen_begin(0),
mts_group_status(MTS_NOT_IN_GROUP), reported_unsafe_warning(false),
@@ -132,6 +134,8 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery
mysql_mutex_init(key_mutex_slave_parallel_pend_jobs, &pending_jobs_lock,
MY_MUTEX_INIT_FAST);
mysql_cond_init(key_cond_slave_parallel_pend_jobs, &pending_jobs_cond, NULL);
mysql_mutex_init(key_mutex_slave_parallel_worker_count, &exit_count_lock,
MY_MUTEX_INIT_FAST);
my_atomic_rwlock_init(&slave_open_temp_tables_lock);
relay_log.init_pthread_objects();
@@ -173,6 +177,7 @@ Relay_log_info::~Relay_log_info()
mysql_cond_destroy(&log_space_cond);
mysql_mutex_destroy(&pending_jobs_lock);
mysql_cond_destroy(&pending_jobs_cond);
mysql_mutex_destroy(&exit_count_lock);
my_atomic_rwlock_destroy(&slave_open_temp_tables_lock);
relay_log.cleanup();
set_rli_description_event(NULL);
View
@@ -490,6 +490,7 @@ class Relay_log_info : public Rpl_info
volatile ulong pending_jobs;
mysql_mutex_t pending_jobs_lock;
mysql_cond_t pending_jobs_cond;
mysql_mutex_t exit_count_lock; // mutex of worker exit count
ulong mts_slave_worker_queue_len_max;
ulonglong mts_pending_jobs_size; // actual mem usage by WQ:s
ulonglong mts_pending_jobs_size_max; // max of WQ:s size forcing C to wait
@@ -525,9 +526,11 @@ class Relay_log_info : public Rpl_info
ulong mts_coordinator_basic_nap; // C sleeps to avoid WQs overrun
ulong opt_slave_parallel_workers; // cache for ::opt_slave_parallel_workers
ulong slave_parallel_workers; // the one slave session time number of workers
ulong exit_counter; // Number of workers contributed to max updated group index
ulonglong max_updated_index;
ulong recovery_parallel_workers; // number of workers while recovering
uint checkpoint_seqno; // counter of groups executed after the most recent CP
uint checkpoint_group; // cache for ::opt_mts_checkpoint_group
uint checkpoint_group; // cache for ::opt_mts_checkpoint_group
MY_BITMAP recovery_groups; // bitmap used during recovery
bool recovery_groups_inited;
ulong mts_recovery_group_cnt; // number of groups to execute at recovery
@@ -562,10 +565,10 @@ class Relay_log_info : public Rpl_info
} mts_group_status;
/*
MTS statistics:
MTS statistics:
*/
ulonglong mts_events_assigned; // number of events (statements) scheduled
ulong mts_groups_assigned; // number of groups (transactions) scheduled
ulonglong mts_groups_assigned; // number of groups (transactions) scheduled
volatile ulong mts_wq_overrun_cnt; // counter of all mts_wq_excess_cnt increments
ulong wq_size_waits_cnt; // number of times C slept due to WQ:s oversize
/*
Oops, something went wrong.

0 comments on commit 37f2e96

Please sign in to comment.