Skip to content

Commit bac3ed5

Browse files
author
Hemant Dangi
committed
Bug#28088177: STOP GROUP_REPLICATION HANG WITH SELECT REPLICATION_CONNECTION_STATUS
Backporting Bug#25423650: QUICK UNINSTALL PLUGIN WHILE START GR CAN RESULT INTO DEADLOCK Issue: ====== when executing 'select * from performance_schema.replication_connection_status' concurrently with "stop group_replication", both gets stucked. Resolution: =========== Group_replication_handler class has been removed along with lock LOCK_group_replication_handler. With use of my_plugin_lock_by_name, server will have better visibility over GR operations and take decisions accordingly.
1 parent 7d6110e commit bac3ed5

12 files changed

+379
-261
lines changed

include/mysql/group_replication_priv.h

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
1+
/* Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
22
33
This program is free software; you can redistribute it and/or modify
44
it under the terms of the GNU General Public License as published by
@@ -37,12 +37,9 @@
3737

3838

3939
/**
40-
Server side initializations and cleanup.
40+
Server side initializations.
4141
*/
42-
int group_replication_init(const char* plugin_name);
43-
int group_replication_cleanup();
44-
int group_replication_start();
45-
int group_replication_stop();
42+
int group_replication_init();
4643

4744

4845
/**

rapid/plugin/group_replication/src/observer_server_state.cc

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2013, 2017, Oracle and/or its affiliates. All rights reserved.
1+
/* Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
22
33
This program is free software; you can redistribute it and/or modify
44
it under the terms of the GNU General Public License as published by
@@ -54,8 +54,7 @@ int group_replication_before_server_shutdown(Server_state_param *param)
5454
int group_replication_after_server_shutdown(Server_state_param *param)
5555
{
5656
server_shutdown_status= true;
57-
if (plugin_is_group_replication_running())
58-
group_replication_stop();
57+
plugin_group_replication_stop();
5958

6059
return 0;
6160
}

rapid/plugin/group_replication/src/plugin.cc

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,12 @@ int plugin_group_replication_start()
352352

353353
Mutex_autolock auto_lock_mutex(&plugin_running_mutex);
354354

355+
DBUG_EXECUTE_IF("group_replication_wait_on_start",
356+
{
357+
const char act[]= "now signal signal.start_waiting wait_for signal.start_continue";
358+
DBUG_ASSERT(!debug_sync_set_action(current_thd, STRING_WITH_LEN(act)));
359+
});
360+
355361
if (plugin_is_group_replication_running())
356362
DBUG_RETURN(GROUP_REPLICATION_ALREADY_RUNNING);
357363
if (check_if_server_properly_configured())
@@ -815,6 +821,12 @@ int plugin_group_replication_stop()
815821

816822
Mutex_autolock auto_lock_mutex(&plugin_running_mutex);
817823

824+
DBUG_EXECUTE_IF("group_replication_wait_on_stop",
825+
{
826+
const char act[]= "now signal signal.stop_waiting wait_for signal.stop_continue";
827+
DBUG_ASSERT(!debug_sync_set_action(current_thd, STRING_WITH_LEN(act)));
828+
});
829+
818830
/*
819831
We delete the delayed initialization object here because:
820832
@@ -986,7 +998,7 @@ int plugin_group_replication_init(MYSQL_PLUGIN plugin_info)
986998

987999
plugin_info_ptr= plugin_info;
9881000

989-
if (group_replication_init(group_replication_plugin_name))
1001+
if (group_replication_init())
9901002
{
9911003
/* purecov: begin inspected */
9921004
log_message(MY_ERROR_LEVEL,
@@ -1036,7 +1048,7 @@ int plugin_group_replication_init(MYSQL_PLUGIN plugin_info)
10361048
init_compatibility_manager();
10371049

10381050
plugin_is_auto_starting= start_group_replication_at_boot_var;
1039-
if (start_group_replication_at_boot_var && group_replication_start())
1051+
if (start_group_replication_at_boot_var && plugin_group_replication_start())
10401052
{
10411053
log_message(MY_ERROR_LEVEL,
10421054
"Unable to start Group Replication on boot");
@@ -1054,10 +1066,9 @@ int plugin_group_replication_deinit(void *p)
10541066
plugin_is_being_uninstalled= true;
10551067
int observer_unregister_error= 0;
10561068

1057-
//plugin_group_replication_stop will be called from this method stack
1058-
if (group_replication_cleanup())
1069+
if (plugin_group_replication_stop())
10591070
log_message(MY_ERROR_LEVEL,
1060-
"Failure when cleaning Group Replication server state");
1071+
"Failure when stopping Group Replication on plugin uninstall");
10611072

10621073
if (group_member_mgr != NULL)
10631074
{
@@ -1567,7 +1578,7 @@ static int check_if_server_properly_configured()
15671578
//Struct that holds startup and runtime requirements
15681579
Trans_context_info startup_pre_reqs;
15691580

1570-
get_server_startup_prerequirements(startup_pre_reqs, true);
1581+
get_server_startup_prerequirements(startup_pre_reqs, !plugin_is_auto_starting);
15711582

15721583
if(!startup_pre_reqs.binlog_enabled)
15731584
{
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
include/group_replication.inc
2+
Warnings:
3+
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
4+
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
5+
[connection server1]
6+
7+
# 1. Setup GR environment and execute START GR.
8+
9+
[connection server1]
10+
SET GLOBAL group_replication_bootstrap_group=ON;
11+
SET GLOBAL group_replication_group_name= "GROUP_REPLICATION_GROUP_NAME";
12+
13+
# 2. Block start, so we can execute UNINSTALL.
14+
15+
SET @debug_save= @@GLOBAL.DEBUG;
16+
SET @@GLOBAL.DEBUG= '+d,group_replication_wait_on_start';
17+
START GROUP_REPLICATION;
18+
19+
# 3. Execute UNINSTALL PLUGIN GR should fail with error
20+
# ER_PLUGIN_CANNOT_BE_UNINSTALLED as START GR is already running.
21+
22+
[connection server_1]
23+
SET DEBUG_SYNC= "now WAIT_FOR signal.start_waiting";
24+
UNINSTALL PLUGIN group_replication;
25+
ERROR HY000: Plugin 'group_replication' cannot be uninstalled now. Plugin is busy, it cannot be uninstalled. To force a stop run STOP GROUP_REPLICATION and then UNINSTALL PLUGIN group_replication.
26+
27+
# 4. SIGNAL START GR to resume processing.
28+
29+
SET DEBUG_SYNC= 'now SIGNAL signal.start_continue';
30+
[connection server1]
31+
32+
# 5. Confirm GR is started
33+
34+
include/gr_wait_for_member_state.inc
35+
36+
# 6. Cleanup
37+
38+
SET @@GLOBAL.DEBUG= @debug_save;
39+
include/group_replication_end.inc
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
include/group_replication.inc
2+
Warnings:
3+
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
4+
Note #### Storing MySQL user name or password information in the master info repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START SLAVE; see the 'START SLAVE Syntax' in the MySQL Manual for more information.
5+
[connection server1]
6+
7+
# 1. Block Stop Group Replication
8+
9+
SET @debug_save= @@GLOBAL.DEBUG;
10+
SET @@GLOBAL.DEBUG= '+d,group_replication_wait_on_stop';
11+
STOP GROUP_REPLICATION;
12+
13+
# 2. Wait for debug sync to be reached and then execute select query
14+
15+
[connection server_1]
16+
SET DEBUG_SYNC= "now WAIT_FOR signal.stop_waiting";
17+
SELECT COUNT(*) FROM performance_schema.replication_connection_status;
18+
COUNT(*)
19+
2
20+
21+
# 3. SIGNAL STOP GR to resume processing
22+
23+
SET DEBUG_SYNC= 'now SIGNAL signal.stop_continue';
24+
[connection server1]
25+
26+
# 4. Confirm Group Replication is stopped
27+
28+
include/gr_wait_for_member_state.inc
29+
30+
# 5. Cleanup
31+
32+
SET @@GLOBAL.DEBUG= @debug_save;
33+
include/group_replication_end.inc
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
################################################################################
2+
# This test confirms that when START GR and UNINSTALL PLUGIN GR commands are
3+
# executed concurrently, no deadlock happens.
4+
#
5+
# Test:
6+
# 0. The test requires one server.
7+
# 1. Setup GR environment and execute START GR.
8+
# 2. Block start, so we can execute UNINSTALL.
9+
# 3. Execute UNINSTALL PLUGIN GR should fail with error
10+
# ER_PLUGIN_CANNOT_BE_UNINSTALLED as START GR is already running.
11+
# 4. SIGNAL START GR to resume processing.
12+
# 5. Confirm GR is started
13+
# 6. Cleanup
14+
#
15+
################################################################################
16+
--source ../inc/have_group_replication_plugin.inc
17+
--let $rpl_skip_group_replication_start= 1
18+
--source ../inc/group_replication.inc
19+
20+
--echo
21+
--echo # 1. Setup GR environment and execute START GR.
22+
--echo
23+
24+
--let $rpl_connection_name= server1
25+
--source include/rpl_connection.inc
26+
27+
SET GLOBAL group_replication_bootstrap_group=ON;
28+
--replace_result $group_replication_group_name GROUP_REPLICATION_GROUP_NAME
29+
--eval SET GLOBAL group_replication_group_name= "$group_replication_group_name"
30+
31+
--echo
32+
--echo # 2. Block start, so we can execute UNINSTALL.
33+
--echo
34+
35+
## If START GR gets lock and UNINSTALL blocks SQL Query execution,
36+
## GR used to deadlock.
37+
38+
SET @debug_save= @@GLOBAL.DEBUG;
39+
SET @@GLOBAL.DEBUG= '+d,group_replication_wait_on_start';
40+
41+
--send START GROUP_REPLICATION
42+
43+
--echo
44+
--echo # 3. Execute UNINSTALL PLUGIN GR should fail with error
45+
--echo # ER_PLUGIN_CANNOT_BE_UNINSTALLED as START GR is already running.
46+
--echo
47+
48+
--let $rpl_connection_name= server_1
49+
--source include/rpl_connection.inc
50+
# Wait for the debug sync to be reached.
51+
SET DEBUG_SYNC= "now WAIT_FOR signal.start_waiting";
52+
53+
--error ER_PLUGIN_CANNOT_BE_UNINSTALLED
54+
UNINSTALL PLUGIN group_replication;
55+
56+
57+
--echo
58+
--echo # 4. SIGNAL START GR to resume processing.
59+
--echo
60+
SET DEBUG_SYNC= 'now SIGNAL signal.start_continue';
61+
62+
--let $rpl_connection_name= server1
63+
--source include/rpl_connection.inc
64+
--reap
65+
66+
--echo
67+
--echo # 5. Confirm GR is started
68+
--echo
69+
70+
--let $group_replication_member_state= ONLINE
71+
--source ../inc/gr_wait_for_member_state.inc
72+
73+
74+
--echo
75+
--echo # 6. Cleanup
76+
--echo
77+
78+
SET @@GLOBAL.DEBUG= @debug_save;
79+
--source ../inc/group_replication_end.inc
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
################################################################################
2+
# This test confirms that when STOP GR and select
3+
# performance_schema.replication_connection_status query is executed
4+
# concurrently, no deadlock happens.
5+
#
6+
# Test:
7+
# 0. The test requires one server.
8+
# 1. Block Stop Group Replication
9+
# 2. Wait for debug sync to be reached and then execute select query
10+
# 3. SIGNAL STOP GR to resume processing
11+
# 4. Confirm Group Replication is stopped
12+
# 5. Cleanup
13+
#
14+
################################################################################
15+
--source ../inc/have_group_replication_plugin.inc
16+
--source ../inc/group_replication.inc
17+
18+
--echo
19+
--echo # 1. Block Stop Group Replication
20+
--echo
21+
22+
SET @debug_save= @@GLOBAL.DEBUG;
23+
SET @@GLOBAL.DEBUG= '+d,group_replication_wait_on_stop';
24+
25+
--send STOP GROUP_REPLICATION
26+
27+
28+
--echo
29+
--echo # 2. Wait for debug sync to be reached and then execute select query
30+
--echo
31+
32+
--let $rpl_connection_name= server_1
33+
--source include/rpl_connection.inc
34+
35+
SET DEBUG_SYNC= "now WAIT_FOR signal.stop_waiting";
36+
37+
SELECT COUNT(*) FROM performance_schema.replication_connection_status;
38+
39+
40+
--echo
41+
--echo # 3. SIGNAL STOP GR to resume processing
42+
--echo
43+
44+
SET DEBUG_SYNC= 'now SIGNAL signal.stop_continue';
45+
46+
--let $rpl_connection_name= server1
47+
--source include/rpl_connection.inc
48+
--reap
49+
50+
51+
--echo
52+
--echo # 4. Confirm Group Replication is stopped
53+
--echo
54+
55+
--let $group_replication_member_state= OFFLINE
56+
--source ../inc/gr_wait_for_member_state.inc
57+
58+
59+
--echo
60+
--echo # 5. Cleanup
61+
--echo
62+
63+
SET @@GLOBAL.DEBUG= @debug_save;
64+
--source ../inc/group_replication_end.inc

rapid/plugin/group_replication/tests/mtr/t/gr_concurrent_uninstall_stop.test

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# ==== Purpose ====
22
#
33
# Verify that 'UNINSTALL PLUGIN' and 'STOP GROUP_REPLICATION' commands
4-
# run well in parallel.
4+
# don't run in parallel in mysql 5.7.
55
#
66
# ==== Implementation ====
77
#
@@ -30,22 +30,30 @@ SET SESSION sql_log_bin= 1;
3030

3131
--source ../inc/start_and_bootstrap_group_replication.inc
3232

33+
## Warnings depends upon sequence of execution of
34+
## query "STOP GROUP_REPLICATION" and "UNINSTALL PLUGIN group_replication"
35+
--disable_warnings
36+
3337
--send UNINSTALL PLUGIN group_replication
3438

3539
--let $rpl_connection_name= server_1
3640
--source include/rpl_connection.inc
3741
--echo #
3842
--echo # STOP GROUP_REPLICATION in parallel.
3943
--echo #
44+
## Uninstall will fail as STOP GR is already running.
4045
--error 0, ER_GROUP_REPLICATION_CONFIGURATION
4146
STOP GROUP_REPLICATION;
4247

4348
--let $rpl_connection_name= server1
4449
--source include/rpl_connection.inc
45-
--error 0, ER_OPTION_PREVENTS_STATEMENT
50+
--error 0, ER_PLUGIN_CANNOT_BE_UNINSTALLED
4651
--reap
4752

53+
--enable_warnings
54+
4855
SET @@GLOBAL.read_only= 0;
56+
4957
# The previous UNINSTALL attempt may have failed due to SUPER_READ_ONLY=1, as
5058
# such we uninstall the plugin again.
5159
--error 0, ER_SP_DOES_NOT_EXIST

0 commit comments

Comments
 (0)