forked from tarantool/tarantool
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
box: disable split-brain detection until schema is upgraded
Our split-brain detection machinery relies among other things on all nodes tracking the synchro queue confirmed lsn. This tracking was only added together with the split-brain detection. Only the synchro queue owner tracked the confirmed lsn before. This means that after an upgrade all the replicas remember the latest confirmed lsn as 0, and any PROMOTE/DEMOTE request from the queue owner is treated as a split brain. Let's fix this and only enable split-brain detection on the replica set once the schema version is updated. Thanks to the synchro queue freeze on restart, this can only happen after a new PROMOTE or DEMOTE entry is written by one of the nodes, and thus the correct confirmed lsn is propagated with this PROMOTE/DEMOTE to all the cluster members. Closes tarantool#8996 NO_DOC=bugfix (cherry picked from commit a844bd3)
- Loading branch information
1 parent
2757a84
commit 6b8a39f
Showing
6 changed files
with
133 additions
and
2 deletions.
There are no files selected for viewing
4 changes: 4 additions & 0 deletions
4
changelogs/unreleased/gh-8996-spurious-spit-brain-detected.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
## bugfix/replication | ||
|
||
* Fixed a false-positive split-brain in a replica set on the first | ||
promotion after an upgrade from versions before 2.10.1 (gh-8996). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
78 changes: 78 additions & 0 deletions
78
test/replication-luatest/gh_8996_synchro_filter_enable_test.lua
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
local t = require('luatest') | ||
local replica_set = require('luatest.replica_set') | ||
local server = require('luatest.server') | ||
|
||
local g = t.group('synchro-filter-enable-by-version') | ||
|
||
g.before_each(function(cg) | ||
cg.replica_set = replica_set:new{} | ||
cg.box_cfg = { | ||
replication = { | ||
server.build_listen_uri('server1', cg.replica_set.id), | ||
server.build_listen_uri('server2', cg.replica_set.id), | ||
}, | ||
replication_timeout = 0.1, | ||
} | ||
for i = 1,2 do | ||
cg['server' .. i] = cg.replica_set:build_and_add_server{ | ||
alias = 'server' .. i, | ||
box_cfg = cg.box_cfg, | ||
} | ||
end | ||
end) | ||
|
||
g.after_each(function(cg) | ||
cg.replica_set:drop() | ||
end) | ||
|
||
-- Check that split-brain detection does not work with schema version <= | ||
-- 2.10.1, and is re-enabled back after a schema upgrade. | ||
g.test_filter_enable_disable = function(cg) | ||
cg.replica_set:start() | ||
cg.server1:exec(function() | ||
box.ctl.wait_rw() | ||
box.schema.downgrade('2.10.1') | ||
t.assert_equals(box.space._schema:get{'version'}, | ||
{'version', 2, 10, 1}) | ||
end) | ||
cg.server2:wait_for_vclock_of(cg.server1) | ||
|
||
cg.server1:update_box_cfg({replication = ""}) | ||
cg.server2:update_box_cfg({replication = ""}) | ||
|
||
cg.server1:exec(function() | ||
box.ctl.promote() | ||
end) | ||
cg.server2:exec(function() | ||
box.ctl.promote() | ||
end) | ||
|
||
cg.server1:update_box_cfg(cg.box_cfg) | ||
cg.server2:update_box_cfg(cg.box_cfg) | ||
cg.server1:wait_for_vclock_of(cg.server2) | ||
cg.server2:wait_for_vclock_of(cg.server1) | ||
cg.server1:assert_follows_upstream(cg.server2:get_instance_id()) | ||
cg.server2:assert_follows_upstream(cg.server1:get_instance_id()) | ||
|
||
cg.server1:update_box_cfg({replication = ""}) | ||
cg.server2:update_box_cfg({replication = ""}) | ||
|
||
for i = 1,2 do | ||
cg['server' .. i]:exec(function() | ||
box.ctl.promote() | ||
box.schema.upgrade() | ||
end) | ||
end | ||
|
||
t.helpers.retrying({}, function() | ||
for i = 1,2 do | ||
cg['server' .. i]:update_box_cfg(cg.box_cfg) | ||
cg['server' .. i]:exec(function(id) | ||
t.assert_equals(box.info.replication[id].upstream.status, | ||
'stopped') | ||
t.assert_str_contains(box.info.replication[id].upstream.message, | ||
'Split-Brain discovered') | ||
end, {cg['server' .. 3 - i]:get_instance_id()}) | ||
end | ||
end) | ||
end |