Skip to content

Commit

Permalink
box: disable split-brain detection until schema is upgraded
Browse files Browse the repository at this point in the history
Our split-brain detection machinery relies among other things on all
nodes tracking the synchro queue confirmed lsn. This tracking was only
added together with the split-brain detection. Only the synchro queue
owner tracked the confirmed lsn before.

This means that after an upgrade all the replicas remember the latest
confirmed lsn as 0, and any PROMOTE/DEMOTE request from the queue owner
is treated as a split brain.

Let's fix this and only enable split-brain detection on the replica set
once the schema version is updated. Thanks to the synchro queue freeze
on restart, this can only happen after a new PROMOTE or DEMOTE entry is
written by one of the nodes, and thus the correct confirmed lsn
is propagated with this PROMOTE/DEMOTE to all the cluster members.

Closes #8996

NO_DOC=bugfix
  • Loading branch information
sergepetrenko committed Sep 28, 2023
1 parent a3da753 commit a844bd3
Show file tree
Hide file tree
Showing 6 changed files with 161 additions and 4 deletions.
4 changes: 4 additions & 0 deletions changelogs/unreleased/gh-8996-spurious-spit-brain-detected.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
## bugfix/replication

* Fixed a false-positive split-brain in a replica set on the first
promotion after an upgrade from versions before 2.10.1 (gh-8996).
64 changes: 62 additions & 2 deletions src/box/alter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include "coll_id_cache.h"
#include "coll_id_def.h"
#include "txn.h"
#include "txn_limbo.h"
#include "tuple.h"
#include "tuple_constraint.h"
#include "fiber.h" /* for gc_pool */
Expand Down Expand Up @@ -4170,10 +4171,42 @@ on_commit_replicaset_name(struct trigger *trigger, void * /* event */)
return 0;
}

static int
start_synchro_filtering(va_list /* ap */)
{
txn_limbo_filter_enable(&txn_limbo);
return 0;
}

static int
stop_synchro_filtering(va_list /* ap */)
{
txn_limbo_filter_disable(&txn_limbo);
return 0;
}

/** Data passed to on_commit_dd_version trigger. */
struct on_commit_dd_version_data {
/** A fiber to perform async work after commit. */
struct fiber *fiber;
/** New version. */
uint32_t version_id;
};

/**
* Update the cached schema version and enable version-dependent features, like
* split-brain detection. Reenabling is done asynchronously by a separate fiber
* prepared by on_replace trigger.
*/
static int
on_commit_dd_version(struct trigger *trigger, void * /* event */)
{
dd_version_id = (uint32_t)(uintptr_t)trigger->data;
struct on_commit_dd_version_data *data =
(struct on_commit_dd_version_data *)trigger->data;
dd_version_id = data->version_id;
struct fiber *fiber = data->fiber;
if (fiber != NULL)
fiber_wakeup(fiber);
return 0;
}

Expand Down Expand Up @@ -4346,11 +4379,38 @@ on_replace_dd_schema(struct trigger * /* trigger */, void *event)
*/
version = tarantool_version_id();
}
struct on_commit_dd_version_data *data = xregion_alloc_object(
&txn->region, typeof(*data));
data->version_id = version;
data->fiber = NULL;
struct trigger *on_commit = txn_alter_trigger_new(
on_commit_dd_version, (void *)(uintptr_t)version);
on_commit_dd_version, data);
if (on_commit == NULL)
return -1;
txn_stmt_on_commit(stmt, on_commit);
if (recovery_state != FINISHED_RECOVERY) {
return 0;
}
/*
* Set data->fiber after on_commit is created, because we can't
* remove a not-yet-run fiber in case of on_commit creation
* failure.
*/
struct fiber *fiber = NULL;
if (version > version_id(2, 10, 1) &&
recovery_state == FINISHED_RECOVERY) {
fiber = fiber_new_system("synchro_filter_enabler",
start_synchro_filtering);
if (fiber == NULL)
return -1;
} else if (version <= version_id(2, 10, 1) &&
recovery_state == FINISHED_RECOVERY) {
fiber = fiber_new_system("synchro_filter_disabler",
stop_synchro_filtering);
if (fiber == NULL)
return -1;
}
data->fiber = fiber;
} else if (strcmp(key, "bootstrap_leader_uuid") == 0) {
struct tt_uuid *uuid = xregion_alloc_object(&txn->region,
typeof(*uuid));
Expand Down
7 changes: 5 additions & 2 deletions src/box/box.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5466,9 +5466,12 @@ box_cfg_xc(void)
/*
* Enable split brain detection once node is fully recovered or
* bootstrapped. No split brain could happen during bootstrap or local
* recovery.
* recovery. Only do so in an upgraded cluster. Unfortunately, schema
* version 2.10.1 was used in 2.10.0 release, while split-brain
* detection appeared in 2.10.1. So use the schema version after 2.10.1.
*/
txn_limbo_filter_enable(&txn_limbo);
if (dd_version_id > version_id(2, 10, 1))
txn_limbo_filter_enable(&txn_limbo);

title("running");
say_info("ready to accept requests");
Expand Down
8 changes: 8 additions & 0 deletions src/box/txn_limbo.c
Original file line number Diff line number Diff line change
Expand Up @@ -1273,6 +1273,14 @@ txn_limbo_filter_enable(struct txn_limbo *limbo)
latch_unlock(&limbo->promote_latch);
}

void
txn_limbo_filter_disable(struct txn_limbo *limbo)
{
latch_lock(&limbo->promote_latch);
limbo->do_validate = false;
latch_unlock(&limbo->promote_latch);
}

void
txn_limbo_init(void)
{
Expand Down
4 changes: 4 additions & 0 deletions src/box/txn_limbo.h
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,10 @@ txn_limbo_on_parameters_change(struct txn_limbo *limbo);
void
txn_limbo_filter_enable(struct txn_limbo *limbo);

/** Stop filtering incoming synchro requests. */
void
txn_limbo_filter_disable(struct txn_limbo *limbo);

/**
* Freeze limbo. Prevent CONFIRMs and ROLLBACKs until limbo is unfrozen.
*/
Expand Down
78 changes: 78 additions & 0 deletions test/replication-luatest/gh_8996_synchro_filter_enable_test.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
local t = require('luatest')
local replica_set = require('luatest.replica_set')
local server = require('luatest.server')

local g = t.group('synchro-filter-enable-by-version')

g.before_each(function(cg)
cg.replica_set = replica_set:new{}
cg.box_cfg = {
replication = {
server.build_listen_uri('server1', cg.replica_set.id),
server.build_listen_uri('server2', cg.replica_set.id),
},
replication_timeout = 0.1,
}
for i = 1,2 do
cg['server' .. i] = cg.replica_set:build_and_add_server{
alias = 'server' .. i,
box_cfg = cg.box_cfg,
}
end
end)

g.after_each(function(cg)
cg.replica_set:drop()
end)

-- Check that split-brain detection does not work with schema version <=
-- 2.10.1, and is re-enabled back after a schema upgrade.
g.test_filter_enable_disable = function(cg)
cg.replica_set:start()
cg.server1:exec(function()
box.ctl.wait_rw()
box.schema.downgrade('2.10.1')
t.assert_equals(box.space._schema:get{'version'},
{'version', 2, 10, 1})
end)
cg.server2:wait_for_vclock_of(cg.server1)

cg.server1:update_box_cfg({replication = ""})
cg.server2:update_box_cfg({replication = ""})

cg.server1:exec(function()
box.ctl.promote()
end)
cg.server2:exec(function()
box.ctl.promote()
end)

cg.server1:update_box_cfg(cg.box_cfg)
cg.server2:update_box_cfg(cg.box_cfg)
cg.server1:wait_for_vclock_of(cg.server2)
cg.server2:wait_for_vclock_of(cg.server1)
cg.server1:assert_follows_upstream(cg.server2:get_instance_id())
cg.server2:assert_follows_upstream(cg.server1:get_instance_id())

cg.server1:update_box_cfg({replication = ""})
cg.server2:update_box_cfg({replication = ""})

for i = 1,2 do
cg['server' .. i]:exec(function()
box.ctl.promote()
box.schema.upgrade()
end)
end

t.helpers.retrying({}, function()
for i = 1,2 do
cg['server' .. i]:update_box_cfg(cg.box_cfg)
cg['server' .. i]:exec(function(id)
t.assert_equals(box.info.replication[id].upstream.status,
'stopped')
t.assert_str_contains(box.info.replication[id].upstream.message,
'Split-Brain discovered')
end, {cg['server' .. 3 - i]:get_instance_id()})
end
end)
end

0 comments on commit a844bd3

Please sign in to comment.