Skip to content

Commit

Permalink
replication: do not ignore replication_connect_quorum
Browse files Browse the repository at this point in the history
On bootstrap and after initial configuration replication_connect_quorum
was ignored. The instance tried to connect to every replica listed in
replication parameter, and failed if it wasn't possible.

The patch alters this behaviour. An instance still tries to connect to
every node listed in box.cfg.replication, but does not raise an error if
it was able to connect to at least replication_connect_quorum instances.

Closes #3428

@TarantoolBot document
Title: replication_connect_quorum is not ignored
Now on replica set bootstrap and in case of replication reconfiguration
(e.g. calling box.cfg{replication=...} for the second time) tarantool
doesn't fail, if it couldn't connect to to every replica, but could
connect to replication_connect_quorum replicas. If after
replication_connect_timeout seconds the instance is not connected to at
least replication_connect_quorum other instances, we throw an error.
  • Loading branch information
sergepetrenko authored and locker committed Aug 14, 2018
1 parent 438a4e6 commit c1a16b2
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 18 deletions.
35 changes: 24 additions & 11 deletions src/box/box.cc
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,7 @@ cfg_get_replication(int *p_count)
* don't start appliers.
*/
static void
box_sync_replication(double timeout, bool connect_all)
box_sync_replication(bool connect_quorum)
{
int count = 0;
struct applier **appliers = cfg_get_replication(&count);
Expand All @@ -607,7 +607,7 @@ box_sync_replication(double timeout, bool connect_all)
applier_delete(appliers[i]); /* doesn't affect diag */
});

replicaset_connect(appliers, count, timeout, connect_all);
replicaset_connect(appliers, count, connect_quorum);

guard.is_active = false;
}
Expand All @@ -625,8 +625,13 @@ box_set_replication(void)
}

box_check_replication();
/* Try to connect to all replicas within the timeout period */
box_sync_replication(replication_connect_timeout, true);
/*
* Try to connect to all replicas within the timeout period.
* The configuration will succeed as long as we've managed
* to connect to at least replication_connect_quorum
* masters.
*/
box_sync_replication(true);
/* Follow replica */
replicaset_follow();
}
Expand Down Expand Up @@ -1865,8 +1870,13 @@ box_cfg_xc(void)

title("orphan");

/* Wait for the cluster to start up */
box_sync_replication(replication_connect_timeout, false);
/*
* In case of recovering from a checkpoint we
* don't need to wait for 'quorum' masters, since
* the recovered _cluster space will have all the
* information about cluster.
*/
box_sync_replication(false);
} else {
if (!tt_uuid_is_nil(&instance_uuid))
INSTANCE_UUID = instance_uuid;
Expand All @@ -1883,12 +1893,15 @@ box_cfg_xc(void)
/*
* Wait for the cluster to start up.
*
* Note, when bootstrapping a new instance, we have to
* connect to all masters to make sure all replicas
* receive the same replica set UUID when a new cluster
* is deployed.
* Note, when bootstrapping a new instance, we try to
* connect to all masters during timeout to make sure
* all replicas recieve the same replica set UUID when
* a new cluster is deployed.
* If we fail to do so, settle with connecting to
* 'replication_connect_quorum' masters.
* If this also fails, throw an error.
*/
box_sync_replication(TIMEOUT_INFINITY, true);
box_sync_replication(true);
/* Bootstrap a new master */
bootstrap(&replicaset_uuid, &is_bootstrap_leader);
}
Expand Down
11 changes: 7 additions & 4 deletions src/box/replication.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ struct tt_uuid INSTANCE_UUID;
struct tt_uuid REPLICASET_UUID;

double replication_timeout = 1.0; /* seconds */
double replication_connect_timeout = 4.0; /* seconds */
double replication_connect_timeout = 30.0; /* seconds */
int replication_connect_quorum = REPLICATION_CONNECT_QUORUM_ALL;
double replication_sync_lag = 10.0; /* seconds */

Expand Down Expand Up @@ -540,7 +540,7 @@ applier_on_connect_f(struct trigger *trigger, void *event)

void
replicaset_connect(struct applier **appliers, int count,
double timeout, bool connect_all)
bool connect_quorum)
{
if (count == 0) {
/* Cleanup the replica set. */
Expand Down Expand Up @@ -571,6 +571,9 @@ replicaset_connect(struct applier **appliers, int count,
state.connected = state.failed = 0;
fiber_cond_create(&state.wakeup);

double timeout = replication_connect_timeout;
int quorum = MIN(count, replication_connect_quorum);

/* Add triggers and start simulations connection to remote peers */
for (int i = 0; i < count; i++) {
struct applier *applier = appliers[i];
Expand All @@ -587,15 +590,15 @@ replicaset_connect(struct applier **appliers, int count,
double wait_start = ev_monotonic_now(loop());
if (fiber_cond_wait_timeout(&state.wakeup, timeout) != 0)
break;
if (state.failed > 0 && connect_all)
if (count - state.failed < quorum)
break;
timeout -= ev_monotonic_now(loop()) - wait_start;
}
if (state.connected < count) {
say_crit("failed to connect to %d out of %d replicas",
count - state.connected, count);
/* Timeout or connection failure. */
if (connect_all)
if (connect_quorum && state.connected < quorum)
goto error;
} else {
say_verbose("connected to %d replicas", state.connected);
Expand Down
7 changes: 4 additions & 3 deletions src/box/replication.h
Original file line number Diff line number Diff line change
Expand Up @@ -357,12 +357,13 @@ replicaset_add(uint32_t replica_id, const struct tt_uuid *instance_uuid);
* \param appliers the array of appliers
* \param count size of appliers array
* \param timeout connection timeout
* \param connect_all if this flag is set, fail unless all
* appliers have successfully connected
* \param connect_quorum if this flag is set, fail unless at
* least replication_connect_quorum
* appliers have successfully connected.
*/
void
replicaset_connect(struct applier **appliers, int count,
double timeout, bool connect_all);
bool connect_quorum);

/**
* Resume all appliers registered with the replica set.
Expand Down
49 changes: 49 additions & 0 deletions test/replication/quorum.result
Original file line number Diff line number Diff line change
Expand Up @@ -401,3 +401,52 @@ test_run:cmd("switch default")
test_run:drop_cluster(SERVERS)
---
...
-- Test that quorum is not ignored neither during bootstrap, nor
-- during reconfiguration.
box.schema.user.grant('guest', 'replication')
---
...
test_run:cmd('create server replica_quorum with script="replication/replica_quorum.lua"')
---
- true
...
-- Arguments are: replication_connect_quorum, replication_timeout
-- replication_connect_timeout.
-- If replication_connect_quorum was ignored here, the instance
-- would exit with an error.
test_run:cmd('start server replica_quorum with wait=True, wait_load=True, args="1 0.05 0.1"')
---
- true
...
test_run:cmd('switch replica_quorum')
---
- true
...
-- If replication_connect_quorum was ignored here, the instance
-- would exit with an error.
box.cfg{replication={INSTANCE_URI, nonexistent_uri(1)}}
---
...
box.info.id
---
- 1
...
test_run:cmd('switch default')
---
- true
...
test_run:cmd('stop server replica_quorum')
---
- true
...
test_run:cmd('cleanup server replica_quorum')
---
- true
...
test_run:cmd('delete server replica_quorum')
---
- true
...
box.schema.user.revoke('guest', 'replication')
---
...
20 changes: 20 additions & 0 deletions test/replication/quorum.test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,23 @@ box.space.test:select()
test_run:cmd("switch default")
-- Cleanup.
test_run:drop_cluster(SERVERS)

-- Test that quorum is not ignored neither during bootstrap, nor
-- during reconfiguration.
box.schema.user.grant('guest', 'replication')
test_run:cmd('create server replica_quorum with script="replication/replica_quorum.lua"')
-- Arguments are: replication_connect_quorum, replication_timeout
-- replication_connect_timeout.
-- If replication_connect_quorum was ignored here, the instance
-- would exit with an error.
test_run:cmd('start server replica_quorum with wait=True, wait_load=True, args="1 0.05 0.1"')
test_run:cmd('switch replica_quorum')
-- If replication_connect_quorum was ignored here, the instance
-- would exit with an error.
box.cfg{replication={INSTANCE_URI, nonexistent_uri(1)}}
box.info.id
test_run:cmd('switch default')
test_run:cmd('stop server replica_quorum')
test_run:cmd('cleanup server replica_quorum')
test_run:cmd('delete server replica_quorum')
box.schema.user.revoke('guest', 'replication')
24 changes: 24 additions & 0 deletions test/replication/replica_quorum.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/usr/bin/env tarantool

local SOCKET_DIR = require('fio').cwd()

local QUORUM = tonumber(arg[1])
local TIMEOUT = arg[2] and tonumber(arg[2]) or 0.1
local CON_TIMEOUT = arg[3] and tonumber(arg[3]) or 30.0
INSTANCE_URI = SOCKET_DIR .. '/replica_quorum.sock'

function nonexistent_uri(id)
return SOCKET_DIR .. '/replica_quorum' .. (1000 + id) .. '.sock'
end

require('console').listen(os.getenv('ADMIN'))

box.cfg{
listen = INSTANCE_URI,
replication_timeout = TIMEOUT,
replication_connect_timeout = CON_TIMEOUT,
replication_connect_quorum = QUORUM,
replication = {INSTANCE_URI,
nonexistent_uri(1),
nonexistent_uri(2)}
}

0 comments on commit c1a16b2

Please sign in to comment.