Skip to content

Commit

Permalink
relay: update lag on any acked txn
Browse files Browse the repository at this point in the history
Not only for own txns, but also on the txns authored by other
instances.

Note that the lag isn't updated when the replica got new txns from
another master. The lag still only reflects the replication
between this relay and its specific applier.

The motivation is that otherwise the lag sometimes shows
irrelevant things, like that the replica is very outdated, while
it keeps replicating just fine. Only not txns of this specific
master, who might even turned into a replica itself already.

Closes #9748

NO_DOC=bugfix

(cherry picked from commit 39af9fb)
  • Loading branch information
Gerold103 authored and sergepetrenko committed May 17, 2024
1 parent 142b326 commit 40c7a08
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 3 deletions.
4 changes: 4 additions & 0 deletions changelogs/unreleased/gh-9748-downstream-lag-bug.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
## bugfix/replication

* Fixed a bug that the `box.info.replication[...].downstream.lag` value could be
misleading, not updating in time, frozen (gh-9748).
5 changes: 5 additions & 0 deletions src/box/applier.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1719,6 +1719,11 @@ applier_signal_ack(struct applier *applier)
* space, we obtain a nil pointer here.
*/
applier->ack_msg.txn_last_tm = applier->txn_last_tm;
/*
* Send each timestamp only once. New timestamp is treated by
* relay like something new was acked from that specific relay.
*/
applier->txn_last_tm = 0;
applier->ack_msg.vclock_sync = applier->last_vclock_sync;
applier->ack_msg.term = box_raft()->term;
vclock_copy(&applier->ack_msg.vclock, &replicaset.vclock);
Expand Down
3 changes: 1 addition & 2 deletions src/box/relay.cc
Original file line number Diff line number Diff line change
Expand Up @@ -775,8 +775,7 @@ relay_process_ack(struct relay *relay, double tm)
* can't go down.
*/
assert(vclock_compare(prev_vclock, next_vclock) <= 0);
if (vclock_get(prev_vclock, instance_id) <
vclock_get(next_vclock, instance_id))
if (vclock_compare_ignore0(prev_vclock, next_vclock) < 0)
relay->txn_lag = ev_now(loop()) - tm;
}

Expand Down
118 changes: 117 additions & 1 deletion test/replication-luatest/gh_9748_downstream_lag_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,20 @@ local server = require('luatest.server')
local replica_set = require('luatest.replica_set')

local g = t.group('gh_9748')
local delay = 0.1

g.before_all(function(cg)
cg.replica_set = replica_set:new({})
cg.replication = {
server.build_listen_uri('server1', cg.replica_set.id),
server.build_listen_uri('server2', cg.replica_set.id),
server.build_listen_uri('server3', cg.replica_set.id),
}
local box_cfg = {
replication = cg.replication,
replication_timeout = 0.1,
}
for i = 1, 2 do
for i = 1, 3 do
cg['server'..i] = cg.replica_set:build_and_add_server{
alias = 'server' .. i,
box_cfg = box_cfg,
Expand All @@ -32,7 +34,11 @@ g.before_each(function(cg)
s:update_box_cfg{replication = cg.replication}
end
cg.server1:wait_for_downstream_to(cg.server2)
cg.server1:wait_for_downstream_to(cg.server3)
cg.server2:wait_for_downstream_to(cg.server1)
cg.server2:wait_for_downstream_to(cg.server3)
cg.server3:wait_for_downstream_to(cg.server1)
cg.server3:wait_for_downstream_to(cg.server2)
end)

g.after_all(function(cg)
Expand Down Expand Up @@ -63,3 +69,113 @@ g.test_lag_on_master_restart = function(cg)
t.assert_equals(box.info.replication[id].downstream.lag, 0)
end, {cg.server2:get_instance_id()})
end

--
-- gh-9748: downstream lag must be updated even when the replica confirmed a txn
-- not created by the master. The lag represents WAL delay, and WAL shouldn't
-- care about replica IDs in the txns.
--
local function test_lag_from_third_node(cg)
-- server1 <-> server2 | server3
cg.server3:update_box_cfg{replication = {}}
-- Server2 creates downstream lag to server3.
cg.server2:exec(function(delay)
box.space.test:replace{1}
require('fiber').sleep(delay)
end, {delay})
cg.server3:update_box_cfg{replication = {cg.replication[2]}}
cg.server3:wait_for_vclock_of(cg.server2)
cg.server2:exec(function(id, delay)
t.assert_ge(box.info.replication[id].downstream.lag, delay)
end, {cg.server3:get_instance_id(), delay})
-- Server1 makes a txn, replicated to server3 via server2. But that still
-- bumps the downstream lag in server2->server3. Downstream lag is updated
-- even when the replica confirms third-node txns, not only master's own
-- txns.
cg.server1:exec(function()
box.space.test:replace{2}
end)
cg.server2:wait_for_downstream_to(cg.server3)
cg.server2:exec(function(id, delay)
local lag = box.info.replication[id].downstream.lag
t.assert_le(lag, delay)
t.assert_not_equals(lag, 0)
end, {cg.server3:get_instance_id(), delay})
end

g.test_lag_from_third_node = function(cg)
-- Retry, because with a non-huge replication timeout the replicas sometimes
-- might timeout when the system is slow, and that would make downstream lag
-- disappear, breaking the test.
t.helpers.retrying({}, test_lag_from_third_node, cg)
end

--
-- The test ensures the downstream lag uses the timestamp of when the txn was
-- written to WAL of the master, not when it was created. Those moments are
-- different for txns received by the considered master from another master, and
-- then replicated further.
--
local function test_lag_is_local_to_sender(cg)
cg.server3:update_box_cfg{replication = {}}
cg.server2:update_box_cfg{replication = {}}
-- Create a lag in server1->server2.
cg.server1:exec(function()
box.space.test:replace{1}
end)
cg.server2:exec(function(delay, replication)
require('fiber').sleep(delay)
box.cfg{replication = replication}
end, {delay, {cg.replication[1], cg.replication[2]}})
-- server1 -> server2 -> server3
cg.server3:update_box_cfg{replication = {cg.replication[2]}}
cg.server2:wait_for_downstream_to(cg.server3)
-- Server2 has a low lag to server3.
cg.server2:exec(function(id, delay)
local lag = box.info.replication[id].downstream.lag
t.assert_le(lag, delay)
t.assert_not_equals(lag, 0)
end, {cg.server3:get_instance_id(), delay})
-- But server1->server2 lag is high. This proves, that the lag is calculated
-- relative to when the master wrote the txn to its local WAL. Not relative
-- to when the txn was created.
cg.server1:exec(function(id, delay)
t.assert_ge(box.info.replication[id].downstream.lag, delay)
end, {cg.server2:get_instance_id(), delay})
end

g.test_lag_is_local_to_sender = function(cg)
-- Retry, because with a non-huge replication timeout the replicas sometimes
-- might timeout when the system is slow, and that would make downstream lag
-- disappear, breaking the test.
t.helpers.retrying({}, test_lag_is_local_to_sender, cg)
end

--
-- Replica can be replicating from multiple masters. Getting data from one of
-- the masters shouldn't affect its downstream lag on the other masters.
--
g.test_lag_no_update_when_replica_follows_third_node = function(cg)
-- server1 -> server2 <- server3
cg.server1:update_box_cfg{replication = {}}
cg.server3:update_box_cfg{replication = {}}
-- Lag server1->server2 won't change if server2 is receiving txns from other
-- nodes, bypassing server1. Because the lag represents delay between
-- server1 and server2 WALs. When they are in sync, and server1 WAL doesn't
-- change, the lag also shouldn't change.
local lag = cg.server1:exec(function(id)
return box.info.replication[id].downstream.lag
end, {cg.server2:get_instance_id()})
local vclock = cg.server3:exec(function()
box.space.test:replace{1}
return box.info.vclock
end)
cg.server1:exec(function(id, vclock, lag)
t.helpers.retrying({}, function()
require('log').info(box.info.replication[id].downstream)
t.assert_equals(box.info.replication[id].downstream.vclock, vclock,
'Server2 did not ack server3 vclock to server1')
end)
t.assert_equals(box.info.replication[id].downstream.lag, lag)
end, {cg.server2:get_instance_id(), vclock, lag})
end

0 comments on commit 40c7a08

Please sign in to comment.