Skip to content

Commit

Permalink
test: dirty fix for some flaky replication-luatest tests
Browse files Browse the repository at this point in the history
The fixed tests often failed with

  fail | 2023-03-01 15:54:30.550 [3724975] main/103/server_instance.lua
  F> can't initialize storage: unlink, called on fd 63,
  aka unix/:(socket), peer of unix/:(socket): Address already in use

We fixed a similar issue in commit 3d3e9de ("test: fix flaky
box-luatest/gh_7917_log_row_on_recovery_error_test") by using unique
instance names. Let's do the same here.

NO_DOC=testing stuff
NO_TEST=testing stuff
NO_CHANGELOG=testing stuff
  • Loading branch information
ylobankov committed Apr 7, 2023
1 parent efa60df commit 7ac2685
Show file tree
Hide file tree
Showing 11 changed files with 126 additions and 117 deletions.
82 changes: 41 additions & 41 deletions test/replication-luatest/bootstrap_strategy_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -39,27 +39,27 @@ g_auto.before_test('test_auto_bootstrap_waits_for_confirmations', function(cg)
cg.replica_set = replica_set:new{}
cg.box_cfg = {
replication = {
server.build_listen_uri('server1'),
server.build_listen_uri('server2'),
server.build_listen_uri('server_bs_1'),
server.build_listen_uri('server_bs_2'),
},
replication_connect_timeout = 1000,
replication_timeout = 0.1,
}
-- Make server1 the bootstrap leader.
cg.box_cfg.instance_uuid = uuid1
cg.server1 = cg.replica_set:build_and_add_server{
alias = 'server1',
alias = 'server_bs_1',
box_cfg = cg.box_cfg,
}
cg.box_cfg.replication[3] = server.build_listen_uri('server3')
cg.box_cfg.replication[3] = server.build_listen_uri('server_bs_3')
cg.box_cfg.instance_uuid = uuid2
cg.server2 = cg.replica_set:build_and_add_server{
alias = 'server2',
alias = 'server_bs_2',
box_cfg = cg.box_cfg,
}
cg.box_cfg.instance_uuid = uuid3
cg.server3 = cg.replica_set:build_and_add_server{
alias = 'server3',
alias = 'server_bs_3',
box_cfg = cg.box_cfg,
}
end)
Expand All @@ -82,19 +82,19 @@ g_auto.before_test('test_join_checks_fullmesh', function(cg)
cg.replica_set = replica_set:new{}
cg.box_cfg = {
replication = {
server.build_listen_uri('server1'),
server.build_listen_uri('server2'),
server.build_listen_uri('server_bs_1'),
server.build_listen_uri('server_bs_2'),
},
replication_timeout = 0.1,
}
cg.box_cfg.instance_uuid = uuid1
cg.server1 = cg.replica_set:build_and_add_server{
alias = 'server1',
alias = 'server_bs_1',
box_cfg = cg.box_cfg,
}
cg.box_cfg.instance_uuid = uuid2
cg.server2 = cg.replica_set:build_and_add_server{
alias = 'server2',
alias = 'server_bs_2',
box_cfg = cg.box_cfg,
}
cg.replica_set:start()
Expand All @@ -103,11 +103,11 @@ end)
g_auto.test_join_checks_fullmesh = function(cg)
cg.box_cfg.replication[2] = nil
cg.server3 = cg.replica_set:build_server{
alias = 'server3',
alias = 'server_bs_3',
box_cfg = cg.box_cfg,
}
cg.server3:start{wait_until_ready = false}
local logfile = fio.pathjoin(cg.server3.workdir, 'server3.log')
local logfile = fio.pathjoin(cg.server3.workdir, 'server_bs_3.log')
local uuid_pattern = uuid2:gsub('%-', '%%-')
local pattern = 'No connection to ' .. uuid_pattern
t.helpers.retrying({}, function()
Expand Down Expand Up @@ -190,11 +190,11 @@ end)
g_config.before_test('test_no_replication', function(cg)
cg.replica_set = replica_set:new{}
cg.server1 = cg.replica_set:build_and_add_server{
alias = 'server1',
alias = 'server_bs_1',
box_cfg = {
replication_timeout = 0.1,
bootstrap_strategy = 'config',
bootstrap_leader = server.build_listen_uri('server1'),
bootstrap_leader = server.build_listen_uri('server_bs_1'),
replication = nil
},
}
Expand All @@ -204,7 +204,7 @@ local no_leader_msg = 'failed to connect to the bootstrap leader'

g_config.test_no_replication = function(cg)
cg.replica_set:start{wait_until_ready = false}
local logfile = fio.pathjoin(cg.server1.workdir, 'server1.log')
local logfile = fio.pathjoin(cg.server1.workdir, 'server_bs_1.log')
t.helpers.retrying({}, function()
t.assert(server:grep_log(no_leader_msg, nil, {filename = logfile}))
end)
Expand All @@ -213,7 +213,7 @@ end
g_config.before_test('test_uuid', function(cg)
cg.replica_set = replica_set:new{}
cg.server1 = cg.replica_set:build_and_add_server{
alias = 'server1',
alias = 'server_bs_1',
box_cfg = {
bootstrap_strategy = 'config',
bootstrap_leader = uuid1,
Expand All @@ -238,18 +238,18 @@ end)
g_config.before_test('test_replication_without_bootstrap_leader', function(cg)
cg.replica_set = replica_set:new{}
cg.server1 = cg.replica_set:build_and_add_server{
alias = 'server1',
alias = 'server_bs_1',
box_cfg = {
replication_timeout = 0.1,
bootstrap_strategy = 'config',
bootstrap_leader = server.build_listen_uri('server1'),
bootstrap_leader = server.build_listen_uri('server_bs_1'),
replication = {
server.build_listen_uri('server2'),
server.build_listen_uri('server_bs_2'),
},
},
}
cg.server2 = cg.replica_set:build_and_add_server{
alias = 'server2',
alias = 'server_bs_2',
box_cfg = {
replication_timeout = 0.1,
},
Expand All @@ -258,7 +258,7 @@ end)

g_config.test_replication_without_bootstrap_leader = function(cg)
cg.replica_set:start{wait_until_ready = false}
local logfile = fio.pathjoin(cg.server1.workdir, 'server1.log')
local logfile = fio.pathjoin(cg.server1.workdir, 'server_bs_1.log')
t.helpers.retrying({}, function()
t.assert(server:grep_log(no_leader_msg, nil, {filename = logfile}))
end)
Expand All @@ -280,12 +280,12 @@ local set_log_before_cfg = [[
g_config.before_test('test_no_leader', function(cg)
cg.replica_set = replica_set:new{}
cg.server1 = cg.replica_set:build_and_add_server{
alias = 'server1',
alias = 'server_bs_1',
box_cfg = {
replication_timeout = 0.1,
bootstrap_strategy = 'config',
bootstrap_leader = nil,
replication = server.build_listen_uri('server1'),
replication = server.build_listen_uri('server_bs_1'),
},
env = {
['TARANTOOL_RUN_BEFORE_BOX_CFG'] = set_log_before_cfg,
Expand All @@ -295,7 +295,7 @@ end)

g_config.test_no_leader = function(cg)
cg.replica_set:start{wait_until_ready = false}
local logfile = fio.pathjoin(cg.server1.workdir, 'server1.log')
local logfile = fio.pathjoin(cg.server1.workdir, 'server_bs_1.log')
local empty_leader_msg = "the option can't be empty when bootstrap " ..
"strategy is 'config'"
t.helpers.retrying({}, function()
Expand All @@ -306,12 +306,12 @@ end
g_config.before_test('test_single_leader', function(cg)
cg.replica_set = replica_set:new{}
cg.server1 = cg.replica_set:build_and_add_server{
alias = 'server1',
alias = 'server_bs_1',
box_cfg = {
replication_timeout = 0.1,
bootstrap_strategy = 'config',
bootstrap_leader = server.build_listen_uri('server1'),
replication = server.build_listen_uri('server1'),
bootstrap_leader = server.build_listen_uri('server_bs_1'),
replication = server.build_listen_uri('server_bs_1'),
},
}
end)
Expand All @@ -329,14 +329,14 @@ g_config.after_test('test_single_leader', function(cg)
end)

local g_config_success = t.group('gh-7999-bootstrap-strategy-config-success', {
{leader = 'server3'},
{leader = 'server_bs_3'},
{leader = uuid3},
})

g_config_success.before_each(function(cg)
cg.leader = cg.params.leader
-- cg.params can't have "/" for some reason, so recreate the path here.
if string.match(cg.leader, 'server3') then
if string.match(cg.leader, 'server_bs_3') then
cg.leader = server.build_listen_uri(cg.leader)
end
end)
Expand All @@ -348,35 +348,35 @@ end)
g_config_success.before_test('test_correct_bootstrap_leader', function(cg)
cg.replica_set = replica_set:new{}
cg.server1 = cg.replica_set:build_and_add_server{
alias = 'server1',
alias = 'server_bs_1',
box_cfg = {
bootstrap_strategy = 'config',
bootstrap_leader = cg.leader,
instance_uuid = uuid1,
replication = {
server.build_listen_uri('server1'),
server.build_listen_uri('server2'),
server.build_listen_uri('server3'),
server.build_listen_uri('server_bs_1'),
server.build_listen_uri('server_bs_2'),
server.build_listen_uri('server_bs_3'),
},
replication_timeout = 0.1,
},
}
cg.replica_set_a = replica_set:new{}
cg.server2 = cg.replica_set_a:build_and_add_server{
alias = 'server2',
alias = 'server_bs_2',
box_cfg = {
replicaset_uuid = uuida,
instance_uuid = uuid2,
}
}
cg.replica_set_b = replica_set:new{}
cg.server3 = cg.replica_set_b:build_and_add_server{
alias = 'server3',
alias = 'server_bs_3',
box_cfg = {
replicaset_uuid = uuidb,
instance_uuid = uuid3,
listen = {
server.build_listen_uri('server3'),
server.build_listen_uri('server_bs_3'),
},
},
}
Expand All @@ -401,27 +401,27 @@ end
g_config_success.before_test('test_wait_only_for_leader', function(cg)
cg.replica_set = replica_set:new{}
cg.server1 = cg.replica_set:build_and_add_server{
alias = 'server1',
alias = 'server_bs_1',
box_cfg = {
bootstrap_strategy = 'config',
bootstrap_leader = cg.leader,
replication = {
server.build_listen_uri('server1'),
server.build_listen_uri('server_bs_1'),
server.build_listen_uri('unreachable_2'),
server.build_listen_uri('server3'),
server.build_listen_uri('server_bs_3'),
server.build_listen_uri('unreachable_4'),
},
replication_connect_timeout = 1000,
replication_timeout = 0.1,
},
}
cg.server3 = cg.replica_set:build_and_add_server{
alias = 'server3',
alias = 'server_bs_3',
box_cfg = {
replicaset_uuid = uuidb,
instance_uuid = uuid3,
listen = {
server.build_listen_uri('server3'),
server.build_listen_uri('server_bs_3'),
},
},
}
Expand Down
38 changes: 23 additions & 15 deletions test/replication-luatest/election_fencing_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ local cluster = require('luatest.replica_set')
local server = require('luatest.server')

local g_async = luatest.group('fencing_async', {
{election_mode = 'manual'}, {election_mode = 'candidate'}})
{mode = 'manual'}, {mode = 'candidate'}})
local g_sync = luatest.group('fencing_sync')
local g_mode = luatest.group('fencing_mode', {
{election_fencing_mode = 'soft'}, {election_fencing_mode = 'strict'}})
{mode = 'soft'}, {mode = 'strict'}})

local SHORT_TIMEOUT = 0.1
local LONG_TIMEOUT = 1000
Expand Down Expand Up @@ -61,13 +61,20 @@ local function box_cfg_update(servers, cfg)
end

local function start(g)
local suffix
if g.params then
suffix = g.params.mode
else
suffix = g.name
end

g.box_cfg = {
election_mode = 'manual',
election_timeout = SHORT_TIMEOUT,
replication = {
server.build_listen_uri('server_1'),
server.build_listen_uri('server_2'),
server.build_listen_uri('server_3'),
server.build_listen_uri('server_1_' .. suffix),
server.build_listen_uri('server_2_' .. suffix),
server.build_listen_uri('server_3_' .. suffix),
},
replication_synchro_quorum = 2,
replication_synchro_timeout = SHORT_TIMEOUT,
Expand All @@ -76,13 +83,13 @@ local function start(g)

g.cluster = cluster:new({})
g.server_1 = g.cluster:build_and_add_server(
{alias = 'server_1', box_cfg = g.box_cfg})
{alias = 'server_1_' .. suffix, box_cfg = g.box_cfg})

g.box_cfg.read_only = true
g.server_2 = g.cluster:build_and_add_server(
{alias = 'server_2', box_cfg = g.box_cfg})
{alias = 'server_2_' .. suffix, box_cfg = g.box_cfg})
g.server_3 = g.cluster:build_and_add_server(
{alias = 'server_3', box_cfg = g.box_cfg})
{alias = 'server_3_' .. suffix, box_cfg = g.box_cfg})

g.cluster:start()
g.cluster:wait_for_fullmesh()
Expand All @@ -109,7 +116,7 @@ g_async.after_each(function(g)
end)

g_async.test_fencing = function(g)
box_cfg_update({g.server_1}, {election_mode = g.params.election_mode})
box_cfg_update({g.server_1}, {election_mode = g.params.mode})
g.server_1:exec(function()
box.schema.create_space('test'):create_index('pk')
end)
Expand Down Expand Up @@ -296,19 +303,20 @@ g_mode.after_all(stop)
g_mode.test_fencing_mode = function(g)
local timeout = 0.5
box_cfg_update({g.server_1, g.server_2}, {
election_fencing_mode = g.params.election_fencing_mode,
election_fencing_mode = g.params.mode,
replication_timeout = timeout,
})

local proxy = require('luatest.replica_proxy'):new({
client_socket_path = server.build_listen_uri('server_1_proxy'),
server_socket_path = server.build_listen_uri('server_1'),
client_socket_path = server.build_listen_uri(
g.server_1.alias .. '_proxy'),
server_socket_path = server.build_listen_uri(g.server_1.alias),
})
proxy:start({force = true})

local proxied_replication = {
server.build_listen_uri('server_1_proxy'),
server.build_listen_uri('server_2'),
server.build_listen_uri(g.server_1.alias .. '_proxy'),
server.build_listen_uri(g.server_2.alias),
}

box_cfg_update({g.server_2}, {replication = {}})
Expand All @@ -335,7 +343,7 @@ g_mode.test_fencing_mode = function(g)
return box.info.replication[leader_id].upstream.status
end, {leader_id})

if g.params.election_fencing_mode == 'strict' then
if g.params.mode == 'strict' then
luatest.assert_equals(follower_connection_status, 'follow',
'Follower did not notice leader disconnection')
else
Expand Down
Loading

0 comments on commit 7ac2685

Please sign in to comment.