-
Notifications
You must be signed in to change notification settings - Fork 376
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
xlog: add request details to panic message for broken LSN
Aid the debugging of replication issues related to out-of-order requests. Adds the details of request/tuple to the diagnostic message whenever possible. Closes #3105
- Loading branch information
Showing
12 changed files
with
331 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,187 @@ | ||
-- Issue 3105: Test logging of request with broken lsn before panicking | ||
-- Two cases are covered: Recovery and Joining a new replica | ||
env = require('test_run') | ||
--- | ||
... | ||
test_run = env.new() | ||
--- | ||
... | ||
test_run:cleanup_cluster() | ||
--- | ||
... | ||
fio = require('fio') | ||
--- | ||
... | ||
test_run:cmd("setopt delimiter ';'") | ||
--- | ||
- true | ||
... | ||
function grep_file_tail(filepath, bytes, pattern) | ||
local fh = fio.open(filepath, {'O_RDONLY'}) | ||
local size = fh:seek(0, 'SEEK_END') | ||
if size < bytes then | ||
bytes = size | ||
end | ||
fh:seek(-bytes, 'SEEK_END') | ||
local line = fh:read(bytes) | ||
fh:close() | ||
return string.match(line, pattern) | ||
end; | ||
--- | ||
... | ||
test_run:cmd("setopt delimiter ''"); | ||
--- | ||
- true | ||
... | ||
-- Testing case of panic on recovery | ||
test_run:cmd('create server panic with script="xlog/panic.lua"') | ||
--- | ||
- true | ||
... | ||
test_run:cmd('start server panic') | ||
--- | ||
- true | ||
... | ||
test_run:switch('panic') | ||
--- | ||
- true | ||
... | ||
box.space._schema:replace{"t0", "v0"} | ||
--- | ||
- ['t0', 'v0'] | ||
... | ||
lsn = box.info.vclock[1] | ||
--- | ||
... | ||
box.error.injection.set("ERRINJ_WAL_BREAK_LSN", lsn + 1) | ||
--- | ||
- ok | ||
... | ||
box.space._schema:replace{"t0", "v1"} | ||
--- | ||
- ['t0', 'v1'] | ||
... | ||
box.error.injection.set("ERRINJ_WAL_BREAK_LSN", -1) | ||
--- | ||
- ok | ||
... | ||
test_run:switch('default') | ||
--- | ||
- true | ||
... | ||
test_run:cmd('stop server panic') | ||
--- | ||
- true | ||
... | ||
dirname = fio.pathjoin(fio.cwd(), "panic") | ||
--- | ||
... | ||
xlogs = fio.glob(dirname .. "/*.xlog") | ||
--- | ||
... | ||
fio.unlink(xlogs[#xlogs]) | ||
--- | ||
- true | ||
... | ||
test_run:cmd('start server panic with crash_expected=True') | ||
--- | ||
- false | ||
... | ||
logpath = fio.pathjoin(fio.cwd(), 'panic.log') | ||
--- | ||
... | ||
-- Check that log contains the mention of broken LSN and the request printout | ||
grep_file_tail(logpath, 256, "LSN for 1 is used twice or COMMIT order is broken: confirmed: 1, new: 1, req: {.*}") | ||
--- | ||
- 'LSN for 1 is used twice or COMMIT order is broken: confirmed: 1, new: 1, req: {type: | ||
''REPLACE'', lsn: 1, space_id: 272, index_id: 0, tuple: ["t0", "v1"]}' | ||
... | ||
test_run:cmd('cleanup server panic') | ||
--- | ||
- true | ||
... | ||
test_run:cmd('delete server panic') | ||
--- | ||
- true | ||
... | ||
-- Testing case of panic on joining a new replica | ||
box.schema.user.grant('guest', 'replication') | ||
--- | ||
... | ||
_ = box.schema.space.create('test') | ||
--- | ||
... | ||
_ = box.space.test:create_index('pk') | ||
--- | ||
... | ||
box.space.test:auto_increment{'v0'} | ||
--- | ||
- [1, 'v0'] | ||
... | ||
lsn = box.info.vclock[1] | ||
--- | ||
... | ||
box.error.injection.set("ERRINJ_RELAY_BREAK_LSN", lsn + 1) | ||
--- | ||
- ok | ||
... | ||
box.space.test:auto_increment{'v1'} | ||
--- | ||
- [2, 'v1'] | ||
... | ||
test_run:cmd('create server replica with rpl_master=default, script="xlog/replica.lua"') | ||
--- | ||
- true | ||
... | ||
test_run:cmd('start server replica with crash_expected=True') | ||
--- | ||
- false | ||
... | ||
fiber = require('fiber') | ||
--- | ||
... | ||
while box.info.replication == nil do fiber.sleep(0.001) end | ||
--- | ||
... | ||
while box.info.replication[2].downstream.status ~= "stopped" do fiber.sleep(0.001) end | ||
--- | ||
... | ||
box.error.injection.set("ERRINJ_RELAY_BREAK_LSN", -1) | ||
--- | ||
- ok | ||
... | ||
logpath = fio.pathjoin(fio.cwd(), 'replica.log') | ||
--- | ||
... | ||
-- Check that log contains the mention of broken LSN and the request printout | ||
test_run:cmd("push filter 'lsn: "..lsn..", space_id: [0-9]+' to 'lsn: <lsn>, space_id: <space_id>'") | ||
--- | ||
- true | ||
... | ||
test_run:cmd("push filter 'confirmed: "..lsn..", new: "..lsn.."' to '<lsn>'") | ||
--- | ||
- true | ||
... | ||
grep_file_tail(logpath, 256, "(LSN for 1 is used twice or COMMIT order is broken: confirmed: "..lsn..", new: "..lsn.."), req: ({.*})") | ||
--- | ||
- 'LSN for 1 is used twice or COMMIT order is broken: <lsn>' | ||
- '{type: ''INSERT'', lsn: <lsn>, space_id: <space_id>, index_id: 0, tuple: [2, "v1"]}' | ||
... | ||
test_run:cmd("clear filter") | ||
--- | ||
- true | ||
... | ||
test_run:cmd('cleanup server replica') | ||
--- | ||
- true | ||
... | ||
test_run:cmd('delete server replica') | ||
--- | ||
- true | ||
... | ||
box.space.test:drop() | ||
--- | ||
... | ||
box.schema.user.revoke('guest', 'replication') | ||
--- | ||
... |
Oops, something went wrong.