Skip to content

Commit

Permalink
Add test for hanging xlog tarantool instance
Browse files Browse the repository at this point in the history
Was added another way with checking patterns in log. Where
test sequentially finds expected patterns and checks that there are no
unexpected lines. For this approach was changed TarantoolLog.seek_wait()
function. Now it is able to find patterns not from beginning
(for sequence of patterns). If a pattern was found its last symbol
position is saved as position for start point for next searching with
start not from beginning.

This approach helps for comparing hanging result. Pytest script
test_hanging_xlog.py executes hang.test.lua and test-run log is
comparing by expected patterns in log. Also, there is check about all
subprocesses were killed. Otherwise, raise an exception with existed
processes info.

Follows up #276
  • Loading branch information
VitaliyaIoffe authored and avtikhon committed Jun 11, 2021
1 parent d058a6f commit 320f235
Show file tree
Hide file tree
Showing 10 changed files with 172 additions and 3 deletions.
1 change: 1 addition & 0 deletions .luacheckrc
Expand Up @@ -27,4 +27,5 @@ include_files = {
exclude_files = {
"lib/tarantool-python",
"test/test-tarantool/*.test.lua",
"test/negative_tests/*.test.lua",
}
13 changes: 10 additions & 3 deletions lib/tarantool_server.py
Expand Up @@ -437,6 +437,7 @@ class TarantoolLog(object):
def __init__(self, path):
self.path = path
self.log_begin = 0
self.last_position = 0

def positioning(self):
if os.path.exists(self.path):
Expand All @@ -459,14 +460,18 @@ def seek_once(self, msg):
if pos != -1:
return pos

def seek_wait(self, msg, proc=None, name=None, deadline=None, timeout=10):
def seek_wait(self, msg, proc=None, name=None, deadline=None, timeout=10,
start_from_beginning=True):
while True:
if os.path.exists(self.path):
break
gevent.sleep(0.001)

with open(self.path, 'r') as f:
f.seek(self.log_begin, os.SEEK_SET)
if start_from_beginning:
f.seek(self.log_begin, os.SEEK_SET)
else:
f.seek(self.last_position, os.SEEK_SET)
cur_pos = self.log_begin
if deadline is None:
deadline = time.time() + timeout
Expand All @@ -479,7 +484,9 @@ def seek_wait(self, msg, proc=None, name=None, deadline=None, timeout=10):
gevent.sleep(0.001)
f.seek(cur_pos, os.SEEK_SET)
continue
if re.findall(msg, log_str):
if re.search(msg, log_str):
self.last_position = re.search(msg, log_str).end() + \
cur_pos
return True
cur_pos = f.tell()
return False
Expand Down
12 changes: 12 additions & 0 deletions test/negative_tests/box.lua
@@ -0,0 +1,12 @@
#!/usr/bin/env tarantool
local os = require('os')

box.cfg{
listen = os.getenv("LISTEN"),
memtx_memory = 107374182,
pid_file = "tarantool.pid",
force_recovery = true,
wal_max_size = 500
}

require('console').listen(os.getenv('ADMIN'))
42 changes: 42 additions & 0 deletions test/negative_tests/conftest.py
@@ -0,0 +1,42 @@
import pytest
import psutil
from pathlib import Path
from gevent import subprocess
from lib import Options
from lib.tarantool_server import TarantoolLog


@pytest.yield_fixture(scope="session", autouse=True)
def clean_all_subprocesses() -> None:
"""Kill remained subprocess. Raise an exception of not killed procs."""
current_process = psutil.Process()
children = current_process.children(recursive=True)
yield
not_terminated_processes = []
for child in children:
if psutil.pid_exists(child.pid):
not_terminated_processes.append(child)
child.terminate()
if not_terminated_processes:
raise Exception(
"Next processes were not terminated: {}\n".format(
not_terminated_processes))


@pytest.fixture
def tarantool_log(log_file: Path) -> TarantoolLog:
tarantool_log = TarantoolLog(log_file)
return tarantool_log


@pytest.yield_fixture
def log_file() -> Path:
dir_path = Path(__file__).resolve().parent
with open(dir_path / 'outfile.txt', 'w') as file:
ret = subprocess.Popen(
['../test-run.py',
'hang.test.lua'], cwd=dir_path, stdout=file)
ret.wait(timeout=Options().args.no_output_timeout + 10)
yield Path(dir_path / 'outfile.txt')
ret.kill()

25 changes: 25 additions & 0 deletions test/negative_tests/hang.result
@@ -0,0 +1,25 @@
-- test-run result file version 2
-- box.schema.user.grant('guest', 'replication')replication
local test_run = require('test_run').new()
| ---
| ...
test_run:cmd('create server replica with rpl_master=default, script="xlog/replica.lua"')
| ---
| - error: '[string "return test_run:cmd(''create server replica wi..."]:1: attempt
| to index global ''test_run'' (a nil value)'
| ...
test_run:cmd('start server replica')
| ---
| - error: '[string "return test_run:cmd(''start server replica'') "]:1: attempt to
| index global ''test_run'' (a nil value)'
| ...
test_run:cmd('cleanup server replica')
| ---
| - error: '[string "return test_run:cmd(''cleanup server replica'') "]:1: attempt to
| index global ''test_run'' (a nil value)'
| ...
test_run:cmd('delete server replica')
| ---
| - error: '[string "return test_run:cmd(''delete server replica'') "]:1: attempt to
| index global ''test_run'' (a nil value)'
| ...
6 changes: 6 additions & 0 deletions test/negative_tests/hang.test.lua
@@ -0,0 +1,6 @@
-- box.schema.user.grant('guest', 'replication')replication
test_run = require('test_run').new()
test_run:cmd('create server replica with rpl_master=default, script="negative_tests/replica.lua"')
test_run:cmd('start server replica')
test_run:cmd('cleanup server replica')
test_run:cmd('delete server replica')
13 changes: 13 additions & 0 deletions test/negative_tests/replica.lua
@@ -0,0 +1,13 @@
#!/usr/bin/env tarantool

-- Start the console first to allow test-run to attach even before
-- box.cfg is finished.
require('console').listen(os.getenv('ADMIN'))

box.cfg({
listen = os.getenv("LISTEN"),
replication = os.getenv("MASTER"),
memtx_memory = 107374182,
-- pid_file = "tarantool.pid",
-- logger = "tarantool.log",
})
10 changes: 10 additions & 0 deletions test/negative_tests/suite.ini
@@ -0,0 +1,10 @@
[default]
core = tarantool
description = tarantool write ahead log tests
disabled = hang.test.lua
script = xlog.lua
use_unix_sockets = True
use_unix_sockets_iproto = True
long_run = snap_io_rate.test.lua
is_parallel = True
pretest_clean = True
40 changes: 40 additions & 0 deletions test/negative_tests/test_hanging_xlog.py
@@ -0,0 +1,40 @@
from pathlib import Path

from lib import Options
from lib.tarantool_server import TarantoolLog


class TestHangingXLog:

def test_hanging_xlog(self, tarantool_log: TarantoolLog) -> None:
"""Check if tarantool is hanging it will be stopped by start_timeout.
Check next patterns in the test-run log:
There are (timeout // 10 - 1) lines with 'No output' warnings, which
are going sequentially.
There are no (timeout // 10)th line with same warning.
Next, find log line with failing on start server timeout.
Finally, check log contains '[ fail ]'.
"""

timeout = Options().args.server_start_timeout
no_output_timeout = Options().args.no_output_timeout
p = Path(tarantool_log.path)
assert p.stat().st_size != 0
for time in range(10, timeout, 10):
assert tarantool_log.seek_wait(
r"No output during {} seconds. "
r"Will abort after {} seconds".format(time, no_output_timeout),
start_from_beginning=False)
assert not tarantool_log.seek_wait(
r"No output during {} seconds. "
r"Will abort after {} seconds".format(
timeout, no_output_timeout),
start_from_beginning=False, timeout=1)
assert tarantool_log.seek_wait(
r"\[Instance 'replica'\] "
r"Start timeout {} was reached.".format(timeout),
start_from_beginning=False, timeout=1)
assert tarantool_log.seek_wait(
r'\[ fail \]', start_from_beginning=False, timeout=1)

13 changes: 13 additions & 0 deletions test/negative_tests/xlog.lua
@@ -0,0 +1,13 @@
#!/usr/bin/env tarantool
local os = require('os')

box.cfg{
listen = os.getenv("LISTEN"),
memtx_memory = 107374182,
pid_file = "tarantool.pid",
force_recovery = true,
wal_max_size = 500,
snap_io_rate_limit = 16
}

require('console').listen(os.getenv('ADMIN'))

0 comments on commit 320f235

Please sign in to comment.