Skip to content

Commit

Permalink
Add timeout for starting tarantool server
Browse files Browse the repository at this point in the history
Function wait_until_started in TarantoolServer contains seek_wait,
which waits pattern in logfile. If there is no pattern, server is
hanging. Was added start-server-time (by default equals to 90 secs).
The pattern is sought until the time runs out and wait_until_started
returns True if the pattern was found (else False). Was added new
 logging that the instance wasn't started.

Fixes: #276
  • Loading branch information
VitaliyaIoffe committed May 31, 2021
1 parent 1c27177 commit 5ff2d63
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 19 deletions.
8 changes: 8 additions & 0 deletions lib/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,14 @@ def __init__(self):
help="""Break the test process with kill signal if the test runs
longer than this amount of seconds. Default: 110 [seconds].""")

parser.add_argument(
"--server-start-timeout",
dest="server_start_timeout",
default=env_int('SERVER_START_TIMEOUT', 90),
type=int,
help="""Break the test process with kill signal if the test starts
longer than this amount of seconds. Default: 90 [seconds].""")

parser.add_argument(
"--no-output-timeout",
dest="no_output_timeout",
Expand Down
5 changes: 3 additions & 2 deletions lib/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,9 @@ def server_start(self, ctype, sname, opts):
if crash_expected:
# disable crash detector
self.servers[sname].crash_expected = True
self.servers[sname].start(silent=True, rais=True, wait=wait,
wait_load=wait_load, args=args)
self.servers[sname].start(
silent=True, rais=True, wait=wait,
wait_load=wait_load, args=args)
except Exception as e:
crash_occured = True
if not (crash_expected and
Expand Down
56 changes: 39 additions & 17 deletions lib/tarantool_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,8 +424,15 @@ def execute(self, server):


class TarantoolStartError(OSError):
def __init__(self, name=None):
def __init__(self, name=None, timeout=None):
self.name = name
self.timeout = timeout

def __str__(self):
if self.timeout:
return "\n[Instance '{}'] Start timeout {} was reached.\n".format(
self.name, self.timeout)
return "Failed {}".format(self.name)


class TarantoolLog(object):
Expand Down Expand Up @@ -454,7 +461,7 @@ def seek_once(self, msg):
if pos != -1:
return pos

def seek_wait(self, msg, proc=None, name=None):
def seek_wait(self, msg, proc=None, name=None, deadline=None, timeout=10):
while True:
if os.path.exists(self.path):
break
Expand All @@ -463,7 +470,9 @@ def seek_wait(self, msg, proc=None, name=None):
with open(self.path, 'r') as f:
f.seek(self.log_begin, os.SEEK_SET)
cur_pos = self.log_begin
while True:
if deadline is None:
deadline = time.time() + timeout
while time.time() < deadline:
if not (proc is None):
if not (proc.poll() is None):
raise TarantoolStartError(name)
Expand All @@ -473,8 +482,9 @@ def seek_wait(self, msg, proc=None, name=None):
f.seek(cur_pos, os.SEEK_SET)
continue
if re.findall(msg, log_str):
return
return True
cur_pos = f.tell()
return False


class TarantoolServer(Server):
Expand Down Expand Up @@ -833,12 +843,12 @@ def cleanup(self, *args, **kwargs):
def start(self, silent=True, wait=True, wait_load=True, rais=True, args=[],
**kwargs):
if self._start_against_running:
return
return True
if self.status == 'started':
if not silent:
color_stdout('The server is already started.\n',
schema='lerror')
return
return True

args = self.prepare_args(args)
self.pidfile = '%s.pid' % self.name
Expand Down Expand Up @@ -887,9 +897,10 @@ def start(self, silent=True, wait=True, wait_load=True, rais=True, args=[],
self.crash_detector.start()

if wait:
deadline = time.time() + Options().args.server_start_timeout
try:
self.wait_until_started(wait_load)
except TarantoolStartError:
self.wait_until_started(wait_load, deadline)
except TarantoolStartError as err:
# Python tests expect we raise an exception when non-default
# server fails
if self.crash_expected:
Expand All @@ -898,9 +909,7 @@ def start(self, silent=True, wait=True, wait_load=True, rais=True, args=[],
self.current_test.is_crash_reported):
if self.current_test:
self.current_test.is_crash_reported = True
color_stdout('\n[Instance "{0.name}"] Tarantool server '
'failed to start\n'.format(self),
schema='error')
color_stdout(err, schema='error')
self.print_log(15)
# Raise exception when caller ask for it (e.g. in case of
# non-default servers)
Expand Down Expand Up @@ -928,6 +937,7 @@ def start(self, silent=True, wait=True, wait_load=True, rais=True, args=[],
actual_version),
schema='error')
raise TarantoolStartError(self.name)
return True

def crash_detect(self):
if self.crash_expected:
Expand Down Expand Up @@ -1098,7 +1108,19 @@ def kill_old_server(self, silent=True):
self.wait_until_stopped(pid)
return True

def wait_until_started(self, wait_load=True):
def wait_load(self, deadline):
"""Wait until the server log file is matched the entry pattern
If the entry pattern couldn't be found in a log file until a timeout
is up, it will raise a TarantoolStartError exception.
"""
msg = 'entering the event loop|will retry binding|hot standby mode'
p = self.process if not self.gdb and not self.lldb else None
if not self.logfile_pos.seek_wait(msg, p, self.name, deadline):
raise TarantoolStartError(
self.name, Options().args.server_start_timeout)

def wait_until_started(self, wait_load=True, deadline=None):
""" Wait until server is started.
Server consists of two parts:
Expand All @@ -1109,12 +1131,9 @@ def wait_until_started(self, wait_load=True):
color_log('DEBUG: [Instance {}] Waiting until started '
'(wait_load={})\n'.format(self.name, str(wait_load)),
schema='info')

if wait_load:
msg = 'entering the event loop|will retry binding|hot standby mode'
p = self.process if not self.gdb and not self.lldb else None
self.logfile_pos.seek_wait(msg, p, self.name)
while True:
self.wait_load(deadline)
while time.time() < deadline:
try:
temp = AdminConnection('localhost', self.admin.port)
if not wait_load:
Expand All @@ -1140,6 +1159,9 @@ def wait_until_started(self, wait_load=True):
gevent.sleep(0.1)
continue
raise
else:
raise TarantoolStartError(
self.name, Options().args.server_start_timeout)

def wait_until_stopped(self, pid):
while True:
Expand Down
2 changes: 2 additions & 0 deletions test-run.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ def main_loop_parallel():
format(args.test_timeout), schema='tr_text')
color_stdout("NO_OUTPUT_TIMEOUT:" . ljust(26) + "{}\n" .
format(args.no_output_timeout), schema='tr_text')
color_stdout("START_TIMEOUT:" . ljust(26) + "{}\n" .
format(args.server_start_timeout), schema='tr_text')
color_stdout("\n", schema='tr_text')

task_groups = get_task_groups()
Expand Down

0 comments on commit 5ff2d63

Please sign in to comment.