Skip to content

Commit

Permalink
Merge pull request #2478 from locustio/processes-stop-parent-process-…
Browse files Browse the repository at this point in the history
…from-trying-to-kill-children-too-fast

Fix issue with --processes: Stop parent process from trying to kill children too fast
  • Loading branch information
cyberw committed Nov 21, 2023
2 parents c03228f + 387bdbd commit 1bdf579
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 3 deletions.
31 changes: 28 additions & 3 deletions locust/main.py
@@ -1,3 +1,4 @@
import errno
import logging
import os
import signal
Expand Down Expand Up @@ -229,13 +230,32 @@ def sigint_handler(_signal, _frame):

def kill_workers(children):
exit_code = 0
logging.debug("Sending SIGINT to children")
start_time = time.time()
# give children some time to finish up (in case they had an error parsing arguments etc)
for child_pid in children[:]:
while time.time() < start_time + 3:
try:
_, child_status = os.waitpid(child_pid, os.WNOHANG)
children.remove(child_pid)
try:
if sys.version_info > (3, 8):
child_exit_code = os.waitstatus_to_exitcode(child_status)
exit_code = max(exit_code, child_exit_code)
except AttributeError:
pass # dammit python 3.8...
except OSError as e:
if e.errno == errno.EINTR:
time.sleep(0.1)
else:
logging.error(traceback.format_exc())
else:
break
for child_pid in children:
try:
logging.debug(f"Sending SIGINT to child with pid {child_pid}")
os.kill(child_pid, signal.SIGINT)
except ProcessLookupError:
pass # never mind, process was already dead
logging.debug("waiting for children to terminate")
for child_pid in children:
_, child_status = os.waitpid(child_pid, 0)
try:
Expand All @@ -245,7 +265,10 @@ def kill_workers(children):
except AttributeError:
pass # dammit python 3.8...
if exit_code > 1:
logging.error(f"bad response code from worker children: {exit_code}")
logging.error(f"Bad response code from worker children: {exit_code}")
# ensure master doesnt finish until output from workers has arrived
# otherwise the terminal might look weird.
time.sleep(0.1)

atexit.register(kill_workers, children)

Expand Down Expand Up @@ -398,6 +421,8 @@ def kill_workers(children):
"Starting web interface at %s://0.0.0.0:%s (accepting connections from all network interfaces)"
% (protocol, options.web_port)
)
if options.web_auth:
logging.info("BasicAuth support is deprecated, it will be removed in a future release.")
web_ui = environment.create_web_ui(
host=web_host,
port=options.web_port,
Expand Down
23 changes: 23 additions & 0 deletions locust/test/test_main.py
Expand Up @@ -1925,3 +1925,26 @@ def my_task(self):

self.assertNotIn("Traceback", worker_stderr)
self.assertIn("Didn't get heartbeat from master in over ", worker_stderr)

def test_processes_error_doesnt_blow_up_completely(self):
with mock_locustfile() as mocked:
proc = subprocess.Popen(
[
"locust",
"-f",
mocked.file_path,
"--processes",
"4",
"-L",
"DEBUG",
"UserThatDoesntExist",
],
stdout=PIPE,
stderr=PIPE,
text=True,
)
_, stderr = proc.communicate()
self.assertIn("Unknown User(s): UserThatDoesntExist", stderr)
# the error message should repeat 4 times for the workers and once for the master
self.assertEqual(stderr.count("Unknown User(s): UserThatDoesntExist"), 5)
self.assertNotIn("Traceback", stderr)

0 comments on commit 1bdf579

Please sign in to comment.