Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix sigint handling when running in helper mode #869

Merged
merged 7 commits into from
Jun 20, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 20 additions & 6 deletions cli_helper/kart.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ int is_helper_enabled()
/**
* @brief Exit signal handler for SIGALRM
*/
void exit_on_alarm(int sig)
void exit_on_sigalrm(int sig)
{
int semval = semctl(semid, SEMNUM, GETVAL);
if (semval < 0)
Expand All @@ -209,6 +209,17 @@ void exit_on_alarm(int sig)
exit(exit_code);
}

/**
* @brief Exit signal handler for SIGINT.
* Tries to kill the whole process group.
*/
void exit_on_sigint(int sig)
{
putchar('\n');
rcoup marked this conversation as resolved.
Show resolved Hide resolved
killpg(0, sig);
exit(128 + sig);
}

int main(int argc, char **argv, char **environ)
{
char cmd_path[PATH_MAX];
Expand All @@ -221,6 +232,10 @@ int main(int argc, char **argv, char **environ)
{
debug("enabled %s, pid=%d\n", cmd_path, getpid());

// Make this process the leader of a process group:
// The procress-group ID (pgid) will be the same as the pid.
setpgrp();
rcoup marked this conversation as resolved.
Show resolved Hide resolved

// start or use an existing helper process
char **env_ptr;

Expand Down Expand Up @@ -267,8 +282,8 @@ int main(int argc, char **argv, char **environ)
int fp = open(getcwd(NULL, 0), O_RDONLY);
int fds[4] = {fileno(stdin), fileno(stdout), fileno(stderr), fp};

char *socket_filename = malloc(strlen(getenv("HOME")) + strlen(".kart.socket") + 2);
sprintf(socket_filename, "%s/%s", getenv("HOME"), ".kart.socket");
char *socket_filename = malloc(strlen(getenv("HOME")) + strlen(".kart.1234567890.socket") + 2);
sprintf(socket_filename, "%s/.kart.%d.socket", getenv("HOME"), getsid(0));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sprintf is evil 👿

Suggested change
char *socket_filename = malloc(strlen(getenv("HOME")) + strlen(".kart.1234567890.socket") + 2);
sprintf(socket_filename, "%s/.kart.%d.socket", getenv("HOME"), getsid(0));
size_t socket_filename_sz = strlen(getenv("HOME")) + strlen("/.kart..socket") + sizeof(pid_t) * 3 + 1;
char *socket_filename = malloc(socket_filename_sz);
int r = snprintf(socket_filename, socket_filename_sz, "%s/.kart.%d.socket", getenv("HOME"), getsid(0));
if (r < 0 || (size_t) r >= socket_filename_sz)
{
fprintf(stderr, "Error allocating socket filename\n");
exit(1);
}

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

int socket_fd = socket(AF_UNIX, SOCK_STREAM, 0);

struct sockaddr_un addr;
Expand All @@ -289,8 +304,6 @@ int main(int argc, char **argv, char **environ)
// process are left open in it
if (fork() == 0)
{
setsid();

// start helper in background and wait
char *helper_argv[] = {&cmd_path[0], "helper", "--socket", socket_filename, NULL};

Expand Down Expand Up @@ -370,7 +383,8 @@ int main(int argc, char **argv, char **environ)
memcpy((int *)CMSG_DATA(cmsg), fds, sizeof(fds));
msg.msg_controllen = cmsg->cmsg_len;

signal(SIGALRM, exit_on_alarm);
signal(SIGALRM, exit_on_sigalrm);
signal(SIGINT, exit_on_sigint);

if (sendmsg(socket_fd, &msg, 0) < 0)
{
Expand Down
9 changes: 6 additions & 3 deletions kart/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,11 +182,11 @@ def _configure_process_cleanup_nonwindows():
# to attempt to give Kart it's own process group ID (PGID)
if "_KART_PGID_SET" not in os.environ and os.getpid() != os.getpgrp():
try:
os.setsid()
os.setpgrp()
# No need to do this again for any Kart subprocess of this Kart process.
os.environ["_KART_PGID_SET"] = "1"
except OSError as e:
L.warning("Error setting Kart PGID - os.setsid() failed. %s", e)
L.warning("Error setting Kart PGID - os.setpgrp() failed. %s", e)

# If Kart now has its own PGID, which its children share - we want to SIGTERM that when Kart exits.
if os.getpid() == os.getpgrp():
Expand All @@ -199,7 +199,10 @@ def _cleanup_process_group(signum, stack_frame):
if _kart_process_group_killed:
return
_kart_process_group_killed = True
os.killpg(os.getpid(), signum)
try:
os.killpg(0, signum)
except Exception:
rcoup marked this conversation as resolved.
Show resolved Hide resolved
pass
sys.exit(128 + signum)

signal.signal(signal.SIGTERM, _cleanup_process_group)
Expand Down
2 changes: 1 addition & 1 deletion kart/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def print_version(ctx):
# report on whether this was run through helper mode
helper_pid = os.environ.get("KART_HELPER_PID")
if helper_pid:
click.echo(f"Executed via helper, PID: {helper_pid}")
click.echo(f"Executed via helper, SID={os.getsid(0)} PID={helper_pid}")

ctx.exit()

Expand Down
19 changes: 18 additions & 1 deletion kart/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,18 @@ def _helper_log(msg):
log_file.write(f"{datetime.now()} [{os.getpid()}]: {msg}\n")


def getsid():
if hasattr(os, "getsid"):
return os.getsid(0)
return 0


@click.command(context_settings=dict(ignore_unknown_options=True))
@click.pass_context
@click.option(
"--socket",
"socket_filename",
default=Path.home() / ".kart.socket",
default=Path.home() / f".kart.{getsid()}.socket",
show_default=True,
help="What socket to use",
)
Expand Down Expand Up @@ -133,6 +139,7 @@ def helper(ctx, socket_filename, timeout, args):
else:
# child
_helper_log("post-fork")

payload, fds = recv_json_and_fds(client, maxfds=4)
if not payload or len(fds) != 4:
click.echo(
Expand Down Expand Up @@ -169,6 +176,16 @@ def helper(ctx, socket_filename, timeout, args):
f"Payload:\n{repr(payload)}",
)
else:
try:
# Join the process group of the calling process - so that if they get killed, we get killed to.
os.setpgid(0, calling_environment["pid"])
os.environ["_KART_PGID_SET"] = "1"
except Exception as e:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect this could catch a more specific exception class (OSError? or one of the subclasses: https://docs.python.org/3/library/exceptions.html#os-exceptions)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the possible error codes from setpgid() are all mapped to subclasses of OSError.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
except Exception as e:
except OSError as e:

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

# Kart will still work even if this fails: it just means SIGINT Ctrl+C might not work properly.
# We'll just log it and hope for the best.
_helper_log(f"error joining caller's process group: {e}")
pass

sys.argv[1:] = calling_environment["argv"][1:]
_helper_log(f"cmd={' '.join(calling_environment['argv'])}")
os.environ.clear()
Expand Down
90 changes: 88 additions & 2 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import contextlib
import json
import os
import re
import sys
from pathlib import Path
import signal
import sys
from time import sleep

import pytest

from kart import cli
from kart import cli, is_windows


H = pytest.helpers.helpers()
Expand Down Expand Up @@ -109,3 +112,86 @@ def test_ext_run(tmp_path, cli_runner, sys_path_reset):
assert (val1, val2) == (False, 3)
assert Path(sfile) == (tmp_path / "three.py")
assert sname == "kart.ext_run.three"


@pytest.mark.parametrize("use_helper", [False, True])
def test_sigint_handling_unix(use_helper, tmp_path):
if is_windows:
return
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
@pytest.mark.parametrize("use_helper", [False, True])
def test_sigint_handling_unix(use_helper, tmp_path):
if is_windows:
return
@pytest.mark.skipif(os.name == "nt")
@pytest.mark.parametrize("use_helper", [False, True])
def test_sigint_handling_unix(use_helper, tmp_path):

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


import subprocess

kart_bin_dir = Path(sys.executable).parent
kart_exe = kart_bin_dir / "kart"
kart_cli_exe = kart_bin_dir / "kart_cli"

kart_with_helper_mode = kart_exe if kart_cli_exe.is_file() else kart_cli_exe
kart_without_helper = kart_cli_exe if kart_cli_exe.is_file() else kart_exe

if use_helper and not kart_with_helper_mode.is_file():
raise pytest.skip(f"Couldn't find kart helper mode in {kart_bin_dir}")

kart_to_use = kart_with_helper_mode if use_helper else kart_without_helper
assert kart_to_use.is_file(), "Couldn't find kart"

# working example
with open(tmp_path / "test.py", "wt") as fs:
fs.write(
"\n".join(
[
"import os",
"import sys",
"from time import sleep",
"",
"def main(ctx, args):",
" print(os.getpid())",
" fork_id = os.fork()",
" if fork_id == 0:",
" sleep(100)",
" else:",
" print(fork_id)",
" sys.stdout.flush()",
" os.wait()",
"",
]
)
)

env = os.environ.copy()
env.pop("_KART_PGID_SET", None)
env.pop("NO_CONFIGURE_PROCESS_CLEANUP", None)

p = subprocess.Popen(
[str(kart_to_use), "ext-run", str(tmp_path / "test.py")],
encoding="utf8",
env=env,
stdout=subprocess.PIPE,
)
child_pid = p.pid

sleep(1)
child_pid = int(p.stdout.readline())
grandchild_pid = int(p.stdout.readline())

# The new kart process should be in a new process group.
assert os.getpgid(0) != os.getpgid(child_pid)
# And its subprocess should be in the same process group.
assert os.getpgid(child_pid) == os.getpgid(grandchild_pid)
orig_pgid = os.getpgid(child_pid)

assert p.poll() == None
os.kill(child_pid, signal.SIGINT)

sleep(1)
assert p.poll() != None

def safe_get_pgid(pid):
try:
return os.getpgid(pid)
except Exception:
return -1

# The child and grandchildren should now both be dead.
# Their PIDs may now belong to other processes, but at least, they won't be in the same process group as before.
assert safe_get_pgid(child_pid) != orig_pgid
assert safe_get_pgid(grandchild_pid) != orig_pgid
Loading