Skip to content

Commit d0744e5

Browse files
committed
Merge pull request #1745 from poettering/journal-deadlock
Make sure journald never blocks on sd_notify() to PID 1
2 parents 534e8f8 + 3958325 commit d0744e5

File tree

18 files changed

+498
-265
lines changed

18 files changed

+498
-265
lines changed

src/basic/env-util.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "alloc-util.h"
2626
#include "def.h"
2727
#include "env-util.h"
28+
#include "parse-util.h"
2829
#include "string-util.h"
2930
#include "strv.h"
3031
#include "utf8.h"
@@ -594,3 +595,13 @@ char **replace_env_argv(char **argv, char **env) {
594595
ret[k] = NULL;
595596
return ret;
596597
}
598+
599+
int getenv_bool(const char *p) {
600+
const char *e;
601+
602+
e = getenv(p);
603+
if (!e)
604+
return -ENXIO;
605+
606+
return parse_boolean(e);
607+
}

src/basic/env-util.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,5 @@ char **strv_env_unset_many(char **l, ...) _sentinel_;
4747

4848
char *strv_env_get_n(char **l, const char *name, size_t k) _pure_;
4949
char *strv_env_get(char **x, const char *n) _pure_;
50+
51+
int getenv_bool(const char *p);

src/basic/fdset.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ FDSet *fdset_new(void) {
4444
return MAKE_FDSET(set_new(NULL));
4545
}
4646

47-
int fdset_new_array(FDSet **ret, int *fds, unsigned n_fds) {
47+
int fdset_new_array(FDSet **ret, const int *fds, unsigned n_fds) {
4848
unsigned i;
4949
FDSet *s;
5050
int r;

src/basic/fdset.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ int fdset_consume(FDSet *s, int fd);
3535
bool fdset_contains(FDSet *s, int fd);
3636
int fdset_remove(FDSet *s, int fd);
3737

38-
int fdset_new_array(FDSet **ret, int *fds, unsigned n_fds);
38+
int fdset_new_array(FDSet **ret, const int *fds, unsigned n_fds);
3939
int fdset_new_fill(FDSet **ret);
4040
int fdset_new_listen_fds(FDSet **ret, bool unset);
4141

src/core/manager.c

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@
8686
#include "virt.h"
8787
#include "watchdog.h"
8888

89+
#define NOTIFY_RCVBUF_SIZE (8*1024*1024)
90+
8991
/* Initial delay and the interval for printing status messages about running jobs */
9092
#define JOBS_IN_PROGRESS_WAIT_USEC (5*USEC_PER_SEC)
9193
#define JOBS_IN_PROGRESS_PERIOD_USEC (USEC_PER_SEC / 3)
@@ -689,6 +691,8 @@ static int manager_setup_notify(Manager *m) {
689691
if (fd < 0)
690692
return log_error_errno(errno, "Failed to allocate notification socket: %m");
691693

694+
fd_inc_rcvbuf(fd, NOTIFY_RCVBUF_SIZE);
695+
692696
if (m->running_as == MANAGER_SYSTEM)
693697
m->notify_socket = strdup("/run/systemd/notify");
694698
else {
@@ -1488,7 +1492,7 @@ static unsigned manager_dispatch_dbus_queue(Manager *m) {
14881492
return n;
14891493
}
14901494

1491-
static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, char *buf, size_t n, FDSet *fds) {
1495+
static void manager_invoke_notify_message(Manager *m, Unit *u, pid_t pid, const char *buf, size_t n, FDSet *fds) {
14921496
_cleanup_strv_free_ char **tags = NULL;
14931497

14941498
assert(m);
@@ -1618,7 +1622,7 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
16181622
return 0;
16191623
}
16201624

1621-
static void invoke_sigchld_event(Manager *m, Unit *u, siginfo_t *si) {
1625+
static void invoke_sigchld_event(Manager *m, Unit *u, const siginfo_t *si) {
16221626
assert(m);
16231627
assert(u);
16241628
assert(si);
@@ -2000,8 +2004,7 @@ int manager_loop(Manager *m) {
20002004
m->exit_code = MANAGER_OK;
20012005

20022006
/* Release the path cache */
2003-
set_free_free(m->unit_path_cache);
2004-
m->unit_path_cache = NULL;
2007+
m->unit_path_cache = set_free_free(m->unit_path_cache);
20052008

20062009
manager_check_finished(m);
20072010

@@ -2111,6 +2114,9 @@ void manager_send_unit_audit(Manager *m, Unit *u, int type, bool success) {
21112114
const char *msg;
21122115
int audit_fd, r;
21132116

2117+
if (m->running_as != MANAGER_SYSTEM)
2118+
return;
2119+
21142120
audit_fd = get_audit_fd();
21152121
if (audit_fd < 0)
21162122
return;
@@ -2120,9 +2126,6 @@ void manager_send_unit_audit(Manager *m, Unit *u, int type, bool success) {
21202126
if (m->n_reloading > 0)
21212127
return;
21222128

2123-
if (m->running_as != MANAGER_SYSTEM)
2124-
return;
2125-
21262129
if (u->type != UNIT_SERVICE)
21272130
return;
21282131

@@ -2771,8 +2774,7 @@ static int create_generator_dir(Manager *m, char **generator, const char *name)
27712774
return log_oom();
27722775

27732776
if (!mkdtemp(p)) {
2774-
log_error_errno(errno, "Failed to create generator directory %s: %m",
2775-
p);
2777+
log_error_errno(errno, "Failed to create generator directory %s: %m", p);
27762778
free(p);
27772779
return -errno;
27782780
}

src/journal-remote/journal-remote.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1256,7 +1256,6 @@ static int parse_argv(int argc, char *argv[]) {
12561256
};
12571257

12581258
int c, r;
1259-
const char *p;
12601259
bool type_a, type_b;
12611260

12621261
assert(argc >= 0);
@@ -1417,7 +1416,7 @@ static int parse_argv(int argc, char *argv[]) {
14171416

14181417
case ARG_GNUTLS_LOG: {
14191418
#ifdef HAVE_GNUTLS
1420-
p = optarg;
1419+
const char* p = optarg;
14211420
for (;;) {
14221421
_cleanup_free_ char *word = NULL;
14231422

src/journal/journald-server.c

Lines changed: 127 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@
7878

7979
#define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
8080

81+
#define NOTIFY_SNDBUF_SIZE (8*1024*1024)
82+
8183
static int determine_space_for(
8284
Server *s,
8385
JournalMetrics *metrics,
@@ -1457,6 +1459,126 @@ static int server_open_hostname(Server *s) {
14571459
return 0;
14581460
}
14591461

1462+
static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1463+
Server *s = userdata;
1464+
int r;
1465+
1466+
assert(s);
1467+
assert(s->notify_event_source == es);
1468+
assert(s->notify_fd == fd);
1469+
1470+
if (revents != EPOLLOUT) {
1471+
log_error("Invalid events on notify file descriptor.");
1472+
return -EINVAL;
1473+
}
1474+
1475+
/* The $NOTIFY_SOCKET is writable again, now send exactly one
1476+
* message on it. Either it's the initial READY=1 event or an
1477+
* stdout stream event. If there's nothing to write anymore,
1478+
* turn our event source off. The next time there's something
1479+
* to send it will be turned on again. */
1480+
1481+
if (!s->sent_notify_ready) {
1482+
static const char p[] =
1483+
"READY=1\n"
1484+
"STATUS=Processing requests...";
1485+
ssize_t l;
1486+
1487+
l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1488+
if (l < 0) {
1489+
if (errno == EAGAIN)
1490+
return 0;
1491+
1492+
return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1493+
}
1494+
1495+
s->sent_notify_ready = true;
1496+
log_debug("Sent READY=1 notification.");
1497+
1498+
} else if (s->stdout_streams_notify_queue)
1499+
/* Dispatch one stream notification event */
1500+
stdout_stream_send_notify(s->stdout_streams_notify_queue);
1501+
1502+
/* Leave us enabled if there's still more to to do. */
1503+
if (s->stdout_streams_notify_queue)
1504+
return 0;
1505+
1506+
/* There was nothing to do anymore, let's turn ourselves off. */
1507+
r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1508+
if (r < 0)
1509+
return log_error_errno(r, "Failed to turn off notify event source: %m");
1510+
1511+
return 0;
1512+
}
1513+
1514+
static int server_connect_notify(Server *s) {
1515+
union sockaddr_union sa = {
1516+
.un.sun_family = AF_UNIX,
1517+
};
1518+
const char *e;
1519+
int r;
1520+
1521+
assert(s);
1522+
assert(s->notify_fd < 0);
1523+
assert(!s->notify_event_source);
1524+
1525+
/*
1526+
So here's the problem: we'd like to send notification
1527+
messages to PID 1, but we cannot do that via sd_notify(),
1528+
since that's synchronous, and we might end up blocking on
1529+
it. Specifically: given that PID 1 might block on
1530+
dbus-daemon during IPC, and dbus-daemon is logging to us,
1531+
and might hence block on us, we might end up in a deadlock
1532+
if we block on sending PID 1 notification messages -- by
1533+
generating a full blocking circle. To avoid this, let's
1534+
create a non-blocking socket, and connect it to the
1535+
notification socket, and then wait for POLLOUT before we
1536+
send anything. This should efficiently avoid any deadlocks,
1537+
as we'll never block on PID 1, hence PID 1 can safely block
1538+
on dbus-daemon which can safely block on us again.
1539+
1540+
Don't think that this issue is real? It is, see:
1541+
https://github.com/systemd/systemd/issues/1505
1542+
*/
1543+
1544+
e = getenv("NOTIFY_SOCKET");
1545+
if (!e)
1546+
return 0;
1547+
1548+
if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1549+
log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1550+
return -EINVAL;
1551+
}
1552+
1553+
if (strlen(e) > sizeof(sa.un.sun_path)) {
1554+
log_error("NOTIFY_SOCKET path too long: %s", e);
1555+
return -EINVAL;
1556+
}
1557+
1558+
s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1559+
if (s->notify_fd < 0)
1560+
return log_error_errno(errno, "Failed to create notify socket: %m");
1561+
1562+
(void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1563+
1564+
strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1565+
if (sa.un.sun_path[0] == '@')
1566+
sa.un.sun_path[0] = 0;
1567+
1568+
r = connect(s->notify_fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(e));
1569+
if (r < 0)
1570+
return log_error_errno(errno, "Failed to connect to notify socket: %m");
1571+
1572+
r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1573+
if (r < 0)
1574+
return log_error_errno(r, "Failed to watch notification socket: %m");
1575+
1576+
/* This should fire pretty soon, which we'll use to send the
1577+
* READY=1 event. */
1578+
1579+
return 0;
1580+
}
1581+
14601582
int server_init(Server *s) {
14611583
_cleanup_fdset_free_ FDSet *fds = NULL;
14621584
int n, r, fd;
@@ -1465,7 +1587,7 @@ int server_init(Server *s) {
14651587
assert(s);
14661588

14671589
zero(*s);
1468-
s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = -1;
1590+
s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
14691591
s->compress = true;
14701592
s->seal = true;
14711593

@@ -1511,8 +1633,6 @@ int server_init(Server *s) {
15111633
if (r < 0)
15121634
return log_error_errno(r, "Failed to create event loop: %m");
15131635

1514-
sd_event_set_watchdog(s->event, true);
1515-
15161636
n = sd_listen_fds(true);
15171637
if (n < 0)
15181638
return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
@@ -1637,6 +1757,8 @@ int server_init(Server *s) {
16371757
server_cache_boot_id(s);
16381758
server_cache_machine_id(s);
16391759

1760+
(void) server_connect_notify(s);
1761+
16401762
return system_journal_open(s, false);
16411763
}
16421764

@@ -1685,6 +1807,7 @@ void server_done(Server *s) {
16851807
sd_event_source_unref(s->sigterm_event_source);
16861808
sd_event_source_unref(s->sigint_event_source);
16871809
sd_event_source_unref(s->hostname_event_source);
1810+
sd_event_source_unref(s->notify_event_source);
16881811
sd_event_unref(s->event);
16891812

16901813
safe_close(s->syslog_fd);
@@ -1693,6 +1816,7 @@ void server_done(Server *s) {
16931816
safe_close(s->dev_kmsg_fd);
16941817
safe_close(s->audit_fd);
16951818
safe_close(s->hostname_fd);
1819+
safe_close(s->notify_fd);
16961820

16971821
if (s->rate_limit)
16981822
journal_rate_limit_free(s->rate_limit);

src/journal/journald-server.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,12 @@
2626

2727
#include "sd-event.h"
2828

29+
typedef struct Server Server;
30+
2931
#include "hashmap.h"
3032
#include "journal-file.h"
3133
#include "journald-rate-limit.h"
34+
#include "journald-stream.h"
3235
#include "list.h"
3336

3437
typedef enum Storage {
@@ -48,15 +51,14 @@ typedef enum SplitMode {
4851
_SPLIT_INVALID = -1
4952
} SplitMode;
5053

51-
typedef struct StdoutStream StdoutStream;
52-
53-
typedef struct Server {
54+
struct Server {
5455
int syslog_fd;
5556
int native_fd;
5657
int stdout_fd;
5758
int dev_kmsg_fd;
5859
int audit_fd;
5960
int hostname_fd;
61+
int notify_fd;
6062

6163
sd_event *event;
6264

@@ -71,6 +73,7 @@ typedef struct Server {
7173
sd_event_source *sigterm_event_source;
7274
sd_event_source *sigint_event_source;
7375
sd_event_source *hostname_event_source;
76+
sd_event_source *notify_event_source;
7477

7578
JournalFile *runtime_journal;
7679
JournalFile *system_journal;
@@ -111,6 +114,7 @@ typedef struct Server {
111114
usec_t oldest_file_usec;
112115

113116
LIST_HEAD(StdoutStream, stdout_streams);
117+
LIST_HEAD(StdoutStream, stdout_streams_notify_queue);
114118
unsigned n_stdout_streams;
115119

116120
char *tty_path;
@@ -132,6 +136,7 @@ typedef struct Server {
132136

133137
struct udev *udev;
134138

139+
bool sent_notify_ready;
135140
bool sync_scheduled;
136141

137142
char machine_id_field[sizeof("_MACHINE_ID=") + 32];
@@ -140,7 +145,7 @@ typedef struct Server {
140145

141146
/* Cached cgroup root, so that we don't have to query that all the time */
142147
char *cgroup_root;
143-
} Server;
148+
};
144149

145150
#define SERVER_MACHINE_ID(s) ((s)->machine_id_field + strlen("_MACHINE_ID="))
146151

0 commit comments

Comments
 (0)