Skip to content

Commit

Permalink
Show failures which occur during systemctl stop
Browse files Browse the repository at this point in the history
Fix systemd#6478.

Also failures during stop could be part of a stop+start job aka restart.
We show the same message for this case.  It is marked "Warning: ", to try
and show that it is not a fatal error, and we continued with the start.

If there are failures during both stop and start, the messages get a little
messy :(.  We add an extra message between the two failures.  Without it
the second failure message would appear to be extra details about the stop
failure.

# Conflicts:
#	test/TEST-03-JOBS/test-jobs.sh
  • Loading branch information
sourcejedi committed Sep 1, 2017
1 parent 5c7f9dc commit 2679ee1
Show file tree
Hide file tree
Showing 5 changed files with 171 additions and 17 deletions.
30 changes: 28 additions & 2 deletions src/core/dbus-job.c
Expand Up @@ -203,7 +203,7 @@ void bus_job_send_change_signal(Job *j) {
}

static int send_removed_signal(sd_bus *bus, void *userdata) {
_cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
_cleanup_(sd_bus_message_unrefp) sd_bus_message *m2 = NULL, *m = NULL;
_cleanup_free_ char *p = NULL;
Job *j = userdata;
int r;
Expand All @@ -215,6 +215,28 @@ static int send_removed_signal(sd_bus *bus, void *userdata) {
if (!p)
return -ENOMEM;

/* Send the most detailed version of the signal first.
* Clients may rely on this order, to support versions
* without the most detailed signal. */

r = sd_bus_message_new_signal(
bus,
&m2,
"/org/freedesktop/systemd1",
"org.freedesktop.systemd1.Manager",
"JobRemoved2");
if (r < 0)
return r;

r = sd_bus_message_append(m2, "uossb", j->id, p, j->unit->id,
job_result_to_string(j->result), j->unclean_stop);
if (r < 0)
return r;

r = sd_bus_send(bus, m2, NULL);
if (r < 0)
return r;

r = sd_bus_message_new_signal(
bus,
&m,
Expand All @@ -228,7 +250,11 @@ static int send_removed_signal(sd_bus *bus, void *userdata) {
if (r < 0)
return r;

return sd_bus_send(bus, m, NULL);
r = sd_bus_send(bus, m, NULL);
if (r < 0)
return r;

return 0;
}

void bus_job_send_removed_signal(Job *j) {
Expand Down
1 change: 1 addition & 0 deletions src/core/job.h
Expand Up @@ -174,6 +174,7 @@ struct Job {
bool irreversible:1;
bool in_gc_queue:1;
bool ref_by_private_bus:1;
bool unclean_stop:1;
};

Job* job_new(Unit *unit, JobType type);
Expand Down
7 changes: 4 additions & 3 deletions src/core/unit.c
Expand Up @@ -2076,10 +2076,11 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
case JOB_STOP:
case JOB_RESTART:
case JOB_TRY_RESTART:

if (UNIT_IS_INACTIVE_OR_FAILED(ns))
if (UNIT_IS_INACTIVE_OR_FAILED(ns)) {
if (ns == UNIT_FAILED)
u->job->unclean_stop = true;
job_finish_and_invalidate(u->job, JOB_DONE, true, false);
else if (u->job->state == JOB_RUNNING && ns != UNIT_DEACTIVATING) {
} else if (u->job->state == JOB_RUNNING && ns != UNIT_DEACTIVATING) {
unexpected = true;
job_finish_and_invalidate(u->job, JOB_FAILED, true, false);
}
Expand Down
83 changes: 72 additions & 11 deletions src/shared/bus-unit-util.c
Expand Up @@ -992,6 +992,7 @@ typedef struct BusWaitForJobs {

char *name;
char *result;
bool unclean_stop;

sd_bus_slot *slot_job_removed;
sd_bus_slot *slot_disconnected;
Expand All @@ -1006,33 +1007,65 @@ static int match_disconnected(sd_bus_message *m, void *userdata, sd_bus_error *e
return 0;
}

static int match_job_removed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
static void job_removed(BusWaitForJobs *d,
const char *path,
const char *unit,
const char *result,
bool unclean_stop) {
char *found;

found = set_remove(d->jobs, (char*) path);
if (!found)
return;

free(found);

if (!isempty(result))
d->result = strdup(result);

if (!isempty(unit))
d->name = strdup(unit);

d->unclean_stop = unclean_stop;
}

static int match_job_removed_v2(sd_bus_message *m, void *userdata, sd_bus_error *error) {
const char *path, *unit, *result;
BusWaitForJobs *d = userdata;
bool unclean_stop;
uint32_t id;
char *found;
int r;

assert(m);
assert(d);

r = sd_bus_message_read(m, "uoss", &id, &path, &unit, &result);
r = sd_bus_message_read(m, "uossb", &id, &path, &unit, &result, &unclean_stop);
if (r < 0) {
bus_log_parse_error(r);
return 0;
}

found = set_remove(d->jobs, (char*) path);
if (!found)
return 0;
job_removed(d, path, unit, result, unclean_stop);

free(found);
return 0;
}

if (!isempty(result))
d->result = strdup(result);
static int match_job_removed(sd_bus_message *m, void *userdata, sd_bus_error *error) {
const char *path, *unit, *result;
BusWaitForJobs *d = userdata;
uint32_t id;
int r;

if (!isempty(unit))
d->name = strdup(unit);
assert(m);
assert(d);

r = sd_bus_message_read(m, "uoss", &id, &path, &unit, &result);
if (r < 0) {
bus_log_parse_error(r);
return 0;
}

job_removed(d, path, unit, result, false);

return 0;
}
Expand Down Expand Up @@ -1070,6 +1103,23 @@ int bus_wait_for_jobs_new(sd_bus *bus, BusWaitForJobs **ret) {
/* When we are a bus client we match by sender. Direct
* connections OTOH have no initialized sender field, and
* hence we ignore the sender then */
r = sd_bus_add_match(
bus,
&d->slot_job_removed,
bus->bus_client ?
"type='signal',"
"sender='org.freedesktop.systemd1',"
"interface='org.freedesktop.systemd1.Manager',"
"member='JobRemoved2',"
"path='/org/freedesktop/systemd1'" :
"type='signal',"
"interface='org.freedesktop.systemd1.Manager',"
"member='JobRemoved2',"
"path='/org/freedesktop/systemd1'",
match_job_removed_v2, d);
if (r < 0)
return r;

r = sd_bus_add_match(
bus,
&d->slot_job_removed,
Expand Down Expand Up @@ -1213,6 +1263,17 @@ static int check_wait_response(BusWaitForJobs *d, bool quiet, const char* const*
assert(d->result);

if (!quiet) {
if (d->unclean_stop) {
log_warning("Warning: %s stopped uncleanly. See \"journalctl -xe\" for details.",
strna(d->name));

if (!STR_IN_SET(d->result, "done", "skipped")) {
/* Without some sort of separator, the error message below
* would appear to be referring to the unclean stop. */
log_error("Failed to start %s:", strna(d->name));
}
}

if (streq(d->result, "canceled"))
log_error("Job for %s canceled.", strna(d->name));
else if (streq(d->result, "timeout"))
Expand Down
67 changes: 66 additions & 1 deletion test/TEST-03-JOBS/test-jobs.sh
@@ -1,4 +1,4 @@
#!/bin/bash -e
#!/bin/bash -xe

# Test merging of a --job-mode=ignore-dependencies job into a previously
# installed job.
Expand Down Expand Up @@ -91,4 +91,69 @@ sleep 1
LC_ALL=C systemctl list-jobs > /root/list-jobs.txt
grep "oneshot-wait2.service.* [ ]*restart [ ]*running$" /root/list-jobs.txt || exit 1

# Test different types of service failure
cat <<EOF > /run/systemd/system/fail-start.service
[Unit]
Description=Fail to start
[Service]
Type=oneshot
ExecStart=/bin/sh -c false
EOF

! WARN="$(systemctl start fail-start.service 2>&1)" || exit 1
[ "$WARN" != "" ] || exit 1
systemctl is-failed fail-start.service || exit 1

systemctl reset-failed fail-start.service || exit 1
! systemctl is-failed fail-start.service || exit 1

cat <<EOF > /run/systemd/system/fail-reload.service
[Unit]
Description=Fail to reload
[Service]
Type=oneshot
RemainAfterExit=true
ExecStart=/bin/sh -c true
ExecReload=/bin/sh -c false
EOF

systemctl start fail-reload.service || exit 1

! WARN="$(systemctl reload fail-reload.service 2>&1)" || exit 1
[ "$WARN" != "" ] || exit 1

# reload failure doesn't fail the service
! systemctl is-failed fail-reload.service || exit 1

cat <<EOF > /run/systemd/system/fail-stop.service
[Unit]
Description=Fail to stop
[Service]
Type=oneshot
RemainAfterExit=true
ExecStart=/bin/sh -c true
ExecStop=/bin/sh -c false
EOF

systemctl start fail-stop.service || exit 1

# When stop jobs have to resort to SIGKILL, they are not considered to fail.
# However `systemctl` will warn...
WARN="$(systemctl stop fail-stop.service 2>&1)" || exit 1
[ "$WARN" != "" ] || exit 1

# ... and the service is marked as failed.
systemctl is-failed fail-stop.service || exit 1

# A sucessful start clears the failed state
systemctl start fail-stop.service || exit 1
! systemctl is-failed fail-stop.service || exit 1

# restart = stop+start. We should get the same warning from the stop...
WARN="$(systemctl restart fail-stop.service 2>&1)" || exit 1
[ "$WARN" != "" ] || exit 1

# ... but the failed state is cleared by the successful start
! systemctl is-failed fail-stop.service || exit 1

touch /testok

0 comments on commit 2679ee1

Please sign in to comment.