From ebebfc67e68b844a6cc2a87af1841949c523c9d0 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Sun, 13 Aug 2017 15:13:10 +0100 Subject: [PATCH] core: fail stop job when ExecStop fails This fixes the first part of #6478. It does not display failures which occur during a restart. This was less work than it perhaps looks at first. I took the existing `->reload_result`, mechanically created `->stop_result`, and then it passed the test case from the issue. Test cases are also included for ExecReload and ExecStart failure. --- src/core/automount.c | 2 +- src/core/device.c | 2 +- src/core/mount.c | 18 +++++++++++- src/core/mount.h | 1 + src/core/path.c | 2 +- src/core/scope.c | 2 +- src/core/service.c | 54 ++++++++++++++++++++++++++++++---- src/core/service.h | 3 +- src/core/slice.c | 2 +- src/core/socket.c | 2 +- src/core/swap.c | 2 +- src/core/target.c | 2 +- src/core/timer.c | 2 +- src/core/unit.c | 11 ++++++- src/core/unit.h | 2 +- test/TEST-03-JOBS/test-jobs.sh | 44 +++++++++++++++++++++++++++ 16 files changed, 133 insertions(+), 18 deletions(-) diff --git a/src/core/automount.c b/src/core/automount.c index 96de933e2327e..ecad9df93348f 100644 --- a/src/core/automount.c +++ b/src/core/automount.c @@ -258,7 +258,7 @@ static void automount_set_state(Automount *a, AutomountState state) { if (state != old_state) log_unit_debug(UNIT(a), "Changed %s -> %s", automount_state_to_string(old_state), automount_state_to_string(state)); - unit_notify(UNIT(a), state_translation_table[old_state], state_translation_table[state], true); + unit_notify(UNIT(a), state_translation_table[old_state], state_translation_table[state], true, true); } static int automount_coldplug(Unit *u) { diff --git a/src/core/device.c b/src/core/device.c index 77601c5520bc6..24c5f40c3bd84 100644 --- a/src/core/device.c +++ b/src/core/device.c @@ -135,7 +135,7 @@ static void device_set_state(Device *d, DeviceState state) { if (state != old_state) log_unit_debug(UNIT(d), "Changed %s -> %s", device_state_to_string(old_state), device_state_to_string(state)); - unit_notify(UNIT(d), state_translation_table[old_state], state_translation_table[state], true); + unit_notify(UNIT(d), state_translation_table[old_state], state_translation_table[state], true, true); } static int device_coldplug(Unit *u) { diff --git a/src/core/mount.c b/src/core/mount.c index ca8ee2e6a4ee3..9cef409c0c326 100644 --- a/src/core/mount.c +++ b/src/core/mount.c @@ -652,8 +652,10 @@ static void mount_set_state(Mount *m, MountState state) { if (state != old_state) log_unit_debug(UNIT(m), "Changed %s -> %s", mount_state_to_string(old_state), mount_state_to_string(state)); - unit_notify(UNIT(m), state_translation_table[old_state], state_translation_table[state], m->reload_result == MOUNT_SUCCESS); + unit_notify(UNIT(m), state_translation_table[old_state], state_translation_table[state], + m->reload_result == MOUNT_SUCCESS, m->stop_result == MOUNT_SUCCESS); m->reload_result = MOUNT_SUCCESS; + m->stop_result = MOUNT_SUCCESS; } static int mount_coldplug(Unit *u) { @@ -906,6 +908,7 @@ static void mount_enter_unmounting(Mount *m) { fail: log_unit_warning_errno(UNIT(m), r, "Failed to run 'umount' task: %m"); + m->stop_result = MOUNT_FAILURE_RESOURCES; mount_enter_mounted(m, MOUNT_FAILURE_RESOURCES); } @@ -1047,6 +1050,7 @@ static int mount_start(Unit *u) { m->result = MOUNT_SUCCESS; m->reload_result = MOUNT_SUCCESS; + m->stop_result = MOUNT_SUCCESS; m->reset_cpu_usage = true; mount_enter_mounting(m); @@ -1103,6 +1107,7 @@ static int mount_serialize(Unit *u, FILE *f, FDSet *fds) { unit_serialize_item(u, f, "state", mount_state_to_string(m->state)); unit_serialize_item(u, f, "result", mount_result_to_string(m->result)); unit_serialize_item(u, f, "reload-result", mount_result_to_string(m->reload_result)); + unit_serialize_item(u, f, "stop-result", mount_result_to_string(m->stop_result)); if (m->control_pid > 0) unit_serialize_item_format(u, f, "control-pid", PID_FMT, m->control_pid); @@ -1146,6 +1151,15 @@ static int mount_deserialize_item(Unit *u, const char *key, const char *value, F else if (f != MOUNT_SUCCESS) m->reload_result = f; + } else if (streq(key, "stop-result")) { + MountResult f; + + f = mount_result_from_string(value); + if (f < 0) + log_unit_debug(u, "Failed to parse stop result value: %s", value); + else if (f != MOUNT_SUCCESS) + m->stop_result = f; + } else if (streq(key, "control-pid")) { pid_t pid; @@ -1324,6 +1338,7 @@ static int mount_dispatch_timer(sd_event_source *source, usec_t usec, void *user case MOUNT_UNMOUNTING: log_unit_warning(UNIT(m), "Unmounting timed out. Terminating."); + m->stop_result = MOUNT_FAILURE_TIMEOUT; mount_enter_signal(m, MOUNT_UNMOUNTING_SIGTERM, MOUNT_FAILURE_TIMEOUT); break; @@ -1903,6 +1918,7 @@ static void mount_reset_failed(Unit *u) { m->result = MOUNT_SUCCESS; m->reload_result = MOUNT_SUCCESS; + m->stop_result = MOUNT_SUCCESS; } static int mount_kill(Unit *u, KillWho who, int signo, sd_bus_error *error) { diff --git a/src/core/mount.h b/src/core/mount.h index 9f7326ba6ada3..ecc44bf76bb61 100644 --- a/src/core/mount.h +++ b/src/core/mount.h @@ -76,6 +76,7 @@ struct Mount { MountResult result; MountResult reload_result; + MountResult stop_result; mode_t directory_mode; diff --git a/src/core/path.c b/src/core/path.c index 83f794be89e5f..db750fd1d9d06 100644 --- a/src/core/path.c +++ b/src/core/path.c @@ -428,7 +428,7 @@ static void path_set_state(Path *p, PathState state) { if (state != old_state) log_unit_debug(UNIT(p), "Changed %s -> %s", path_state_to_string(old_state), path_state_to_string(state)); - unit_notify(UNIT(p), state_translation_table[old_state], state_translation_table[state], true); + unit_notify(UNIT(p), state_translation_table[old_state], state_translation_table[state], true, true); } static void path_enter_waiting(Path *p, bool initial, bool recheck); diff --git a/src/core/scope.c b/src/core/scope.c index a1d5c1cfd5411..059b083069134 100644 --- a/src/core/scope.c +++ b/src/core/scope.c @@ -109,7 +109,7 @@ static void scope_set_state(Scope *s, ScopeState state) { if (state != old_state) log_debug("%s changed %s -> %s", UNIT(s)->id, scope_state_to_string(old_state), scope_state_to_string(state)); - unit_notify(UNIT(s), state_translation_table[old_state], state_translation_table[state], true); + unit_notify(UNIT(s), state_translation_table[old_state], state_translation_table[state], true, true); } static int scope_add_default_dependencies(Scope *s) { diff --git a/src/core/service.c b/src/core/service.c index ebd8bd5a3f77d..093e5b9f65777 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -728,6 +728,7 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) { "%sService State: %s\n" "%sResult: %s\n" "%sReload Result: %s\n" + "%sStop Result: %s\n" "%sPermissionsStartOnly: %s\n" "%sRootDirectoryStartOnly: %s\n" "%sRemainAfterExit: %s\n" @@ -739,6 +740,7 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) { prefix, service_state_to_string(s->state), prefix, service_result_to_string(s->result), prefix, service_result_to_string(s->reload_result), + prefix, service_result_to_string(s->stop_result), prefix, yes_no(s->permissions_start_only), prefix, yes_no(s->root_directory_start_only), prefix, yes_no(s->remain_after_exit), @@ -969,7 +971,8 @@ static void service_set_state(Service *s, ServiceState state) { if (old_state != state) log_unit_debug(UNIT(s), "Changed %s -> %s", service_state_to_string(old_state), service_state_to_string(state)); - unit_notify(UNIT(s), table[old_state], table[state], s->reload_result == SERVICE_SUCCESS); + unit_notify(UNIT(s), table[old_state], table[state], + s->reload_result == SERVICE_SUCCESS, s->stop_result == SERVICE_SUCCESS); } static usec_t service_coldplug_timeout(Service *s) { @@ -1644,6 +1647,8 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f fail: log_unit_warning_errno(UNIT(s), r, "Failed to kill processes: %m"); + if (s->stop_result == SERVICE_SUCCESS) + s->stop_result = SERVICE_FAILURE_RESOURCES; if (IN_SET(state, SERVICE_STOP_SIGABRT, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL)) service_enter_stop_post(s, SERVICE_FAILURE_RESOURCES); @@ -1667,6 +1672,7 @@ static void service_enter_stop(Service *s, ServiceResult f) { assert(s); + s->stop_result = SERVICE_SUCCESS; if (s->result == SERVICE_SUCCESS) s->result = f; @@ -1693,6 +1699,7 @@ static void service_enter_stop(Service *s, ServiceResult f) { fail: log_unit_warning_errno(UNIT(s), r, "Failed to run 'stop' task: %m"); + s->stop_result = SERVICE_FAILURE_RESOURCES; service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_RESOURCES); } @@ -2044,15 +2051,28 @@ static void service_run_next_control(Service *s) { fail: log_unit_warning_errno(UNIT(s), r, "Failed to run next control task: %m"); - if (IN_SET(s->state, SERVICE_START_PRE, SERVICE_START_POST, SERVICE_STOP)) + switch (s->state) { + case SERVICE_START_PRE: + case SERVICE_START_POST: service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_RESOURCES); - else if (s->state == SERVICE_STOP_POST) + break; + case SERVICE_STOP: + s->stop_result = SERVICE_FAILURE_RESOURCES; + service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_RESOURCES); + break; + case SERVICE_STOP_POST: + if (s->stop_result != SERVICE_SUCCESS) + s->stop_result = SERVICE_FAILURE_RESOURCES; service_enter_dead(s, SERVICE_FAILURE_RESOURCES, true); - else if (s->state == SERVICE_RELOAD) { + break; + case SERVICE_RELOAD: s->reload_result = SERVICE_FAILURE_RESOURCES; service_enter_running(s, SERVICE_SUCCESS); - } else + break; + default: service_enter_stop(s, SERVICE_FAILURE_RESOURCES); + break; + } } static void service_run_next_main(Service *s) { @@ -2126,6 +2146,7 @@ static int service_start(Unit *u) { s->result = SERVICE_SUCCESS; s->reload_result = SERVICE_SUCCESS; + s->stop_result = SERVICE_SUCCESS; s->main_pid_known = false; s->main_pid_alien = false; s->forbid_restart = false; @@ -2286,6 +2307,7 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) { unit_serialize_item(u, f, "state", service_state_to_string(s->state)); unit_serialize_item(u, f, "result", service_result_to_string(s->result)); unit_serialize_item(u, f, "reload-result", service_result_to_string(s->reload_result)); + unit_serialize_item(u, f, "stop-result", service_result_to_string(s->stop_result)); if (s->control_pid > 0) unit_serialize_item_format(u, f, "control-pid", PID_FMT, s->control_pid); @@ -2496,6 +2518,15 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value, else if (f != SERVICE_SUCCESS) s->reload_result = f; + } else if (streq(key, "stop-result")) { + ServiceResult f; + + f = service_result_from_string(value); + if (f < 0) + log_unit_debug(u, "Failed to parse stop result value: %s", value); + else if (f != SERVICE_SUCCESS) + s->stop_result = f; + } else if (streq(key, "control-pid")) { pid_t pid; @@ -3134,6 +3165,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { break; case SERVICE_STOP: + s->stop_result = f; service_enter_signal(s, SERVICE_STOP_SIGTERM, f); break; @@ -3149,6 +3181,10 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { break; case SERVICE_STOP_POST: + if (s->stop_result == SERVICE_SUCCESS) + s->stop_result = f; + /* Fall through */ + case SERVICE_FINAL_SIGTERM: case SERVICE_FINAL_SIGKILL: if (main_pid_good(s) <= 0) @@ -3211,6 +3247,7 @@ static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *us case SERVICE_STOP: log_unit_warning(UNIT(s), "Stopping timed out. Terminating."); + s->stop_result = SERVICE_FAILURE_TIMEOUT; service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT); break; @@ -3220,6 +3257,9 @@ static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *us break; case SERVICE_STOP_SIGTERM: + if (s->stop_result == SERVICE_SUCCESS) + s->stop_result = SERVICE_FAILURE_TIMEOUT; + if (s->kill_context.send_sigkill) { log_unit_warning(UNIT(s), "State 'stop-sigterm' timed out. Killing."); service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_TIMEOUT); @@ -3240,6 +3280,9 @@ static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *us break; case SERVICE_STOP_POST: + if (s->stop_result == SERVICE_SUCCESS) + s->stop_result = SERVICE_FAILURE_TIMEOUT; + log_unit_warning(UNIT(s), "State 'stop-post' timed out. Terminating."); service_enter_signal(s, SERVICE_FINAL_SIGTERM, SERVICE_FAILURE_TIMEOUT); break; @@ -3589,6 +3632,7 @@ static void service_reset_failed(Unit *u) { s->result = SERVICE_SUCCESS; s->reload_result = SERVICE_SUCCESS; + s->stop_result = SERVICE_SUCCESS; s->n_restarts = 0; s->flush_n_restarts = false; } diff --git a/src/core/service.h b/src/core/service.h index 0ac8bc9a675ad..fb5781ca6289e 100644 --- a/src/core/service.h +++ b/src/core/service.h @@ -155,9 +155,10 @@ struct Service { bool remain_after_exit; bool guess_main_pid; - /* If we shut down, remember why */ + /* If we fail, remember why */ ServiceResult result; ServiceResult reload_result; + ServiceResult stop_result; bool main_pid_known:1; bool main_pid_alien:1; diff --git a/src/core/slice.c b/src/core/slice.c index ed5d3fd701a02..88890fca64c72 100644 --- a/src/core/slice.c +++ b/src/core/slice.c @@ -54,7 +54,7 @@ static void slice_set_state(Slice *t, SliceState state) { slice_state_to_string(old_state), slice_state_to_string(state)); - unit_notify(UNIT(t), state_translation_table[old_state], state_translation_table[state], true); + unit_notify(UNIT(t), state_translation_table[old_state], state_translation_table[state], true, true); } static int slice_add_parent_slice(Slice *s) { diff --git a/src/core/socket.c b/src/core/socket.c index ca59a13b66315..1655625ed9a94 100644 --- a/src/core/socket.c +++ b/src/core/socket.c @@ -1686,7 +1686,7 @@ static void socket_set_state(Socket *s, SocketState state) { if (state != old_state) log_unit_debug(UNIT(s), "Changed %s -> %s", socket_state_to_string(old_state), socket_state_to_string(state)); - unit_notify(UNIT(s), state_translation_table[old_state], state_translation_table[state], true); + unit_notify(UNIT(s), state_translation_table[old_state], state_translation_table[state], true, true); } static int socket_coldplug(Unit *u) { diff --git a/src/core/swap.c b/src/core/swap.c index e839c26141158..dec5aa4184fac 100644 --- a/src/core/swap.c +++ b/src/core/swap.c @@ -504,7 +504,7 @@ static void swap_set_state(Swap *s, SwapState state) { if (state != old_state) log_unit_debug(UNIT(s), "Changed %s -> %s", swap_state_to_string(old_state), swap_state_to_string(state)); - unit_notify(UNIT(s), state_translation_table[old_state], state_translation_table[state], true); + unit_notify(UNIT(s), state_translation_table[old_state], state_translation_table[state], true, true); /* If there other units for the same device node have a job queued it might be worth checking again if it is runnable diff --git a/src/core/target.c b/src/core/target.c index 2a58dd394d098..def0b5b029249 100644 --- a/src/core/target.c +++ b/src/core/target.c @@ -43,7 +43,7 @@ static void target_set_state(Target *t, TargetState state) { target_state_to_string(old_state), target_state_to_string(state)); - unit_notify(UNIT(t), state_translation_table[old_state], state_translation_table[state], true); + unit_notify(UNIT(t), state_translation_table[old_state], state_translation_table[state], true, true); } static int target_add_default_dependencies(Target *t) { diff --git a/src/core/timer.c b/src/core/timer.c index 701949fd60df5..75aed689a1a67 100644 --- a/src/core/timer.c +++ b/src/core/timer.c @@ -268,7 +268,7 @@ static void timer_set_state(Timer *t, TimerState state) { if (state != old_state) log_unit_debug(UNIT(t), "Changed %s -> %s", timer_state_to_string(old_state), timer_state_to_string(state)); - unit_notify(UNIT(t), state_translation_table[old_state], state_translation_table[state], true); + unit_notify(UNIT(t), state_translation_table[old_state], state_translation_table[state], true, true); } static void timer_enter_waiting(Timer *t, bool initial); diff --git a/src/core/unit.c b/src/core/unit.c index a42c667754af8..1c94ed83acae0 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -1950,7 +1950,7 @@ void unit_trigger_notify(Unit *u) { UNIT_VTABLE(other)->trigger_notify(other, u); } -void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_success) { +void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_success, bool stop_success) { Manager *m; bool unexpected; @@ -2055,6 +2055,15 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su break; case JOB_STOP: + if (UNIT_IS_INACTIVE_OR_FAILED(ns)) + job_finish_and_invalidate(u->job, stop_success ? JOB_DONE : JOB_FAILED, true, false); + else if (u->job->state == JOB_RUNNING && ns != UNIT_DEACTIVATING) { + unexpected = true; + job_finish_and_invalidate(u->job, JOB_FAILED, true, false); + } + + break; + case JOB_RESTART: case JOB_TRY_RESTART: diff --git a/src/core/unit.h b/src/core/unit.h index a384f264ee412..673203d8bdff0 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -538,7 +538,7 @@ int unit_reload(Unit *u); int unit_kill(Unit *u, KillWho w, int signo, sd_bus_error *error); int unit_kill_common(Unit *u, KillWho who, int signo, pid_t main_pid, pid_t control_pid, sd_bus_error *error); -void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_success); +void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool stop_success, bool reload_success); int unit_watch_pid(Unit *u, pid_t pid); void unit_unwatch_pid(Unit *u, pid_t pid); diff --git a/test/TEST-03-JOBS/test-jobs.sh b/test/TEST-03-JOBS/test-jobs.sh index 4c954d039739b..a4fa89e98f3fd 100755 --- a/test/TEST-03-JOBS/test-jobs.sh +++ b/test/TEST-03-JOBS/test-jobs.sh @@ -77,4 +77,48 @@ END_SEC=$(date -u '+%s') ELAPSED=$(($END_SEC-$START_SEC)) [[ "$ELAPSED" -ge 5 ]] && [[ "$ELAPSED" -le 7 ]] || exit 1 +# Test job failure +cat < /run/systemd/system/fail-start.service +[Unit] +Description=Fail to start +[Service] +Type=oneshot +ExecStart=/bin/sh -c false +EOF + +systemctl start fail-start.service && exit 1 +systemctl is-failed fail-start.service || exit 1 + +systemctl reset-failed fail-start.service || exit 1 +systemctl is-failed fail-start.service && exit 1 + +cat < /run/systemd/system/fail-reload.service +[Unit] +Description=Fail to reload +[Service] +Type=oneshot +RemainAfterExit=true +ExecStart=/bin/sh -c true +ExecReload=/bin/sh -c false +EOF + +systemctl start fail-reload.service || exit 1 +systemctl reload fail-reload.service && exit 1 +# reload failure doesn't fail the service +systemctl is-failed fail-reload.service && exit 1 + +cat < /run/systemd/system/fail-stop.service +[Unit] +Description=Fail to stop +[Service] +Type=oneshot +RemainAfterExit=true +ExecStart=/bin/sh -c true +ExecStop=/bin/sh -c false +EOF + +systemctl start fail-stop.service || exit 1 +systemctl stop fail-stop.service && exit 1 +systemctl is-failed fail-stop.service || exit 1 + touch /testok