From 0879adb6c1a7b9a915983151f195c4e40e7ae77b Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 28 Jun 2017 23:29:29 +0200 Subject: [PATCH 1/2] [3.6] bpo-30703: Improve signal delivery (GH-2415) * Improve signal delivery Avoid using Py_AddPendingCall from signal handler, to avoid calling signal-unsafe functions. * Remove unused function * Improve comments * Add stress test * Adapt for --without-threads * Add second stress test * Add NEWS blurb * Address comments @haypo. (cherry picked from commit c08177a1ccad2ed0d50898c2731b518c631aed14) --- Include/ceval.h | 1 + Lib/test/test_signal.py | 97 +++++++++++++++++++ .../2017-06-28-21-07-32.bpo-30703.ULCdFp.rst | 6 ++ Modules/signalmodule.c | 30 +++--- Python/ceval.c | 75 ++++++++++---- 5 files changed, 172 insertions(+), 37 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2017-06-28-21-07-32.bpo-30703.ULCdFp.rst diff --git a/Include/ceval.h b/Include/ceval.h index 1e482729a1cc98..38d470999f7350 100644 --- a/Include/ceval.h +++ b/Include/ceval.h @@ -46,6 +46,7 @@ PyAPI_FUNC(int) PyEval_MergeCompilerFlags(PyCompilerFlags *cf); #endif PyAPI_FUNC(int) Py_AddPendingCall(int (*func)(void *), void *arg); +PyAPI_FUNC(void) _PyEval_SignalReceived(void); PyAPI_FUNC(int) Py_MakePendingCalls(void); /* Protection against deeply nested recursive calls diff --git a/Lib/test/test_signal.py b/Lib/test/test_signal.py index aa2b8bb48cd328..6e6dbf5245588e 100644 --- a/Lib/test/test_signal.py +++ b/Lib/test/test_signal.py @@ -3,7 +3,9 @@ from contextlib import closing import enum import gc +import os import pickle +import random import select import signal import socket @@ -955,6 +957,101 @@ def handler(signum, frame): (exitcode, stdout)) +class StressTest(unittest.TestCase): + """ + Stress signal delivery, especially when a signal arrives in + the middle of recomputing the signal state or executing + previously tripped signal handlers. + """ + + @unittest.skipUnless(hasattr(signal, "setitimer"), + "test needs setitimer()") + def test_stress_delivery_dependent(self): + """ + This test uses dependent signal handlers. + """ + N = 10000 + sigs = [] + + def first_handler(signum, frame): + # 1e-6 is the minimum non-zero value for `setitimer()`. + # Choose a random delay so as to improve chances of + # triggering a race condition. Ideally the signal is received + # when inside critical signal-handling routines such as + # Py_MakePendingCalls(). + signal.setitimer(signal.ITIMER_REAL, 1e-6 + random.random() * 1e-5) + + def second_handler(signum=None, frame=None): + sigs.append(signum) + + def setsig(signum, handler): + old_handler = signal.signal(signum, handler) + self.addCleanup(signal.signal, signum, old_handler) + + # Here on Linux, SIGPROF > SIGALRM > SIGUSR1. By using both + # ascending and descending sequences (SIGUSR1 then SIGALRM, + # SIGPROF then SIGALRM), we maximize chances of hitting a bug. + setsig(signal.SIGPROF, first_handler) + setsig(signal.SIGUSR1, first_handler) + setsig(signal.SIGALRM, second_handler) # for ITIMER_REAL + + expected_sigs = 0 + deadline = time.time() + 15.0 + + while expected_sigs < N: + os.kill(os.getpid(), signal.SIGPROF) + expected_sigs += 1 + # Wait for handlers to run to avoid signal coalescing + while len(sigs) < expected_sigs and time.time() < deadline: + time.sleep(1e-5) + + os.kill(os.getpid(), signal.SIGUSR1) + expected_sigs += 1 + while len(sigs) < expected_sigs and time.time() < deadline: + time.sleep(1e-5) + + # All ITIMER_REAL signals should have been delivered to the + # Python handler + self.assertEqual(len(sigs), N, "Some signals were lost") + + @unittest.skipUnless(hasattr(signal, "setitimer"), + "test needs setitimer()") + def test_stress_delivery_simultaneous(self): + """ + This test uses simultaneous signal handlers. + """ + N = 10000 + sigs = [] + + def handler(signum, frame): + sigs.append(signum) + + def setsig(signum, handler): + old_handler = signal.signal(signum, handler) + self.addCleanup(signal.signal, signum, old_handler) + + setsig(signal.SIGUSR1, handler) + setsig(signal.SIGALRM, handler) # for ITIMER_REAL + + expected_sigs = 0 + deadline = time.time() + 15.0 + + while expected_sigs < N: + # Hopefully the SIGALRM will be received somewhere during + # initial processing of SIGUSR1. + signal.setitimer(signal.ITIMER_REAL, 1e-6 + random.random() * 1e-5) + os.kill(os.getpid(), signal.SIGUSR1) + + expected_sigs += 2 + # Wait for handlers to run to avoid signal coalescing + while len(sigs) < expected_sigs and time.time() < deadline: + time.sleep(1e-5) + + # All ITIMER_REAL signals should have been delivered to the + # Python handler + self.assertEqual(len(sigs), N, "Some signals were lost") + + def tearDownModule(): support.reap_children() diff --git a/Misc/NEWS.d/next/Core and Builtins/2017-06-28-21-07-32.bpo-30703.ULCdFp.rst b/Misc/NEWS.d/next/Core and Builtins/2017-06-28-21-07-32.bpo-30703.ULCdFp.rst new file mode 100644 index 00000000000000..9adeb450aeeb72 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2017-06-28-21-07-32.bpo-30703.ULCdFp.rst @@ -0,0 +1,6 @@ +Improve signal delivery. + +Avoid using Py_AddPendingCall from signal handler, to avoid calling signal- +unsafe functions. The tests I'm adding here fail without the rest of the +patch, on Linux and OS X. This means our signal delivery logic had defects +(some signals could be lost). diff --git a/Modules/signalmodule.c b/Modules/signalmodule.c index 55e4c2a00e9ef3..30990f89c3eeaf 100644 --- a/Modules/signalmodule.c +++ b/Modules/signalmodule.c @@ -192,12 +192,6 @@ The default handler for SIGINT installed by Python.\n\ It raises KeyboardInterrupt."); -static int -checksignals_witharg(void * unused) -{ - return PyErr_CheckSignals(); -} - static int report_wakeup_write_error(void *data) { @@ -248,17 +242,15 @@ trip_signal(int sig_num) Handlers[sig_num].tripped = 1; - if (!is_tripped) { - /* Set is_tripped after setting .tripped, as it gets - cleared in PyErr_CheckSignals() before .tripped. */ - is_tripped = 1; - Py_AddPendingCall(checksignals_witharg, NULL); - } + /* Set is_tripped after setting .tripped, as it gets + cleared in PyErr_CheckSignals() before .tripped. */ + is_tripped = 1; + _PyEval_SignalReceived(); /* And then write to the wakeup fd *after* setting all the globals and - doing the Py_AddPendingCall. We used to write to the wakeup fd and then - set the flag, but this allowed the following sequence of events - (especially on windows, where trip_signal runs in a new thread): + doing the _PyEval_SignalReceived. We used to write to the wakeup fd + and then set the flag, but this allowed the following sequence of events + (especially on windows, where trip_signal may run in a new thread): - main thread blocks on select([wakeup_fd], ...) - signal arrives @@ -293,6 +285,8 @@ trip_signal(int sig_num) wakeup.send_err_set = 1; wakeup.send_errno = errno; wakeup.send_win_error = GetLastError(); + /* Py_AddPendingCall() isn't signal-safe, but we + still use it for this exceptional case. */ Py_AddPendingCall(report_wakeup_send_error, NULL); } } @@ -306,6 +300,8 @@ trip_signal(int sig_num) rc = _Py_write_noraise(fd, &byte, 1); if (rc < 0) { + /* Py_AddPendingCall() isn't signal-safe, but we + still use it for this exceptional case. */ Py_AddPendingCall(report_wakeup_write_error, (void *)(intptr_t)errno); } @@ -1555,8 +1551,10 @@ PyErr_CheckSignals(void) arglist); Py_DECREF(arglist); } - if (!result) + if (!result) { + is_tripped = 1; return -1; + } Py_DECREF(result); } diff --git a/Python/ceval.c b/Python/ceval.c index ea79f5f2b57f8f..bf9103dd9aa4e3 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -195,6 +195,15 @@ PyEval_GetCallStats(PyObject *self) do { pending_async_exc = 0; COMPUTE_EVAL_BREAKER(); } while (0) +/* This single variable consolidates all requests to break out of the fast path + in the eval loop. */ +static _Py_atomic_int eval_breaker = {0}; +/* Request for running pending calls. */ +static _Py_atomic_int pendingcalls_to_do = {0}; +/* Request for looking at the `async_exc` field of the current thread state. + Guarded by the GIL. */ +static int pending_async_exc = 0; + #ifdef WITH_THREAD #ifdef HAVE_ERRNO_H @@ -204,16 +213,8 @@ PyEval_GetCallStats(PyObject *self) static PyThread_type_lock pending_lock = 0; /* for pending calls */ static long main_thread = 0; -/* This single variable consolidates all requests to break out of the fast path - in the eval loop. */ -static _Py_atomic_int eval_breaker = {0}; /* Request for dropping the GIL */ static _Py_atomic_int gil_drop_request = {0}; -/* Request for running pending calls. */ -static _Py_atomic_int pendingcalls_to_do = {0}; -/* Request for looking at the `async_exc` field of the current thread state. - Guarded by the GIL. */ -static int pending_async_exc = 0; #include "ceval_gil.h" @@ -326,9 +327,6 @@ PyEval_ReInitThreads(void) _PyThreadState_DeleteExcept(current_tstate); } -#else -static _Py_atomic_int eval_breaker = {0}; -static int pending_async_exc = 0; #endif /* WITH_THREAD */ /* This function is used to signal that async exceptions are waiting to be @@ -403,6 +401,15 @@ PyEval_RestoreThread(PyThreadState *tstate) #endif */ +void +_PyEval_SignalReceived(void) +{ + /* bpo-30703: Function called when the C signal handler of Python gets a + signal. We cannot queue a callback using Py_AddPendingCall() since + that function is not async-signal-safe. */ + SIGNAL_PENDING_CALLS(); +} + #ifdef WITH_THREAD /* The WITH_THREAD implementation is thread-safe. It allows @@ -467,6 +474,8 @@ Py_MakePendingCalls(void) int i; int r = 0; + assert(PyGILState_Check()); + if (!pending_lock) { /* initial allocation of the lock */ pending_lock = PyThread_allocate_lock(); @@ -481,6 +490,16 @@ Py_MakePendingCalls(void) if (busy) return 0; busy = 1; + /* unsignal before starting to call callbacks, so that any callback + added in-between re-signals */ + UNSIGNAL_PENDING_CALLS(); + + /* Python signal handler doesn't really queue a callback: it only signals + that a signal was received, see _PyEval_SignalReceived(). */ + if (PyErr_CheckSignals() < 0) { + goto error; + } + /* perform a bounded number of calls, in case of recursion */ for (i=0; i Date: Thu, 29 Jun 2017 16:40:14 +0200 Subject: [PATCH 2/2] bpo-30796: Fix failures in signal delivery stress test (#2488) * bpo-30796: Fix failures in signal delivery stress test setitimer() can have a poor minimum resolution on some machines, this would make the test reach its deadline (and a stray signal could then kill a subsequent test). * Make sure to clear the itimer after the test --- Lib/test/test_signal.py | 66 +++++++++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 16 deletions(-) diff --git a/Lib/test/test_signal.py b/Lib/test/test_signal.py index 6e6dbf5245588e..8d6386793d4a57 100644 --- a/Lib/test/test_signal.py +++ b/Lib/test/test_signal.py @@ -9,7 +9,7 @@ import select import signal import socket -import struct +import statistics import subprocess import traceback import sys, os, time, errno @@ -964,13 +964,55 @@ class StressTest(unittest.TestCase): previously tripped signal handlers. """ + def setsig(self, signum, handler): + old_handler = signal.signal(signum, handler) + self.addCleanup(signal.signal, signum, old_handler) + + def measure_itimer_resolution(self): + N = 20 + times = [] + + def handler(signum=None, frame=None): + if len(times) < N: + times.append(time.perf_counter()) + # 1 µs is the smallest possible timer interval, + # we want to measure what the concrete duration + # will be on this platform + signal.setitimer(signal.ITIMER_REAL, 1e-6) + + self.addCleanup(signal.setitimer, signal.ITIMER_REAL, 0) + self.setsig(signal.SIGALRM, handler) + handler() + while len(times) < N: + time.sleep(1e-3) + + durations = [times[i+1] - times[i] for i in range(len(times) - 1)] + med = statistics.median(durations) + if support.verbose: + print("detected median itimer() resolution: %.6f s." % (med,)) + return med + + def decide_itimer_count(self): + # Some systems have poor setitimer() resolution (for example + # measured around 20 ms. on FreeBSD 9), so decide on a reasonable + # number of sequential timers based on that. + reso = self.measure_itimer_resolution() + if reso <= 1e-4: + return 10000 + elif reso <= 1e-2: + return 100 + else: + self.skipTest("detected itimer resolution (%.3f s.) too high " + "(> 10 ms.) on this platform (or system too busy)" + % (reso,)) + @unittest.skipUnless(hasattr(signal, "setitimer"), "test needs setitimer()") def test_stress_delivery_dependent(self): """ This test uses dependent signal handlers. """ - N = 10000 + N = self.decide_itimer_count() sigs = [] def first_handler(signum, frame): @@ -984,16 +1026,12 @@ def first_handler(signum, frame): def second_handler(signum=None, frame=None): sigs.append(signum) - def setsig(signum, handler): - old_handler = signal.signal(signum, handler) - self.addCleanup(signal.signal, signum, old_handler) - # Here on Linux, SIGPROF > SIGALRM > SIGUSR1. By using both # ascending and descending sequences (SIGUSR1 then SIGALRM, # SIGPROF then SIGALRM), we maximize chances of hitting a bug. - setsig(signal.SIGPROF, first_handler) - setsig(signal.SIGUSR1, first_handler) - setsig(signal.SIGALRM, second_handler) # for ITIMER_REAL + self.setsig(signal.SIGPROF, first_handler) + self.setsig(signal.SIGUSR1, first_handler) + self.setsig(signal.SIGALRM, second_handler) # for ITIMER_REAL expected_sigs = 0 deadline = time.time() + 15.0 @@ -1020,18 +1058,14 @@ def test_stress_delivery_simultaneous(self): """ This test uses simultaneous signal handlers. """ - N = 10000 + N = self.decide_itimer_count() sigs = [] def handler(signum, frame): sigs.append(signum) - def setsig(signum, handler): - old_handler = signal.signal(signum, handler) - self.addCleanup(signal.signal, signum, old_handler) - - setsig(signal.SIGUSR1, handler) - setsig(signal.SIGALRM, handler) # for ITIMER_REAL + self.setsig(signal.SIGUSR1, handler) + self.setsig(signal.SIGALRM, handler) # for ITIMER_REAL expected_sigs = 0 deadline = time.time() + 15.0