-
-
Notifications
You must be signed in to change notification settings - Fork 608
/
exception_sink.py
454 lines (378 loc) · 18.6 KB
/
exception_sink.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
# Copyright 2018 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).
import datetime
import faulthandler
import logging
import os
import signal
import sys
import threading
import traceback
from contextlib import contextmanager
from typing import Callable, Dict, Iterator, Optional
import setproctitle
from pants.util.dirutil import safe_mkdir, safe_open
from pants.util.meta import classproperty
from pants.util.osutil import Pid
logger = logging.getLogger(__name__)
class SignalHandler:
"""A specification for how to handle a fixed set of nonfatal signals.
This is subclassed and registered with ExceptionSink.reset_signal_handler() whenever the signal
handling behavior is modified for different pants processes, for example in the remote client when
pantsd is enabled. The default behavior is to exit "gracefully" by leaving a detailed log of which
signal was received, then exiting with failure.
Note that the terminal will convert a ctrl-c from the user into a SIGINT.
"""
@property
def signal_handler_mapping(self) -> Dict[signal.Signals, Callable]:
"""A dict mapping (signal number) -> (a method handling the signal)."""
# Could use an enum here, but we never end up doing any matching on the specific signal value,
# instead just iterating over the registered signals to set handlers, so a dict is probably
# better.
return {
signal.SIGINT: self._handle_sigint_if_enabled,
signal.SIGQUIT: self.handle_sigquit,
signal.SIGTERM: self.handle_sigterm,
}
def __init__(self, *, pantsd_instance: bool):
self._ignore_sigint_lock = threading.Lock()
self._ignoring_sigint = False
self._pantsd_instance = pantsd_instance
def _handle_sigint_if_enabled(self, signum: int, _frame):
with self._ignore_sigint_lock:
if not self._ignoring_sigint:
self.handle_sigint(signum, _frame)
def _toggle_ignoring_sigint(self, toggle: bool) -> None:
if not self._pantsd_instance:
with self._ignore_sigint_lock:
self._ignoring_sigint = toggle
def handle_sigint(self, signum: int, _frame):
ExceptionSink._signal_sent = signum
raise KeyboardInterrupt("User interrupted execution with control-c!")
# TODO(#7406): figure out how to let sys.exit work in a signal handler instead of having to raise
# this exception!
class SignalHandledNonLocalExit(Exception):
"""Raised in handlers for non-fatal signals to overcome Python limitations.
When waiting on a subprocess and in a signal handler, sys.exit appears to be ignored, and
causes the signal handler to return. We want to (eventually) exit after these signals, not
ignore them, so we raise this exception instead and check it in our sys.excepthook override.
"""
def __init__(self, signum, signame):
self.signum = signum
self.signame = signame
self.traceback_lines = traceback.format_stack()
super(SignalHandler.SignalHandledNonLocalExit, self).__init__()
if "I/O operation on closed file" in self.traceback_lines:
logger.debug(
"SignalHandledNonLocalExit: unexpected appearance of "
"'I/O operation on closed file' in traceback"
)
def handle_sigquit(self, signum, _frame):
ExceptionSink._signal_sent = signum
raise self.SignalHandledNonLocalExit(signum, "SIGQUIT")
def handle_sigterm(self, signum, _frame):
ExceptionSink._signal_sent = signum
raise self.SignalHandledNonLocalExit(signum, "SIGTERM")
class ExceptionSink:
"""A mutable singleton object representing where exceptions should be logged to."""
# NB: see the bottom of this file where we call reset_log_location() and other mutators in order
# to properly setup global state.
_log_dir = None
# Where to log stacktraces to in a SIGUSR2 handler.
_interactive_output_stream = None
# Whether to print a stacktrace in any fatal error message printed to the terminal.
_should_print_backtrace_to_terminal = True
_signal_handler: SignalHandler = SignalHandler(pantsd_instance=False)
# These persistent open file descriptors are kept so the signal handler can do almost no work
# (and lets faulthandler figure out signal safety).
_pid_specific_error_fileobj = None
_shared_error_fileobj = None
_signal_sent: Optional[int] = None
def __new__(cls, *args, **kwargs):
raise TypeError("Instances of {} are not allowed to be constructed!".format(cls.__name__))
class ExceptionSinkError(Exception):
pass
@classmethod
def signal_sent(cls) -> Optional[int]:
return cls._signal_sent
@classmethod
def reset_should_print_backtrace_to_terminal(cls, should_print_backtrace):
"""Set whether a backtrace gets printed to the terminal error stream on a fatal error.
Class state:
- Overwrites `cls._should_print_backtrace_to_terminal`.
"""
cls._should_print_backtrace_to_terminal = should_print_backtrace
# All reset_* methods are ~idempotent!
@classmethod
def reset_log_location(cls, new_log_location: str) -> None:
"""Re-acquire file handles to error logs based in the new location.
Class state:
- Overwrites `cls._log_dir`, `cls._pid_specific_error_fileobj`, and
`cls._shared_error_fileobj`.
OS state:
- May create a new directory.
- Overwrites signal handlers for many fatal and non-fatal signals (but not SIGUSR2).
:raises: :class:`ExceptionSink.ExceptionSinkError` if the directory does not exist or is not
writable.
"""
# We could no-op here if the log locations are the same, but there's no reason not to have the
# additional safety of re-acquiring file descriptors each time (and erroring out early if the
# location is no longer writable).
try:
safe_mkdir(new_log_location)
except Exception as e:
raise cls.ExceptionSinkError(
"The provided log location path at '{}' is not writable or could not be created: {}.".format(
new_log_location, str(e)
),
e,
)
pid = os.getpid()
pid_specific_log_path = cls.exceptions_log_path(for_pid=pid, in_dir=new_log_location)
shared_log_path = cls.exceptions_log_path(in_dir=new_log_location)
assert pid_specific_log_path != shared_log_path
try:
pid_specific_error_stream = safe_open(pid_specific_log_path, mode="w")
shared_error_stream = safe_open(shared_log_path, mode="a")
except Exception as e:
raise cls.ExceptionSinkError(
"Error opening fatal error log streams for log location '{}': {}".format(
new_log_location, str(e)
)
)
# NB: mutate process-global state!
if faulthandler.is_enabled():
logger.debug("re-enabling faulthandler")
# Call Py_CLEAR() on the previous error stream:
# https://github.com/vstinner/faulthandler/blob/master/faulthandler.c
faulthandler.disable()
# Send a stacktrace to this file if interrupted by a fatal error.
faulthandler.enable(file=pid_specific_error_stream, all_threads=True)
# NB: mutate the class variables!
cls._log_dir = new_log_location
cls._pid_specific_error_fileobj = pid_specific_error_stream
cls._shared_error_fileobj = shared_error_stream
@classmethod
def reset_interactive_output_stream(
cls, interactive_output_stream, override_faulthandler_destination=True
):
"""Class state:
- Overwrites `cls._interactive_output_stream`.
OS state:
- Overwrites the SIGUSR2 handler.
This method registers a SIGUSR2 handler, which permits a non-fatal `kill -31 <pants pid>` for
stacktrace retrieval. This is also where the the error message on fatal exit will be printed to.
"""
try:
# NB: mutate process-global state!
# This permits a non-fatal `kill -31 <pants pid>` for stacktrace retrieval.
if override_faulthandler_destination:
faulthandler.register(
signal.SIGUSR2, interactive_output_stream, all_threads=True, chain=False
)
# NB: mutate the class variables!
cls._interactive_output_stream = interactive_output_stream
except ValueError:
# Warn about "ValueError: IO on closed file" when the stream is closed.
cls._log_exception(
"Cannot reset interactive_output_stream -- stream (probably stderr) is closed"
)
@classproperty
def should_print_exception_stacktrace(cls):
return cls._should_print_backtrace_to_terminal
@classmethod
def exceptions_log_path(cls, for_pid=None, in_dir=None):
"""Get the path to either the shared or pid-specific fatal errors log file."""
if for_pid is None:
intermediate_filename_component = ""
else:
assert isinstance(for_pid, Pid)
intermediate_filename_component = ".{}".format(for_pid)
in_dir = in_dir or cls._log_dir
return os.path.join(
in_dir, ".pids", "exceptions{}.log".format(intermediate_filename_component)
)
@classmethod
def _log_exception(cls, msg):
"""Try to log an error message to this process's error log and the shared error log.
NB: Doesn't raise (logs an error instead).
"""
pid = os.getpid()
fatal_error_log_entry = cls._format_exception_message(msg, pid)
# We care more about this log than the shared log, so write to it first.
try:
cls._try_write_with_flush(cls._pid_specific_error_fileobj, fatal_error_log_entry)
except Exception as e:
logger.error(
"Error logging the message '{}' to the pid-specific file handle for {} at pid {}:\n{}".format(
msg, cls._log_dir, pid, e
)
)
# Write to the shared log.
try:
# TODO: we should probably guard this against concurrent modification by other pants
# subprocesses somehow.
cls._try_write_with_flush(cls._shared_error_fileobj, fatal_error_log_entry)
except Exception as e:
logger.error(
"Error logging the message '{}' to the shared file handle for {} at pid {}:\n{}".format(
msg, cls._log_dir, pid, e
)
)
@classmethod
def _try_write_with_flush(cls, fileobj, payload):
"""This method is here so that it can be patched to simulate write errors.
This is because mock can't patch primitive objects like file objects.
"""
fileobj.write(payload)
fileobj.flush()
@classmethod
def reset_signal_handler(cls, signal_handler: SignalHandler) -> SignalHandler:
"""Given a SignalHandler, uses the `signal` std library functionality to set the pants
process's signal handlers to those specified in the object.
Note that since this calls `signal.signal()`, it will crash if not the main thread. Returns
the previously-registered signal handler.
"""
for signum, handler in signal_handler.signal_handler_mapping.items():
signal.signal(signum, handler)
# Retry any system calls interrupted by any of the signals we just installed handlers for
# (instead of having them raise EINTR). siginterrupt(3) says this is the default behavior on
# Linux and OSX.
signal.siginterrupt(signum, False)
previous_signal_handler = cls._signal_handler
cls._signal_handler = signal_handler
return previous_signal_handler
@classmethod
@contextmanager
def trapped_signals(cls, new_signal_handler: SignalHandler) -> Iterator[None]:
"""A contextmanager which temporarily overrides signal handling.
NB: This method calls signal.signal(), which will crash if not called from the main thread!
"""
previous_signal_handler = cls.reset_signal_handler(new_signal_handler)
try:
yield
finally:
cls.reset_signal_handler(previous_signal_handler)
@classmethod
def toggle_ignoring_sigint(cls, toggle: bool) -> None:
"""This method is used to temporarily disable responding to the SIGINT signal sent by a
Ctrl-C in the terminal.
We currently only use this to implement disabling catching SIGINT while an
InteractiveProcess is running (where we want that process to catch it), and only when pantsd
is not enabled (if pantsd is enabled, the client will actually catch SIGINT and forward it
to the server, so we don't want the server process to ignore it.
"""
cls._signal_handler._toggle_ignoring_sigint(toggle)
@classmethod
def _iso_timestamp_for_now(cls):
return datetime.datetime.now().isoformat()
# NB: This includes a trailing newline, but no leading newline.
_EXCEPTION_LOG_FORMAT = """\
timestamp: {timestamp}
process title: {process_title}
sys.argv: {args}
pid: {pid}
{message}
"""
@classmethod
def _format_exception_message(cls, msg, pid):
return cls._EXCEPTION_LOG_FORMAT.format(
timestamp=cls._iso_timestamp_for_now(),
process_title=setproctitle.getproctitle(),
args=sys.argv,
pid=pid,
message=msg,
)
_traceback_omitted_default_text = "(backtrace omitted)"
@classmethod
def _format_traceback(cls, traceback_lines, should_print_backtrace):
if should_print_backtrace:
traceback_string = "\n{}".format("".join(traceback_lines))
else:
traceback_string = " {}".format(cls._traceback_omitted_default_text)
return traceback_string
_UNHANDLED_EXCEPTION_LOG_FORMAT = """\
Exception caught: ({exception_type}){backtrace}
Exception message: {exception_message}{maybe_newline}
"""
@classmethod
def _format_unhandled_exception_log(cls, exc, tb, add_newline, should_print_backtrace):
exc_type = type(exc)
exception_full_name = "{}.{}".format(exc_type.__module__, exc_type.__name__)
exception_message = str(exc) if exc else "(no message)"
maybe_newline = "\n" if add_newline else ""
return cls._UNHANDLED_EXCEPTION_LOG_FORMAT.format(
exception_type=exception_full_name,
backtrace=cls._format_traceback(
traceback_lines=traceback.format_tb(tb),
should_print_backtrace=should_print_backtrace,
),
exception_message=exception_message,
maybe_newline=maybe_newline,
)
@classmethod
def log_exception(cls, exc_class=None, exc=None, tb=None, add_newline=False):
"""Logs an unhandled exception to a variety of locations."""
exc_class = exc_class or sys.exc_info()[0]
exc = exc or sys.exc_info()[1]
tb = tb or sys.exc_info()[2]
# This exception was raised by a signal handler with the intent to exit the program.
if exc_class == SignalHandler.SignalHandledNonLocalExit:
return cls._handle_signal_gracefully(exc.signum, exc.signame, exc.traceback_lines)
extra_err_msg = None
try:
# Always output the unhandled exception details into a log file, including the traceback.
exception_log_entry = cls._format_unhandled_exception_log(
exc, tb, add_newline, should_print_backtrace=True
)
cls._log_exception(exception_log_entry)
except Exception as e:
extra_err_msg = "Additional error logging unhandled exception {}: {}".format(exc, e)
logger.error(extra_err_msg)
# Generate an unhandled exception report fit to be printed to the terminal.
logger.exception(exc)
_CATCHABLE_SIGNAL_ERROR_LOG_FORMAT = """\
Signal {signum} ({signame}) was raised. Exiting with failure.{formatted_traceback}
"""
@classmethod
def _handle_signal_gracefully(cls, signum, signame, traceback_lines):
"""Signal handler for non-fatal signals which raises or logs an error."""
# Extract the stack, and format an entry to be written to the exception log.
formatted_traceback = cls._format_traceback(
traceback_lines=traceback_lines, should_print_backtrace=True
)
signal_error_log_entry = cls._CATCHABLE_SIGNAL_ERROR_LOG_FORMAT.format(
signum=signum, signame=signame, formatted_traceback=formatted_traceback
)
# TODO: determine the appropriate signal-safe behavior here (to avoid writing to our file
# descriptors reentrantly, which raises an IOError).
# This method catches any exceptions raised within it.
cls._log_exception(signal_error_log_entry)
# Create a potentially-abbreviated traceback for the terminal or other interactive stream.
formatted_traceback_for_terminal = cls._format_traceback(
traceback_lines=traceback_lines,
should_print_backtrace=cls._should_print_backtrace_to_terminal,
)
terminal_log_entry = cls._CATCHABLE_SIGNAL_ERROR_LOG_FORMAT.format(
signum=signum, signame=signame, formatted_traceback=formatted_traceback_for_terminal
)
# Print the output via standard logging.
logger.error(terminal_log_entry)
# Setup global state such as signal handlers and sys.excepthook with probably-safe values at module
# import time.
# Set the log location for writing logs before bootstrap options are parsed.
ExceptionSink.reset_log_location(os.getcwd())
# NB: Mutate process-global state!
sys.excepthook = ExceptionSink.log_exception
# Sets a SIGUSR2 handler.
ExceptionSink.reset_interactive_output_stream(sys.stderr.buffer)
# Setup a default signal handler.
ExceptionSink.reset_signal_handler(SignalHandler(pantsd_instance=False))
# Set whether to print stacktraces on exceptions or signals during import time.
# NB: This will be overridden by bootstrap options in PantsRunner, so we avoid printing out a full
# stacktrace when a user presses control-c during import time unless the environment variable is set
# to explicitly request it. The exception log will have any stacktraces regardless so this should
# not hamper debugging.
ExceptionSink.reset_should_print_backtrace_to_terminal(
should_print_backtrace=os.environ.get("PANTS_PRINT_EXCEPTION_STACKTRACE", "True") == "True"
)