Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fatal error logging followup fixes #6610

Merged
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/python/pants/base/BUILD
Expand Up @@ -74,7 +74,9 @@ python_library(
sources = ['exception_sink.py'],
dependencies = [
'3rdparty/python:faulthandler',
'3rdparty/python:setproctitle',
'src/python/pants/util:dirutil',
':build_environment',
':exiter',
],
)
Expand Down
57 changes: 49 additions & 8 deletions src/python/pants/base/exception_sink.py
Expand Up @@ -13,6 +13,9 @@
import traceback
from builtins import object, str

import setproctitle

from pants.base.build_environment import get_buildroot
from pants.base.exiter import Exiter
from pants.util.dirutil import safe_mkdir, safe_open
from pants.util.meta import classproperty
Expand All @@ -33,6 +36,10 @@ class ExceptionSink(object):
_exiter = None
# Where to log stacktraces to in a SIGUSR2 handler.
_interactive_output_stream = None
# Copy of the applicable bootstrap option values for the current process. Used in error logs for
# debuggability, and to determine whether to print the exception stacktrace to the terminal on
# exit.
_bootstrap_option_values = None

# These persistent open file descriptors are kept so the signal handler can do almost no work
# (and lets faulthandler figure out signal safety).
Expand Down Expand Up @@ -70,6 +77,18 @@ def __new__(cls, *args, **kwargs):

class ExceptionSinkError(Exception): pass

@classmethod
def reset_bootstrap_options(cls, new_bootstrap_option_values):
"""Set the bootstrap option values held by this singleton.

The option values are used to provide more context in a fatal error log entry, and to determine
whether to print the exception stacktrace to the terminal on exit.

Class state:
- Overwrites `cls._bootstrap_option_values`.
"""
cls._bootstrap_option_values = new_bootstrap_option_values

# All reset_* methods are ~idempotent!
@classmethod
def reset_log_location(cls, new_log_location):
Expand Down Expand Up @@ -237,16 +256,24 @@ def _iso_timestamp_for_now(cls):
# NB: This includes a trailing newline, but no leading newline.
_EXCEPTION_LOG_FORMAT = """\
timestamp: {timestamp}
args: {args}
process title: {process_title}
sys.argv: {args}
bootstrap options: {bootstrap_options}
pid: {pid}
{message}
"""

@classmethod
def _format_exception_message(cls, msg, pid):
if cls._bootstrap_option_values:
Copy link
Sponsor Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than individually poking at fields of the bootstrap options, it would be good to capture the fields you need in set_bootstrap_options, and to have defaults for those fields.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Truly fantastic idea. Will do.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

bootstrap_fmt = cls._bootstrap_option_values._debug_dump()
Copy link
Sponsor Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is going to be a bit too noisy probably?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It totally is, and this is why the testing has the nonce thing at all. I think the first suggestion you described (to capture the relevant fields) is the one to go for here, that obviates the _debug_dump() method.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed!

else:
bootstrap_fmt = '<none>'
return cls._EXCEPTION_LOG_FORMAT.format(
timestamp=cls._iso_timestamp_for_now(),
process_title=setproctitle.getproctitle(),
args=sys.argv,
bootstrap_options=bootstrap_fmt,
pid=pid,
message=msg)

Expand Down Expand Up @@ -276,13 +303,28 @@ def _format_unhandled_exception_log(cls, exc, tb, add_newline, should_print_back
exception_message=exception_message,
maybe_newline=maybe_newline)

_EXIT_FAILURE_TERMINAL_MESSAGE_FORMAT = """\
timestamp: {timestamp}
{terminal_msg}
"""

@classmethod
def _exit_with_failure(cls, terminal_msg):
formatted_terminal_msg = cls._EXIT_FAILURE_TERMINAL_MESSAGE_FORMAT.format(
timestamp=cls._iso_timestamp_for_now(),
terminal_msg=terminal_msg or '')
# Exit with failure, printing a message to the terminal (or whatever the interactive stream is).
cls._exiter.exit(result=cls.UNHANDLED_EXCEPTION_EXIT_CODE,
msg=terminal_msg,
msg=formatted_terminal_msg,
out=cls._interactive_output_stream)

@classproperty
def _should_print_backtrace(cls):
if cls._bootstrap_option_values:
return cls._bootstrap_option_values.print_exception_stacktrace
else:
return True

@classmethod
def _log_unhandled_exception_and_exit(cls, exc_class=None, exc=None, tb=None, add_newline=False):
"""A sys.excepthook implementation which logs the error and exits with failure."""
Expand All @@ -294,7 +336,7 @@ def _log_unhandled_exception_and_exit(cls, exc_class=None, exc=None, tb=None, ad
try:
# Always output the unhandled exception details into a log file, including the traceback.
exception_log_entry = cls._format_unhandled_exception_log(exc, tb, add_newline,
should_print_backtrace=True)
should_print_backtrace=True)
cls.log_exception(exception_log_entry)
except Exception as e:
extra_err_msg = 'Additional error logging unhandled exception {}: {}'.format(exc, e)
Expand All @@ -304,7 +346,7 @@ def _log_unhandled_exception_and_exit(cls, exc_class=None, exc=None, tb=None, ad
# Exiter's should_print_backtrace field).
stderr_printed_error = cls._format_unhandled_exception_log(
exc, tb, add_newline,
should_print_backtrace=cls._exiter.should_print_backtrace)
should_print_backtrace=cls._should_print_backtrace)
if extra_err_msg:
stderr_printed_error = '{}\n{}'.format(stderr_printed_error, extra_err_msg)
cls._exit_with_failure(stderr_printed_error)
Expand Down Expand Up @@ -339,19 +381,18 @@ def handle_signal_gracefully(cls, signum, frame):

# Format a message to be printed to the terminal or other interactive stream, if applicable.
formatted_traceback_for_terminal = cls._format_traceback(
tb, should_print_backtrace=cls._exiter.should_print_backtrace and bool(tb))
tb, should_print_backtrace=cls._should_print_backtrace and bool(tb))
terminal_log_entry = cls._CATCHABLE_SIGNAL_ERROR_LOG_FORMAT.format(
signum=signum,
formatted_traceback=formatted_traceback_for_terminal)
cls._exit_with_failure(terminal_log_entry)



# Setup global state such as signal handlers and sys.excepthook with probably-safe values at module
# import time.
# Sets fatal signal handlers with reasonable defaults to catch errors early in startup.
ExceptionSink.reset_log_location(os.getcwd())
ExceptionSink.reset_log_location(os.path.join(get_buildroot(), '.pants.d'))
# Sets except hook.
ExceptionSink.reset_exiter(Exiter(print_backtraces=True))
ExceptionSink.reset_exiter(Exiter(exiter=sys.exit))
# Sets a SIGUSR2 handler.
ExceptionSink.reset_interactive_output_stream(sys.stderr)
5 changes: 1 addition & 4 deletions src/python/pants/base/exiter.py
Expand Up @@ -26,19 +26,16 @@ class Exiter(object):
4) Call Exiter.exit(), Exiter.exit_and_fail() or exiter_inst() when you wish to exit the runtime.
"""

def __init__(self, exiter=sys.exit, print_backtraces=True):
def __init__(self, exiter=sys.exit):
"""
:param func exiter: A function to be called to conduct the final exit of the runtime. (Optional)
:param bool print_backtraces: Whether or not to print backtraces by default. Can be
overridden by Exiter.apply_options(). (Optional)
"""
# Since we have some exit paths that run via the sys.excepthook,
# symbols we use can become garbage collected before we use them; ie:
# we can find `sys` and `traceback` are `None`. As a result we capture
# all symbols we need here to ensure we function in excepthook context.
# See: http://stackoverflow.com/questions/2572172/referencing-other-modules-in-atexit
self._exit = exiter
self.should_print_backtrace = print_backtraces

def __call__(self, *args, **kwargs):
"""Map class calls to self.exit() to support sys.exit() fungibility."""
Expand Down
4 changes: 2 additions & 2 deletions src/python/pants/bin/pants_runner.py
Expand Up @@ -32,12 +32,12 @@ def __init__(self, exiter, args=None, env=None, start_time=None):
self._start_time = start_time

def run(self):
ExceptionSink.reset_exiter(self._exiter)
options_bootstrapper = OptionsBootstrapper(env=self._env, args=self._args)
bootstrap_options = options_bootstrapper.get_bootstrap_options()
global_bootstrap_options = bootstrap_options.for_global_scope()
ExceptionSink.reset_bootstrap_options(global_bootstrap_options)
ExceptionSink.reset_log_location(global_bootstrap_options.pants_workdir)
self._exiter.should_print_backtrace = global_bootstrap_options.print_exception_stacktrace
ExceptionSink.reset_exiter(self._exiter)

if bootstrap_options.for_global_scope().enable_pantsd:
try:
Expand Down
3 changes: 3 additions & 0 deletions src/python/pants/option/option_value_container.py
Expand Up @@ -27,6 +27,9 @@ class OptionValueContainer(object):
def __init__(self):
self._value_map = {} # key -> either raw value or RankedValue wrapping the raw value.

def _debug_dump(self):
Copy link
Sponsor Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding this for a test is fine, but it shouldn't be called in other consuming code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can be removed if we avoid keeping a copy of all the bootstrap values.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed!

return '{}(<with value map = {!r}>)'.format(type(self).__name__, self._value_map)

def get_explicit_keys(self):
"""Returns the keys for any values that were set explicitly (via flag, config, or env var)."""
ret = []
Expand Down
6 changes: 2 additions & 4 deletions src/python/pants/pantsd/pants_daemon.py
Expand Up @@ -365,13 +365,11 @@ def run_sync(self):
with self._pantsd_logging() as log_stream:
# We don't have any stdio streams to log to anymore, but we can get tracebacks of the pantsd
# process by tailing the pantsd log and sending it SIGUSR2.
ExceptionSink.reset_exiter(Exiter(exiter=os._exit))
ExceptionSink.reset_interactive_output_stream(log_stream)
global_bootstrap_options = self._bootstrap_options.for_global_scope()
ExceptionSink.reset_bootstrap_options(global_bootstrap_options)
ExceptionSink.reset_log_location(global_bootstrap_options.pants_workdir)
pantsd_exiter = Exiter(
exiter=os._exit,
print_backtraces=global_bootstrap_options.print_exception_stacktrace)
ExceptionSink.reset_exiter(pantsd_exiter)
self._logger.info('pantsd starting, log level is {}'.format(self._log_level))

self._native.set_panic_handler()
Expand Down
1 change: 1 addition & 0 deletions tests/python/pants_test/base/BUILD
Expand Up @@ -216,6 +216,7 @@ python_tests(
'src/python/pants/util:dirutil',
'src/python/pants/util:osutil',
'tests/python/pants_test:test_base',
'tests/python/pants_test/option/util',
],
tags={'platform_specific_behavior'},
)
Expand Down
44 changes: 37 additions & 7 deletions tests/python/pants_test/base/test_exception_sink.py
Expand Up @@ -12,8 +12,10 @@
import mock

from pants.base.exception_sink import ExceptionSink
from pants.option.scope import GLOBAL_SCOPE
from pants.util.contextutil import temporary_dir
from pants.util.osutil import get_normalized_os_name
from pants_test.option.util.fakes import create_options
from pants_test.test_base import TestBase


Expand All @@ -25,8 +27,10 @@ class AnonymousSink(ExceptionSink): pass
return AnonymousSink

def test_default_log_location(self):
self.assertEqual(self._gen_sink_subclass()._log_dir,
os.getcwd())
# NB: get_buildroot() returns the temporary buildroot used for running this test, so we cheat
# and assume we are being run from the buildroot.
self.assertEqual(ExceptionSink._log_dir,
os.path.join(os.getcwd(), '.pants.d'))

def test_reset_log_location(self):
sink = self._gen_sink_subclass()
Expand Down Expand Up @@ -57,13 +61,23 @@ def test_set_invalid_log_location(self):
with self.assertRaisesRegexp(ExceptionSink.ExceptionSinkError, err_rx):
sink.reset_log_location('/')

def test_log_exception(self):
def _assert_log_exception(self, msg, fake_process_title, bootstrap_options=None):
sink = self._gen_sink_subclass()
pid = os.getpid()

with temporary_dir() as tmpdir:
# Check that tmpdir exists, and log an exception into that directory.
if bootstrap_options:
bs_option_values = create_options(bootstrap_options).for_global_scope()
sink.reset_bootstrap_options(bs_option_values)
sink.reset_log_location(tmpdir)
sink.log_exception('XXX')

with mock.patch('setproctitle.getproctitle', autospec=True, spec_set=True) \
as getproctitle_mock:
getproctitle_mock.return_value = fake_process_title
sink.log_exception(msg)
getproctitle_mock.assert_called_once()

# This should have created two log files, one specific to the current pid.
self.assertEqual(os.listdir(tmpdir), ['logs'])

Expand All @@ -76,17 +90,33 @@ def test_log_exception(self):
self.assertNotEqual(cur_process_error_log_path, shared_error_log_path)

# We only logged a single error, so the files should both contain only that single log entry.
if bootstrap_options is None:
msg_bootstrap_options = '<none>'
else:
msg_bootstrap_options = ('<fake options(<with value map = {!r}>)>'
.format(bootstrap_options[GLOBAL_SCOPE]))
err_rx = """\
timestamp: ([^\n]+)
args: ([^\n]+)
process title: {fake_process_title}
sys.argv: ([^\n]+)
bootstrap options: {bootstrap_options}
pid: {pid}
XXX
""".format(pid=re.escape(str(pid)))
{msg}
""".format(fake_process_title=re.escape(fake_process_title),
bootstrap_options=re.escape(msg_bootstrap_options),
pid=pid,
msg=msg)
with open(cur_process_error_log_path, 'r') as cur_pid_file:
self.assertRegexpMatches(cur_pid_file.read(), err_rx)
with open(shared_error_log_path, 'r') as shared_log_file:
self.assertRegexpMatches(shared_log_file.read(), err_rx)

def test_log_exception(self):
self._assert_log_exception(msg='XXX', fake_process_title='fake1', bootstrap_options=None)
self._assert_log_exception(msg='YYY', fake_process_title='fake2', bootstrap_options={
GLOBAL_SCOPE: { 'some_option_name': 'option_value' }
})

def test_backup_logging_on_fatal_error(self):
sink = self._gen_sink_subclass()
with self.captured_logging(level=logging.ERROR) as captured:
Expand Down