Skip to content

Commit

Permalink
fix bug 1306643: document signature generation pipeline
Browse files Browse the repository at this point in the history
This adds a command that generates a restructured text file, the generated
file, and tweaks the docs to include that in the signature generation
chapter.

This also documents the signature generation rules.
  • Loading branch information
willkg committed Aug 28, 2018
1 parent 0100d05 commit 700a7a9
Show file tree
Hide file tree
Showing 5 changed files with 251 additions and 15 deletions.
3 changes: 3 additions & 0 deletions docs/signaturegeneration.rst
Expand Up @@ -89,3 +89,6 @@ to `reprocess the affected signatures <https://github.com/adngdb/reprocess>`_.


.. include:: ../socorro/signature/siglists/README.rst


.. include:: ../socorro/signature/pipeline.rst
3 changes: 2 additions & 1 deletion socorro-cmd
Expand Up @@ -93,7 +93,8 @@ COMMANDS = [
Group(
'Miscellaneous', {
'showcommands': showcommands_cmd,
'signature': 'socorro.signature.cmd_signature.main'
'signature': 'socorro.signature.cmd_signature.main',
'signature-doc': 'socorro.signature.cmd_doc.main',
}
)
]
Expand Down
109 changes: 109 additions & 0 deletions socorro/signature/cmd_doc.py
@@ -0,0 +1,109 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

from __future__ import print_function

import argparse
import importlib
import re
import sys


DESCRIPTION = """
Generates documentation for the specified signature generation pipeline.
Outputs the documentation in restructured text format.
"""


def import_rules(rules):
module_path, attr = rules.rsplit('.', 1)
module = importlib.import_module(module_path)
return getattr(module, attr)


def indent(text, prefix):
text = text.replace('\n', '\n' + prefix)
return text.strip()


LEAD_WHITESPACE = re.compile('^[ \t]*')


def dedent_docstring(text):
text_lines = text.splitlines()

# Figure out the indentation of all the lines to figure out how much to
# dedent by
leads = []
for line in text_lines:
if len(line.strip()) == 0:
continue
leads.append(LEAD_WHITESPACE.match(line).group(0))

if leads and len(leads[0]) == 0:
leads.pop(0)

if not leads:
return text

# Let's use the first non-empty line to dedent the text with. It's
# possible this isn't a great idea. If that's the case, we can figure
# out a different way to do it.
dedent_str = leads[0]
dedent_amount = len(dedent_str)

for i, line in enumerate(text_lines):
if line.startswith(dedent_str):
text_lines[i] = text_lines[i][dedent_amount:]

return '\n'.join(text_lines)


def get_doc(cls):
return 'Rule: %s\n\n%s' % (
cls.__class__.__name__,
dedent_docstring(cls.__doc__)
)


def main(argv=None):
"""Generates documentation for signature generation pipeline"""
parser = argparse.ArgumentParser(description=DESCRIPTION)
parser.add_argument(
'pipeline',
help='Python dotted path to rules pipeline to document'
)
parser.add_argument('output', help='output file')

if argv is None:
args = parser.parse_args()
else:
args = parser.parse_args(argv)

print('Generating documentation for %s in %s...' % (args.pipeline, args.output))

rules = import_rules(args.pipeline)

with open(args.output, 'w') as fp:
fp.write('.. THIS IS AUTOGEMERATED USING:\n')
fp.write(' \n')
fp.write(' %s\n' % (' '.join(sys.argv)))
fp.write(' \n')
fp.write('Signature generation rules pipeline\n')
fp.write('===================================\n')
fp.write('\n')
fp.write('\n')
fp.write(
'This is the signature generation pipeline defined at ``%s``:\n' %
args.pipeline
)
fp.write('\n')

for i, rule in enumerate(rules):
li = '%s. ' % (i + 1)
fp.write('%s%s\n' % (
li,
indent(get_doc(rule), ' ' * len(li))
))
fp.write('\n')
96 changes: 96 additions & 0 deletions socorro/signature/pipeline.rst
@@ -0,0 +1,96 @@
.. THIS IS AUTOGEMERATED USING:
./socorro-cmd signature-doc socorro.signature.generator.DEFAULT_PIPELINE socorro/signature/pipeline.rst
Signature generation rules pipeline
===================================


This is the signature generation pipeline defined at ``socorro.signature.generator.DEFAULT_PIPELINE``:

1. Rule: SignatureGenerationRule

Generates a signature based on stack frames.

For Java crashes, this generates a basic signature using stack frames.

For C/C++/Rust crashes, this generates a more robust signature using
normalized versions of stack frames augmented by the contents of the
signature lists.

Rough signature list rules (there are more details in the siglists README):

1. Walk the frames looking for a "signature sentinel" which becomes the
first item in the signature.
2. Continue walking frames.

1. If the frame is in the "irrelevant" list, ignore it and
continue.
2. If the frame is in the "prefix" list, add it to the signature
and continue.
3. If the frame isn't in either list, stop walking frames.

3. Signature is generated by joining those frames with " | " between
them.

This rule also generates the proto_signature which is the complete list
of normalized frames.

2. Rule: StackwalkerErrorSignatureRule

Appends minidump-stackwalker error to signature.

3. Rule: OOMSignature

Prepends ``OOM | <size>`` to signatures for OOM crashes.

See bug #1007530.

4. Rule: AbortSignature

Prepends abort message to signature.

See bug #803779.

5. Rule: SignatureShutdownTimeout

Replaces signature with async_shutdown_timeout message.

6. Rule: SignatureRunWatchDog

Prepends "shutdownhang" to signature for shutdown hang crashes.

7. Rule: SignatureIPCChannelError

Replaces the signature with IPC channel error.

8. Rule: SignatureIPCMessageName

Appends ipc_message_name to signature.

9. Rule: SignatureParentIDNotEqualsChildID

Stomp on the signature if MozCrashReason is ``parentBuildID != childBuildID``.

In the case where the assertion fails, then the parent buildid and the child buildid are
different. This causes a lot of strangeness particularly in symbolification, so the signatures
end up as junk. Instead, we want to bucket all these together so we replace the signature.

10. Rule: SignatureJitCategory

Replaces signature with JIT classification.

11. Rule: SigFixWhitespace

Fix whitespace in signatures.

This does the following:

* trims leading and trailing whitespace
* converts all non-space whitespace characters to space
* reduce consecutive spaces to a single space

12. Rule: SigTruncate

Truncates signatures down to SIGNATURE_MAX_LENGTH characters.

55 changes: 41 additions & 14 deletions socorro/signature/rules.py
Expand Up @@ -78,7 +78,7 @@ def _do_generate(self, source_list, hang_type, crashed_thread, delimiter):


class CSignatureTool(SignatureTool):
"""Generates signature from C/C++/Rust stacks
"""Generates signature from C/C++/Rust stacks.
This is the class for signature generation tools that work on breakpad
C/C++ stacks. It normalizes frames and then runs them through the siglists
Expand Down Expand Up @@ -419,7 +419,33 @@ def _do_generate(self, source, hang_type_unused=0, crashed_thread_unused=None, d


class SignatureGenerationRule(Rule):
"""Generates a signature based on stack frames.
For Java crashes, this generates a basic signature using stack frames.
For C/C++/Rust crashes, this generates a more robust signature using
normalized versions of stack frames augmented by the contents of the
signature lists.
Rough signature list rules (there are more details in the siglists README):
1. Walk the frames looking for a "signature sentinel" which becomes the
first item in the signature.
2. Continue walking frames.
1. If the frame is in the "irrelevant" list, ignore it and
continue.
2. If the frame is in the "prefix" list, add it to the signature
and continue.
3. If the frame isn't in either list, stop walking frames.
3. Signature is generated by joining those frames with " | " between
them.
This rule also generates the proto_signature which is the complete list
of normalized frames.
"""
def __init__(self):
super(SignatureGenerationRule, self).__init__()
self.java_signature_tool = JavaSignatureTool()
Expand Down Expand Up @@ -492,9 +518,11 @@ def action(self, crash_data, result):


class OOMSignature(Rule):
"""To satisfy Bug 1007530, this rule will modify the signature to
tag OOM (out of memory) crashes"""
"""Prepends ``OOM | <size>`` to signatures for OOM crashes.
See bug #1007530.
"""
signature_fragments = (
'NS_ABORT_OOM',
'mozalloc_handle_oom',
Expand Down Expand Up @@ -532,7 +560,7 @@ def action(self, crash_data, result):


class AbortSignature(Rule):
"""Adds abort message data to the beginning of the signature
"""Prepends abort message to signature.
See bug #803779.
Expand Down Expand Up @@ -582,7 +610,7 @@ def action(self, crash_data, result):


class SigFixWhitespace(Rule):
"""Fix whitespace in signatures
"""Fix whitespace in signatures.
This does the following:
Expand Down Expand Up @@ -615,7 +643,7 @@ def action(self, crash_data, result):


class SigTruncate(Rule):
"""Truncates signatures down to SIGNATURE_MAX_LENGTH characters"""
"""Truncates signatures down to SIGNATURE_MAX_LENGTH characters."""

def predicate(self, crash_data, result):
return len(result['signature']) > SIGNATURE_MAX_LENGTH
Expand All @@ -628,7 +656,7 @@ def action(self, crash_data, result):


class StackwalkerErrorSignatureRule(Rule):
"""ensure that the signature contains the stackwalker error message"""
"""Appends minidump-stackwalker error to signature."""

def predicate(self, crash_data, result):
return bool(
Expand All @@ -645,7 +673,7 @@ def action(self, crash_data, result):


class SignatureRunWatchDog(SignatureGenerationRule):
"""ensure that the signature contains the stackwalker error message"""
"""Prepends "shutdownhang" to signature for shutdown hang crashes."""

def predicate(self, crash_data, result):
return 'RunWatchdog' in result['signature']
Expand All @@ -669,8 +697,7 @@ def action(self, crash_data, result):


class SignatureShutdownTimeout(Rule):
"""replaces the signature if there is a shutdown timeout message in the
crash"""
"""Replaces signature with async_shutdown_timeout message."""

def predicate(self, crash_data, result):
return bool(crash_data.get('async_shutdown_timeout'))
Expand Down Expand Up @@ -708,7 +735,7 @@ def action(self, crash_data, result):


class SignatureJitCategory(Rule):
"""replaces the signature if there is a JIT classification in the crash"""
"""Replaces signature with JIT classification."""

def predicate(self, crash_data, result):
return bool(crash_data.get('jit_category'))
Expand All @@ -723,7 +750,7 @@ def action(self, crash_data, result):


class SignatureIPCChannelError(Rule):
"""replaces the signature if there is a IPC channel error in the crash"""
"""Replaces the signature with IPC channel error."""

def predicate(self, crash_data, result):
return bool(crash_data.get('ipc_channel_error'))
Expand All @@ -745,7 +772,7 @@ def action(self, crash_data, result):


class SignatureIPCMessageName(Rule):
"""augments the signature if there is a IPC message name in the crash"""
"""Appends ipc_message_name to signature."""

def predicate(self, crash_data, result):
return bool(crash_data.get('ipc_message_name'))
Expand All @@ -759,7 +786,7 @@ def action(self, crash_data, result):


class SignatureParentIDNotEqualsChildID(Rule):
"""Stomp on the signature if MozCrashReason is parentBuildID != childBuildID
"""Stomp on the signature if MozCrashReason is ``parentBuildID != childBuildID``.
In the case where the assertion fails, then the parent buildid and the child buildid are
different. This causes a lot of strangeness particularly in symbolification, so the signatures
Expand Down

0 comments on commit 700a7a9

Please sign in to comment.