From 700a7a927c84d19104ec82b2048f9e7b99ddbf4c Mon Sep 17 00:00:00 2001 From: Will Kahn-Greene Date: Tue, 28 Aug 2018 19:33:09 -0400 Subject: [PATCH] fix bug 1306643: document signature generation pipeline This adds a command that generates a restructured text file, the generated file, and tweaks the docs to include that in the signature generation chapter. This also documents the signature generation rules. --- docs/signaturegeneration.rst | 3 + socorro-cmd | 3 +- socorro/signature/cmd_doc.py | 109 +++++++++++++++++++++++++++++++++ socorro/signature/pipeline.rst | 96 +++++++++++++++++++++++++++++ socorro/signature/rules.py | 55 ++++++++++++----- 5 files changed, 251 insertions(+), 15 deletions(-) create mode 100644 socorro/signature/cmd_doc.py create mode 100644 socorro/signature/pipeline.rst diff --git a/docs/signaturegeneration.rst b/docs/signaturegeneration.rst index 3970501656..e8ce4b9e5e 100644 --- a/docs/signaturegeneration.rst +++ b/docs/signaturegeneration.rst @@ -89,3 +89,6 @@ to `reprocess the affected signatures `_. .. include:: ../socorro/signature/siglists/README.rst + + +.. include:: ../socorro/signature/pipeline.rst diff --git a/socorro-cmd b/socorro-cmd index 6f1336e773..45213bc53c 100755 --- a/socorro-cmd +++ b/socorro-cmd @@ -93,7 +93,8 @@ COMMANDS = [ Group( 'Miscellaneous', { 'showcommands': showcommands_cmd, - 'signature': 'socorro.signature.cmd_signature.main' + 'signature': 'socorro.signature.cmd_signature.main', + 'signature-doc': 'socorro.signature.cmd_doc.main', } ) ] diff --git a/socorro/signature/cmd_doc.py b/socorro/signature/cmd_doc.py new file mode 100644 index 0000000000..248dfae626 --- /dev/null +++ b/socorro/signature/cmd_doc.py @@ -0,0 +1,109 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import print_function + +import argparse +import importlib +import re +import sys + + +DESCRIPTION = """ +Generates documentation for the specified signature generation pipeline. +Outputs the documentation in restructured text format. +""" + + +def import_rules(rules): + module_path, attr = rules.rsplit('.', 1) + module = importlib.import_module(module_path) + return getattr(module, attr) + + +def indent(text, prefix): + text = text.replace('\n', '\n' + prefix) + return text.strip() + + +LEAD_WHITESPACE = re.compile('^[ \t]*') + + +def dedent_docstring(text): + text_lines = text.splitlines() + + # Figure out the indentation of all the lines to figure out how much to + # dedent by + leads = [] + for line in text_lines: + if len(line.strip()) == 0: + continue + leads.append(LEAD_WHITESPACE.match(line).group(0)) + + if leads and len(leads[0]) == 0: + leads.pop(0) + + if not leads: + return text + + # Let's use the first non-empty line to dedent the text with. It's + # possible this isn't a great idea. If that's the case, we can figure + # out a different way to do it. + dedent_str = leads[0] + dedent_amount = len(dedent_str) + + for i, line in enumerate(text_lines): + if line.startswith(dedent_str): + text_lines[i] = text_lines[i][dedent_amount:] + + return '\n'.join(text_lines) + + +def get_doc(cls): + return 'Rule: %s\n\n%s' % ( + cls.__class__.__name__, + dedent_docstring(cls.__doc__) + ) + + +def main(argv=None): + """Generates documentation for signature generation pipeline""" + parser = argparse.ArgumentParser(description=DESCRIPTION) + parser.add_argument( + 'pipeline', + help='Python dotted path to rules pipeline to document' + ) + parser.add_argument('output', help='output file') + + if argv is None: + args = parser.parse_args() + else: + args = parser.parse_args(argv) + + print('Generating documentation for %s in %s...' % (args.pipeline, args.output)) + + rules = import_rules(args.pipeline) + + with open(args.output, 'w') as fp: + fp.write('.. THIS IS AUTOGEMERATED USING:\n') + fp.write(' \n') + fp.write(' %s\n' % (' '.join(sys.argv))) + fp.write(' \n') + fp.write('Signature generation rules pipeline\n') + fp.write('===================================\n') + fp.write('\n') + fp.write('\n') + fp.write( + 'This is the signature generation pipeline defined at ``%s``:\n' % + args.pipeline + ) + fp.write('\n') + + for i, rule in enumerate(rules): + li = '%s. ' % (i + 1) + fp.write('%s%s\n' % ( + li, + indent(get_doc(rule), ' ' * len(li)) + )) + fp.write('\n') diff --git a/socorro/signature/pipeline.rst b/socorro/signature/pipeline.rst new file mode 100644 index 0000000000..f3eae08216 --- /dev/null +++ b/socorro/signature/pipeline.rst @@ -0,0 +1,96 @@ +.. THIS IS AUTOGEMERATED USING: + + ./socorro-cmd signature-doc socorro.signature.generator.DEFAULT_PIPELINE socorro/signature/pipeline.rst + +Signature generation rules pipeline +=================================== + + +This is the signature generation pipeline defined at ``socorro.signature.generator.DEFAULT_PIPELINE``: + +1. Rule: SignatureGenerationRule + + Generates a signature based on stack frames. + + For Java crashes, this generates a basic signature using stack frames. + + For C/C++/Rust crashes, this generates a more robust signature using + normalized versions of stack frames augmented by the contents of the + signature lists. + + Rough signature list rules (there are more details in the siglists README): + + 1. Walk the frames looking for a "signature sentinel" which becomes the + first item in the signature. + 2. Continue walking frames. + + 1. If the frame is in the "irrelevant" list, ignore it and + continue. + 2. If the frame is in the "prefix" list, add it to the signature + and continue. + 3. If the frame isn't in either list, stop walking frames. + + 3. Signature is generated by joining those frames with " | " between + them. + + This rule also generates the proto_signature which is the complete list + of normalized frames. + +2. Rule: StackwalkerErrorSignatureRule + + Appends minidump-stackwalker error to signature. + +3. Rule: OOMSignature + + Prepends ``OOM | `` to signatures for OOM crashes. + + See bug #1007530. + +4. Rule: AbortSignature + + Prepends abort message to signature. + + See bug #803779. + +5. Rule: SignatureShutdownTimeout + + Replaces signature with async_shutdown_timeout message. + +6. Rule: SignatureRunWatchDog + + Prepends "shutdownhang" to signature for shutdown hang crashes. + +7. Rule: SignatureIPCChannelError + + Replaces the signature with IPC channel error. + +8. Rule: SignatureIPCMessageName + + Appends ipc_message_name to signature. + +9. Rule: SignatureParentIDNotEqualsChildID + + Stomp on the signature if MozCrashReason is ``parentBuildID != childBuildID``. + + In the case where the assertion fails, then the parent buildid and the child buildid are + different. This causes a lot of strangeness particularly in symbolification, so the signatures + end up as junk. Instead, we want to bucket all these together so we replace the signature. + +10. Rule: SignatureJitCategory + + Replaces signature with JIT classification. + +11. Rule: SigFixWhitespace + + Fix whitespace in signatures. + + This does the following: + + * trims leading and trailing whitespace + * converts all non-space whitespace characters to space + * reduce consecutive spaces to a single space + +12. Rule: SigTruncate + + Truncates signatures down to SIGNATURE_MAX_LENGTH characters. + diff --git a/socorro/signature/rules.py b/socorro/signature/rules.py index 8e26482c26..6c1701a516 100644 --- a/socorro/signature/rules.py +++ b/socorro/signature/rules.py @@ -78,7 +78,7 @@ def _do_generate(self, source_list, hang_type, crashed_thread, delimiter): class CSignatureTool(SignatureTool): - """Generates signature from C/C++/Rust stacks + """Generates signature from C/C++/Rust stacks. This is the class for signature generation tools that work on breakpad C/C++ stacks. It normalizes frames and then runs them through the siglists @@ -419,7 +419,33 @@ def _do_generate(self, source, hang_type_unused=0, crashed_thread_unused=None, d class SignatureGenerationRule(Rule): + """Generates a signature based on stack frames. + For Java crashes, this generates a basic signature using stack frames. + + For C/C++/Rust crashes, this generates a more robust signature using + normalized versions of stack frames augmented by the contents of the + signature lists. + + Rough signature list rules (there are more details in the siglists README): + + 1. Walk the frames looking for a "signature sentinel" which becomes the + first item in the signature. + 2. Continue walking frames. + + 1. If the frame is in the "irrelevant" list, ignore it and + continue. + 2. If the frame is in the "prefix" list, add it to the signature + and continue. + 3. If the frame isn't in either list, stop walking frames. + + 3. Signature is generated by joining those frames with " | " between + them. + + This rule also generates the proto_signature which is the complete list + of normalized frames. + + """ def __init__(self): super(SignatureGenerationRule, self).__init__() self.java_signature_tool = JavaSignatureTool() @@ -492,9 +518,11 @@ def action(self, crash_data, result): class OOMSignature(Rule): - """To satisfy Bug 1007530, this rule will modify the signature to - tag OOM (out of memory) crashes""" + """Prepends ``OOM | `` to signatures for OOM crashes. + See bug #1007530. + + """ signature_fragments = ( 'NS_ABORT_OOM', 'mozalloc_handle_oom', @@ -532,7 +560,7 @@ def action(self, crash_data, result): class AbortSignature(Rule): - """Adds abort message data to the beginning of the signature + """Prepends abort message to signature. See bug #803779. @@ -582,7 +610,7 @@ def action(self, crash_data, result): class SigFixWhitespace(Rule): - """Fix whitespace in signatures + """Fix whitespace in signatures. This does the following: @@ -615,7 +643,7 @@ def action(self, crash_data, result): class SigTruncate(Rule): - """Truncates signatures down to SIGNATURE_MAX_LENGTH characters""" + """Truncates signatures down to SIGNATURE_MAX_LENGTH characters.""" def predicate(self, crash_data, result): return len(result['signature']) > SIGNATURE_MAX_LENGTH @@ -628,7 +656,7 @@ def action(self, crash_data, result): class StackwalkerErrorSignatureRule(Rule): - """ensure that the signature contains the stackwalker error message""" + """Appends minidump-stackwalker error to signature.""" def predicate(self, crash_data, result): return bool( @@ -645,7 +673,7 @@ def action(self, crash_data, result): class SignatureRunWatchDog(SignatureGenerationRule): - """ensure that the signature contains the stackwalker error message""" + """Prepends "shutdownhang" to signature for shutdown hang crashes.""" def predicate(self, crash_data, result): return 'RunWatchdog' in result['signature'] @@ -669,8 +697,7 @@ def action(self, crash_data, result): class SignatureShutdownTimeout(Rule): - """replaces the signature if there is a shutdown timeout message in the - crash""" + """Replaces signature with async_shutdown_timeout message.""" def predicate(self, crash_data, result): return bool(crash_data.get('async_shutdown_timeout')) @@ -708,7 +735,7 @@ def action(self, crash_data, result): class SignatureJitCategory(Rule): - """replaces the signature if there is a JIT classification in the crash""" + """Replaces signature with JIT classification.""" def predicate(self, crash_data, result): return bool(crash_data.get('jit_category')) @@ -723,7 +750,7 @@ def action(self, crash_data, result): class SignatureIPCChannelError(Rule): - """replaces the signature if there is a IPC channel error in the crash""" + """Replaces the signature with IPC channel error.""" def predicate(self, crash_data, result): return bool(crash_data.get('ipc_channel_error')) @@ -745,7 +772,7 @@ def action(self, crash_data, result): class SignatureIPCMessageName(Rule): - """augments the signature if there is a IPC message name in the crash""" + """Appends ipc_message_name to signature.""" def predicate(self, crash_data, result): return bool(crash_data.get('ipc_message_name')) @@ -759,7 +786,7 @@ def action(self, crash_data, result): class SignatureParentIDNotEqualsChildID(Rule): - """Stomp on the signature if MozCrashReason is parentBuildID != childBuildID + """Stomp on the signature if MozCrashReason is ``parentBuildID != childBuildID``. In the case where the assertion fails, then the parent buildid and the child buildid are different. This causes a lot of strangeness particularly in symbolification, so the signatures