Bug 794540 - added support for exploitability tool to original processor

mozilla-services · Nov 20, 2012 · 5ad3dd6 · 5ad3dd6
1 parent 7a5b784
commit 5ad3dd6
Show file tree

Hide file tree

Showing 5 changed files with 136 additions and 24 deletions.
diff --git a/scripts/config/processorconfig.py.dist b/scripts/config/processorconfig.py.dist
@@ -66,6 +66,14 @@ minidump_stackwalkPathname = cm.Option()
 minidump_stackwalkPathname.doc = 'the full pathname of the extern program minidump_stackwalk (quote path with embedded spaces)'
 minidump_stackwalkPathname.default = '/data/socorro/stackwalk/bin/minidump_stackwalk'
 
+exploitability_tool_command_line = cm.Option()
+exploitability_tool_command_line.doc = 'the template for the command to invoke the exploitability tool'
+exploitability_tool_command_line.default = '$exploitability_tool_pathname $dumpfilePathname 2>/dev/null'
+
+exploitability_tool_pathname = cm.Option()
+exploitability_tool_pathname.doc = 'the full pathname of the extern program exploitability tool (quote path with embedded spaces)'
+exploitability_tool_pathname.default = '/data/socorro/stackwalk/bin/exploitable'
+
 symbolCachePath = cm.Option()
 symbolCachePath.doc = 'the path where the symbol cache is found (quote path with embedded spaces)'
 symbolCachePath.default = '/mnt/socorro/symbols'

diff --git a/socorro/lib/util.py b/socorro/lib/util.py
@@ -168,6 +168,9 @@ def __init__(self, anIterator):
     self.secondaryCacheSize = 0
     self.useSecondary = False
   #-----------------------------------------------------------------------------------------------------------------
+  def close(self):
+    self.theIterator.close()
+  #-----------------------------------------------------------------------------------------------------------------
   def __iter__(self):
     #try:  #to be used in Python 2.5 or greater
       for x in self.theIterator:

diff --git a/socorro/processor/externalProcessor.py b/socorro/processor/externalProcessor.py
@@ -9,6 +9,7 @@
 import threading
 import time
 import re
+from contextlib import closing
 
 import logging
 
@@ -44,8 +45,15 @@ def __init__(self, config):
     tmp = tmp.replace('$processorSymbolsPathnameList','SYMBOL_PATHS')
     # finally, convert any remaining $param to pythonic %(param)s
     tmp = toPythonRE.sub(r'%(\1)s',tmp)
-    self.commandLine = tmp % config
+    self.mdsw_command_line = tmp % config
 
+    # Canonical form of $(param) is $param. Convert any that are needed
+    tmp = stripParensRE.sub(r'$\2',config.exploitability_tool_command_line)
+    # Convert canonical $dumpfilePathname to DUMPFILEPATHNAME
+    tmp = tmp.replace('$dumpfilePathname','DUMPFILEPATHNAME')
+    # finally, convert any remaining $param to pythonic %(param)s
+    tmp = toPythonRE.sub(r'%(\1)s', tmp)
+    self.exploitability_command_line = tmp % config
 
 #-----------------------------------------------------------------------------------------------------------------
   def invokeBreakpadStackdump(self, dumpfilePathname):
@@ -61,23 +69,53 @@ def invokeBreakpadStackdump(self, dumpfilePathname):
     else:
       symbol_path = ' '.join(['"%s"' % x for x in self.config.processorSymbolsPathnameList.split()])
     #commandline = '"%s" %s "%s" %s 2>/dev/null' % (self.config.minidump_stackwalkPathname, "-m", dumpfilePathname, symbol_path)
-    newCommandLine = self.commandLine.replace("DUMPFILEPATHNAME", dumpfilePathname)
+    newCommandLine = self.mdsw_command_line.replace("DUMPFILEPATHNAME", dumpfilePathname)
     newCommandLine = newCommandLine.replace("SYMBOL_PATHS", symbol_path)
     #logger.info("invoking: %s", newCommandLine)
     subprocessHandle = subprocess.Popen(newCommandLine, shell=True, stdout=subprocess.PIPE)
     return (socorro.lib.util.StrCachingIterator(subprocessHandle.stdout), subprocessHandle)
 
 #-----------------------------------------------------------------------------------------------------------------
-  def doBreakpadStackDumpAnalysis (self, reportId, uuid, dumpfilePathname, isHang, java_stack_trace, databaseCursor, date_processed, processorErrorMessages):
-    """ This function overrides the base class version of this function.  This function coordinates the six
-          steps of running the breakpad_stackdump process and analyzing the textual output for insertion
-          into the database.
+  def invoke_exploitability(self, dump_pathname):
+    """ This function invokes exploitability tool as an external process
+        capturing and returning the text output of stdout.  This version
+        represses the stderr output.
+
+          input parameters:
+            dump_pathname: the complete pathname of the dumpfile to be analyzed
+    """
+    command_line = self.exploitability_command_line.replace(
+                     "DUMPFILEPATHNAME",
+                     dump_pathname
+                   )
+    subprocessHandle = subprocess.Popen(
+                         command_line,
+                         shell=True,
+                         stdout=subprocess.PIPE
+                       )
+    return (subprocessHandle.stdout, subprocessHandle)
+
+#-----------------------------------------------------------------------------------------------------------------
+  def doBreakpadStackDumpAnalysis (self,
+                                   reportId,
+                                   uuid,
+                                   dumpfilePathname,
+                                   isHang,
+                                   java_stack_trace,
+                                   databaseCursor,
+                                   date_processed,
+                                   processorErrorMessages):
+    """ This function overrides the base class version of this function.  This
+    function coordinates the six steps of running the breakpad_stackdump
+    process and analyzing the textual output for insertion into the database.
 
           returns:
-            truncated - boolean: True - due to excessive length the frames of the crashing thread may have been truncated.
+            truncated - boolean: True - due to excessive length the frames of
+                                        the crashing thread may have been
+                                        truncated.
 
           input parameters:
-            reportId - the primary key from the 'reports' table for this crash report
+            reportId - the primary key from the 'reports' table for this report
             uuid - the unique string identifier for the crash report
             dumpfilePathname - the complete pathname for the =crash dump file
             isHang - boolean, is this a hang crash?
@@ -87,8 +125,46 @@ def doBreakpadStackDumpAnalysis (self, reportId, uuid, dumpfilePathname, isHang,
             processorErrorMessages
     """
     #logger.debug('doBreakpadStackDumpAnalysis')
-    dumpAnalysisLineIterator, subprocessHandle = self.invokeBreakpadStackdump(dumpfilePathname)
-    dumpAnalysisLineIterator.secondaryCacheMaximumSize = self.config.crashingThreadTailFrameThreshold + 1
+    dumpAnalysisLineIterator, \
+      mdsw_subprocess_handle = self.invokeBreakpadStackdump(dumpfilePathname)
+    dumpAnalysisLineIterator.secondaryCacheMaximumSize = \
+      self.config.crashingThreadTailFrameThreshold + 1
+    exploitability_line_iterator, \
+      exploitability_subprocess_handle = self.invoke_exploitability(
+                                          dumpfilePathname
+                                        )
+    additionalReportValuesAsDict = self._stackwalk_analysis(
+                                     dumpAnalysisLineIterator,
+                                     mdsw_subprocess_handle,
+                                     reportId,
+                                     uuid,
+                                     dumpfilePathname,
+                                     isHang,
+                                     java_stack_trace,
+                                     databaseCursor,
+                                     date_processed,
+                                     processorErrorMessages
+                                   )
+    additionalReportValuesAsDict['exploitability'] = \
+      self._exploitability_analysis(
+        exploitability_line_iterator,
+        exploitability_subprocess_handle,
+        processorErrorMessages
+      )
+    return additionalReportValuesAsDict
+
+#-----------------------------------------------------------------------------------------------------------------
+  def _stackwalk_analysis(self,
+                          dumpAnalysisLineIterator,
+                          mdsw_subprocess_handle,
+                          reportId,
+                          uuid,
+                          dumpfilePathname,
+                          isHang,
+                          java_stack_trace,
+                          databaseCursor,
+                          date_processed,
+                          processorErrorMessages):
     try:
       additionalReportValuesAsDict = self.analyzeHeader(reportId, dumpAnalysisLineIterator, databaseCursor, date_processed, processorErrorMessages)
       crashedThread = additionalReportValuesAsDict["crashedThread"]
@@ -105,14 +181,29 @@ def doBreakpadStackDumpAnalysis (self, reportId, uuid, dumpfilePathname, isHang,
     finally:
       dumpAnalysisLineIterator.theIterator.close() #this is really a handle to a file-like object - got to close it
     # is the return code from the invocation important?  Uncomment, if it is...
-    returncode = subprocessHandle.wait()
+    returncode = mdsw_subprocess_handle.wait()
     if returncode is not None and returncode != 0:
-      processorErrorMessages.append("%s failed with return code %s when processing dump %s" %(self.config.minidump_stackwalkPathname, subprocessHandle.returncode, uuid))
+      processorErrorMessages.append("%s failed with return code %s" %(self.config.minidump_stackwalkPathname, mdsw_subprocess_handle.returncode))
       additionalReportValuesAsDict['success'] = False
       if additionalReportValuesAsDict["signature"].startswith("EMPTY"):
         additionalReportValuesAsDict["signature"] += "; corrupt dump"
     return additionalReportValuesAsDict
 
+#-----------------------------------------------------------------------------------------------------------------
+  def _exploitability_analysis(self,
+                              exploitability_line_iterator,
+                              exploitability_subprocess_handle,
+                              error_messages):
+    exploitability = None
+    with closing(exploitability_line_iterator) as the_iter:
+      for a_line in the_iter:
+        exploitability = a_line.strip()
+    returncode = exploitability_subprocess_handle.wait()
+    if returncode is not None and returncode != 0:
+      error_messages.append("%s failed with return code %s" %
+                               (self.config.exploitability_tool_pathname,
+                               returncode))
+    return exploitability
 
 #-----------------------------------------------------------------------------------------------------------------
   def analyzeHeader(self, reportId, dumpAnalysisLineIterator, databaseCursor, date_processed, processorErrorMessages):

diff --git a/socorro/processor/processor.py b/socorro/processor/processor.py
@@ -443,7 +443,7 @@ def convertDatesInDictToString (aDict):
 
   #-----------------------------------------------------------------------------------------------------------------
   @staticmethod
-  def sanitizeDict (aDict, listOfForbiddenKeys=['url','email','user_id']):
+  def sanitizeDict (aDict, listOfForbiddenKeys=['url','email','user_id', 'exploitability']):
     for aForbiddenKey in listOfForbiddenKeys:
       if aForbiddenKey in aDict:
         del aDict[aForbiddenKey]
@@ -598,7 +598,8 @@ def processJob (self, jobTuple):
         truncated = %%s,
         topmost_filenames = %%s,
         addons_checked = %%s,
-        flash_version = %%s
+        flash_version = %%s,
+        exploitability = %%s
       where id = %s and date_processed = timestamp with time zone '%s'
       """ % (reportId,date_processed)
       #logger.debug("newReportRecordAsDict %s, %s", newReportRecordAsDict['topmost_filenames'], newReportRecordAsDict['flash_version'])
@@ -620,7 +621,7 @@ def processJob (self, jobTuple):
       flash_version = newReportRecordAsDict.get('flash_version')
       processor_notes = '; '.join(processorErrorMessages)
       newReportRecordAsDict['processor_notes'] = processor_notes
-      infoTuple = (newReportRecordAsDict['signature'], processor_notes, startedDateTime, completedDateTime, newReportRecordAsDict["success"], newReportRecordAsDict["truncated"], topmost_filenames, addons_checked, flash_version)
+      infoTuple = (newReportRecordAsDict['signature'], processor_notes, startedDateTime, completedDateTime, newReportRecordAsDict["success"], newReportRecordAsDict["truncated"], topmost_filenames, addons_checked, flash_version, newReportRecordAsDict["exploitability"],)
       #logger.debug("Updated report %s (%s): %s", reportId, jobUuid, str(infoTuple))
       threadLocalCursor.execute(reportsSql, infoTuple)
       threadLocalDatabaseConnection.commit()

diff --git a/socorro/unittest/processor/testProcessor.py b/socorro/unittest/processor/testProcessor.py
@@ -811,7 +811,7 @@ def testProcessJob06():
     assert r == e, 'expected\n%s\nbut got\n%s' % (e, r)
 
 def testProcessJob07():
-    """testProcessJobProductIdOverride: success"""
+    """testProcessJob07: success"""
     threadName = thr.currentThread().getName()
     p, c = getMockedProcessorAndContext()
     p.submitOoidToElasticSearch = lambda x: None   # eliminate this call
@@ -864,6 +864,7 @@ def testProcessJob07():
                                 'flash_version': "all.bad",
                                 'truncated': False,
                                 'topmost_filenames': [ 'myfile.cpp' ],
+                                'exploitability': 'HIGH'
                                 #'expected_topmost': 'myfile.cpp',
                                 #'expected_addons_checked': True,
                                }
@@ -901,7 +902,8 @@ def testProcessJob07():
         truncated = %%s,
         topmost_filenames = %%s,
         addons_checked = %%s,
-        flash_version = %%s
+        flash_version = %%s,
+        exploitability = %%s
       where id = %s and date_processed = timestamp with time zone '%s'
       """ % (reportId, date_processed)
     c.fakeCursor.expect('execute',
@@ -917,6 +919,7 @@ def testProcessJob07():
                           #additional_report_values['expected_addons_checked'],
                           True,
                           additional_report_values['flash_version'],
+                          'HIGH',
                           )),
                         {})
     c.fakeConnection.expect('commit', (), {}, None)
@@ -939,6 +942,7 @@ def testProcessJob07():
             'id': 345,
             'completeddatetime': dt.datetime(2011, 2, 15, 1, 1, tzinfo=UTC),
             'ReleaseChannel': 'release',
+            'exploitability': 'HIGH',
            }
     fakeSaveProcessedDumpJson.expect('__call__',
                                      (nrr, c.fakeCrashStorage),
@@ -951,7 +955,7 @@ def testProcessJob07():
     assert r == e, 'expected\n%s\nbut got\n%s' % (e, r)
 
 def testProcessJobProductIdOverride():
-    """testProcessJob07: success"""
+    """testProcessJobProductIdOverride: success"""
     threadName = thr.currentThread().getName()
     p, c = getMockedProcessorAndContext()
     p.productIdMap = {'abcdefg':{'product_name':'WaterWolf',
@@ -1014,6 +1018,7 @@ def testProcessJobProductIdOverride():
                                 'flash_version': "all.bad",
                                 'truncated': False,
                                 'topmost_filenames': [ 'myfile.cpp' ],
+                                'exploitability': None
                                 #'expected_topmost': 'myfile.cpp',
                                 #'expected_addons_checked': True,
                                }
@@ -1051,7 +1056,8 @@ def testProcessJobProductIdOverride():
         truncated = %%s,
         topmost_filenames = %%s,
         addons_checked = %%s,
-        flash_version = %%s
+        flash_version = %%s,
+        exploitability = %%s
       where id = %s and date_processed = timestamp with time zone '%s'
       """ % (reportId, date_processed)
     c.fakeCursor.expect('execute',
@@ -1067,6 +1073,7 @@ def testProcessJobProductIdOverride():
                           #additional_report_values['expected_addons_checked'],
                           True,
                           additional_report_values['flash_version'],
+                          None,
                           )),
                         {})
     c.fakeConnection.expect('commit', (), {}, None)
@@ -1089,6 +1096,7 @@ def testProcessJobProductIdOverride():
             'id': 345,
             'completeddatetime': dt.datetime(2011, 2, 15, 1, 1, tzinfo=UTC),
             'ReleaseChannel': 'release',
+            'exploitability': None,
            }
     fakeSaveProcessedDumpJson.expect('__call__',
                                      (nrr, c.fakeCrashStorage),
@@ -1164,6 +1172,7 @@ def testProcessdJobDefaultIsNotAHang():
                                 'flash_version': "all.bad",
                                 'truncated': False,
                                 'topmost_filenames': [ 'myfile.cpp' ],
+                                'exploitability': None
                                 #'expected_topmost': 'myfile.cpp',
                                 #'expected_addons_checked': True,
                                }
@@ -1201,7 +1210,8 @@ def testProcessdJobDefaultIsNotAHang():
         truncated = %%s,
         topmost_filenames = %%s,
         addons_checked = %%s,
-        flash_version = %%s
+        flash_version = %%s,
+        exploitability = %%s
       where id = %s and date_processed = timestamp with time zone '%s'
       """ % (reportId, date_processed)
     c.fakeCursor.expect('execute',
@@ -1217,6 +1227,7 @@ def testProcessdJobDefaultIsNotAHang():
                           #additional_report_values['expected_addons_checked'],
                           True,
                           additional_report_values['flash_version'],
+                          None
                           )),
                         {})
     c.fakeConnection.expect('commit', (), {}, None)
@@ -1239,6 +1250,7 @@ def testProcessdJobDefaultIsNotAHang():
             'id': 345,
             'completeddatetime': dt.datetime(2011, 2, 15, 1, 1, tzinfo=UTC),
             'ReleaseChannel': 'release',
+            'exploitability': None,
            }
     fakeSaveProcessedDumpJson.expect('__call__',
                                      (nrr, c.fakeCrashStorage),
@@ -1273,11 +1285,8 @@ def testGetJsonOrWarn():
     r = proc.Processor.getJsonOrWarn(d, 'key', message_list)
     assert r == None
     assert len(message_list) == 1
-    print message_list
     assert "'int'" in message_list[0]
-    # the following test line fails under Python 2.5 because the Python 
-    # error message has changed.  
-    assert "subscriptable" in message_list[0]
+    assert "ERROR" in message_list[0]
 
 expected_report_tuple = ('ooid1',
                          dt.datetime(2011, 2, 16, 4, 44, 52, tzinfo=UTC),