In [1]:
!git clone https://github.com/marco-c/missing_symbols.git
!pip install tabulate

Cloning into 'missing_symbols'...
remote: Counting objects: 35, done.[K
remote: Total 35 (delta 0), reused 0 (delta 0), pack-reused 35[K
Unpacking objects: 100% (35/35), done.
Checking connectivity... done.
Collecting tabulate
  Using cached tabulate-0.7.7-py2.py3-none-any.whl
Installing collected packages: tabulate
Successfully installed tabulate-0.7.7
[33mYou are using pip version 8.1.2, however version 9.0.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
import os
from datetime import datetime, timedelta
from pyspark.sql import functions
import boto3
from tabulate import tabulate

In [3]:
known_modules = set([module[:-4].lower() for module in os.listdir('missing_symbols/known_modules')])

In [4]:
num_days = 3
days = [datetime.utcnow().date() - timedelta(1) - timedelta(i) for i in range(0, num_days)]

In [5]:
dataset = SQLContext(sc).read.load(['s3://telemetry-parquet/socorro_crash/v2/crash_date=' + day.strftime('%Y%m%d') for day in days], 'parquet')

In [6]:
modules = dataset\
.filter(dataset['product'] == 'Firefox')\
.select(['uuid'] + [functions.explode(dataset['json_dump']['modules']).alias('module')])\
.dropDuplicates(['uuid', 'module'])\
.select(['module'])\
.rdd\
.map(lambda v: v['module'])\
.filter(lambda m: m['missing_symbols'] and m['filename'].lower() not in known_modules)\
.flatMap(lambda m: [((m['filename'], m['version']), 1)])\
.reduceByKey(lambda x, y: x + y)\
.map(lambda v: (v[0][0], [(v[0][1], v[1])]))\
.reduceByKey(lambda x, y: x + y)\
.sortBy(lambda v: sum(count for ver,count in v[1]), ascending=False)\
.collect()

In [7]:
len(modules)

27964

In [8]:
top_missing = sorted([(name, version, count) for name, versions in modules for version, count in versions if count > 2000], key=lambda m: m[2], reverse=True)

In [9]:
[(module, sum(count for ver, count in versions)) for module, versions in modules[:50]]

[(u'ntdll.dll', 79782),
 (u'kernel32.dll', 79607),
 (u'KERNELBASE.dll', 55351),
 (u'ole32.dll', 50559),
 (u'msmpeg2vdec.dll', 43341),
 (u'libsystem_kernel.dylib', 12788),
 (u'user32.dll', 11580),
 (u'npCodyyMultiHD.dll', 9285),
 (u'CodyyConversionCom.dll', 9274),
 (u'IPCSocket_x86_V5.5_2013.dll', 9274),
 (u'EMET.dll', 9215),
 (u'SYSV00000000 (deleted)', 8782),
 (u'xul.dll', 8053),
 (u'icudt56.dll', 7146),
 (u'DropboxExt.16.0.dll', 6906),
 (u'rpcrt4.dll', 6856),
 (u'FileSyncShell.dll', 6092),
 (u'odbcint.dll', 5777),
 (u'CoreFoundation', 5585),
 (u'linux-gate.so', 5314),
 (u'tmmon.dll', 5215),
 (u'TmUmEvt.dll', 5193),
 (u'combase.dll', 5041),
 (u'aetpkss1.dll', 4582),
 (u'FwcWsp.dll', 4548),
 (u'xpsp2res.dll', 4321),
 (u'icudt52.dll', 4282),
 (u'libdispatch.dylib', 4170),
 (u'HIToolbox', 4139),
 (u'l3codecx.ax', 3897),
 (u'mbae.dll', 3426),
 (u'nss3.dll', 3358),
 (u'VideoToolbox', 3344),
 (u'AppKit', 2957),
 (u'prochook.dll', 2873),
 (u'CoreMedia', 2722),
 (u'eplgFirefox.dll', 2716),
 (

In [10]:
import boto3

subject = 'Weekly report of modules with missing symbols in crash reports'
body = tabulate(top_missing, headers=['Name', 'Version', '# of crash reports (*)'])
body += """
\n\n(*) The number of crash reports refers to the past 3 days.
Only modules with at least 2000 crash reports are shown in this list.

If you see modules that shouldn't be in this list as it's expected not
to have their symbols, either contact mcastelluccio@mozilla.com or open
a PR to add them to https://github.com/marco-c/missing_symbols/tree/master/known_modules.
"""

ses = boto3.client('ses')
ses.send_email(
    Source='telemetry-alerts@mozilla.com',
    Destination={
        'ToAddresses': ['mcastelluccio@mozilla.com', 'release-mgmt@mozilla.com', 'stability@mozilla.org'],
        'CcAddresses': [],
    },
    Message={
        'Subject': {'Data': subject, 'Charset': 'UTF-8'},
        'Body': {'Text': {'Data': body, 'Charset': 'UTF-8'}}
    }
)['MessageId']

'0101015c3f721f2c-5cd00c8d-13f9-45b8-beb6-216b1e075cbd-000000'