In [1]:
!rm -rf crashcorrelations
!git clone https://github.com/marco-c/crashcorrelations
dbutils.library.installPyPI("google-cloud-bigquery", "1.20.0")
dbutils.library.restartPython()

In [2]:
!pip download stemming==1.0.1
!tar xf stemming-1.0.1.tar.gz
sc.addPyFile('stemming-1.0.1/stemming/porter2.py')

In [3]:
import hashlib
from collections import defaultdict
from datetime import datetime

import os
import sys
sys.path += [os.path.abspath("."), os.path.abspath("crashcorrelations")]

from crashcorrelations import download_data, utils, crash_deviations, comments

In [4]:
print(datetime.utcnow())

In [5]:
channels = ['release', 'beta', 'nightly']
channel_to_versions = {}

for channel in channels:
    channel_to_versions[channel] = download_data.get_versions(channel, product='FennecAndroid')

In [6]:
signatures = {}

for channel in channels:
    signatures[channel] = download_data.get_top(100, versions=channel_to_versions[channel], days=5, product='FennecAndroid')

In [7]:
utils.rmdir('top-fennec-signatures-correlations_output')
utils.mkdir('top-fennec-signatures-correlations_output')

totals = {
    'date': str(utils.utc_today()),
}

for channel in channels:
    print(channel)

    utils.mkdir('top-fennec-signatures-correlations_output/' + channel)

    dataset = crash_deviations.get_telemetry_crashes(spark, versions=channel_to_versions[channel], days=5, product='FennecAndroid')
    results, total_reference, total_groups = crash_deviations.find_deviations(sc, dataset, signatures=signatures[channel])

    totals[channel] = total_reference

    for signature in signatures[channel]:
        if signature not in results:
            continue

        utils.write_json('top-fennec-signatures-correlations_output/' + channel + '/' + hashlib.sha1(signature.encode('utf-8')).hexdigest() + '.json.gz', {
            'total': total_groups[signature],
            'results': results[signature],
        })

utils.write_json('top-fennec-signatures-correlations_output/all.json.gz', totals)

In [8]:
print(datetime.utcnow())

In [9]:
# Will be uploaded under https://analysis-output.telemetry.mozilla.org/top-fennec-signatures-correlations/data/
utils.remove_results('top-fennec-signatures-correlations')
utils.upload_results('top-fennec-signatures-correlations', 'top-fennec-signatures-correlations_output')