In [None]:
!git clone https://github.com/marco-c/crashcorrelations

In [None]:
!pip download stemming==1.0.1
!unzip stemming-1.0.1
sc.addPyFile('stemming-1.0.1/stemming/porter2.py')

In [None]:
import hashlib
from collections import defaultdict
from datetime import datetime

from crashcorrelations import download_data, utils, crash_deviations, comments

In [None]:
print(datetime.utcnow())

In [None]:
channels = ['release', 'beta', 'aurora', 'nightly', 'esr']
channel_to_versions = {}

for channel in channels:
    channel_to_versions[channel] = download_data.get_versions(channel)

In [None]:
signatures = {}

for channel in channels:
    signatures[channel] = download_data.get_top(200, versions=channel_to_versions[channel], days=5)

In [None]:
utils.mkdir('output_tmp')

totals = {}
addon_related_signatures = defaultdict(list)

for channel in channels:
    print(channel)

    utils.mkdir('output_tmp/' + channel)

    dataset = crash_deviations.get_telemetry_crashes(sc, versions=channel_to_versions[channel], days=5)
    results, total_reference, total_groups = crash_deviations.find_deviations(sc, dataset, signatures=signatures[channel])

    totals[channel] = total_reference

    dataset = crash_deviations.get_telemetry_crashes(sc, versions=channel_to_versions[channel], days=30)
    top_words = comments.get_top_words(dataset, signatures[channel])

    for signature in signatures[channel]:
        if signature not in results:
            continue

        addons = [result for result in results[signature] if any('Addon' in elem and float(result['count_group']) / total_groups[signature] > float(result['count_reference']) / total_reference for elem in result['item'].keys() if len(result['item']) == 1)]

        if len(addons) > 0:
            addon_related_signatures[channel].append({
                'signature': signature,
                'addons': addons,
                'total': total_groups[signature],
            })

        res = {
            'total': total_groups[signature],
            'results': results[signature],
        }

        if signature in top_words:
            res['top_words'] = top_words[signature]

        utils.write_json('output_tmp/' + channel + '/' + hashlib.sha1(signature).hexdigest() + '.json.gz', res)

utils.write_json('output_tmp/all.json.gz', totals)
utils.write_json('output_tmp/addon_related_signatures.json.gz', addon_related_signatures)

utils.remove_results('top-signatures-correlations')

utils.copytree('output_tmp', 'output')

In [None]:
print(datetime.utcnow())