In [None]:
!git clone https://github.com/marco-c/crashcorrelations

In [None]:
!pip install stemming

In [None]:
import os
import errno
import json
import gzip
import shutil
import hashlib
from collections import defaultdict

from crashcorrelations import download_data, utils, crash_deviations, comments

In [None]:
from datetime import datetime
print(datetime.utcnow())

In [None]:
def mkdir(path):
    try:
        os.mkdir(path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise e

def rmdir(path):
    try:
        shutil.rmtree(path)
    except OSError as e:
        if e.errno != errno.ENOENT:
            raise e

def write_json(path, obj):
    with gzip.open(path, 'wb') as f:
        json.dump(obj, f)

In [None]:
channels = ['release', 'beta', 'aurora', 'nightly']
channel_to_versions = {}

for channel in channels:
    channel_to_versions[channel] = download_data.get_versions(channel)

In [None]:
# download_data.set_token('INSERT_YOUR_TOKEN_HERE')

signatures = {}

for channel in channels:
    download_data.download_crashes(versions=channel_to_versions[channel], days=30)
    signatures[channel] = download_data.get_top(200, versions=channel_to_versions[channel], days=5)

In [None]:
rmdir('output')

totals = {}
addon_related_signatures = defaultdict(list)

for channel in channels:
    print(channel)

    mkdir('output/' + channel)

    dataset = crash_deviations.get_crashes(sc, versions=channel_to_versions[channel], days=5)
    telemetry_dataset = crash_deviations.get_telemetry_crashes(sc, versions=channel_to_versions[channel], days=5)
    results, total_reference, total_groups = crash_deviations.find_deviations(sc, dataset, signatures=signatures[channel], telemetry_dataset=telemetry_dataset)

    totals[channel] = total_reference

    dataset = crash_deviations.get_crashes(sc, versions=channel_to_versions[channel], days=30)
    top_words = comments.get_top_words(dataset, signatures[channel])

    for signature in signatures[channel]:
        if signature not in results:
            continue

        addons = [result for result in results[signature] if any('Addon' in elem and float(result['count_group']) / total_groups[signature] > float(result['count_reference']) / total_reference for elem in result['item'].keys() if len(result['item']) == 1)]

        if len(addons) > 0:
            addon_related_signatures[channel].append({
                'signature': signature,
                'addons': addons,
                'total': total_groups[signature],
            })

        res = {
            'total': total_groups[signature],
            'results': results[signature],
        }

        if signature in top_words:
            res['top_words'] = top_words[signature]

        write_json('output/' + channel + '/' + hashlib.sha1(signature).hexdigest() + '.json.gz', res)

In [None]:
write_json('output/all.json.gz', totals)
write_json('output/addon_related_signatures.json.gz', addon_related_signatures)

In [None]:
from datetime import datetime
print(datetime.utcnow())