In [None]:
!git clone https://github.com/marco-c/crashcorrelations

In [None]:
import os
import errno
import json
import gzip
import shutil
import hashlib

from crashcorrelations import download_data, crash_deviations

In [None]:
def mkdir(path):
    try:
        os.mkdir(path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise e

def rmdir(path):
    try:
        shutil.rmtree(path)
    except OSError as e:
        if e.errno != errno.ENOENT:
            raise e

def write_json(path, obj):
    with gzip.open(path, 'wb') as f:
        json.dump(obj, f)

In [None]:
channels = ['release', 'beta', 'aurora', 'nightly']

channel_to_versions = {}
results = {}

for channel in channels:
    channel_to_versions[channel] = download_data.get_versions(channel)

In [None]:
# download_data.set_token('INSERT_YOUR_TOKEN_HERE')

for channel in channels:
    download_data.download_crashes(versions=channel_to_versions[channel], days=5)

In [None]:
rmdir('output_tmp')
mkdir('output_tmp')

totals = {}

for channel in channels:
    mkdir('output_tmp/' + channel)

    df_a = crash_deviations.get_crashes(sc, versions=channel_to_versions[channel], days=5)

    signatures = download_data.get_top(200, versions=channel_to_versions[channel], days=11)
    i = 1
    for signature in signatures:
        print(channel + '-' + str(i) + ' ' + signature)
        i += 1

        res, total_a, total_b = crash_deviations.find_deviations(sc, df_a, signature=signature)

        if channel not in results:
            results[channel] = {
                'total': total_a,
                'signatures': {},
            }
            totals[channel] = total_a

        results[channel]['signatures'][signature] = {
            'total': total_b,
            'results': res,
        }

        write_json('output_tmp/' + channel + '/' + hashlib.sha1(signature).hexdigest() + '.json.gz', {
            'total': total_b,
            'results': res,
        })

write_json('output/all.json.gz', totals)

# TODO: Remove once we have updated stab-crashes to use the new results format.
write_json('output/top_results.json.gz', results)

for channel in channels:
    rmdir('output/' + channel)
    shutil.copytree('output_tmp/' + channel, 'output/' + channel)