Skip to content

Commit

Permalink
Merge pull request #4887 from willkg/1542394-updatesig
Browse files Browse the repository at this point in the history
bug 1542394: rewrite updatesignatures as Django command
  • Loading branch information
willkg committed Apr 10, 2019
2 parents e93b504 + 57bf0f5 commit 3b46bc8
Show file tree
Hide file tree
Showing 8 changed files with 214 additions and 260 deletions.
3 changes: 0 additions & 3 deletions socorro/cron/crontabber_app.py
Expand Up @@ -1088,9 +1088,6 @@ def sentrytest(self):
# Product/version maintenance
'socorro.cron.jobs.archivescraper.ArchiveScraperCronApp|1h',

# Crash data analysis
'socorro.cron.jobs.update_signatures.UpdateSignaturesCronApp|1h',

# Dependency checking
'socorro.cron.jobs.monitoring.DependencySecurityCheckCronApp|1d|05:00',

Expand Down
179 changes: 0 additions & 179 deletions socorro/cron/jobs/update_signatures.py

This file was deleted.

14 changes: 1 addition & 13 deletions socorro/unittest/cron/jobs/test_verify_processed.py
Expand Up @@ -11,7 +11,7 @@

from socorro.cron.crontabber_app import CronTabberApp
from socorro.lib.ooid import create_new_ooid
from socorro.unittest.cron.crontabber_tests_base import get_config_manager, load_structure
from socorro.unittest.cron.crontabber_tests_base import get_config_manager


TODAY = datetime.datetime.now().strftime('%Y%m%d')
Expand Down Expand Up @@ -69,18 +69,6 @@ def fetch_crashids(self, conn):
for result in results
]

def run_job_and_assert_success(self, db_conn):
# Run crontabber
config_manager = self._setup_config_manager()
with config_manager.context() as config:
crontabberapp = CronTabberApp(config)
crontabberapp.run_one('update-signatures')

# Assert the job ran correctly
crontabber_info = load_structure(db_conn)
assert crontabber_info['update-signatures']['last_error'] == {}
assert crontabber_info['update-signatures']['last_success']

@contextlib.contextmanager
def get_app(self):
config_manager = self._setup_config_manager()
Expand Down
1 change: 0 additions & 1 deletion socorro/unittest/cron/test_crontabber_app.py
Expand Up @@ -10,7 +10,6 @@ class TestJobsConverter(object):
'ArchiveScraperCronApp',
'DependencySecurityCheckCronApp',
'ElasticsearchCleanupCronApp',
'UpdateSignaturesCronApp',
'VerifyProcessedCronApp',
]

Expand Down
@@ -0,0 +1,165 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

"""
Maintain Signature data using crash data in Elasticsearch.
"""

import datetime

from django.core.management.base import BaseCommand, CommandError
from django.utils import timezone
from django.utils.dateparse import parse_datetime

from crashstats.crashstats.models import Signature
from crashstats.supersearch.models import SuperSearch
from socorro.external.es.super_search_fields import SuperSearchFieldsData
from socorro.lib.datetimeutil import string_to_datetime


# Maximum number of results returned for a super search query
MAX_PAGE = 1000


class Command(BaseCommand):
help = 'Updates the signatures table using crash data from Elasticsearch'

def add_arguments(self, parser):
parser.add_argument(
'--last-success', default='',
help=(
'The start of the window to look at in YYYY-mm-ddTHH:MM format in UTC. '
'Defaults to run-time value minus 90 minutes.'
)
)
parser.add_argument(
'--run-time', default='',
help=(
'The end of the window to look at in YYYY-mm-ddTHH:MM format in UTC. '
'Defaults to now.'
)
)
parser.add_argument(
'--dry-run', action='store_true',
help='Whether or not to do a dry run.'
)

def update_crashstats_signature(self, signature, report_date, report_build):
report_build = int(report_build)
report_date = string_to_datetime(report_date)
try:
sig = Signature.objects.get(signature=signature)
sig.first_build = min(report_build, sig.first_build)
sig.first_date = min(report_date, sig.first_date)
except Signature.DoesNotExist:
sig = Signature.objects.create(
signature=signature,
first_build=report_build,
first_date=report_date
)
sig.save()

def handle(self, **options):
start_datetime = options.get('last_success')
end_datetime = options.get('run_time')

if end_datetime:
end_datetime = parse_datetime(end_datetime)
else:
end_datetime = timezone.now()

if start_datetime:
start_datetime = parse_datetime(start_datetime)
# When run via cronrun, start_datetime is based on the last success
# and we want to increase the window by 10 minutes to get some
# overlap with the previous run
start_datetime = start_datetime - datetime.timedelta(minutes=10)
else:
# Default to end_datetime - 90 minutes
start_datetime = end_datetime - datetime.timedelta(minutes=90)

# Truncate seconds and microseconds
start_datetime = start_datetime.replace(second=0, microsecond=0)
end_datetime = end_datetime.replace(second=0, microsecond=0)

if not end_datetime > start_datetime:
raise CommandError('start time must be before end time.')

# Do a super search and get the signature, buildid, and date processed for
# every crash in the range
all_fields = SuperSearchFieldsData().get()
api = SuperSearch()
self.stdout.write('Looking at %s to %s' % (start_datetime, end_datetime))

params = {
'date': [
'>={}'.format(start_datetime.isoformat()),
'<{}'.format(end_datetime.isoformat()),
],
'_columns': ['signature', 'build_id', 'date'],
'_facets_size': 0,
'_fields': all_fields,

# Set up first page
'_results_offset': 0,
'_results_number': MAX_PAGE,
}

results = {}
crashids_count = 0

while True:
resp = api.get(**params)
hits = resp['hits']
for hit in hits:
crashids_count += 1

if not hit['build_id']:
# Not all crashes have a build id, so skip the ones that don't.
continue

if hit['signature'] in results:
data = results[hit['signature']]
data['build_id'] = min(data['build_id'], hit['build_id'])
data['date'] = min(data['date'], hit['date'])
else:
data = {
'signature': hit['signature'],
'build_id': hit['build_id'],
'date': hit['date']
}
results[hit['signature']] = data

# If there are no more crash ids to get, we return
total = resp['total']
if not hits or crashids_count >= total:
break

# Get the next page, but only as many results as we need
params['_results_offset'] += MAX_PAGE
params['_results_number'] = min(
# MAX_PAGE is the maximum we can request
MAX_PAGE,

# The number of results Super Search can return to us that is hasn't returned so far
total - crashids_count
)

signature_data = results.values()

# Save signature data to the db
for item in signature_data:
if options['dry_run']:
self.stdout.write(
'Inserting/updating signature (%s, %s, %s)' %
(item['signature'], item['date'], item['build_id'])
)
else:
self.update_crashstats_signature(
signature=item['signature'],
report_date=item['date'],
report_build=item['build_id'],
)

self.stdout.write('Inserted/updated %d signatures.' % len(signature_data))

0 comments on commit 3b46bc8

Please sign in to comment.