Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4887 from willkg/1542394-updatesig
bug 1542394: rewrite updatesignatures as Django command
- Loading branch information
Showing
8 changed files
with
214 additions
and
260 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
165 changes: 165 additions & 0 deletions
165
webapp-django/crashstats/crashstats/management/commands/updatesignatures.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
# This Source Code Form is subject to the terms of the Mozilla Public | ||
# License, v. 2.0. If a copy of the MPL was not distributed with this | ||
# file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||
|
||
""" | ||
Maintain Signature data using crash data in Elasticsearch. | ||
""" | ||
|
||
import datetime | ||
|
||
from django.core.management.base import BaseCommand, CommandError | ||
from django.utils import timezone | ||
from django.utils.dateparse import parse_datetime | ||
|
||
from crashstats.crashstats.models import Signature | ||
from crashstats.supersearch.models import SuperSearch | ||
from socorro.external.es.super_search_fields import SuperSearchFieldsData | ||
from socorro.lib.datetimeutil import string_to_datetime | ||
|
||
|
||
# Maximum number of results returned for a super search query | ||
MAX_PAGE = 1000 | ||
|
||
|
||
class Command(BaseCommand): | ||
help = 'Updates the signatures table using crash data from Elasticsearch' | ||
|
||
def add_arguments(self, parser): | ||
parser.add_argument( | ||
'--last-success', default='', | ||
help=( | ||
'The start of the window to look at in YYYY-mm-ddTHH:MM format in UTC. ' | ||
'Defaults to run-time value minus 90 minutes.' | ||
) | ||
) | ||
parser.add_argument( | ||
'--run-time', default='', | ||
help=( | ||
'The end of the window to look at in YYYY-mm-ddTHH:MM format in UTC. ' | ||
'Defaults to now.' | ||
) | ||
) | ||
parser.add_argument( | ||
'--dry-run', action='store_true', | ||
help='Whether or not to do a dry run.' | ||
) | ||
|
||
def update_crashstats_signature(self, signature, report_date, report_build): | ||
report_build = int(report_build) | ||
report_date = string_to_datetime(report_date) | ||
try: | ||
sig = Signature.objects.get(signature=signature) | ||
sig.first_build = min(report_build, sig.first_build) | ||
sig.first_date = min(report_date, sig.first_date) | ||
except Signature.DoesNotExist: | ||
sig = Signature.objects.create( | ||
signature=signature, | ||
first_build=report_build, | ||
first_date=report_date | ||
) | ||
sig.save() | ||
|
||
def handle(self, **options): | ||
start_datetime = options.get('last_success') | ||
end_datetime = options.get('run_time') | ||
|
||
if end_datetime: | ||
end_datetime = parse_datetime(end_datetime) | ||
else: | ||
end_datetime = timezone.now() | ||
|
||
if start_datetime: | ||
start_datetime = parse_datetime(start_datetime) | ||
# When run via cronrun, start_datetime is based on the last success | ||
# and we want to increase the window by 10 minutes to get some | ||
# overlap with the previous run | ||
start_datetime = start_datetime - datetime.timedelta(minutes=10) | ||
else: | ||
# Default to end_datetime - 90 minutes | ||
start_datetime = end_datetime - datetime.timedelta(minutes=90) | ||
|
||
# Truncate seconds and microseconds | ||
start_datetime = start_datetime.replace(second=0, microsecond=0) | ||
end_datetime = end_datetime.replace(second=0, microsecond=0) | ||
|
||
if not end_datetime > start_datetime: | ||
raise CommandError('start time must be before end time.') | ||
|
||
# Do a super search and get the signature, buildid, and date processed for | ||
# every crash in the range | ||
all_fields = SuperSearchFieldsData().get() | ||
api = SuperSearch() | ||
self.stdout.write('Looking at %s to %s' % (start_datetime, end_datetime)) | ||
|
||
params = { | ||
'date': [ | ||
'>={}'.format(start_datetime.isoformat()), | ||
'<{}'.format(end_datetime.isoformat()), | ||
], | ||
'_columns': ['signature', 'build_id', 'date'], | ||
'_facets_size': 0, | ||
'_fields': all_fields, | ||
|
||
# Set up first page | ||
'_results_offset': 0, | ||
'_results_number': MAX_PAGE, | ||
} | ||
|
||
results = {} | ||
crashids_count = 0 | ||
|
||
while True: | ||
resp = api.get(**params) | ||
hits = resp['hits'] | ||
for hit in hits: | ||
crashids_count += 1 | ||
|
||
if not hit['build_id']: | ||
# Not all crashes have a build id, so skip the ones that don't. | ||
continue | ||
|
||
if hit['signature'] in results: | ||
data = results[hit['signature']] | ||
data['build_id'] = min(data['build_id'], hit['build_id']) | ||
data['date'] = min(data['date'], hit['date']) | ||
else: | ||
data = { | ||
'signature': hit['signature'], | ||
'build_id': hit['build_id'], | ||
'date': hit['date'] | ||
} | ||
results[hit['signature']] = data | ||
|
||
# If there are no more crash ids to get, we return | ||
total = resp['total'] | ||
if not hits or crashids_count >= total: | ||
break | ||
|
||
# Get the next page, but only as many results as we need | ||
params['_results_offset'] += MAX_PAGE | ||
params['_results_number'] = min( | ||
# MAX_PAGE is the maximum we can request | ||
MAX_PAGE, | ||
|
||
# The number of results Super Search can return to us that is hasn't returned so far | ||
total - crashids_count | ||
) | ||
|
||
signature_data = results.values() | ||
|
||
# Save signature data to the db | ||
for item in signature_data: | ||
if options['dry_run']: | ||
self.stdout.write( | ||
'Inserting/updating signature (%s, %s, %s)' % | ||
(item['signature'], item['date'], item['build_id']) | ||
) | ||
else: | ||
self.update_crashstats_signature( | ||
signature=item['signature'], | ||
report_date=item['date'], | ||
report_build=item['build_id'], | ||
) | ||
|
||
self.stdout.write('Inserted/updated %d signatures.' % len(signature_data)) |
Oops, something went wrong.