From f5a09b8fcef3d0f7c5516050de22e922c1d41e71 Mon Sep 17 00:00:00 2001 From: lonnen Date: Wed, 8 Jul 2015 18:49:49 -0400 Subject: [PATCH] Revert "Fixes bug 1177605 - Added sub aggregations for signature facets. r=peterbe" This reverts commit 1c06eab09d48a53b5ecc38a539c87dfd04deeeac. --- socorro/external/es/supersearch.py | 135 +++++----- socorro/lib/search_common.py | 2 - .../unittest/external/es/test_supersearch.py | 231 +----------------- .../crashstats/supersearch/models.py | 2 - 4 files changed, 58 insertions(+), 312 deletions(-) diff --git a/socorro/external/es/supersearch.py b/socorro/external/es/supersearch.py index 4c3bf7edc9..a5fdac3c17 100644 --- a/socorro/external/es/supersearch.py +++ b/socorro/external/es/supersearch.py @@ -4,7 +4,7 @@ import datetime import re -from elasticsearch_dsl import Search, A, F, Q +from elasticsearch_dsl import Search, F, Q from elasticsearch.exceptions import NotFoundError from socorro.external import ( @@ -120,34 +120,6 @@ def format_fields(self, hit): return hit - def get_field_name(self, value, full=True): - try: - field_ = self.all_fields[value] - except KeyError: - raise BadArgumentError( - value, - msg='Unknown field "%s"' % value - ) - - if not field_['is_returned']: - # Returning this field is not allowed. - raise BadArgumentError( - value, - msg='Field "%s" is not allowed to be returned' % value - ) - - field_name = '%s.%s' % ( - field_['namespace'], - field_['in_database_name'] - ) - - if full and field_['has_full_version']: - # If the param has a full version, that means what matters - # is the full string, and not its individual terms. - field_name += '.full' - - return field_name - def format_aggregations(self, aggregations): """Return aggregations in a form that looks like facets. @@ -156,27 +128,11 @@ def format_aggregations(self, aggregations): """ aggs = aggregations.to_dict() for agg in aggs: - for i, bucket in enumerate(aggs[agg]['buckets']): - sub_aggs = {} - for key in bucket: - # Go through all sub aggregations. Those are contained in - # all the keys that are not 'key' or 'count'. - if key in ('key', 'doc_count'): - continue - - sub_aggs[key] = [ - {'term': x['key'], 'count': x['doc_count']} - for x in bucket[key]['buckets'] - ] - + for i, row in enumerate(aggs[agg]['buckets']): aggs[agg]['buckets'][i] = { - 'term': bucket['key'], - 'count': bucket['doc_count'], + 'term': row['key'], + 'count': row['doc_count'], } - - if sub_aggs: - aggs[agg]['buckets'][i]['facets'] = sub_aggs - aggs[agg] = aggs[agg]['buckets'] return aggs @@ -212,8 +168,6 @@ def get(self, **kwargs): results_from = param.value[0] elif param.name == '_results_number': results_number = param.value[0] - elif param.name == '_facets_size': - facets_size = param.value[0] # Don't use meta parameters in the query. continue @@ -349,7 +303,27 @@ def get(self, **kwargs): if not value: continue - field_name = self.get_field_name(value, full=False) + try: + field_ = self.all_fields[value] + except KeyError: + # That is not a known field, we can't restrict on it. + raise BadArgumentError( + value, + msg='Unknown field "%s", cannot return it' % value + ) + + if not field_['is_returned']: + # Returning this field is not allowed. + raise BadArgumentError( + value, + msg='Field "%s" is not allowed to be returned' % value + ) + + field_name = '%s.%s' % ( + field_['namespace'], + field_['in_database_name'] + ) + fields.append(field_name) search = search.fields(fields) @@ -371,7 +345,19 @@ def get(self, **kwargs): desc = True value = value[1:] - field_name = self.get_field_name(value, full=False) + try: + field_ = self.all_fields[value] + except KeyError: + # That is not a known field, we can't sort on it. + raise BadArgumentError( + value, + msg='Unknown field "%s", cannot sort on it' % value + ) + + field_name = '%s.%s' % ( + field_['namespace'], + field_['in_database_name'] + ) if desc: # The underlying library understands that '-' means @@ -389,39 +375,32 @@ def get(self, **kwargs): # Create facets. for param in params['_facets']: for value in param.value: - if not value: - continue + try: + field_ = self.all_fields[value] + except KeyError: + # That is not a known field, we can't facet on it. + raise BadArgumentError( + value, + msg='Unknown field "%s", cannot facet on it' % value + ) + + field_name = '%s.%s' % ( + field_['namespace'], + field_['in_database_name'] + ) + + if field_['has_full_version']: + # If the param has a full version, that means what matters + # is the full string, and not its individual terms. + field_name += '.full' - field_name = self.get_field_name(value) search.aggs.bucket( value, 'terms', field=field_name, - size=facets_size, + size=self.config.facets_max_number ) - # Create signature aggregations. - if params['_aggs.signature']: - sig_bucket = A( - 'terms', - field=self.get_field_name('signature'), - size=facets_size, - ) - for param in params['_aggs.signature']: - for value in param.value: - if not value: - continue - - field_name = self.get_field_name(value) - sig_bucket.bucket( - value, - 'terms', - field=field_name, - size=facets_size, - ) - - search.aggs.bucket('signature', sig_bucket) - # Query and compute results. hits = [] diff --git a/socorro/lib/search_common.py b/socorro/lib/search_common.py index 85bb63d523..d8491ce7f8 100644 --- a/socorro/lib/search_common.py +++ b/socorro/lib/search_common.py @@ -71,12 +71,10 @@ def __init__(self, name, default=None, data_type='enum', mandatory=False): class SearchBase(object): meta_filters = [ - SearchFilter('_aggs.signature', default=''), SearchFilter('_columns', default=[ 'uuid', 'date', 'signature', 'product', 'version' ]), SearchFilter('_facets', default='signature'), - SearchFilter('_facets_size', data_type='int', default=50), SearchFilter('_results_number', data_type='int', default=100), SearchFilter('_results_offset', data_type='int', default=0), SearchFilter('_return_query', data_type='bool', default=False), diff --git a/socorro/unittest/external/es/test_supersearch.py b/socorro/unittest/external/es/test_supersearch.py index 521643e1bd..7163ee38e9 100644 --- a/socorro/unittest/external/es/test_supersearch.py +++ b/socorro/unittest/external/es/test_supersearch.py @@ -626,26 +626,7 @@ def test_get_with_facets(self): res = self.api.get(**kwargs) ok_('version' in res['facets']) - eq_(len(res['facets']['version']), 50) # 50 is the default value - - # Test with a different number of facets results. - kwargs = { - '_facets': ['version'], - '_facets_size': 20 - } - res = self.api.get(**kwargs) - - ok_('version' in res['facets']) - eq_(len(res['facets']['version']), 20) - - kwargs = { - '_facets': ['version'], - '_facets_size': 100 - } - res = self.api.get(**kwargs) - - ok_('version' in res['facets']) - eq_(len(res['facets']['version']), number_of_crashes) + eq_(len(res['facets']['version']), self.api.config.facets_max_number) # Test errors assert_raises( @@ -654,216 +635,6 @@ def test_get_with_facets(self): _facets=['unkownfield'] ) - @minimum_es_version('1.0') - def test_get_with_signature_aggregations(self): - self.index_crash({ - 'signature': 'js::break_your_browser', - 'product': 'WaterWolf', - 'os_name': 'Windows NT', - 'date_processed': self.now, - }) - self.index_crash({ - 'signature': 'js::break_your_browser', - 'product': 'WaterWolf', - 'os_name': 'Linux', - 'date_processed': self.now, - }) - self.index_crash({ - 'signature': 'js::break_your_browser', - 'product': 'NightTrain', - 'os_name': 'Linux', - 'date_processed': self.now, - }) - self.index_crash({ - 'signature': 'foo(bar)', - 'product': 'EarthRacoon', - 'os_name': 'Linux', - 'date_processed': self.now, - }) - - # Index a lot of distinct values to test the results limit. - number_of_crashes = 51 - processed_crash = { - 'version': '10.%s', - 'signature': 'crash_me_I_m_famous', - 'date_processed': self.now, - } - self.index_many_crashes( - number_of_crashes, - processed_crash, - loop_field='version', - ) - # Note: index_many_crashes does the index refreshing. - - # Test several facets - kwargs = { - '_aggs.signature': ['product', 'platform'], - 'signature': '!=crash_me_I_m_famous', - } - res = self.api.get(**kwargs) - - ok_('facets' in res) - ok_('signature' in res['facets']) - - expected_terms = [ - { - 'term': 'js::break_your_browser', - 'count': 3, - 'facets': { - 'product': [ - { - 'term': 'WaterWolf', - 'count': 2 - }, - { - 'term': 'NightTrain', - 'count': 1 - }, - ], - 'platform': [ - { - 'term': 'Linux', - 'count': 2 - }, - { - 'term': 'Windows NT', - 'count': 1 - }, - ] - } - }, - { - 'term': 'foo(bar)', - 'count': 1, - 'facets': { - 'product': [ - { - 'term': 'EarthRacoon', - 'count': 1 - } - ], - 'platform': [ - { - 'term': 'Linux', - 'count': 1 - } - ], - } - }, - ] - eq_(res['facets']['signature'], expected_terms) - - # Test one facet with filters - kwargs = { - '_aggs.signature': ['product'], - 'product': 'WaterWolf', - } - res = self.api.get(**kwargs) - - ok_('signature' in res['facets']) - expected_terms = [ - { - 'term': 'js::break_your_browser', - 'count': 2, - 'facets': { - 'product': [ - { - 'term': 'WaterWolf', - 'count': 2 - }, - ] - } - }, - ] - eq_(res['facets']['signature'], expected_terms) - - # Test one facet with a different filter - kwargs = { - '_aggs.signature': ['product'], - 'platform': 'linux', - } - res = self.api.get(**kwargs) - - ok_('signature' in res['facets']) - - expected_terms = [ - { - 'term': 'js::break_your_browser', - 'count': 2, - 'facets': { - 'product': [ - { - 'term': 'NightTrain', - 'count': 1 - }, - { - 'term': 'WaterWolf', - 'count': 1 - }, - ], - } - }, - { - 'term': 'foo(bar)', - 'count': 1, - 'facets': { - 'product': [ - { - 'term': 'EarthRacoon', - 'count': 1 - } - ], - } - }, - ] - eq_(res['facets']['signature'], expected_terms) - - # Test the number of results. - kwargs = { - '_aggs.signature': ['version'], - 'signature': '=crash_me_I_m_famous', - } - res = self.api.get(**kwargs) - - ok_('signature' in res['facets']) - ok_('version' in res['facets']['signature'][0]['facets']) - - version_sub_facet = res['facets']['signature'][0]['facets']['version'] - eq_(len(version_sub_facet), 50) # 50 is the default - - # Test with a different number of facets results. - kwargs = { - '_aggs.signature': ['version'], - '_facets_size': 20, - 'signature': '=crash_me_I_m_famous', - } - res = self.api.get(**kwargs) - - ok_('signature' in res['facets']) - ok_('version' in res['facets']['signature'][0]['facets']) - - version_sub_facet = res['facets']['signature'][0]['facets']['version'] - eq_(len(version_sub_facet), 20) - - kwargs = { - '_aggs.signature': ['version'], - '_facets_size': 100, - 'signature': '=crash_me_I_m_famous', - } - res = self.api.get(**kwargs) - - version_sub_facet = res['facets']['signature'][0]['facets']['version'] - eq_(len(version_sub_facet), number_of_crashes) - - # Test errors - args = {} - args['_aggs.signature'] = ['unkownfield'] - assert_raises( - BadArgumentError, - self.api.get, - **args - ) - @minimum_es_version('1.0') def test_get_with_columns(self): self.index_crash({ diff --git a/webapp-django/crashstats/supersearch/models.py b/webapp-django/crashstats/supersearch/models.py index 78e61c13b4..71f8bb8129 100644 --- a/webapp-django/crashstats/supersearch/models.py +++ b/webapp-django/crashstats/supersearch/models.py @@ -8,10 +8,8 @@ SUPERSEARCH_META_PARAMS = ( - ('_aggs.signature', list), ('_columns', list), ('_facets', list), - ('_facets_size', int), ('_results_offset', int), ('_results_number', int), '_return_query',