Skip to content

Commit

Permalink
Fixes bug 1143722 - Added sorting capabilities to Super Search middle…
Browse files Browse the repository at this point in the history
…ware service.
  • Loading branch information
adngdb committed Mar 19, 2015
1 parent eadcca5 commit a87382c
Show file tree
Hide file tree
Showing 6 changed files with 195 additions and 12 deletions.
8 changes: 4 additions & 4 deletions requirements.txt
Expand Up @@ -52,9 +52,9 @@ psycopg2==2.4.5
# sha256: U6gisBsElN-WWMBOhyFhgOXsvoWxeGc92Eix-AnVwQA
# sha256: u5PYXKflDWjkGXhSu226YJi7dyUA-C9dsv9KgHEVBck
elasticsearch==1.2
# sha256: atb_lGMjAnslXevLZuPgNeVF1fhCe7Vji2LjskbU2Io
# sha256: jEZclknV3j4XtGOFcipJBRERC2A8o8E62cdxMqRnxCw
elasticsearch-dsl==0.0.2
# sha256: vjtAX2ezbybvBNNKkSJSvps-Q0bxtfXQK_0jzH3ZQeg
# sha256: LUb7Hnm9I9XqRjMDxvW0U1bj3LyHzeau5XioOynYK8A
elasticsearch-dsl==0.0.3
# sha256: 2upIE6eJSfnxlBvixW1aviyZqFgjUMrwWYBPtleIrKU
pyelasticsearch==0.6.1
# sha256: QQvqlt8kefozmGxAw-76nB57qVp_cHEcMVhgu-LWbH8
Expand Down Expand Up @@ -124,7 +124,7 @@ pyflakes==0.8.1
pyhs2==0.6.0
# sha256: TzJCJuHuhaxBUgV5up3nAylAEmAuU2IX_Q50VuFH2IY
sasl==0.1.3
# this is a temporary fork until https://github.com/seb-m/pyinotify/pull/92
# this is a temporary fork until https://github.com/seb-m/pyinotify/pull/92
# is merged and a new pyinotify release is available
# sha256: 6nj05rvk7z_4OHM6mUIsl7GjE2plb4N3PqnJWaSRRlw
https://github.com/rhelmer/pyinotify/archive/9ff352f.zip#egg=pyinotify
Expand Down
40 changes: 40 additions & 0 deletions socorro/external/elasticsearch/supersearch.py
Expand Up @@ -195,6 +195,46 @@ def get(self, **kwargs):

search = search.filter(filters)

# Sorting.
sort_fields = []
for param in params['_sort']:
for value in param.value:
if not value:
continue

# Values starting with a '-' are sorted in descending order.
# In order to retrieve the database name of the field, we
# must first remove the '-' part and add it back later.
# Example: given ['product', '-version'], the results will be
# sorted by ascending product and descending version.
desc = False
if value.startswith('-'):
desc = True
value = value[1:]

try:
field_ = self.all_fields[value]
except KeyError:
# That is not a known field, we can't sort on it.
raise BadArgumentError(
value,
msg='Unknown field "%s", cannot sort on it' % value
)

field_name = '%s.%s' % (
field_['namespace'],
field_['in_database_name']
)

if desc:
# The underlying library understands that '-' means
# sorting in descending order.
field_name = '-' + field_name

sort_fields.append(field_name)

search = search.order_by(*sort_fields)

# Pagination.
results_to = results_from + results_number
search = search[results_from:results_to]
Expand Down
42 changes: 41 additions & 1 deletion socorro/external/es/supersearch.py
Expand Up @@ -296,6 +296,46 @@ def get(self, **kwargs):

search = search.filter(filters)

# Sorting.
sort_fields = []
for param in params['_sort']:
for value in param.value:
if not value:
continue

# Values starting with a '-' are sorted in descending order.
# In order to retrieve the database name of the field, we
# must first remove the '-' part and add it back later.
# Example: given ['product', '-version'], the results will be
# sorted by ascending product and descending version.
desc = False
if value.startswith('-'):
desc = True
value = value[1:]

try:
field_ = self.all_fields[value]
except KeyError:
# That is not a known field, we can't sort on it.
raise BadArgumentError(
value,
msg='Unknown field "%s", cannot sort on it' % value
)

field_name = '%s.%s' % (
field_['namespace'],
field_['in_database_name']
)

if desc:
# The underlying library understands that '-' means
# sorting in descending order.
field_name = '-' + field_name

sort_fields.append(field_name)

search = search.sort(*sort_fields)

# Pagination.
results_to = results_from + results_number
search = search[results_from:results_to]
Expand Down Expand Up @@ -336,7 +376,7 @@ def get(self, **kwargs):
for x in self.all_fields.values()
if x['is_returned']
]
search = search.fields(*fields)
search = search.fields(fields)

if params['_return_query'][0].value[0]:
# Return only the JSON query that would be sent to elasticsearch.
Expand Down
5 changes: 3 additions & 2 deletions socorro/lib/search_common.py
Expand Up @@ -75,6 +75,7 @@ class SearchBase(object):
SearchFilter('_results_number', data_type='int', default=100),
SearchFilter('_results_offset', data_type='int', default=0),
SearchFilter('_return_query', data_type='bool', default=False),
SearchFilter('_sort', default=''),
]

def __init__(self, *args, **kwargs):
Expand Down Expand Up @@ -157,7 +158,7 @@ def get_parameters(self, **kwargs):
(param.name, value, param.data_type)
)

if not param.name in parameters:
if param.name not in parameters:
parameters[param.name] = []

if not operator:
Expand Down Expand Up @@ -199,7 +200,7 @@ def fix_date_parameter(self, parameters):
days=self.config.search_maximum_date_range
)

if not 'date' in parameters:
if 'date' not in parameters:
now = datetimeutil.utc_now()
lastweek = now - default_date_range

Expand Down
52 changes: 47 additions & 5 deletions socorro/unittest/external/elasticsearch/test_supersearch.py
Expand Up @@ -24,11 +24,11 @@
)

# Remove debugging noise during development
import logging
logging.getLogger('pyelasticsearch').setLevel(logging.ERROR)
logging.getLogger('elasticutils').setLevel(logging.ERROR)
logging.getLogger('requests.packages.urllib3.connectionpool')\
.setLevel(logging.ERROR)
# import logging
# logging.getLogger('pyelasticsearch').setLevel(logging.ERROR)
# logging.getLogger('elasticutils').setLevel(logging.ERROR)
# logging.getLogger('requests.packages.urllib3.connectionpool')\
# .setLevel(logging.ERROR)


SUPERSEARCH_FIELDS = {
Expand Down Expand Up @@ -1533,6 +1533,48 @@ def test_get_with_pagination(self):
eq_(res['total'], 21)
eq_(len(res['hits']), 0)

def test_get_with_sorting(self):
"""Test a search with sort returns expected results. """
res = self.api.get(_sort='product')
ok_(res['total'] > 0)

last_item = ''
for hit in res['hits']:
ok_(last_item <= hit['product'], (last_item, hit['product']))
last_item = hit['product']

# Descending order.
res = self.api.get(_sort='-product')
ok_(res['total'] > 0)

last_item = 'zzzzz'
for hit in res['hits']:
ok_(last_item >= hit['product'], (last_item, hit['product']))
last_item = hit['product']

# Several fields.
res = self.api.get(_sort=['product', 'email'])
ok_(res['total'] > 0)

last_product = ''
last_email = ''
for hit in res['hits']:
if hit['product'] != last_product:
last_email = ''

ok_(last_product <= hit['product'], (last_product, hit['product']))
last_product = hit['product']

ok_(last_email <= hit['email'], (last_email, hit['email']))
last_email = hit['email']

# Invalid field.
assert_raises(
BadArgumentError,
self.api.get,
_sort='something',
) # `something` is invalid

@maximum_es_version('0.90')
def test_get_with_not_operator(self):
"""Test a search with a few NOT operators. """
Expand Down
60 changes: 60 additions & 0 deletions socorro/unittest/external/es/test_supersearch.py
Expand Up @@ -454,6 +454,66 @@ def test_get_with_pagination(self):
eq_(res['total'], number_of_crashes)
eq_(len(res['hits']), 0)

@minimum_es_version('1.0')
def test_get_with_sorting(self):
"""Test a search with sort returns expected results. """
self.index_crash({
'product': 'WaterWolf',
'os_name': 'Windows NT',
'date_processed': self.now,
})
self.index_crash({
'product': 'WaterWolf',
'os_name': 'Linux',
'date_processed': self.now,
})
self.index_crash({
'product': 'NightTrain',
'os_name': 'Linux',
'date_processed': self.now,
})
self.refresh_index()

res = self.api.get(_sort='product')
ok_(res['total'] > 0)

last_item = ''
for hit in res['hits']:
ok_(last_item <= hit['product'], (last_item, hit['product']))
last_item = hit['product']

# Descending order.
res = self.api.get(_sort='-product')
ok_(res['total'] > 0)

last_item = 'zzzzz'
for hit in res['hits']:
ok_(last_item >= hit['product'], (last_item, hit['product']))
last_item = hit['product']

# Several fields.
res = self.api.get(_sort=['product', 'platform'])
ok_(res['total'] > 0)

last_product = ''
last_platform = ''
for hit in res['hits']:
if hit['product'] != last_product:
last_platform = ''

ok_(last_product <= hit['product'], (last_product, hit['product']))
last_product = hit['product']

ok_(last_platform <= hit['platform'], (last_platform, hit['platform']))
last_platform = hit['platform']

# Invalid field.
assert_raises(
BadArgumentError,
self.api.get,
_sort='something',
) # `something` is invalid

@minimum_es_version('1.0')
def test_get_with_facets(self):
self.index_crash({
Expand Down

0 comments on commit a87382c

Please sign in to comment.