Skip to content

Commit

Permalink
move response manipulation responsibility to Results class and allow …
Browse files Browse the repository at this point in the history
…custom results classes
  • Loading branch information
swistakm committed Sep 25, 2015
1 parent cf0fa49 commit 16f14a4
Show file tree
Hide file tree
Showing 2 changed files with 143 additions and 71 deletions.
150 changes: 94 additions & 56 deletions pysolr.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,19 +219,70 @@ class SolrError(Exception):


class Results(object):
def __init__(self, docs, hits, highlighting=None, facets=None,
spellcheck=None, stats=None, qtime=None, debug=None,
grouped=None, nextCursorMark=None):
self.docs = docs
self.hits = hits
self.highlighting = highlighting or {}
self.facets = facets or {}
self.spellcheck = spellcheck or {}
self.stats = stats or {}
self.qtime = qtime
self.debug = debug or {}
self.grouped = grouped or {}
self.nextCursorMark = nextCursorMark or None
"""
Default results class for wrapping decoded (from JSON) solr responses.
Required ``decoded`` argument must be a Solr response dictionary.
Individual documents can be retrieved either through ``docs`` attribute
or by iterating over results instance.
Example::
results = Results({
'response': {
'docs': [{'id': 1}, {'id': 2}, {'id': 3}],
'numFound': 3,
}
})
# this:
for doc in results:
print doc
# ... is equivalent to:
for doc in results.docs:
print doc
# also:
list(results) == results.docs
Note that ``Results`` object does not support indexing and slicing. If you
need to retrieve documents by index just use ``docs`` attribute.
Other response metadata (debug, highlighting, qtime, etc.) are available
as attributes. Note that not all response keys may be covered for current
version of pysolr. If you're sure that your queries return
something that is missing you can easily extend ``Results``
and provide it as a custom results class to ``pysolr.Solr``.
Example::
import pysolr
class CustomResults(pysolr.Results):
def __init__(self, decoded):
self.some_new_attribute = decoded.get('not_covered_key' None)
super(self, CustomResults).__init__(decoded)
solr = Solr('<solr url>', response_cls=CustomResults)
"""

def __init__(self, decoded):
# main response part of decoded Solr response
response_part = decoded.get('response') or {}
self.docs = response_part.get('docs', ())
self.hits = response_part.get('numFound', 0)

# other response metadata
self.debug = decoded.get('debug', {})
self.highlighting = decoded.get('highlighting', {})
self.facets = decoded.get('facet_counts', {})
self.spellcheck = decoded.get('spellcheck', {})
self.stats = decoded.get('stats', {})
self.qtime = decoded.get('responseHeader', {}).get('QTime', None)
self.grouped = decoded.get('grouped', {})
self.nextCursorMark = decoded.get('nextCursorMark', None)

def __len__(self):
return len(self.docs)
Expand All @@ -250,20 +301,29 @@ class Solr(object):
Optionally accepts ``timeout`` for wait seconds until giving up on a
request. Default is ``60`` seconds.
Optionally accepts ``results_cls`` that specifies class of results object
returned by ``.search()`` and ``.more_like_this()`` methods.
Default is ``pysolr.Results``.
Usage::
solr = pysolr.Solr('http://localhost:8983/solr')
# With a 10 second timeout.
solr = pysolr.Solr('http://localhost:8983/solr', timeout=10)
# with a dict as a default results class instead of pysolr.Results
solr = pysolr.Solr('http://localhost:8983/solr', results_cls=dict)
"""
def __init__(self, url, decoder=None, timeout=60):
def __init__(self, url, decoder=None, timeout=60, results_cls=Results):
self.decoder = decoder or json.JSONDecoder()
self.url = url
self.timeout = timeout
self.log = self._get_log()
self.session = requests.Session()
self.session.stream = False
self.results_cls = results_cls

def _get_log(self):
return LOG
Expand Down Expand Up @@ -617,6 +677,9 @@ def search(self, q, **kwargs):
Optionally accepts ``**kwargs`` for additional options to be passed
through the Solr URL.
Returns ``self.results_cls`` class object (defaults to
``pysolr.Results``)
Usage::
# All docs.
Expand All @@ -632,44 +695,22 @@ def search(self, q, **kwargs):
params = {'q': q}
params.update(kwargs)
response = self._select(params)
decoded = self.decoder.decode(response)

# TODO: make result retrieval lazy and allow custom result objects
result = self.decoder.decode(response)
result_kwargs = {}

if result.get('debug'):
result_kwargs['debug'] = result['debug']

if result.get('highlighting'):
result_kwargs['highlighting'] = result['highlighting']

if result.get('facet_counts'):
result_kwargs['facets'] = result['facet_counts']

if result.get('spellcheck'):
result_kwargs['spellcheck'] = result['spellcheck']

if result.get('stats'):
result_kwargs['stats'] = result['stats']

if 'QTime' in result.get('responseHeader', {}):
result_kwargs['qtime'] = result['responseHeader']['QTime']

if result.get('grouped'):
result_kwargs['grouped'] = result['grouped']

if result.get('nextCursorMark'):
result_kwargs['nextCursorMark'] = result['nextCursorMark']

response = result.get('response') or {}
numFound = response.get('numFound', 0)
self.log.debug("Found '%s' search results.", numFound)
return Results(response.get('docs', ()), numFound, **result_kwargs)
self.log.debug(
"Found '%s' search results.",
# cover both cases: there is no response key or value is None
(decoded.get('response', {}) or {}).get('numFound', 0)
)
return self.results_cls(decoded)

def more_like_this(self, q, mltfl, **kwargs):
"""
Finds and returns results similar to the provided query.
Returns ``self.results_cls`` class object (defaults to
``pysolr.Results``)
Requires Solr 1.3+.
Usage::
Expand All @@ -683,17 +724,14 @@ def more_like_this(self, q, mltfl, **kwargs):
}
params.update(kwargs)
response = self._mlt(params)
decoded = self.decoder.decode(response)

result = self.decoder.decode(response)

if result['response'] is None:
result['response'] = {
'docs': [],
'numFound': 0,
}

self.log.debug("Found '%s' MLT results.", result['response']['numFound'])
return Results(result['response']['docs'], result['response']['numFound'])
self.log.debug(
"Found '%s' MLT results.",
# cover both cases: there is no response key or value is None
(decoded.get('response', {}) or {}).get('numFound', 0)
)
return self.results_cls(decoded)

def suggest_terms(self, fields, prefix, **kwargs):
"""
Expand Down
64 changes: 49 additions & 15 deletions tests/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,13 @@ def test_clean_xml_string(self):

class ResultsTestCase(unittest.TestCase):
def test_init(self):
default_results = Results([{'id': 1}, {'id': 2}], 2)
default_results = Results({
'response': {
'docs': [{'id': 1}, {'id': 2}],
'numFound': 2,
},
})

self.assertEqual(default_results.docs, [{'id': 1}, {'id': 2}])
self.assertEqual(default_results.hits, 2)
self.assertEqual(default_results.highlighting, {})
Expand All @@ -70,18 +76,23 @@ def test_init(self):
self.assertEqual(default_results.debug, {})
self.assertEqual(default_results.grouped, {})

full_results = Results(
docs=[{'id': 1}, {'id': 2}, {'id': 3}],
hits=3,
full_results = Results({
'response': {
'docs': [{'id': 1}, {'id': 2}, {'id': 3}],
'numFound': 3,
},
# Fake data just to check assignments.
highlighting='hi',
facets='fa',
spellcheck='sp',
stats='st',
qtime='0.001',
debug=True,
grouped=['a']
)
'highlighting': 'hi',
'facet_counts': 'fa',
'spellcheck': 'sp',
'stats': 'st',
'responseHeader': {
'QTime': '0.001',
},
'debug': True,
'grouped': ['a'],
})

self.assertEqual(full_results.docs, [{'id': 1}, {'id': 2}, {'id': 3}])
self.assertEqual(full_results.hits, 3)
self.assertEqual(full_results.highlighting, 'hi')
Expand All @@ -93,14 +104,29 @@ def test_init(self):
self.assertEqual(full_results.grouped, ['a'])

def test_len(self):
small_results = Results([{'id': 1}, {'id': 2}], 2)
small_results = Results({
'response': {
'docs': [{'id': 1}, {'id': 2}],
'numFound': 2,
},
})
self.assertEqual(len(small_results), 2)

wrong_hits_results = Results([{'id': 1}, {'id': 2}, {'id': 3}], 7)
wrong_hits_results = Results({
'response': {
'docs': [{'id': 1}, {'id': 2}, {'id': 3}],
'numFound': 7,
},
})
self.assertEqual(len(wrong_hits_results), 3)

def test_iter(self):
long_results = Results([{'id': 1}, {'id': 2}, {'id': 3}], 3)
long_results = Results({
'response': {
'docs': [{'id': 1}, {'id': 2}, {'id': 3}],
'numFound': 7,
},
})

to_iter = list(long_results)
self.assertEqual(to_iter[0], {'id': 1})
Expand Down Expand Up @@ -168,6 +194,14 @@ def test_init(self):
self.assertTrue(isinstance(self.solr.decoder, json.JSONDecoder))
self.assertEqual(self.solr.timeout, 2)

def test_custom_results_class(self):
solr = Solr('http://localhost:8983/solr/core0', results_cls=dict)

results = solr.search(q='*:*')
assert isinstance(results, dict)
assert 'responseHeader' in results
assert 'response' in results

def test__create_full_url(self):
# Nada.
self.assertEqual(self.solr._create_full_url(path=''), 'http://localhost:8983/solr/core0')
Expand Down

0 comments on commit 16f14a4

Please sign in to comment.