move response manipulation responsibility to Results class and allow …

…custom results classes
django-haystack · Sep 25, 2015 · 16f14a4 · 16f14a4
1 parent cf0fa49
commit 16f14a4
Show file tree

Hide file tree

Showing 2 changed files with 143 additions and 71 deletions.
diff --git a/pysolr.py b/pysolr.py
@@ -219,19 +219,70 @@ class SolrError(Exception):
 
 
 class Results(object):
-    def __init__(self, docs, hits, highlighting=None, facets=None,
-                 spellcheck=None, stats=None, qtime=None, debug=None,
-                 grouped=None, nextCursorMark=None):
-        self.docs = docs
-        self.hits = hits
-        self.highlighting = highlighting or {}
-        self.facets = facets or {}
-        self.spellcheck = spellcheck or {}
-        self.stats = stats or {}
-        self.qtime = qtime
-        self.debug = debug or {}
-        self.grouped = grouped or {}
-        self.nextCursorMark = nextCursorMark or None
+    """
+    Default results class for wrapping decoded (from JSON) solr responses.
+
+    Required ``decoded`` argument must be a Solr response dictionary.
+    Individual documents can be retrieved either through ``docs`` attribute
+    or by iterating over results instance.
+
+    Example::
+
+        results = Results({
+            'response': {
+                'docs': [{'id': 1}, {'id': 2}, {'id': 3}],
+                'numFound': 3,
+            }
+        })
+
+        # this:
+        for doc in results:
+            print doc
+
+        # ... is equivalent to:
+        for doc in results.docs:
+            print doc
+
+        # also:
+        list(results) == results.docs
+
+    Note that ``Results`` object does not support indexing and slicing. If you
+    need to retrieve documents by index just use ``docs`` attribute.
+
+    Other response metadata (debug, highlighting, qtime, etc.) are available
+    as attributes. Note that not all response keys may be covered for current
+    version of pysolr. If you're sure that your queries return
+    something that is missing you can easily extend ``Results``
+    and provide it as a custom results class to ``pysolr.Solr``.
+
+    Example::
+
+        import pysolr
+
+        class CustomResults(pysolr.Results):
+            def __init__(self, decoded):
+                 self.some_new_attribute = decoded.get('not_covered_key' None)
+                 super(self, CustomResults).__init__(decoded)
+
+        solr = Solr('<solr url>', response_cls=CustomResults)
+
+    """
+
+    def __init__(self, decoded):
+        # main response part of decoded Solr response
+        response_part = decoded.get('response') or {}
+        self.docs = response_part.get('docs', ())
+        self.hits = response_part.get('numFound', 0)
+
+        # other response metadata
+        self.debug = decoded.get('debug', {})
+        self.highlighting = decoded.get('highlighting', {})
+        self.facets = decoded.get('facet_counts', {})
+        self.spellcheck = decoded.get('spellcheck', {})
+        self.stats = decoded.get('stats', {})
+        self.qtime = decoded.get('responseHeader', {}).get('QTime', None)
+        self.grouped = decoded.get('grouped', {})
+        self.nextCursorMark = decoded.get('nextCursorMark', None)
 
     def __len__(self):
         return len(self.docs)
@@ -250,20 +301,29 @@ class Solr(object):
     Optionally accepts ``timeout`` for wait seconds until giving up on a
     request. Default is ``60`` seconds.
 
+    Optionally accepts ``results_cls`` that specifies class of results object
+    returned by ``.search()`` and ``.more_like_this()`` methods.
+    Default is ``pysolr.Results``.
+
     Usage::
 
         solr = pysolr.Solr('http://localhost:8983/solr')
         # With a 10 second timeout.
+
         solr = pysolr.Solr('http://localhost:8983/solr', timeout=10)
 
+        # with a dict as a default results class instead of pysolr.Results
+        solr = pysolr.Solr('http://localhost:8983/solr', results_cls=dict)
+
     """
-    def __init__(self, url, decoder=None, timeout=60):
+    def __init__(self, url, decoder=None, timeout=60, results_cls=Results):
         self.decoder = decoder or json.JSONDecoder()
         self.url = url
         self.timeout = timeout
         self.log = self._get_log()
         self.session = requests.Session()
         self.session.stream = False
+        self.results_cls = results_cls
 
     def _get_log(self):
         return LOG
@@ -617,6 +677,9 @@ def search(self, q, **kwargs):
         Optionally accepts ``**kwargs`` for additional options to be passed
         through the Solr URL.
 
+        Returns ``self.results_cls`` class object (defaults to
+        ``pysolr.Results``)
+
         Usage::
 
             # All docs.
@@ -632,44 +695,22 @@ def search(self, q, **kwargs):
         params = {'q': q}
         params.update(kwargs)
         response = self._select(params)
+        decoded = self.decoder.decode(response)
 
-        # TODO: make result retrieval lazy and allow custom result objects
-        result = self.decoder.decode(response)
-        result_kwargs = {}
-
-        if result.get('debug'):
-            result_kwargs['debug'] = result['debug']
-
-        if result.get('highlighting'):
-            result_kwargs['highlighting'] = result['highlighting']
-
-        if result.get('facet_counts'):
-            result_kwargs['facets'] = result['facet_counts']
-
-        if result.get('spellcheck'):
-            result_kwargs['spellcheck'] = result['spellcheck']
-
-        if result.get('stats'):
-            result_kwargs['stats'] = result['stats']
-
-        if 'QTime' in result.get('responseHeader', {}):
-            result_kwargs['qtime'] = result['responseHeader']['QTime']
-
-        if result.get('grouped'):
-            result_kwargs['grouped'] = result['grouped']
-
-        if result.get('nextCursorMark'):
-            result_kwargs['nextCursorMark'] = result['nextCursorMark']
-
-        response = result.get('response') or {}
-        numFound = response.get('numFound', 0)
-        self.log.debug("Found '%s' search results.", numFound)
-        return Results(response.get('docs', ()), numFound, **result_kwargs)
+        self.log.debug(
+            "Found '%s' search results.",
+            # cover both cases: there is no response key or value is None
+            (decoded.get('response', {}) or {}).get('numFound', 0)
+        )
+        return self.results_cls(decoded)
 
     def more_like_this(self, q, mltfl, **kwargs):
         """
         Finds and returns results similar to the provided query.
 
+        Returns ``self.results_cls`` class object (defaults to
+        ``pysolr.Results``)
+
         Requires Solr 1.3+.
 
         Usage::
@@ -683,17 +724,14 @@ def more_like_this(self, q, mltfl, **kwargs):
         }
         params.update(kwargs)
         response = self._mlt(params)
+        decoded = self.decoder.decode(response)
 
-        result = self.decoder.decode(response)
-
-        if result['response'] is None:
-            result['response'] = {
-                'docs': [],
-                'numFound': 0,
-            }
-
-        self.log.debug("Found '%s' MLT results.", result['response']['numFound'])
-        return Results(result['response']['docs'], result['response']['numFound'])
+        self.log.debug(
+            "Found '%s' MLT results.",
+            # cover both cases: there is no response key or value is None
+            (decoded.get('response', {}) or {}).get('numFound', 0)
+        )
+        return self.results_cls(decoded)
 
     def suggest_terms(self, fields, prefix, **kwargs):
         """

diff --git a/tests/client.py b/tests/client.py
@@ -59,7 +59,13 @@ def test_clean_xml_string(self):
 
 class ResultsTestCase(unittest.TestCase):
     def test_init(self):
-        default_results = Results([{'id': 1}, {'id': 2}], 2)
+        default_results = Results({
+            'response': {
+                'docs': [{'id': 1}, {'id': 2}],
+                'numFound': 2,
+            },
+        })
+
         self.assertEqual(default_results.docs, [{'id': 1}, {'id': 2}])
         self.assertEqual(default_results.hits, 2)
         self.assertEqual(default_results.highlighting, {})
@@ -70,18 +76,23 @@ def test_init(self):
         self.assertEqual(default_results.debug, {})
         self.assertEqual(default_results.grouped, {})
 
-        full_results = Results(
-            docs=[{'id': 1}, {'id': 2}, {'id': 3}],
-            hits=3,
+        full_results = Results({
+            'response': {
+                'docs': [{'id': 1}, {'id': 2}, {'id': 3}],
+                'numFound': 3,
+            },
             # Fake data just to check assignments.
-            highlighting='hi',
-            facets='fa',
-            spellcheck='sp',
-            stats='st',
-            qtime='0.001',
-            debug=True,
-            grouped=['a']
-        )
+            'highlighting': 'hi',
+            'facet_counts': 'fa',
+            'spellcheck': 'sp',
+            'stats': 'st',
+            'responseHeader': {
+                'QTime': '0.001',
+            },
+            'debug': True,
+            'grouped': ['a'],
+        })
+
         self.assertEqual(full_results.docs, [{'id': 1}, {'id': 2}, {'id': 3}])
         self.assertEqual(full_results.hits, 3)
         self.assertEqual(full_results.highlighting, 'hi')
@@ -93,14 +104,29 @@ def test_init(self):
         self.assertEqual(full_results.grouped, ['a'])
 
     def test_len(self):
-        small_results = Results([{'id': 1}, {'id': 2}], 2)
+        small_results = Results({
+            'response': {
+                'docs': [{'id': 1}, {'id': 2}],
+                'numFound': 2,
+            },
+        })
         self.assertEqual(len(small_results), 2)
 
-        wrong_hits_results = Results([{'id': 1}, {'id': 2}, {'id': 3}], 7)
+        wrong_hits_results = Results({
+            'response': {
+                'docs': [{'id': 1}, {'id': 2}, {'id': 3}],
+                'numFound': 7,
+            },
+        })
         self.assertEqual(len(wrong_hits_results), 3)
 
     def test_iter(self):
-        long_results = Results([{'id': 1}, {'id': 2}, {'id': 3}], 3)
+        long_results = Results({
+            'response': {
+                'docs': [{'id': 1}, {'id': 2}, {'id': 3}],
+                'numFound': 7,
+            },
+        })
 
         to_iter = list(long_results)
         self.assertEqual(to_iter[0], {'id': 1})
@@ -168,6 +194,14 @@ def test_init(self):
         self.assertTrue(isinstance(self.solr.decoder, json.JSONDecoder))
         self.assertEqual(self.solr.timeout, 2)
 
+    def test_custom_results_class(self):
+        solr = Solr('http://localhost:8983/solr/core0', results_cls=dict)
+
+        results = solr.search(q='*:*')
+        assert isinstance(results, dict)
+        assert 'responseHeader' in results
+        assert 'response' in results
+
     def test__create_full_url(self):
         # Nada.
         self.assertEqual(self.solr._create_full_url(path=''), 'http://localhost:8983/solr/core0')