Move the hook for reading response data into the cache up one level.

Old code used to hook the HTTPResponse._fp object, on the code path that deals just with non-chunked responses. Since _fp isn't responsible for detecting the end of chunked responses, it cannot know when to create the cache entry. New code hooks the HTTPResponse itself, on the stream() function, that's the common code path for chunked and non-chunked responses. Fixes psf#95.
toolforger · Sep 6, 2015 · 8628678 · 8628678
1 parent bc89ecc
commit 8628678
Show file tree

Hide file tree

Showing 4 changed files with 20 additions and 92 deletions.
diff --git a/cachecontrol/adapter.py b/cachecontrol/adapter.py
@@ -1,10 +1,12 @@
 import functools
 
+import io
+import types
+
 from requests.adapters import HTTPAdapter
 
 from .controller import CacheController
 from .cache import DictCache
-from .filewrapper import CallbackFileWrapper
 
 
 class CacheControlAdapter(HTTPAdapter):
@@ -87,16 +89,20 @@ def build_response(self, request, response, from_cache=False):
                 if self.heuristic:
                     response = self.heuristic.apply(response)
 
-                # Wrap the response file with a wrapper that will cache the
-                #   response when the stream has been consumed.
-                response._fp = CallbackFileWrapper(
-                    response._fp,
-                    functools.partial(
-                        self.controller.cache_response,
-                        request,
-                        response,
-                    )
-                )
+                # Monkey-patch the response object with a new.stream() generator
+                # function which wraps the original stream(), collects the
+                # stream data, and enters that data into the cache when the
+                # generator finishes.
+                old_stream = response.stream
+                buf = io.BytesIO()
+                def new_stream(response_self, *args, **kwargs):
+                    for data in old_stream(*args, **kwargs):
+                        buf.write(data) # collect stream data
+                        yield data
+                    # Enter collected stream data into cache
+                    self.controller.cache_response(
+                        request, response, buf.getvalue())
+                response.stream = types.MethodType(new_stream, response)
 
         resp = super(CacheControlAdapter, self).build_response(
             request, response

diff --git a/cachecontrol/filewrapper.py b/cachecontrol/filewrapper.py
diff --git a/cachecontrol/serialize.py b/cachecontrol/serialize.py
@@ -40,11 +40,9 @@ def dumps(self, request, response, body=None):
 
             # NOTE: 99% sure this is dead code. I'm only leaving it
             #       here b/c I don't have a test yet to prove
-            #       it. Basically, before using
-            #       `cachecontrol.filewrapper.CallbackFileWrapper`,
-            #       this made an effort to reset the file handle. The
-            #       `CallbackFileWrapper` short circuits this code by
-            #       setting the body as the content is consumed, the
+            #       it. Basically, this made an effort to reset the file handle
+            #       CacheControlAdfapter.build_response was rewritten to delay
+            #       cache entry until the stream was fully read, the
             #       result being a `body` argument is *always* passed
             #       into cache_response, and in turn,
             #       `Serializer.dump`.

diff --git a/tests/test_regressions.py b/tests/test_regressions.py
@@ -4,7 +4,6 @@
 
 from cachecontrol import CacheControl
 from cachecontrol.caches.file_cache import FileCache
-from cachecontrol.filewrapper import CallbackFileWrapper
 from requests import Session
 
 
@@ -17,15 +16,3 @@ def test_file_cache_recognizes_consumed_file_handle(self):
         s.get('http://httpbin.org/cache/60')
         r = s.get('http://httpbin.org/cache/60')
         assert r.from_cache
-
-
-def test_getattr_during_gc():
-    s = CallbackFileWrapper(None, None)
-    # normal behavior:
-    with pytest.raises(AttributeError):
-        s.x
-
-    # this previously had caused an infinite recursion
-    vars(s).clear()  # gc does this.
-    with pytest.raises(AttributeError):
-        s.x