Refactor CachedHTTPResponse.from_response()

Notes: * consolidated content reading/resetting into utility functions * set_content() is now redundant with reset(). It wasn't used anywhere else, so it has been removed.
requests-cache · Feb 17, 2024 · 4a887b7 · 4a887b7
1 parent 3b3363f
commit 4a887b7
Show file tree

Hide file tree

Showing 2 changed files with 49 additions and 47 deletions.
diff --git a/requests_cache/models/raw_response.py b/requests_cache/models/raw_response.py
@@ -37,45 +37,24 @@ class CachedHTTPResponse(RichMixin, HTTPResponse):
     version: int = field(default=0)
 
     def __init__(self, body: Optional[bytes] = None, **kwargs):
-        """First initialize via HTTPResponse, then via attrs"""
+        """First initialize via HTTPResponse.__init__, then via __attrs_init__"""
         kwargs = {k: v for k, v in kwargs.items() if v is not None}
         super().__init__(body=BytesIO(body or b''), preload_content=False, **kwargs)
         self._body = body
+        self._fp_bytes_read = 0
+        self.length_remaining = len(body or b'')
         self.__attrs_init__(**kwargs)  # type: ignore # False positive in mypy 0.920+?
 
     @classmethod
     def from_response(cls, response: Response) -> 'CachedHTTPResponse':
-        """Create a CachedHTTPResponse based on an original response"""
-        # Copy basic attributes
-        raw = response.raw
-        kwargs = {k: getattr(raw, k, None) for k in fields_dict(cls).keys()}
-        # Note: in HTTPResponse, init kwarg is named 'request_url', but attribute is '_request_url'
-        kwargs['request_url'] = raw._request_url
-
-        # Read and copy raw response data, and then restore response object to its previous state
-        # This is necessary so streaming responses behave consistently with or without the cache
-        if getattr(raw, '_fp', None) and not is_fp_closed(raw._fp):
-            # Body has already been read & decoded by requests
-            if getattr(raw, '_has_decoded_content', False):
-                body = response.content
-                kwargs['body'] = body
-                raw._fp = BytesIO(body)
-                raw._fp_bytes_read = 0
-                raw.length_remaining = len(body)
-            # Body has not yet been read
-            else:
-                body = raw.read(decode_content=False)
-                kwargs['body'] = body
-                raw._fp = BytesIO(body)
-                raw._fp_bytes_read = 0
-                raw.length_remaining = len(body)
-                _ = response.content  # This property reads, decodes, and stores response content
-
-                # After reading, reset file pointer on original raw response
-                raw._fp = BytesIO(body)
-                raw._fp_bytes_read = 0
-                raw.length_remaining = len(body)
-
+        """Create a CachedHTTPResponse based on an original response, and restore the response to
+        its previous state.
+        """
+        # Get basic attributes
+        kwargs = {k: getattr(response.raw, k, None) for k in fields_dict(cls).keys()}
+        # Init kwarg for HTTPResponse._request_url has no leading '_'
+        kwargs['request_url'] = response.raw._request_url
+        kwargs['body'] = _copy_body(response)
         return cls(**kwargs)  # type: ignore  # False positive in mypy 0.920+?
 
     @classmethod
@@ -104,30 +83,59 @@ def release_conn(self):
 
     def read(self, amt=None, decode_content=None, **kwargs):
         """Simplified reader for cached content that emulates
-        :py:meth:`urllib3.response.HTTPResponse.read()`
+        :py:meth:`urllib3.response.HTTPResponse.read()`, but does not need to read from a socket
+        or decode content.
         """
         if 'Content-Encoding' in self.headers and decode_content is False:
             logger.warning('read(decode_content=False) is not supported for cached responses')
 
         data = self._fp.read(amt)
+        if data:
+            self._fp_bytes_read += len(data)
+            if self.length_remaining is not None:
+                self.length_remaining -= len(data)
         # "close" the file to inform consumers to stop reading from it
-        if not data:
+        else:
             self._fp.close()
         return data
 
     def reset(self, body: Optional[bytes] = None):
         """Reset raw response file pointer, and optionally update content"""
-        if body is not None:
-            self._body = body
-        self._fp = BytesIO(self._body or b'')
-
-    def set_content(self, body: bytes):
-        self._body = body
-        self.reset()
+        _reset_fp(self, body or self._body)
 
     def stream(self, amt=None, **kwargs):
         """Simplified generator over cached content that emulates
         :py:meth:`urllib3.response.HTTPResponse.stream()`
         """
         while not self._fp.closed:
             yield self.read(amt=amt, **kwargs)
+
+
+def _copy_body(response: Response) -> Optional[bytes]:
+    """Read and copy raw response data, and then restore response object to its previous state.
+    This is necessary so streaming responses behave consistently with or without the cache.
+    """
+    # File pointer is missing or closed; nothing to do
+    if not getattr(response.raw, '_fp', None) or is_fp_closed(response.raw._fp):
+        return None
+    # Body has already been read & decoded by requests
+    elif getattr(response.raw, '_has_decoded_content', False):
+        body = response.content
+    # Body has not yet been read
+    else:
+        body = response.raw.read(decode_content=False)
+        _reset_fp(response.raw, body)
+        _ = response.content  # This property reads, decodes, and stores response content
+
+    # After reading, reset file pointer once more so client can still read it as a stream
+    _reset_fp(response.raw, body)
+    return body
+
+
+def _reset_fp(raw: HTTPResponse, body: Optional[bytes] = None):
+    """Set content and reset raw response file pointer"""
+    body = body or b''
+    raw._body = body  # type: ignore[attr-defined]
+    raw._fp_bytes_read = 0  # type: ignore[attr-defined]
+    raw._fp = BytesIO(body)  # type: ignore[attr-defined]
+    raw.length_remaining = len(body)
diff --git a/tests/unit/models/test_raw_response.py b/tests/unit/models/test_raw_response.py
@@ -50,12 +50,6 @@ def test_reset():
     assert raw.read(None) == b'mock response'
 
 
-def test_set_content():
-    raw = CachedHTTPResponse(body=None)
-    raw.set_content(b'mock response')
-    assert raw.read() == b'mock response'
-
-
 def test_stream():
     raw = CachedHTTPResponse(body=b'mock response')
     data = b''