Skip to content

Commit

Permalink
Allow user-defined secure cookies (#6357)
Browse files Browse the repository at this point in the history
  • Loading branch information
Gallaecio committed May 15, 2024
1 parent d2f1e00 commit 812fd23
Show file tree
Hide file tree
Showing 4 changed files with 126 additions and 11 deletions.
3 changes: 2 additions & 1 deletion docs/topics/request-response.rst
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,14 @@ Request objects
.. code-block:: python
request_with_cookies = Request(
url="http://www.example.com",
url="https://www.example.com",
cookies=[
{
"name": "currency",
"value": "USD",
"domain": "example.com",
"path": "/currency",
"secure": True,
},
],
)
Expand Down
14 changes: 12 additions & 2 deletions scrapy/downloadermiddlewares/cookies.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@


_split_domain = TLDExtract(include_psl_private_domains=True)
_UNSET = object()


def _is_public_domain(domain: str) -> bool:
Expand Down Expand Up @@ -133,6 +134,7 @@ def _format_cookie(self, cookie: Dict[str, Any], request: Request) -> Optional[s
Decode from bytes if necessary.
"""
decoded = {}
flags = set()
for key in ("name", "value", "path", "domain"):
if cookie.get(key) is None:
if key in ("name", "value"):
Expand All @@ -152,10 +154,16 @@ def _format_cookie(self, cookie: Dict[str, Any], request: Request) -> Optional[s
cookie,
)
decoded[key] = cookie[key].decode("latin1", errors="replace")

for flag in ("secure",):
value = cookie.get(flag, _UNSET)
if value is _UNSET or not value:
continue
flags.add(flag)
cookie_str = f"{decoded.pop('name')}={decoded.pop('value')}"
for key, value in decoded.items(): # path, domain
cookie_str += f"; {key.capitalize()}={value}"
for flag in flags: # secure
cookie_str += f"; {flag.capitalize()}"
return cookie_str

def _get_request_cookies(
Expand All @@ -168,9 +176,11 @@ def _get_request_cookies(
return []
cookies: Iterable[Dict[str, Any]]
if isinstance(request.cookies, dict):
cookies = ({"name": k, "value": v} for k, v in request.cookies.items())
cookies = tuple({"name": k, "value": v} for k, v in request.cookies.items())
else:
cookies = request.cookies
for cookie in cookies:
cookie.setdefault("secure", urlparse_cached(request).scheme == "https")
formatted = filter(None, (self._format_cookie(c, request) for c in cookies))
response = Response(request.url, headers={"Set-Cookie": formatted})
return jar.make_cookies(response, request)
9 changes: 4 additions & 5 deletions scrapy/utils/_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,10 @@
"You have brotlipy installed, and Scrapy will use it, but "
"Scrapy support for brotlipy is deprecated and will stop "
"working in a future version of Scrapy. brotlipy itself is "
"deprecated, it has been superseded by brotlicffi. "
"Please, uninstall brotlipy "
"and install brotli or brotlicffi instead. brotlipy has the same import "
"name as brotli, so keeping both installed is strongly "
"discouraged."
"deprecated, it has been superseded by brotlicffi. Please, "
"uninstall brotlipy and install brotli or brotlicffi instead. "
"brotlipy has the same import name as brotli, so keeping both "
"installed is strongly discouraged."
),
ScrapyDeprecationWarning,
)
Expand Down
111 changes: 108 additions & 3 deletions tests/test_downloadermiddleware_cookies.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
from scrapy.utils.python import to_bytes
from scrapy.utils.test import get_crawler

UNSET = object()


def _cookie_to_set_cookie_value(cookie):
"""Given a cookie defined as a dictionary with name and value keys, and
Expand Down Expand Up @@ -414,19 +416,19 @@ def test_invalid_cookies(self):
"scrapy.downloadermiddlewares.cookies",
"WARNING",
"Invalid cookie found in request <GET http://example.org/1>:"
" {'value': 'bar'} ('name' is missing)",
" {'value': 'bar', 'secure': False} ('name' is missing)",
),
(
"scrapy.downloadermiddlewares.cookies",
"WARNING",
"Invalid cookie found in request <GET http://example.org/2>:"
" {'name': 'foo'} ('value' is missing)",
" {'name': 'foo', 'secure': False} ('value' is missing)",
),
(
"scrapy.downloadermiddlewares.cookies",
"WARNING",
"Invalid cookie found in request <GET http://example.org/3>:"
" {'name': 'foo', 'value': None} ('value' is missing)",
" {'name': 'foo', 'value': None, 'secure': False} ('value' is missing)",
),
)
self.assertCookieValEqual(req1.headers["Cookie"], "key=value1")
Expand Down Expand Up @@ -732,3 +734,106 @@ def test_server_set_cookie_domain_public_period(self):
"co.uk",
cookies=True,
)

def _test_cookie_redirect_scheme_change(
self, secure, from_scheme, to_scheme, cookies1, cookies2, cookies3
):
"""When a redirect causes the URL scheme to change from *from_scheme*
to *to_scheme*, while domain and port remain the same, and given a
cookie on the initial request with its secure attribute set to
*secure*, check if the cookie should be set on the Cookie header of the
initial request (*cookies1*), if it should be kept by the redirect
middleware (*cookies2*), and if it should be present on the Cookie
header in the redirected request (*cookie3*)."""
cookie_kwargs = {}
if secure is not UNSET:
cookie_kwargs["secure"] = secure
input_cookies = [{"name": "a", "value": "b", **cookie_kwargs}]

request1 = Request(f"{from_scheme}://a.example", cookies=input_cookies)
self.mw.process_request(request1, self.spider)
cookies = request1.headers.get("Cookie")
self.assertEqual(cookies, b"a=b" if cookies1 else None)

response = Response(
f"{from_scheme}://a.example",
headers={"Location": f"{to_scheme}://a.example"},
status=301,
)
self.assertEqual(
self.mw.process_response(request1, response, self.spider),
response,
)

request2 = self.redirect_middleware.process_response(
request1,
response,
self.spider,
)
self.assertIsInstance(request2, Request)
cookies = request2.headers.get("Cookie")
self.assertEqual(cookies, b"a=b" if cookies2 else None)

self.mw.process_request(request2, self.spider)
cookies = request2.headers.get("Cookie")
self.assertEqual(cookies, b"a=b" if cookies3 else None)

def test_cookie_redirect_secure_undefined_downgrade(self):
self._test_cookie_redirect_scheme_change(
secure=UNSET,
from_scheme="https",
to_scheme="http",
cookies1=True,
cookies2=True, # xfail, due to a bug in the redirect middleware fixed elsewhere
cookies3=False,
)

def test_cookie_redirect_secure_undefined_upgrade(self):
self._test_cookie_redirect_scheme_change(
secure=UNSET,
from_scheme="http",
to_scheme="https",
cookies1=True,
cookies2=True,
cookies3=True,
)

def test_cookie_redirect_secure_false_downgrade(self):
self._test_cookie_redirect_scheme_change(
secure=False,
from_scheme="https",
to_scheme="http",
cookies1=True,
cookies2=True, # xfail, due to a bug in the redirect middleware fixed elsewhere
cookies3=True,
)

def test_cookie_redirect_secure_false_upgrade(self):
self._test_cookie_redirect_scheme_change(
secure=False,
from_scheme="http",
to_scheme="https",
cookies1=True,
cookies2=True,
cookies3=True,
)

def test_cookie_redirect_secure_true_downgrade(self):
self._test_cookie_redirect_scheme_change(
secure=True,
from_scheme="https",
to_scheme="http",
cookies1=True,
cookies2=True, # xfail, due to a bug in the redirect middleware fixed elsewhere
cookies3=False,
)

def test_cookie_redirect_secure_true_upgrade(self):
self._test_cookie_redirect_scheme_change(
secure=True,
from_scheme="http",
to_scheme="https",
cookies1=False,
cookies2=False,
cookies3=True,
)

0 comments on commit 812fd23

Please sign in to comment.