Skip to content

Commit

Permalink
Merge remote-tracking branch 'cw9j-q3vf-hrrv/1.8.4-authorization' int…
Browse files Browse the repository at this point in the history
…o 1.8
  • Loading branch information
Gallaecio committed Feb 14, 2024
2 parents 71b8741 + ca832a4 commit ee7bd9d
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 2 deletions.
6 changes: 6 additions & 0 deletions docs/news.rst
Expand Up @@ -37,6 +37,12 @@ Scrapy 1.8.4 (unreleased)
``scrapy.downloadermiddlewares.decompression`` module is discouraged and
will trigger a warning.

- The ``Authorization`` header is now dropped on redirects to a different
domain. Please, see the `cw9j-q3vf-hrrv security advisory`_ for more
information.

.. _cw9j-q3vf-hrrv security advisory: https://github.com/scrapy/scrapy/security/advisories/GHSA-cw9j-q3vf-hrrv

.. _release-1.8.3:

Scrapy 1.8.3 (2022-07-25)
Expand Down
10 changes: 8 additions & 2 deletions scrapy/downloadermiddlewares/redirect.py
Expand Up @@ -15,11 +15,17 @@ def _build_redirect_request(source_request, url, **kwargs):
kwargs['url'] = url
kwargs['cookies'] = None
redirect_request = source_request.replace(**kwargs)
if 'Cookie' in redirect_request.headers:
has_cookie_header = "Cookie" in redirect_request.headers
has_authorization_header = "Authorization" in redirect_request.headers
if has_cookie_header or has_authorization_header:
source_request_netloc = urlparse_cached(source_request).netloc
redirect_request_netloc = urlparse_cached(redirect_request).netloc
if source_request_netloc != redirect_request_netloc:
del redirect_request.headers['Cookie']
if has_cookie_header:
del redirect_request.headers["Cookie"]
# https://fetch.spec.whatwg.org/#ref-for-cors-non-wildcard-request-header-name
if has_authorization_header:
del redirect_request.headers["Authorization"]
return redirect_request


Expand Down
33 changes: 33 additions & 0 deletions tests/test_downloadermiddleware_redirect.py
Expand Up @@ -186,6 +186,39 @@ def test_utf8_location(self):
perc_encoded_utf8_url = 'http://scrapytest.org/a%C3%A7%C3%A3o'
self.assertEqual(perc_encoded_utf8_url, req_result.url)

def test_cross_domain_header_dropping(self):
safe_headers = {"a": "b"}
headers = {"Cookie": "a=b", "Authorization": "a"}
headers.update(safe_headers)
original_request = Request(
"https://example.com",
headers=headers,
)

internal_response = Response(
"https://example.com",
headers={"Location": "https://example.com/a"},
status=301,
)
internal_redirect_request = self.mw.process_response(
original_request, internal_response, self.spider
)
self.assertIsInstance(internal_redirect_request, Request)
self.assertEqual(original_request.headers, internal_redirect_request.headers)

external_response = Response(
"https://example.com",
headers={"Location": "https://example.org/a"},
status=301,
)
external_redirect_request = self.mw.process_response(
original_request, external_response, self.spider
)
self.assertIsInstance(external_redirect_request, Request)
self.assertEqual(
safe_headers, external_redirect_request.headers.to_unicode_dict()
)


class MetaRefreshMiddlewareTest(unittest.TestCase):

Expand Down

0 comments on commit ee7bd9d

Please sign in to comment.