diff --git a/lib-python/3/http/server.py b/lib-python/3/http/server.py index 38f7accad7a..39de35458c3 100644 --- a/lib-python/3/http/server.py +++ b/lib-python/3/http/server.py @@ -332,6 +332,13 @@ def parse_request(self): return False self.command, self.path = command, path + # gh-87389: The purpose of replacing '//' with '/' is to protect + # against open redirect attacks possibly triggered if the path starts + # with '//' because http clients treat //path as an absolute URI + # without scheme (similar to http://path) rather than a path. + if self.path.startswith('//'): + self.path = '/' + self.path.lstrip('/') # Reduce to a single / + # Examine the headers and look for a Connection directive. try: self.headers = http.client.parse_headers(self.rfile, diff --git a/lib-python/3/test/test_httpservers.py b/lib-python/3/test/test_httpservers.py index c5b833723eb..97dae7a7cee 100644 --- a/lib-python/3/test/test_httpservers.py +++ b/lib-python/3/test/test_httpservers.py @@ -416,6 +416,55 @@ def test_undecodable_filename(self): self.check_status_and_reason(response, HTTPStatus.OK, data=support.TESTFN_UNDECODABLE) + def test_get_dir_redirect_location_domain_injection_bug(self): + """Ensure //evil.co/..%2f../../X does not put //evil.co/ in Location. + + //netloc/ in a Location header is a redirect to a new host. + https://github.com/python/cpython/issues/87389 + + This checks that a path resolving to a directory on our server cannot + resolve into a redirect to another server. + """ + os.mkdir(os.path.join(self.tempdir, 'existing_directory')) + url = f'/python.org/..%2f..%2f..%2f..%2f..%2f../%0a%0d/../{self.tempdir_name}/existing_directory' + expected_location = f'{url}/' # /python.org.../ single slash single prefix, trailing slash + # Canonicalizes to /tmp/tempdir_name/existing_directory which does + # exist and is a dir, triggering the 301 redirect logic. + response = self.request(url) + self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) + location = response.getheader('Location') + self.assertEqual(location, expected_location, msg='non-attack failed!') + + # //python.org... multi-slash prefix, no trailing slash + attack_url = f'/{url}' + response = self.request(attack_url) + self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) + location = response.getheader('Location') + self.assertFalse(location.startswith('//'), msg=location) + self.assertEqual(location, expected_location, + msg='Expected Location header to start with a single / and ' + 'end with a / as this is a directory redirect.') + + # ///python.org... triple-slash prefix, no trailing slash + attack3_url = f'//{url}' + response = self.request(attack3_url) + self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) + self.assertEqual(response.getheader('Location'), expected_location) + + # If the second word in the http request (Request-URI for the http + # method) is a full URI, we don't worry about it, as that'll be parsed + # and reassembled as a full URI within BaseHTTPRequestHandler.send_head + # so no errant scheme-less //netloc//evil.co/ domain mixup can happen. + attack_scheme_netloc_2slash_url = f'https://pypi.org/{url}' + expected_scheme_netloc_location = f'{attack_scheme_netloc_2slash_url}/' + response = self.request(attack_scheme_netloc_2slash_url) + self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) + location = response.getheader('Location') + # We're just ensuring that the scheme and domain make it through, if + # there are or aren't multiple slashes at the start of the path that + # follows that isn't important in this Location: header. + self.assertTrue(location.startswith('https://pypi.org/'), msg=location) + def test_get(self): #constructs the path relative to the root directory of the HTTPServer response = self.request(self.base_url + '/test')