From fa2ae729e9be18553aa8da09fd83ae57a8f9bff6 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Thu, 23 Oct 2025 14:18:02 +0100 Subject: [PATCH 1/2] fix: only percent-encode characters in the userinfo encode set --- src/url.rs | 32 ++++++++++++++++++++++++++++++-- tests/validators/test_url.py | 33 ++++++++++++++++++++------------- 2 files changed, 50 insertions(+), 15 deletions(-) diff --git a/src/url.rs b/src/url.rs index c2d9c3579..4f8804b63 100644 --- a/src/url.rs +++ b/src/url.rs @@ -7,7 +7,7 @@ use std::sync::OnceLock; use idna::punycode::decode_to_string; use jiter::{PartialMode, StringCacheMode}; -use percent_encoding::{percent_encode, NON_ALPHANUMERIC}; +use percent_encoding::{percent_encode, AsciiSet, CONTROLS}; use pyo3::exceptions::PyValueError; use pyo3::pyclass::CompareOp; use pyo3::sync::OnceLockExt; @@ -602,8 +602,36 @@ fn is_punnycode_domain(lib_url: &Url, domain: &str) -> bool { scheme_is_special(lib_url.scheme()) && domain.split('.').any(|part| part.starts_with(PUNYCODE_PREFIX)) } +/// See +/// +/// Note that this doesn't actually include % itself - see the note in +/// https://url.spec.whatwg.org/#string-percent-encode-after-encoding +const USERINFO_ENCODE_SET: &AsciiSet = &CONTROLS + // query percent-encodes is controls plus the below + .add(b' ') + .add(b'"') + .add(b'#') + .add(b'<') + .add(b'>') + // path percent-encodes is query percent-encodes plus the below + .add(b'?') + .add(b'^') + .add(b'`') + .add(b'{') + .add(b'}') + // userinfo percent-encodes is path percent-encodes plus the below + .add(b'/') + .add(b':') + .add(b';') + .add(b'=') + .add(b'@') + .add(b'[') + .add(b'\\') + .add(b']') + .add(b'|'); + fn encode_userinfo_component(value: &str) -> Cow<'_, str> { - let encoded = percent_encode(value.as_bytes(), NON_ALPHANUMERIC).to_string(); + let encoded = percent_encode(value.as_bytes(), USERINFO_ENCODE_SET).to_string(); if encoded == value { Cow::Borrowed(value) } else { diff --git a/tests/validators/test_url.py b/tests/validators/test_url.py index 799668e27..6c832b9ce 100644 --- a/tests/validators/test_url.py +++ b/tests/validators/test_url.py @@ -1318,31 +1318,38 @@ def test_multi_url_build() -> None: assert str(url) == 'postgresql://testuser:testpassword@127.0.0.1:5432/database?sslmode=require#test' -def test_multi_url_build_encodes_credentials() -> None: - url = MultiHostUrl.build( +@pytest.mark.parametrize('url_type', [Url, MultiHostUrl]) +def test_url_build_encodes_credentials(url_type: type[Url | MultiHostUrl]) -> None: + url = url_type.build( scheme='postgresql', username='user name', - password='p@ss/word?#', + password='p@ss/word?#__', host='example.com', port=5432, ) - assert url == MultiHostUrl('postgresql://user%20name:p%40ss%2Fword%3F%23@example.com:5432') - assert str(url) == 'postgresql://user%20name:p%40ss%2Fword%3F%23@example.com:5432' - assert url.hosts() == [ - {'username': 'user%20name', 'password': 'p%40ss%2Fword%3F%23', 'host': 'example.com', 'port': 5432} - ] + assert url == url_type('postgresql://user%20name:p%40ss%2Fword%3F%23__@example.com:5432') + assert str(url) == 'postgresql://user%20name:p%40ss%2Fword%3F%23__@example.com:5432' + if url_type is Url: + assert url.username == 'user%20name' + assert url.password == 'p%40ss%2Fword%3F%23__' + else: + assert url.hosts() == [ + {'username': 'user%20name', 'password': 'p%40ss%2Fword%3F%23__', 'host': 'example.com', 'port': 5432} + ] def test_multi_url_build_hosts_encodes_credentials() -> None: hosts = [ - {'host': 'example.com', 'password': 'p@ss/word?#', 'username': 'user name', 'port': 5431}, - {'host': 'example.org', 'password': 'pa%ss', 'username': 'other', 'port': 5432}, + {'host': 'example.com', 'password': 'p@ss/word?#__', 'username': 'user name', 'port': 5431}, + {'host': 'example.org', 'password': 'p@%ss__', 'username': 'other', 'port': 5432}, ] url = MultiHostUrl.build(scheme='postgresql', hosts=hosts) - assert str(url) == 'postgresql://user%20name:p%40ss%2Fword%3F%23@example.com:5431,other:pa%25ss@example.org:5432' + assert ( + str(url) == 'postgresql://user%20name:p%40ss%2Fword%3F%23__@example.com:5431,other:p%40%ss__@example.org:5432' + ) assert url.hosts() == [ - {'username': 'user%20name', 'password': 'p%40ss%2Fword%3F%23', 'host': 'example.com', 'port': 5431}, - {'username': 'other', 'password': 'pa%25ss', 'host': 'example.org', 'port': 5432}, + {'username': 'user%20name', 'password': 'p%40ss%2Fword%3F%23__', 'host': 'example.com', 'port': 5431}, + {'username': 'other', 'password': 'p%40%ss__', 'host': 'example.org', 'port': 5432}, ] From 26aa0724b5cbf5c059347dc8a4e3a740dbc7f398 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Thu, 23 Oct 2025 14:29:31 +0100 Subject: [PATCH 2/2] fix type hint for 3.9 --- tests/validators/test_url.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/validators/test_url.py b/tests/validators/test_url.py index 6c832b9ce..65153171c 100644 --- a/tests/validators/test_url.py +++ b/tests/validators/test_url.py @@ -1319,7 +1319,7 @@ def test_multi_url_build() -> None: @pytest.mark.parametrize('url_type', [Url, MultiHostUrl]) -def test_url_build_encodes_credentials(url_type: type[Url | MultiHostUrl]) -> None: +def test_url_build_encodes_credentials(url_type: type[Union[Url, MultiHostUrl]]) -> None: url = url_type.build( scheme='postgresql', username='user name',