From 04d7e2713466a06df6445fb0b01c3b9c79879ec7 Mon Sep 17 00:00:00 2001 From: scheel Date: Thu, 9 May 2024 08:51:54 -0600 Subject: [PATCH 1/4] Sanitize URLs for logging/display purposes. --- twine/commands/upload.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/twine/commands/upload.py b/twine/commands/upload.py index 5d099b3f..85833111 100644 --- a/twine/commands/upload.py +++ b/twine/commands/upload.py @@ -17,6 +17,7 @@ import fnmatch import logging import os.path +import re from typing import Dict, List, NamedTuple, cast import requests @@ -148,6 +149,26 @@ def _split_inputs( return Inputs(dists, signatures, attestations_by_dist) +def _sanitize_url(url) -> str: + """ + Sanitize URLs, removing any user:password combinations and replacing them with + asterisks. Returns the original URL if the string is a non-matching pattern. + + :param url: + str containing a URL to sanitize. + + return: + str either sanitized or as entered depending on pattern match. + """ + pattern = "(.*https?://)(\w+:\w+)@(\w+\..*)" + m = re.match(pattern, url) + if m: + newurl = f"{m.group(1)}*****:*****@{m.group(3)}" + return newurl + else: + return url + + def upload(upload_settings: settings.Settings, dists: List[str]) -> None: """Upload one or more distributions to a repository, and display the progress. @@ -189,7 +210,7 @@ def upload(upload_settings: settings.Settings, dists: List[str]) -> None: # Determine if the user has passed in pre-signed distributions or any attestations. uploads, signatures, attestations_by_dist = _split_inputs(dists) - print(f"Uploading distributions to {repository_url}") + print(f"Uploading distributions to {_sanitize_url(repository_url)}") packages_to_upload = [ _make_package( From 72ee030a0783959419962b9c4ff5c9fe16e5c507 Mon Sep 17 00:00:00 2001 From: scheel Date: Thu, 9 May 2024 10:04:27 -0600 Subject: [PATCH 2/4] Change regex string to a raw string. --- twine/commands/upload.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/twine/commands/upload.py b/twine/commands/upload.py index 85833111..842ac859 100644 --- a/twine/commands/upload.py +++ b/twine/commands/upload.py @@ -149,8 +149,9 @@ def _split_inputs( return Inputs(dists, signatures, attestations_by_dist) -def _sanitize_url(url) -> str: - """ +def _sanitize_url(url: str) -> str: + """Sanitize a URL. + Sanitize URLs, removing any user:password combinations and replacing them with asterisks. Returns the original URL if the string is a non-matching pattern. @@ -160,7 +161,7 @@ def _sanitize_url(url) -> str: return: str either sanitized or as entered depending on pattern match. """ - pattern = "(.*https?://)(\w+:\w+)@(\w+\..*)" + pattern = r"(.*https?://)(\w+:\w+)@(\w+\..*)" m = re.match(pattern, url) if m: newurl = f"{m.group(1)}*****:*****@{m.group(3)}" From e0ed8088fc872f449376d6d8e4fbf1b71b1a504f Mon Sep 17 00:00:00 2001 From: scheel Date: Thu, 9 May 2024 10:04:40 -0600 Subject: [PATCH 3/4] Changelog entry --- changelog/1104.misc.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/1104.misc.rst diff --git a/changelog/1104.misc.rst b/changelog/1104.misc.rst new file mode 100644 index 00000000..d2f4031b --- /dev/null +++ b/changelog/1104.misc.rst @@ -0,0 +1 @@ +This change will remove sensitive output in URLs for private repositories require a login provided in the format of http(s)://:@domain.com, replacing the sensitive text with a sanitized "*****:*****" to prevent these details from showing up in log files. From c512bbf166ac38239e58545a39155285f8747a7b Mon Sep 17 00:00:00 2001 From: Ian Stapleton Cordasco Date: Wed, 15 May 2024 16:09:29 -0500 Subject: [PATCH 4/4] Properly handle repository URLs with auth in them --- tests/test_utils.py | 25 +++++++++++++++++++++++++ twine/commands/upload.py | 28 +++------------------------- twine/utils.py | 35 ++++++++++++++++++++++++++++++----- 3 files changed, 58 insertions(+), 30 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 4ff31392..6ec178b9 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -150,6 +150,31 @@ def test_get_repository_config_missing(config_file): assert utils.get_repository_from_config(config_file, "pypi") == exp +def test_get_repository_config_url_with_auth(config_file): + repository_url = "https://user:pass@notexisting.python.org/pypi" + exp = { + "repository": "https://notexisting.python.org/pypi", + "username": "user", + "password": "pass", + } + assert utils.get_repository_from_config(config_file, "foo", repository_url) == exp + assert utils.get_repository_from_config(config_file, "pypi", repository_url) == exp + + +@pytest.mark.parametrize( + "input_url, expected_url", + [ + ("https://upload.pypi.org/legacy/", "https://upload.pypi.org/legacy/"), + ( + "https://user:pass@upload.pypi.org/legacy/", + "https://********@upload.pypi.org/legacy/", + ), + ], +) +def test_sanitize_url(input_url: str, expected_url: str) -> None: + assert utils.sanitize_url(input_url) == expected_url + + @pytest.mark.parametrize( "repo_url, message", [ diff --git a/twine/commands/upload.py b/twine/commands/upload.py index 842ac859..0f4f3633 100644 --- a/twine/commands/upload.py +++ b/twine/commands/upload.py @@ -17,7 +17,6 @@ import fnmatch import logging import os.path -import re from typing import Dict, List, NamedTuple, cast import requests @@ -149,27 +148,6 @@ def _split_inputs( return Inputs(dists, signatures, attestations_by_dist) -def _sanitize_url(url: str) -> str: - """Sanitize a URL. - - Sanitize URLs, removing any user:password combinations and replacing them with - asterisks. Returns the original URL if the string is a non-matching pattern. - - :param url: - str containing a URL to sanitize. - - return: - str either sanitized or as entered depending on pattern match. - """ - pattern = r"(.*https?://)(\w+:\w+)@(\w+\..*)" - m = re.match(pattern, url) - if m: - newurl = f"{m.group(1)}*****:*****@{m.group(3)}" - return newurl - else: - return url - - def upload(upload_settings: settings.Settings, dists: List[str]) -> None: """Upload one or more distributions to a repository, and display the progress. @@ -211,7 +189,7 @@ def upload(upload_settings: settings.Settings, dists: List[str]) -> None: # Determine if the user has passed in pre-signed distributions or any attestations. uploads, signatures, attestations_by_dist = _split_inputs(dists) - print(f"Uploading distributions to {_sanitize_url(repository_url)}") + print(f"Uploading distributions to {utils.sanitize_url(repository_url)}") packages_to_upload = [ _make_package( @@ -272,8 +250,8 @@ def upload(upload_settings: settings.Settings, dists: List[str]) -> None: # redirects as well. if resp.is_redirect: raise exceptions.RedirectDetected.from_args( - repository_url, - resp.headers["location"], + utils.sanitize_url(repository_url), + utils.sanitize_url(resp.headers["location"]), ) if skip_upload(resp, upload_settings.skip_existing, package): diff --git a/twine/utils.py b/twine/utils.py index 484a0234..05739bc4 100644 --- a/twine/utils.py +++ b/twine/utils.py @@ -100,6 +100,24 @@ def get_config(path: str) -> Dict[str, RepositoryConfig]: return dict(config) +def sanitize_url(url: str) -> str: + """Sanitize a URL. + + Sanitize URLs, removing any user:password combinations and replacing them with + asterisks. Returns the original URL if the string is a non-matching pattern. + + :param url: + str containing a URL to sanitize. + + return: + str either sanitized or as entered depending on pattern match. + """ + uri = rfc3986.urlparse(url) + if uri.userinfo: + return cast(str, uri.copy_with(userinfo="*" * 8).unsplit()) + return url + + def _validate_repository_url(repository_url: str) -> None: """Validate the given url for allowed schemes and components.""" # Allowed schemes are http and https, based on whether the repository @@ -126,11 +144,7 @@ def get_repository_from_config( # Prefer CLI `repository_url` over `repository` or .pypirc if repository_url: _validate_repository_url(repository_url) - return { - "repository": repository_url, - "username": None, - "password": None, - } + return _config_from_repository_url(repository_url) try: config = get_config(config_file)[repository] @@ -154,6 +168,17 @@ def get_repository_from_config( } +def _config_from_repository_url(url: str) -> RepositoryConfig: + parsed = urlparse(url) + config = {"repository": url, "username": None, "password": None} + if parsed.username: + config["username"] = parsed.username + config["password"] = parsed.password + config["repository"] = urlunparse((parsed.scheme, parsed.hostname) + parsed[2:]) + config["repository"] = normalize_repository_url(cast(str, config["repository"])) + return config + + def normalize_repository_url(url: str) -> str: parsed = urlparse(url) if parsed.netloc in _HOSTNAMES: