diff --git a/docs/html/topics/authentication.md b/docs/html/topics/authentication.md index 981aab5abd7..e3cfc697f79 100644 --- a/docs/html/topics/authentication.md +++ b/docs/html/topics/authentication.md @@ -16,6 +16,33 @@ token as the "username" and do not provide a password: https://0123456789abcdef@pypi.company.com/simple ``` +When you specify several indexes, each of them can come with its own +authentication information. When the domains and schemes of multiple +indexes partially overlap, you can specify different authentication for each of them +For example you can have: + +``` +PIP_INDEX_URL=https://build:password1@pkgs.dev.azure.com/feed1 +PIP_EXTRA_INDEX_URL=https://build:password2@pkgs.dev.azure.com/feed2 +``` + +If you specify multiple identical index URLs with different authentication information, +authentication from the first index will be used. + +In compliance with [RFC7617](https://datatracker.ietf.org/doc/html/rfc7617#section-2.2) if the indexes +overlap, the authentication information from the prefix-match will be reused for the longer index if +the longer index does not contain the authentication information. In case multiple indexes are +prefix-matching, the authentication of the first of the longest matching prefixes is used. + +For example in this case, build:password authentication will be used when authenticating with the extra +index URL. + +``` +PIP_INDEX_URL=https://build:password@pkgs.dev.azure.com/ +PIP_EXTRA_INDEX_URL=https://pkgs.dev.azure.com/feed1 +``` + + ### Percent-encoding special characters ```{versionadded} 10.0 diff --git a/news/10904.feature.rst b/news/10904.feature.rst new file mode 100644 index 00000000000..b880c1e0405 --- /dev/null +++ b/news/10904.feature.rst @@ -0,0 +1,3 @@ +When you specify multiple indexes with common domains and schemes, basic authentication for each of +the indexes can be different - compliant with RFC 7617. Previous versions of pip reused basic +authentication credentials for all urls within the same domain. diff --git a/src/pip/_internal/network/auth.py b/src/pip/_internal/network/auth.py index e40ebfb2785..ef84c705e16 100644 --- a/src/pip/_internal/network/auth.py +++ b/src/pip/_internal/network/auth.py @@ -5,7 +5,7 @@ """ import urllib.parse -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, List, Optional, Tuple from pip._vendor.requests.auth import AuthBase, HTTPBasicAuth from pip._vendor.requests.models import Request, Response @@ -76,7 +76,7 @@ def __init__( ) -> None: self.prompting = prompting self.index_urls = index_urls - self.passwords: Dict[str, AuthInfo] = {} + self.passwords: List[Tuple[str, AuthInfo]] = [] # When the user is prompted to enter credentials and keyring is # available, we will offer to save them. If the user accepts, # this value is set to the credentials they entered. After the @@ -163,6 +163,49 @@ def _get_new_credentials( return username, password + def _get_matching_credentials(self, original_url: str) -> Optional[AuthInfo]: + """ + Find matching credentials based on the longest matching prefix found. + + According to https://datatracker.ietf.org/doc/html/rfc7617#section-2.2 + the authentication scope is defined by removing the last part after + the last "/" of the resource - but in case of `pip` the end of path is always + "/project", so we can treat the `original_url` as full `authentication scope' + for the request. The RFC specifies that prefix matching of the scope is + also within the protection scope specified by the realm value, so we can + safely assume that if we find any match, the longest matching prefix is + the right authentication information we should use. + + The specification does not decide which of the matching scopes should be + used if there are more of them. Our decision is to choose the longest + matching one. In case exactly the same prefix will be added several times, + the authentication information from the first one is used. + + According to the RFC, the authentication scope includes both scheme and netloc, + Both have to match in order to share the authentication scope. + + :param original_url: original URL of the request + :return: Stored Authentication info matching the authentication scope or + None if not found + """ + max_matched_prefix_length = 0 + matched_auth_info = None + no_auth_url = remove_auth_from_url(original_url) + parsed_original = urllib.parse.urlparse(no_auth_url) + for prefix, auth_info in self.passwords: + parsed_stored = urllib.parse.urlparse(prefix) + if ( + parsed_stored.netloc != parsed_original.netloc + or parsed_stored.scheme != parsed_original.scheme + ): + # only consider match when both scheme and netloc match + continue + if no_auth_url.startswith(prefix): + if len(prefix) > max_matched_prefix_length: + matched_auth_info = auth_info + max_matched_prefix_length = len(prefix) + return matched_auth_info + def _get_url_and_credentials( self, original_url: str ) -> Tuple[str, Optional[str], Optional[str]]: @@ -184,12 +227,14 @@ def _get_url_and_credentials( # Do this if either the username or the password is missing. # This accounts for the situation in which the user has specified # the username in the index url, but the password comes from keyring. - if (username is None or password is None) and netloc in self.passwords: - un, pw = self.passwords[netloc] - # It is possible that the cached credentials are for a different username, - # in which case the cache should be ignored. - if username is None or username == un: - username, password = un, pw + if username is None or password is None: + matched_credentials = self._get_matching_credentials(original_url) + if matched_credentials: + un, pw = matched_credentials + # It is possible that the cached credentials are for a + # different username, in which case the cache should be ignored. + if username is None or username == un: + username, password = un, pw if username is not None or password is not None: # Convert the username and password if they're None, so that @@ -200,7 +245,7 @@ def _get_url_and_credentials( password = password or "" # Store any acquired credentials. - self.passwords[netloc] = (username, password) + self.passwords.append((remove_auth_from_url(url), (username, password))) assert ( # Credentials were found @@ -273,7 +318,9 @@ def handle_401(self, resp: Response, **kwargs: Any) -> Response: # Store the new username and password to use for future requests self._credentials_to_save = None if username is not None and password is not None: - self.passwords[parsed.netloc] = (username, password) + self.passwords.append( + (remove_auth_from_url(resp.url), (username, password)) + ) # Prompt to save the password to keyring if save and self._should_save_password_to_keyring(): diff --git a/tests/unit/test_network_auth.py b/tests/unit/test_network_auth.py index 5c0e5746281..eb0699a5454 100644 --- a/tests/unit/test_network_auth.py +++ b/tests/unit/test_network_auth.py @@ -1,10 +1,12 @@ import functools from typing import Any, List, Optional, Tuple +from unittest.mock import Mock, PropertyMock import pytest import pip._internal.network.auth from pip._internal.network.auth import MultiDomainBasicAuth +from src.pip._vendor.requests import PreparedRequest from tests.lib.requests_mocks import MockConnection, MockRequest, MockResponse @@ -50,40 +52,143 @@ def test_get_credentials_parses_correctly( (username is None and password is None) or # Credentials were found and "cached" appropriately - auth.passwords["example.com"] == (username, password) + (url, (username, password)) in auth.passwords ) +def test_handle_prompt_for_password_successful() -> None: + auth = MultiDomainBasicAuth() + resp = Mock() + resp.status_code = 401 + resp.url = "http://example.com" + resp.raw = Mock() + resp.content = PropertyMock() + resp.content = "" + resp.request = PreparedRequest() + resp.request.headers = {} + auth._prompt_for_password = Mock() # type: ignore[assignment] + auth._prompt_for_password.return_value = ("user", "password", True) + auth.handle_401(resp) + auth._prompt_for_password.assert_called_with("example.com") + expected = ("http://example.com", ("user", "password")) + assert len(auth.passwords) == 1 + assert auth.passwords[0] == expected + + +def test_handle_prompt_for_password_unsuccessful() -> None: + auth = MultiDomainBasicAuth() + resp = Mock() + resp.status_code = 401 + resp.url = "http://example.com" + resp.raw = Mock() + resp.content = PropertyMock() + resp.content = "" + resp.request = PreparedRequest() + resp.request.headers = {} + auth._prompt_for_password = Mock() # type: ignore[assignment] + auth._prompt_for_password.return_value = (None, None, False) + auth.handle_401(resp) + auth._prompt_for_password.assert_called_with("example.com") + assert len(auth.passwords) == 0 + + def test_get_credentials_not_to_uses_cached_credentials() -> None: auth = MultiDomainBasicAuth() - auth.passwords["example.com"] = ("user", "pass") + auth.passwords.append(("http://example.com", ("user", "pass"))) got = auth._get_url_and_credentials("http://foo:bar@example.com/path") expected = ("http://example.com/path", "foo", "bar") assert got == expected -def test_get_credentials_not_to_uses_cached_credentials_only_username() -> None: +def test_get_credentials_not_to_use_cached_credentials_only_username() -> None: + auth = MultiDomainBasicAuth() + auth.passwords.append(("https://example.com", ("user", "pass"))) + + got = auth._get_url_and_credentials("https://foo@example.com/path") + expected = ("https://example.com/path", "foo", "") + assert got == expected + + +def test_multi_domain_credentials_match() -> None: + auth = MultiDomainBasicAuth() + auth.passwords.append(("http://example.com", ("user", "pass"))) + auth.passwords.append(("http://example.com/path", ("user", "pass2"))) + + got = auth._get_url_and_credentials("http://user@example.com/path") + expected = ("http://example.com/path", "user", "pass2") + assert got == expected + + +def test_multi_domain_credentials_longest_match() -> None: + auth = MultiDomainBasicAuth() + auth.passwords.append(("http://example.com", ("user", "pass"))) + auth.passwords.append(("http://example.com/path", ("user", "pass2"))) + auth.passwords.append(("http://example.com/path/subpath", ("user", "pass3"))) + + got = auth._get_url_and_credentials("http://user@example.com/path") + expected = ("http://example.com/path", "user", "pass2") + assert got == expected + + +def test_multi_domain_credentials_partial_match_only() -> None: auth = MultiDomainBasicAuth() - auth.passwords["example.com"] = ("user", "pass") + auth.passwords.append(("http://example.com/path1", ("user", "pass"))) - got = auth._get_url_and_credentials("http://foo@example.com/path") - expected = ("http://example.com/path", "foo", "") + got = auth._get_url_and_credentials("http://example.com/path2") + expected = ("http://example.com/path2", None, None) assert got == expected def test_get_credentials_uses_cached_credentials() -> None: auth = MultiDomainBasicAuth() - auth.passwords["example.com"] = ("user", "pass") + auth.passwords.append(("https://example.com", ("user", "pass"))) + + got = auth._get_url_and_credentials("https://example.com/path") + expected = ("https://example.com/path", "user", "pass") + assert got == expected + + +def test_get_credentials_not_uses_cached_credentials_different_scheme_http() -> None: + auth = MultiDomainBasicAuth() + auth.passwords.append(("http://example.com", ("user", "pass"))) + + got = auth._get_url_and_credentials("https://example.com/path") + expected = ("https://example.com/path", None, None) + assert got == expected + + +def test_get_credentials_not_uses_cached_credentials_different_scheme_https() -> None: + auth = MultiDomainBasicAuth() + auth.passwords.append(("https://example.com", ("user", "pass"))) got = auth._get_url_and_credentials("http://example.com/path") - expected = ("http://example.com/path", "user", "pass") + expected = ("http://example.com/path", None, None) assert got == expected def test_get_credentials_uses_cached_credentials_only_username() -> None: auth = MultiDomainBasicAuth() - auth.passwords["example.com"] = ("user", "pass") + auth.passwords.append(("http://example.com", ("user", "pass"))) + + got = auth._get_url_and_credentials("http://user@example.com/path") + expected = ("http://example.com/path", "user", "pass") + assert got == expected + + +def test_get_credentials_uses_cached_credentials_wrong_username() -> None: + auth = MultiDomainBasicAuth() + auth.passwords.append(("http://example.com", ("user", "pass"))) + + got = auth._get_url_and_credentials("http://user2@example.com/path") + expected = ("http://example.com/path", "user2", "") + assert got == expected + + +def test_get_credentials_added_multiple_times() -> None: + auth = MultiDomainBasicAuth() + auth.passwords.append(("http://example.com", ("user", "pass"))) + auth.passwords.append(("http://example.com", ("user", "pass2"))) got = auth._get_url_and_credentials("http://user@example.com/path") expected = ("http://example.com/path", "user", "pass")