Skip to content

Commit

Permalink
Implement RFC7617-compliant multi-domain basic authentication
Browse files Browse the repository at this point in the history
The https://datatracker.ietf.org/doc/html/rfc7617#section-2.2
defines multi-domain authentication behaviour and authentication
scopes for basic authentication. This change improves the
implementation of the multi-domain matching to be RC7617 compliant

* path matching (including longest match)
* scheme validation matching

Closes: pypa#10902
  • Loading branch information
potiuk committed Apr 11, 2022
1 parent 3820b0e commit 5d8a1fa
Show file tree
Hide file tree
Showing 4 changed files with 201 additions and 19 deletions.
27 changes: 27 additions & 0 deletions docs/html/topics/authentication.md
Expand Up @@ -16,6 +16,33 @@ token as the "username" and do not provide a password:
https://0123456789abcdef@pypi.company.com/simple
```

When you specify several indexes, each of them can come with its own
authentication information. When the domains and schemes of multiple
indexes partially overlap, you can specify different authentication for each of them
For example you can have:

```
PIP_INDEX_URL=https://build:password1@pkgs.dev.azure.com/feed1
PIP_EXTRA_INDEX_URL=https://build:password2@pkgs.dev.azure.com/feed2
```

If you specify multiple identical index URLs with different authentication information,
authentication from the first index will be used.

In compliance with [RFC7617](https://datatracker.ietf.org/doc/html/rfc7617#section-2.2) if the indexes
overlap, the authentication information from the prefix-match will be reused for the longer index if
the longer index does not contain the authentication information. In case multiple indexes are
prefix-matching, the authentication of the first of the longest matching prefixes is used.

For example in this case, build:password authentication will be used when authenticating with the extra
index URL.

```
PIP_INDEX_URL=https://build:password@pkgs.dev.azure.com/
PIP_EXTRA_INDEX_URL=https://pkgs.dev.azure.com/feed1
```


### Percent-encoding special characters

```{versionadded} 10.0
Expand Down
3 changes: 3 additions & 0 deletions news/10904.feature.rst
@@ -0,0 +1,3 @@
When you specify multiple indexes with common domains and schemes, basic authentication for each of
the indexes can be different - compliant with RFC 7617. Previous versions of pip reused basic
authentication credentials for all urls within the same domain.
67 changes: 57 additions & 10 deletions src/pip/_internal/network/auth.py
Expand Up @@ -5,7 +5,7 @@
"""

import urllib.parse
from typing import Any, Dict, List, Optional, Tuple
from typing import Any, List, Optional, Tuple

from pip._vendor.requests.auth import AuthBase, HTTPBasicAuth
from pip._vendor.requests.models import Request, Response
Expand Down Expand Up @@ -76,7 +76,7 @@ def __init__(
) -> None:
self.prompting = prompting
self.index_urls = index_urls
self.passwords: Dict[str, AuthInfo] = {}
self.passwords: List[Tuple[str, AuthInfo]] = []
# When the user is prompted to enter credentials and keyring is
# available, we will offer to save them. If the user accepts,
# this value is set to the credentials they entered. After the
Expand Down Expand Up @@ -163,6 +163,49 @@ def _get_new_credentials(

return username, password

def _get_matching_credentials(self, original_url: str) -> Optional[AuthInfo]:
"""
Find matching credentials based on the longest matching prefix found.
According to https://datatracker.ietf.org/doc/html/rfc7617#section-2.2
the authentication scope is defined by removing the last part after
the last "/" of the resource - but in case of `pip` the end of path is always
"/project", so we can treat the `original_url` as full `authentication scope'
for the request. The RFC specifies that prefix matching of the scope is
also within the protection scope specified by the realm value, so we can
safely assume that if we find any match, the longest matching prefix is
the right authentication information we should use.
The specification does not decide which of the matching scopes should be
used if there are more of them. Our decision is to choose the longest
matching one. In case exactly the same prefix will be added several times,
the authentication information from the first one is used.
According to the RFC, the authentication scope includes both scheme and netloc,
Both have to match in order to share the authentication scope.
:param original_url: original URL of the request
:return: Stored Authentication info matching the authentication scope or
None if not found
"""
max_matched_prefix_length = 0
matched_auth_info = None
no_auth_url = remove_auth_from_url(original_url)
parsed_original = urllib.parse.urlparse(no_auth_url)
for prefix, auth_info in self.passwords:
parsed_stored = urllib.parse.urlparse(prefix)
if (
parsed_stored.netloc != parsed_original.netloc
or parsed_stored.scheme != parsed_original.scheme
):
# only consider match when both scheme and netloc match
continue
if no_auth_url.startswith(prefix):
if len(prefix) > max_matched_prefix_length:
matched_auth_info = auth_info
max_matched_prefix_length = len(prefix)
return matched_auth_info

def _get_url_and_credentials(
self, original_url: str
) -> Tuple[str, Optional[str], Optional[str]]:
Expand All @@ -184,12 +227,14 @@ def _get_url_and_credentials(
# Do this if either the username or the password is missing.
# This accounts for the situation in which the user has specified
# the username in the index url, but the password comes from keyring.
if (username is None or password is None) and netloc in self.passwords:
un, pw = self.passwords[netloc]
# It is possible that the cached credentials are for a different username,
# in which case the cache should be ignored.
if username is None or username == un:
username, password = un, pw
if username is None or password is None:
matched_credentials = self._get_matching_credentials(original_url)
if matched_credentials:
un, pw = matched_credentials
# It is possible that the cached credentials are for a
# different username, in which case the cache should be ignored.
if username is None or username == un:
username, password = un, pw

if username is not None or password is not None:
# Convert the username and password if they're None, so that
Expand All @@ -200,7 +245,7 @@ def _get_url_and_credentials(
password = password or ""

# Store any acquired credentials.
self.passwords[netloc] = (username, password)
self.passwords.append((remove_auth_from_url(url), (username, password)))

assert (
# Credentials were found
Expand Down Expand Up @@ -273,7 +318,9 @@ def handle_401(self, resp: Response, **kwargs: Any) -> Response:
# Store the new username and password to use for future requests
self._credentials_to_save = None
if username is not None and password is not None:
self.passwords[parsed.netloc] = (username, password)
self.passwords.append(
(remove_auth_from_url(resp.url), (username, password))
)

# Prompt to save the password to keyring
if save and self._should_save_password_to_keyring():
Expand Down
123 changes: 114 additions & 9 deletions tests/unit/test_network_auth.py
@@ -1,10 +1,12 @@
import functools
from typing import Any, List, Optional, Tuple
from unittest.mock import Mock, PropertyMock

import pytest

import pip._internal.network.auth
from pip._internal.network.auth import MultiDomainBasicAuth
from src.pip._vendor.requests import PreparedRequest
from tests.lib.requests_mocks import MockConnection, MockRequest, MockResponse


Expand Down Expand Up @@ -50,40 +52,143 @@ def test_get_credentials_parses_correctly(
(username is None and password is None)
or
# Credentials were found and "cached" appropriately
auth.passwords["example.com"] == (username, password)
(url, (username, password)) in auth.passwords
)


def test_handle_prompt_for_password_successful() -> None:
auth = MultiDomainBasicAuth()
resp = Mock()
resp.status_code = 401
resp.url = "http://example.com"
resp.raw = Mock()
resp.content = PropertyMock()
resp.content = ""
resp.request = PreparedRequest()
resp.request.headers = {}
auth._prompt_for_password = Mock() # type: ignore[assignment]
auth._prompt_for_password.return_value = ("user", "password", True)
auth.handle_401(resp)
auth._prompt_for_password.assert_called_with("example.com")
expected = ("http://example.com", ("user", "password"))
assert len(auth.passwords) == 1
assert auth.passwords[0] == expected


def test_handle_prompt_for_password_unsuccessful() -> None:
auth = MultiDomainBasicAuth()
resp = Mock()
resp.status_code = 401
resp.url = "http://example.com"
resp.raw = Mock()
resp.content = PropertyMock()
resp.content = ""
resp.request = PreparedRequest()
resp.request.headers = {}
auth._prompt_for_password = Mock() # type: ignore[assignment]
auth._prompt_for_password.return_value = (None, None, False)
auth.handle_401(resp)
auth._prompt_for_password.assert_called_with("example.com")
assert len(auth.passwords) == 0


def test_get_credentials_not_to_uses_cached_credentials() -> None:
auth = MultiDomainBasicAuth()
auth.passwords["example.com"] = ("user", "pass")
auth.passwords.append(("http://example.com", ("user", "pass")))

got = auth._get_url_and_credentials("http://foo:bar@example.com/path")
expected = ("http://example.com/path", "foo", "bar")
assert got == expected


def test_get_credentials_not_to_uses_cached_credentials_only_username() -> None:
def test_get_credentials_not_to_use_cached_credentials_only_username() -> None:
auth = MultiDomainBasicAuth()
auth.passwords.append(("https://example.com", ("user", "pass")))

got = auth._get_url_and_credentials("https://foo@example.com/path")
expected = ("https://example.com/path", "foo", "")
assert got == expected


def test_multi_domain_credentials_match() -> None:
auth = MultiDomainBasicAuth()
auth.passwords.append(("http://example.com", ("user", "pass")))
auth.passwords.append(("http://example.com/path", ("user", "pass2")))

got = auth._get_url_and_credentials("http://user@example.com/path")
expected = ("http://example.com/path", "user", "pass2")
assert got == expected


def test_multi_domain_credentials_longest_match() -> None:
auth = MultiDomainBasicAuth()
auth.passwords.append(("http://example.com", ("user", "pass")))
auth.passwords.append(("http://example.com/path", ("user", "pass2")))
auth.passwords.append(("http://example.com/path/subpath", ("user", "pass3")))

got = auth._get_url_and_credentials("http://user@example.com/path")
expected = ("http://example.com/path", "user", "pass2")
assert got == expected


def test_multi_domain_credentials_partial_match_only() -> None:
auth = MultiDomainBasicAuth()
auth.passwords["example.com"] = ("user", "pass")
auth.passwords.append(("http://example.com/path1", ("user", "pass")))

got = auth._get_url_and_credentials("http://foo@example.com/path")
expected = ("http://example.com/path", "foo", "")
got = auth._get_url_and_credentials("http://example.com/path2")
expected = ("http://example.com/path2", None, None)
assert got == expected


def test_get_credentials_uses_cached_credentials() -> None:
auth = MultiDomainBasicAuth()
auth.passwords["example.com"] = ("user", "pass")
auth.passwords.append(("https://example.com", ("user", "pass")))

got = auth._get_url_and_credentials("https://example.com/path")
expected = ("https://example.com/path", "user", "pass")
assert got == expected


def test_get_credentials_not_uses_cached_credentials_different_scheme_http() -> None:
auth = MultiDomainBasicAuth()
auth.passwords.append(("http://example.com", ("user", "pass")))

got = auth._get_url_and_credentials("https://example.com/path")
expected = ("https://example.com/path", None, None)
assert got == expected


def test_get_credentials_not_uses_cached_credentials_different_scheme_https() -> None:
auth = MultiDomainBasicAuth()
auth.passwords.append(("https://example.com", ("user", "pass")))

got = auth._get_url_and_credentials("http://example.com/path")
expected = ("http://example.com/path", "user", "pass")
expected = ("http://example.com/path", None, None)
assert got == expected


def test_get_credentials_uses_cached_credentials_only_username() -> None:
auth = MultiDomainBasicAuth()
auth.passwords["example.com"] = ("user", "pass")
auth.passwords.append(("http://example.com", ("user", "pass")))

got = auth._get_url_and_credentials("http://user@example.com/path")
expected = ("http://example.com/path", "user", "pass")
assert got == expected


def test_get_credentials_uses_cached_credentials_wrong_username() -> None:
auth = MultiDomainBasicAuth()
auth.passwords.append(("http://example.com", ("user", "pass")))

got = auth._get_url_and_credentials("http://user2@example.com/path")
expected = ("http://example.com/path", "user2", "")
assert got == expected


def test_get_credentials_added_multiple_times() -> None:
auth = MultiDomainBasicAuth()
auth.passwords.append(("http://example.com", ("user", "pass")))
auth.passwords.append(("http://example.com", ("user", "pass2")))

got = auth._get_url_and_credentials("http://user@example.com/path")
expected = ("http://example.com/path", "user", "pass")
Expand Down

0 comments on commit 5d8a1fa

Please sign in to comment.