Skip to content

Commit

Permalink
Strip slash before Windows drive letter in path (#10116)
Browse files Browse the repository at this point in the history
Functions like urllib.parse.urlsplit() parses a file:// URL created from
a non-UNC Windows absolute path with a leading slash in the path
component:

>>> from pathlib import Path
>>> from urllib.parse import urlsplit
>>> path = Path("C:/Users/VssAdministrator")
>>> parsed = urlsplit(path.as_posix())
>>> parsed.path
'/C:/Users/VssAdministrator'

This value unfortunately does not play well with path functions like
open(), so we performs some additional cleanup to strip that leading
slash.

This commit also contains some minor cleanup to unify how Windows is
detected, and how a file:// URL is fetched.
  • Loading branch information
uranusjr committed Jul 22, 2021
1 parent 76cd70a commit f6b184c
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 11 deletions.
3 changes: 3 additions & 0 deletions news/10115.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Strip leading slash from a ``file://`` URL built from an path with the Windows
drive notation. This fixes bugs where the ``file://`` URL cannot be correctly
used as requirement, constraint, or index URLs on Windows.
14 changes: 5 additions & 9 deletions src/pip/_internal/req/req_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from pip._internal.network.session import PipSession
from pip._internal.network.utils import raise_for_status
from pip._internal.utils.encoding import auto_decode
from pip._internal.utils.urls import get_url_scheme, url_to_path
from pip._internal.utils.urls import get_url_scheme

if TYPE_CHECKING:
# NoReturn introduced in 3.6.2; imported only for type checking to maintain
Expand Down Expand Up @@ -532,20 +532,16 @@ def get_file_content(url, session):
"""
scheme = get_url_scheme(url)

if scheme in ['http', 'https']:
# FIXME: catch some errors
# Pip has special support for file:// URLs (LocalFSAdapter).
if scheme in ['http', 'https', 'file']:
resp = session.get(url)
raise_for_status(resp)
return resp.url, resp.text

elif scheme == 'file':
url = url_to_path(url)

# Assume this is a bare path.
try:
with open(url, 'rb') as f:
content = auto_decode(f.read())
except OSError as exc:
raise InstallationError(
f'Could not open requirements file: {exc}'
)
raise InstallationError(f'Could not open requirements file: {exc}')
return url, content
20 changes: 18 additions & 2 deletions src/pip/_internal/utils/urls.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import os
import sys
import string
import urllib.parse
import urllib.request
from typing import Optional

from .compat import WINDOWS


def get_url_scheme(url):
# type: (str) -> Optional[str]
Expand Down Expand Up @@ -37,7 +39,7 @@ def url_to_path(url):
if not netloc or netloc == "localhost":
# According to RFC 8089, same as empty authority.
netloc = ""
elif sys.platform == "win32":
elif WINDOWS:
# If we have a UNC path, prepend UNC share notation.
netloc = "\\\\" + netloc
else:
Expand All @@ -46,4 +48,18 @@ def url_to_path(url):
)

path = urllib.request.url2pathname(netloc + path)

# On Windows, urlsplit parses the path as something like "/C:/Users/foo".
# This creates issues for path-related functions like io.open(), so we try
# to detect and strip the leading slash.
if (
WINDOWS
and not netloc # Not UNC.
and len(path) >= 3
and path[0] == "/" # Leading slash to strip.
and path[1] in string.ascii_letters # Drive letter.
and path[2:4] in (":", ":/") # Colon + end of string, or colon + absolute path.
):
path = path[1:]

return path
16 changes: 16 additions & 0 deletions tests/unit/test_req_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,22 @@ def parse_reqfile(
)


def test_read_file_url(tmp_path):
reqs = tmp_path.joinpath("requirements.txt")
reqs.write_text("foo")
result = list(parse_requirements(reqs.as_posix(), session))

assert len(result) == 1, result
assert result[0].requirement == "foo"

# The comes_from value has three parts: -r or -c flag, path, and line.
# The path value in the middle needs some special logic due to our path
# normalization logic.
assert result[0].comes_from[:3] == "-r "
assert result[0].comes_from[-9:] == " (line 1)"
assert os.path.samefile(result[0].comes_from[3:-9], str(reqs))


class TestPreprocess:
"""tests for `preprocess`"""

Expand Down

0 comments on commit f6b184c

Please sign in to comment.