python-poetry · radoering · Feb 20, 2024 · Apr 27, 2022 · Nov 18, 2023 · Nov 18, 2023
diff --git a/src/poetry/repositories/http_repository.py b/src/poetry/repositories/http_repository.py
@@ -3,7 +3,6 @@
 import functools
 import hashlib
 
-from collections import defaultdict
 from contextlib import contextmanager
 from pathlib import Path
 from typing import TYPE_CHECKING
@@ -13,13 +12,14 @@
 import requests
 import requests.adapters
 
+from packaging.metadata import parse_email
 from poetry.core.constraints.version import parse_constraint
 from poetry.core.packages.dependency import Dependency
-from poetry.core.packages.utils.link import Link
 from poetry.core.utils.helpers import temporary_directory
 from poetry.core.version.markers import parse_marker
 
 from poetry.config.config import Config
+from poetry.inspection.info import PackageInfo
 from poetry.inspection.lazy_wheel import HTTPRangeRequestUnsupported
 from poetry.inspection.lazy_wheel import metadata_from_wheel_url
 from poetry.repositories.cached_repository import CachedRepository
@@ -36,8 +36,8 @@
 
 if TYPE_CHECKING:
     from packaging.utils import NormalizedName
+    from poetry.core.packages.utils.link import Link
 
-    from poetry.inspection.info import PackageInfo
     from poetry.repositories.link_sources.base import LinkSource
     from poetry.utils.authenticator import RepositoryCertificateConfig
 
@@ -109,10 +109,9 @@ def _cached_or_downloaded_file(
             )
             yield filepath
 
-    def _get_info_from_wheel(self, url: str) -> PackageInfo:
+    def _get_info_from_wheel(self, link: Link) -> PackageInfo:
         from poetry.inspection.info import PackageInfo
 
-        link = Link(url)
         netloc = link.netloc
 
         # If "lazy-wheel" is enabled and the domain supports range requests
@@ -147,17 +146,68 @@ def _get_info_from_wheel(self, url: str) -> PackageInfo:
                 level="debug",
             )
             self._supports_range_requests[netloc] = True
-            return self._get_info_from_wheel(link.url)
+            return self._get_info_from_wheel(link)
 
-    def _get_info_from_sdist(self, url: str) -> PackageInfo:
+    def _get_info_from_sdist(self, link: Link) -> PackageInfo:
         from poetry.inspection.info import PackageInfo
 
-        with self._cached_or_downloaded_file(Link(url)) as filepath:
+        with self._cached_or_downloaded_file(link) as filepath:
             return PackageInfo.from_sdist(filepath)
 
-    def _get_info_from_urls(self, urls: dict[str, list[str]]) -> PackageInfo:
+    def _get_info_from_metadata(self, link: Link) -> PackageInfo | None:
+        if link.has_metadata:
+            try:
+                assert link.metadata_url is not None
+                response = self.session.get(link.metadata_url)
+                if link.metadata_hashes and (
+                    hash_name := get_highest_priority_hash_type(
+                        set(link.metadata_hashes.keys()), f"{link.filename}.metadata"
+                    )
+                ):
+                    metadata_hash = getattr(hashlib, hash_name)(
+                        response.text.encode()
+                    ).hexdigest()
+                    if metadata_hash != link.metadata_hashes[hash_name]:
+                        self._log(
+                            f"Metadata file hash ({metadata_hash}) does not match"
+                            f" expected hash ({link.metadata_hashes[hash_name]})."
+                            f" Metadata file for {link.filename} will be ignored.",
+                            level="warning",
+                        )
+                        return None
+
+                metadata, _ = parse_email(response.content)
+                return PackageInfo.from_metadata(metadata)
+
+            except requests.HTTPError:
+                self._log(
+                    f"Failed to retrieve metadata at {link.metadata_url}",
+                    level="warning",
+                )
+
+        return None
+
+    def _get_info_from_links(
+        self,
+        links: list[Link],
+        *,
+        ignore_yanked: bool = True,
+    ) -> PackageInfo:
+        # Sort links by distribution type
+        wheels: list[Link] = []
+        sdists: list[Link] = []
+        for link in links:
+            if link.yanked and ignore_yanked:
+                # drop yanked files unless the entire release is yanked
+                continue
+            if link.is_wheel:
+                wheels.append(link)
+            elif link.filename.endswith(
+                (".tar.gz", ".zip", ".bz2", ".xz", ".Z", ".tar")
+            ):
+                sdists.append(link)
+
         # Prefer to read data from wheels: this is faster and more reliable
-        wheels = urls.get("bdist_wheel")
         if wheels:
             # We ought just to be able to look at any of the available wheels to read
             # metadata, they all should give the same answer.
@@ -173,8 +223,7 @@ def _get_info_from_urls(self, urls: dict[str, list[str]]) -> PackageInfo:
             universal_python3_wheel = None
             platform_specific_wheels = []
             for wheel in wheels:
-                link = Link(wheel)
-                m = wheel_file_re.match(link.filename)
+                m = wheel_file_re.match(wheel.filename)
                 if not m:
                     continue
 
@@ -194,13 +243,19 @@ def _get_info_from_urls(self, urls: dict[str, list[str]]) -> PackageInfo:
                     platform_specific_wheels.append(wheel)
 
             if universal_wheel is not None:
-                return self._get_info_from_wheel(universal_wheel)
+                return self._get_info_from_metadata(
+                    universal_wheel
+                ) or self._get_info_from_wheel(universal_wheel)
 
             info = None
             if universal_python2_wheel and universal_python3_wheel:
-                info = self._get_info_from_wheel(universal_python2_wheel)
+                info = self._get_info_from_metadata(
+                    universal_python2_wheel
+                ) or self._get_info_from_wheel(universal_python2_wheel)
 
-                py3_info = self._get_info_from_wheel(universal_python3_wheel)
+                py3_info = self._get_info_from_metadata(
+                    universal_python3_wheel
+                ) or self._get_info_from_wheel(universal_python3_wheel)
 
                 if info.requires_python or py3_info.requires_python:
                     info.requires_python = str(
@@ -250,35 +305,37 @@ def _get_info_from_urls(self, urls: dict[str, list[str]]) -> PackageInfo:
 
             # Prefer non platform specific wheels
             if universal_python3_wheel:
-                return self._get_info_from_wheel(universal_python3_wheel)
+                return self._get_info_from_metadata(
+                    universal_python3_wheel
+                ) or self._get_info_from_wheel(universal_python3_wheel)
 
             if universal_python2_wheel:
-                return self._get_info_from_wheel(universal_python2_wheel)
+                return self._get_info_from_metadata(
+                    universal_python2_wheel
+                ) or self._get_info_from_wheel(universal_python2_wheel)
 
             if platform_specific_wheels:
                 first_wheel = platform_specific_wheels[0]
-                return self._get_info_from_wheel(first_wheel)
+                return self._get_info_from_metadata(
+                    first_wheel
+                ) or self._get_info_from_wheel(first_wheel)
 
-        return self._get_info_from_sdist(urls["sdist"][0])
+        return self._get_info_from_metadata(sdists[0]) or self._get_info_from_sdist(
+            sdists[0]
+        )
 
     def _links_to_data(self, links: list[Link], data: PackageInfo) -> dict[str, Any]:
         if not links:
             raise PackageNotFound(
                 f'No valid distribution links found for package: "{data.name}" version:'
                 f' "{data.version}"'
             )
-        urls = defaultdict(list)
+
         files: list[dict[str, Any]] = []
         for link in links:
             if link.yanked and not data.yanked:
                 # drop yanked files unless the entire release is yanked
                 continue
-            if link.is_wheel:
-                urls["bdist_wheel"].append(link.url)
-            elif link.filename.endswith(
-                (".tar.gz", ".zip", ".bz2", ".xz", ".Z", ".tar")
-            ):
-                urls["sdist"].append(link.url)
 
             file_hash: str | None
             for hash_name in ("sha512", "sha384", "sha256"):
@@ -299,7 +356,8 @@ def _links_to_data(self, links: list[Link], data: PackageInfo) -> dict[str, Any]
 
         data.files = files
 
-        info = self._get_info_from_urls(urls)
+        # drop yanked files unless the entire release is yanked
+        info = self._get_info_from_links(links, ignore_yanked=not data.yanked)
 
         data.summary = info.summary
         data.requires_dist = info.requires_dist

diff --git a/src/poetry/repositories/link_sources/html.py b/src/poetry/repositories/link_sources/html.py
@@ -42,7 +42,21 @@ def _link_cache(self) -> LinkCache:
                     yanked = unescape(yanked_value)
                 else:
                     yanked = "data-yanked" in anchor
-                link = Link(url, requires_python=pyrequire, yanked=yanked)
+
+                # see https://peps.python.org/pep-0714/#clients
+                # and https://peps.python.org/pep-0658/#specification
+                metadata: str | bool
+                for metadata_key in ("data-core-metadata", "data-dist-info-metadata"):
+                    metadata_value = anchor.get(metadata_key)
+                    if metadata_value:
+                        metadata = unescape(metadata_value)
+                    else:
+                        metadata = metadata_key in anchor
+                    if metadata:
+                        break
+                link = Link(
+                    url, requires_python=pyrequire, yanked=yanked, metadata=metadata
+                )
 
                 if link.ext not in self.SUPPORTED_FORMATS:
                     continue

diff --git a/src/poetry/repositories/link_sources/json.py b/src/poetry/repositories/link_sources/json.py
@@ -28,7 +28,22 @@ def _link_cache(self) -> LinkCache:
             url = file["url"]
             requires_python = file.get("requires-python")
             yanked = file.get("yanked", False)
-            link = Link(url, requires_python=requires_python, yanked=yanked)
+
+            # see https://peps.python.org/pep-0714/#clients
+            # and https://peps.python.org/pep-0691/#project-detail
+            metadata: dict[str, str] | bool = False
+            for metadata_key in ("core-metadata", "dist-info-metadata"):
+                if metadata_key in file:
+                    metadata_value = file[metadata_key]
+                    if metadata_value and isinstance(metadata_value, dict):
+                        metadata = metadata_value
+                    else:
+                        metadata = bool(metadata_value)
+                    break
+
+            link = Link(
+                url, requires_python=requires_python, yanked=yanked, metadata=metadata
+            )
 
             if link.ext not in self.SUPPORTED_FORMATS:
                 continue

diff --git a/src/poetry/repositories/pypi_repository.py b/src/poetry/repositories/pypi_repository.py
@@ -2,7 +2,6 @@
 
 import logging
 
-from collections import defaultdict
 from typing import TYPE_CHECKING
 from typing import Any
 
@@ -162,25 +161,18 @@ def _get_release_info(
         data.files = files
 
         if self._fallback and data.requires_dist is None:
-            self._log("No dependencies found, downloading archives", level="debug")
+            self._log(
+                "No dependencies found, downloading metadata and/or archives",
+                level="debug",
+            )
             # No dependencies set (along with other information)
             # This might be due to actually no dependencies
-            # or badly set metadata when uploading
+            # or badly set metadata when uploading.
             # So, we need to make sure there is actually no
-            # dependencies by introspecting packages
-            urls = defaultdict(list)
-            for url in json_data["urls"]:
-                # Only get sdist and wheels if they exist
-                dist_type = url["packagetype"]
-                if dist_type not in SUPPORTED_PACKAGE_TYPES:
-                    continue
-
-                urls[dist_type].append(url["url"])
-
-            if not urls:
-                return data.asdict()
-
-            info = self._get_info_from_urls(urls)
+            # dependencies by introspecting packages.
+            page = self.get_page(name)
+            links = list(page.links_for_version(name, version))
+            info = self._get_info_from_links(links)
 
             data.requires_dist = info.requires_dist
 

diff --git a/tests/repositories/conftest.py b/tests/repositories/conftest.py
@@ -1,12 +1,18 @@
 from __future__ import annotations
 
+import posixpath
+
+from pathlib import Path
 from typing import TYPE_CHECKING
+from typing import Any
 
 import pytest
+import requests
 
 
 if TYPE_CHECKING:
     from tests.types import HTMLPageGetter
+    from tests.types import RequestsSessionGet
 
 
 @pytest.fixture
@@ -29,3 +35,25 @@ def _fixture(content: str, base_url: str | None = None) -> str:
         """
 
     return _fixture
+
+
+@pytest.fixture
+def get_metadata_mock() -> RequestsSessionGet:
+    def metadata_mock(url: str, **__: Any) -> requests.Response:
+        if url.endswith(".metadata"):
+            response = requests.Response()
+            response.encoding = "application/text"
+            response._content = (
+                (
+                    Path(__file__).parent
+                    / "fixtures"
+                    / "metadata"
+                    / posixpath.basename(url)
+                )
+                .read_text()
+                .encode()
+            )
+            return response
+        raise requests.HTTPError()
+
+    return metadata_mock
diff --git a/tests/repositories/fixtures/legacy/isort-metadata.html b/tests/repositories/fixtures/legacy/isort-metadata.html
@@ -0,0 +1,12 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <title>Links for isort</title>
+</head>
+<body>
+<h1>Links for isort</h1>
+<a href="https://files.pythonhosted.org/packages/1f/2c/non-existant/isort-metadata-4.3.4-py3-none-any.whl#sha256=1153601da39a25b14ddc54955dbbacbb6b2d19135386699e2ad58517953b34af"
+   data-dist-info-metadata="sha256=e360bf0ed8a06390513d50dd5b7e9d635c789853a93b84163f9de4ae0647580c">isort-metadata-4.3.4-py3-none-any.whl</a><br/>
+</body>
+</html>
+<!--SERIAL 3575149-->
diff --git a/tests/repositories/fixtures/metadata/isort-metadata-4.3.4-py2-none-any.whl.metadata b/tests/repositories/fixtures/metadata/isort-metadata-4.3.4-py2-none-any.whl.metadata
@@ -0,0 +1,28 @@
+Metadata-Version: 2.0
+Name: isort-metadata
+Version: 4.3.4
+Summary: A Python utility / library to sort Python imports.
+Home-page: https://github.com/timothycrosley/isort
+Author: Timothy Crosley
+Author-email: timothy.crosley@gmail.com
+License: MIT
+Keywords: Refactor,Python,Python2,Python3,Refactoring,Imports,Sort,Clean
+Platform: UNKNOWN
+Classifier: Development Status :: 6 - Mature
+Classifier: Intended Audience :: Developers
+Classifier: Natural Language :: English
+Classifier: Environment :: Console
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.4
+Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: Implementation :: CPython
+Classifier: Programming Language :: Python :: Implementation :: PyPy
+Classifier: Topic :: Software Development :: Libraries
+Classifier: Topic :: Utilities
+Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*
+Requires-Dist: futures; python_version=="2.7"