Skip to content

Commit

Permalink
Merge pull request #22 from thatch/fetch-metadata-as-bytes
Browse files Browse the repository at this point in the history
Always consider metadata as utf-8.
  • Loading branch information
jwodder committed Feb 20, 2024
2 parents 9f98c86 + 4fdb8f6 commit 7cdf45e
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 12 deletions.
70 changes: 58 additions & 12 deletions src/pypi_simple/client.py
Expand Up @@ -328,18 +328,20 @@ def download_package(
pass
raise

def get_package_metadata(
def get_package_metadata_bytes(
self,
pkg: DistributionPackage,
verify: bool = True,
timeout: float | tuple[float, float] | None = None,
) -> str:
) -> bytes:
"""
.. versionadded:: 1.3.0
.. versionadded:: 1.5.0
Retrieve the `distribution metadata`_ for the given
`DistributionPackage`. The metadata can then be parsed with, for
example, |the packaging package|_.
`DistributionPackage`. This method is lower-level than
`PyPISimple.get_package_metadata` and is most appropriate if you want
to defer interpretation of the data (e.g. if you're just writing to a
file) or want to customize the handling of non-``utf-8`` data.
Not all packages have distribution metadata available for download; the
`DistributionPackage.has_metadata` attribute can be used to check
Expand All @@ -348,12 +350,6 @@ def get_package_metadata(
of `~DistributionPackage.has_metadata`; if the server replies with a
404, a `NoMetadataError` is raised.
.. _distribution metadata:
https://packaging.python.org/en/latest/specifications/core-metadata/
.. |the packaging package| replace:: the ``packaging`` package
.. _the packaging package:
https://packaging.pypa.io/en/stable/metadata.html
:param DistributionPackage pkg:
the distribution package to retrieve the metadata of
:param bool verify:
Expand Down Expand Up @@ -383,7 +379,57 @@ def get_package_metadata(
r.raise_for_status()
digester.update(r.content)
digester.finalize()
return r.text
return r.content

def get_package_metadata(
self,
pkg: DistributionPackage,
verify: bool = True,
timeout: float | tuple[float, float] | None = None,
) -> str:
"""
.. versionadded:: 1.3.0
Retrieve the `distribution metadata`_ for the given
`DistributionPackage`. The metadata can then be parsed with, for
example, |the packaging package|_.
Not all packages have distribution metadata available for download; the
`DistributionPackage.has_metadata` attribute can be used to check
whether the repository reported the availability of the metadata. This
method will always attempt to download metadata regardless of the value
of `~DistributionPackage.has_metadata`; if the server replies with a
404, a `NoMetadataError` is raised.
.. _distribution metadata:
https://packaging.python.org/en/latest/specifications/core-metadata/
.. |the packaging package| replace:: the ``packaging`` package
.. _the packaging package:
https://packaging.pypa.io/en/stable/metadata.html
:param DistributionPackage pkg:
the distribution package to retrieve the metadata of
:param bool verify:
whether to verify the metadata's digests against the retrieved data
:param timeout: optional timeout to pass to the ``requests`` call
:type timeout: float | tuple[float,float] | None
:raises NoMetadataError:
if the repository responds with a 404 error code
:raises requests.HTTPError: if the repository responds with an HTTP
error code other than 404
:raises NoDigestsError:
if ``verify`` is true and the given package's metadata does not
have any digests with known algorithms
:raises DigestMismatchError:
if ``verify`` is true and the digest of the downloaded data does
not match the expected value
"""
return self.get_package_metadata_bytes(
pkg,
verify,
timeout,
).decode("utf-8", "surrogateescape")


class NoSuchProjectError(Exception):
Expand Down
44 changes: 44 additions & 0 deletions test/test_client.py
Expand Up @@ -844,3 +844,47 @@ def progress_cb(content_length: Optional[int]) -> ProgressTracker:
assert spy.enter_called
assert spy.exit_called
assert spy.updates == [65535] * (size // 65535) + [size % 65535]


@responses.activate
def test_metadata_encoding() -> None:
responses.add(
method=responses.GET,
url="https://test.nil/simple/packages/example-0.0.1-py3-none-any.whl.metadata",
body=b"\xe2\x98\x83", # unicode snowman
)
responses.add(
method=responses.GET,
url="https://test.nil/simple/packages/example-0.0.2-py3-none-any.whl.metadata",
body=b"\xff\xfe\x03\x26", # unicode snowman in utf-16
)
with PyPISimple("https://test.nil/simple/") as simple:
pkg = DistributionPackage(
filename="example-0.0.1-py3-none-any.whl",
project="example",
version="0.0.1",
package_type="wheel",
url="https://test.nil/simple/packages/example-0.0.1-py3-none-any.whl",
digests={},
requires_python=None,
has_sig=None,
has_metadata=True,
metadata_digests={"sha1": "2686137311c038a99622242fdb662b88c221c08d"},
)
assert simple.get_package_metadata_bytes(pkg) == b"\xe2\x98\x83"
assert simple.get_package_metadata(pkg) == "\u2603"

pkg = DistributionPackage(
filename="example-0.0.2-py3-none-any.whl",
project="example",
version="0.0.2",
package_type="wheel",
url="https://test.nil/simple/packages/example-0.0.2-py3-none-any.whl",
digests={},
requires_python=None,
has_sig=None,
has_metadata=True,
metadata_digests={"sha1": "7381ac0d9ddb35e4074acfd9cf72ea47314da70b"},
)
assert simple.get_package_metadata_bytes(pkg) == b"\xff\xfe\x03\x26"
assert simple.get_package_metadata(pkg) == "\udcff\udcfe\u0003\u0026"

0 comments on commit 7cdf45e

Please sign in to comment.