Skip to content

Commit

Permalink
Handle .tar.bz2 & .tgz sdists when locking.
Browse files Browse the repository at this point in the history
More generally, investigate what is out there (on PyPI) for sdists and
explicitly admit `.zip`, `.tar.gz`, `.tar.bz2` and `.tgz` as covering
99.999% of all known cases.

Fixes pex-tool#2379
  • Loading branch information
jsirois committed Feb 29, 2024
1 parent 7a69d5f commit 3b277b3
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 4 deletions.
8 changes: 8 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Release Notes

## 2.2.2

This release fixes `pex3 lock create` to handle `.tar.bz2` and `.tgz`
sdists in addition to the officially sanctioned `.tar.gz` and (less
officially so) `.zip` sdists.

# Handle `.tar.bz2` & `.tgz` sdists when locking. (#2380)

## 2.2.1

This release trims down the size of the Pex wheel on PyPI and the
Expand Down
47 changes: 46 additions & 1 deletion pex/resolve/locked_resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,12 +159,57 @@ def __lt__(self, other):

@attr.s(frozen=True, order=False)
class FileArtifact(Artifact):
@staticmethod
def is_zip_sdist(path):
# type: (str) -> bool

# N.B.: Windows sdists traditionally were released in zip format.
return path.endswith(".zip")

@staticmethod
def is_tar_sdist(path):
# type: (str) -> bool

# N.B.: PEP-625 (https://peps.python.org/pep-0625/) says sdists must use .tar.gz, but we
# have a known example of tar.bz2 in the wild in python-constraint 1.4.0 on PyPI:
# https://pypi.org/project/python-constraint/1.4.0/#files
# This probably all stems from the legacy `python setup.py sdist` as last described here:
# https://docs.python.org/3.11/distutils/sourcedist.html
# There was a move to reject exotic formats in PEP-527 in 2016 and the historical sdist
# formats appear to be listed here: https://peps.python.org/pep-0527/#file-extensions
# A query on the PyPI dataset shows:
#
# SELECT
# REGEXP_EXTRACT(path, r'\.([^.]+|tar\.[^.]+|tar)$') as extension,
# count(*) as count
# FROM `bigquery-public-data.pypi.distribution_metadata`
# group by extension
# order by count desc
#
# | extension | count |
# |-----------|---------|
# | whl | 6332494 |
# * | tar.gz | 5283102 |
# | egg | 135940 |
# * | zip | 108532 |
# | exe | 18452 |
# * | tar.bz2 | 3857 |
# | msi | 625 |
# | rpm | 603 |
# * | tgz | 226 |
# | dmg | 47 |
# | deb | 36 |
# * | tar.zip | 2 |
# * | ZIP | 1 |
#
return path.endswith((".tar.gz", ".tgz", ".tar.bz2"))

filename = attr.ib() # type: str

@property
def is_source(self):
# type: () -> bool
return self.filename.endswith((".sdist", ".tar.gz", ".tgz", ".tar.bz2", ".tbz2", ".zip"))
return self.is_tar_sdist(self.filename) or self.is_zip_sdist(self.filename)

def parse_tags(self):
# type: () -> Iterator[tags.Tag]
Expand Down
4 changes: 2 additions & 2 deletions pex/resolve/lockfile/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,10 @@ def _prepare_project_directory(build_request):
return target, project

extract_dir = os.path.join(safe_mkdtemp(), "project")
if project.endswith(".zip"):
if FileArtifact.is_zip_sdist(project):
with open_zip(project) as zf:
zf.extractall(extract_dir)
elif project.endswith(".tar.gz"):
elif FileArtifact.is_tar_sdist(project):
with tarfile.open(project) as tf:
tf.extractall(extract_dir)
else:
Expand Down
2 changes: 1 addition & 1 deletion pex/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2015 Pex project contributors.
# Licensed under the Apache License, Version 2.0 (see LICENSE).

__version__ = "2.2.1"
__version__ = "2.2.2"
60 changes: 60 additions & 0 deletions tests/integration/test_issue_2739.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright 2024 Pex project contributors.
# Licensed under the Apache License, Version 2.0 (see LICENSE).

import os.path
import subprocess

from pex.pep_440 import Version
from pex.pep_503 import ProjectName
from pex.resolve.locked_resolve import FileArtifact
from pex.resolve.lockfile import json_codec
from pex.resolve.resolved_requirement import Pin
from pex.typing import TYPE_CHECKING
from testing import run_pex_command
from testing.cli import run_pex3

if TYPE_CHECKING:
from typing import Any


def test_tar_bz2(tmpdir):
# type: (Any) -> None

lock = os.path.join(str(tmpdir), "lock.json")
pex_root = os.path.join(str(tmpdir), "pex_root")
run_pex3(
"lock",
"create",
"--pex-root",
pex_root,
"python-constraint==1.4.0",
"-o",
lock,
"--indent",
"2",
).assert_success()

lock_file = json_codec.load(lock)
assert len(lock_file.locked_resolves) == 1

locked_resolve = lock_file.locked_resolves[0]
assert len(locked_resolve.locked_requirements) == 1

locked_requirement = locked_resolve.locked_requirements[0]
assert Pin(ProjectName("python-constraint"), Version("1.4.0")) == locked_requirement.pin
assert isinstance(locked_requirement.artifact, FileArtifact)
assert locked_requirement.artifact.is_source
assert locked_requirement.artifact.filename.endswith(".tar.bz2")
assert not locked_requirement.additional_artifacts

pex = os.path.join(str(tmpdir), "pex")
run_pex_command(
args=["--pex-root", pex_root, "--runtime-pex-root", pex_root, "--lock", lock, "-o", pex]
).assert_success()

assert (
b"1.4.0"
== subprocess.check_output(
args=[pex, "-c", "from constraint.version import __version__; print(__version__)"]
).strip()
)

0 comments on commit 3b277b3

Please sign in to comment.