Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't explode sdists before installing them #166

Merged
merged 12 commits into from
Dec 5, 2021
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ All versions prior to 0.0.9 are untracked.
an version string that couldn't be parsed as a PEP-440 version was fixed
([#175](https://github.com/trailofbits/pip-audit/pull/175))

* Dependency sources: a crash caused by incorrect assumptions about
the structure of source distributions was fixed
([#166](https://github.com/trailofbits/pip-audit/pull/166))

### Removed

## [1.0.1] - 2021-12-02
Expand Down
82 changes: 47 additions & 35 deletions pip_audit/_dependency_source/resolvelib/pypi_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@
authors under the ISC license.
"""

import os
import itertools
from email.message import EmailMessage
from email.parser import BytesParser
from io import BytesIO
from operator import attrgetter
from tarfile import TarFile
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import BinaryIO, List, Optional, Set, cast
from typing import BinaryIO, Iterator, List, Optional, Set, cast
from urllib.parse import urlparse
from zipfile import ZipFile

Expand Down Expand Up @@ -42,6 +42,7 @@ class Candidate:
def __init__(
self,
name: str,
filename: Path,
version: Version,
url: str,
extras: Set[str],
Expand All @@ -54,6 +55,7 @@ def __init__(
"""

self.name = canonicalize_name(name)
self.filename = filename
self.version = version
self.url = url
self.extras = extras
Expand All @@ -69,8 +71,8 @@ def __repr__(self): # pragma: no cover
A string representation for `Candidate`.
"""
if not self.extras:
return f"<{self.name}=={self.version}>"
return f"<{self.name}[{','.join(self.extras)}]=={self.version}>"
return f"<{self.name}=={self.version} wheel={self.is_wheel}>"
return f"<{self.name}[{','.join(self.extras)}]=={self.version} wheel={self.is_wheel}>"

@property
def metadata(self) -> EmailMessage:
Expand Down Expand Up @@ -139,35 +141,24 @@ def _get_metadata_for_sdist(self):
"""
Extracts the metadata for this candidate, if it's a source distribution.
"""

response: requests.Response = requests.get(self.url, timeout=self.timeout)
response.raise_for_status()
data = response.content
sdist_data = response.content
metadata = EmailMessage()

with TemporaryDirectory() as pkg_dir:
if self.state is not None:
self.state.update_state(
f"Extracting source distribution for {self.name} ({self.version})"
) # pragma: no cover

# Extract archive onto the disk
with TarFile.open(fileobj=BytesIO(data), mode="r:gz") as t:
# The directory is the first member in a tarball
names = t.getnames()
pkg_name = names[0]
t.extractall(pkg_dir)
sdist = Path(pkg_dir) / self.filename.name
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Haha that's awesome. I didn't realise you could do that.

sdist.write_bytes(sdist_data)

if self.state is not None:
self.state.update_state(
f"Installing source distribution in isolated environment for {self.name} "
f"({self.version})"
) # pragma: no cover

# Put together a full path of where the source distribution is
pkg_path = os.path.join(pkg_dir, pkg_name)

with TemporaryDirectory() as ve_dir:
ve = VirtualEnv(["-e", pkg_path], self.state)
ve = VirtualEnv([str(sdist)], self.state)
ve.create(ve_dir)

if self.state is not None:
Expand All @@ -182,7 +173,9 @@ def _get_metadata_for_sdist(self):
return metadata


def get_project_from_pypi(project, extras, timeout: Optional[int], state: Optional[AuditState]):
def get_project_from_pypi(
project, extras, timeout: Optional[int], state: Optional[AuditState]
) -> Iterator[Candidate]:
"""Return candidates created from the project name and extras."""
url = "https://pypi.org/simple/{}".format(project)
response: requests.Response = requests.get(url, timeout=timeout)
Expand Down Expand Up @@ -214,15 +207,21 @@ def get_project_from_pypi(project, extras, timeout: Optional[int], state: Option
# which we'll then skip via the exception handler.
(name, version) = parse_sdist_filename(filename)
is_wheel = False

# TODO: Handle compatibility tags?
yield Candidate(
name,
Path(filename),
version,
url=url,
extras=extras,
is_wheel=is_wheel,
timeout=timeout,
state=state,
)
except Exception:
continue

# TODO: Handle compatibility tags?

yield Candidate(
name, version, url=url, extras=extras, is_wheel=is_wheel, timeout=timeout, state=state
)


class PyPIProvider(AbstractProvider):
"""
Expand Down Expand Up @@ -272,14 +271,27 @@ def find_matches(self, identifier, requirements, incompatibilities):
# Need to pass the extras to the search, so they
# are added to the candidate at creation - we
# treat candidates as immutable once created.
candidates = (
candidate
for candidate in get_project_from_pypi(identifier, extras, self.timeout, self.state)
if candidate.version not in bad_versions
and all(candidate.version in r.specifier for r in requirements)
candidates = sorted(
[
candidate
for candidate in get_project_from_pypi(identifier, extras, self.timeout, self.state)
if candidate.version not in bad_versions
and all(candidate.version in r.specifier for r in requirements)
],
key=attrgetter("version", "is_wheel"),
reverse=True,
)
# We want to prefer more recent versions and prioritize wheels
return sorted(candidates, key=attrgetter("version", "is_wheel"), reverse=True)

# If we have multiple candidates for a single version and some are wheels,
# yield only the wheels. This keeps us from wasting a large amount of
# dependency search time when comparing wheels against source distributions.
for _, candidates in itertools.groupby(candidates, key=attrgetter("version")):
candidate = next(candidates)
yield candidate
if candidate.is_wheel:
yield from (c for c in candidates if c.is_wheel)
else:
yield from candidates

def is_satisfied_by(self, requirement, candidate):
"""
Expand Down
2 changes: 1 addition & 1 deletion pip_audit/_util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Utilities functions for `pip-audit`.
Utility functions for `pip-audit`.
"""

import sys
Expand Down
32 changes: 24 additions & 8 deletions pip_audit/_virtual_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import json
import logging
import subprocess
import venv
from typing import Iterator, List, Optional, Tuple
Expand All @@ -11,6 +12,8 @@

from ._state import AuditState

logger = logging.getLogger(__name__)


class VirtualEnv(venv.EnvBuilder):
"""
Expand Down Expand Up @@ -61,6 +64,9 @@ def post_setup(self, context):
We do a few things in our custom post-setup:
- Upgrade the `pip` version. We'll be using `pip list` with the `--format json` option which
requires a non-ancient version for `pip`.
- Install `wheel`. When our packages install their own dependencies, they might be able
to do so through wheels, which are much faster and don't require us to run
setup scripts.
- Execute the custom install command.
- Call `pip list`, and parse the output into a list of packages to be returned from when the
`installed_packages` property is queried.
Expand All @@ -71,7 +77,16 @@ def post_setup(self, context):
) # pragma: no cover

# Firstly, upgrade our `pip` versions since `ensurepip` can leave us with an old version
pip_upgrade_cmd = [context.env_exe, "-m", "pip", "install", "--upgrade", "pip"]
# and install `wheel` in case our package dependencies are offered as wheels
pip_upgrade_cmd = [
context.env_exe,
"-m",
"pip",
"install",
"--upgrade",
"pip",
"wheel",
]
try:
subprocess.run(
pip_upgrade_cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
Expand All @@ -85,14 +100,15 @@ def post_setup(self, context):
) # pragma: no cover

# Install our packages
package_install_cmd = [context.env_exe, "-m", "pip", "install", *self._install_args]
package_install_cmd = [
context.env_exe,
"-m",
"pip",
"install",
*self._install_args,
]
try:
subprocess.run(
package_install_cmd,
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
subprocess.run(package_install_cmd, check=True)
except subprocess.CalledProcessError as cpe:
raise VirtualEnvError(f"Failed to install packages: {package_install_cmd}") from cpe

Expand Down