Skip to content

Commit

Permalink
Handle duplicated package metadata when using importlib.metadata (#342)
Browse files Browse the repository at this point in the history
Fixes #341.

Using an environment similar to the one shown in the issue:
```console
$ PYTHONPATH=/workspaces/pipdeptree/temp-site-pkgs/ pipdeptree -a -d 0
Warning!!! Duplicate package metadata found:
"/home/vscode/.local/lib/python3.9/site-packages"
  pluggy                           1.4.0            (using 1.4.0, "/workspaces/pipdeptree/temp-site-pkgs")
  packaging                        24.0             (using 24.0, "/workspaces/pipdeptree/temp-site-pkgs")
  tomli                            2.0.1            (using 2.0.1, "/workspaces/pipdeptree/temp-site-pkgs")
  pytest                           8.1.1            (using 8.1.1, "/workspaces/pipdeptree/temp-site-pkgs")
  exceptiongroup                   1.2.0            (using 1.2.0, "/workspaces/pipdeptree/temp-site-pkgs")
  iniconfig                        2.0.0            (using 2.0.0, "/workspaces/pipdeptree/temp-site-pkgs")
"/usr/local/lib/python3.9/site-packages"
  pip                              23.0.1           (using 24.0, "/home/vscode/.local/lib/python3.9/site-packages")
------------------------------------------------------------------------
chardet==5.2.0
covdefaults==2.3.0
coverage==7.4.4
diff_cover==8.0.3
distlib==0.3.8
exceptiongroup==1.2.0
filelock==3.13.3
gitdb==4.0.11
GitPython==3.1.41
iniconfig==2.0.0
Jinja2==3.1.3
MarkupSafe==2.1.5
packaging==24.0
pip==24.0
pipdeptree==2.16.3.dev3+g91d21e3.d20240403
platformdirs==4.2.0
pluggy==1.4.0
Pygments==2.17.2
pytest==8.1.1
pytest-cov==5.0.0
pytest-mock==3.14.0
setuptools==69.0.3
smmap==5.0.1
tomli==2.0.1
virtualenv==20.25.1
wheel==0.42.0
```
  • Loading branch information
kemzeb committed Apr 4, 2024
1 parent 91d21e3 commit ff31dc4
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 21 deletions.
67 changes: 57 additions & 10 deletions src/pipdeptree/_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,75 @@

import site
import sys
from importlib.metadata import distributions
from typing import TYPE_CHECKING
from importlib.metadata import Distribution, distributions
from typing import Iterable, Tuple

if TYPE_CHECKING:
from importlib.metadata import Distribution
from packaging.utils import canonicalize_name


def get_installed_distributions(
local_only: bool = False, # noqa: FBT001, FBT002
user_only: bool = False, # noqa: FBT001, FBT002
) -> list[Distribution]:
if user_only:
return list(distributions(path=[site.getusersitepackages()]))

# NOTE: See https://docs.python.org/3/library/venv.html#how-venvs-work for more details.
# See https://docs.python.org/3/library/venv.html#how-venvs-work for more details.
in_venv = sys.prefix != sys.base_prefix
original_dists: Iterable[Distribution] = []

if local_only and in_venv:
venv_site_packages = site.getsitepackages([sys.prefix])
return list(distributions(path=venv_site_packages))
original_dists = distributions(path=venv_site_packages)
elif user_only:
original_dists = distributions(path=[site.getusersitepackages()])
else:
original_dists = distributions()

# Since importlib.metadata.distributions() can return duplicate packages, we need to handle this. pip's approach is
# to keep track of each package metadata it finds, and if it encounters one again it will simply just ignore it. We
# take it one step further and warn the user that there are duplicate packages in their environment.
# See https://github.com/pypa/pip/blob/7c49d06ea4be4635561f16a524e3842817d1169a/src/pip/_internal/metadata/importlib/_envs.py#L34
seen_dists: dict[str, Distribution] = {}
first_seen_to_already_seen_dists_dict: dict[Distribution, list[Distribution]] = {}
dists = []
for dist in original_dists:
normalized_name = canonicalize_name(dist.metadata["Name"])
if normalized_name not in seen_dists:
seen_dists[normalized_name] = dist
dists.append(dist)
continue
already_seen_dists = first_seen_to_already_seen_dists_dict.setdefault(seen_dists[normalized_name], [])
already_seen_dists.append(dist)

if first_seen_to_already_seen_dists_dict:
render_duplicated_dist_metadata_text(first_seen_to_already_seen_dists_dict)

return dists


FirstSeenWithDistsPair = Tuple[Distribution, Distribution]


def render_duplicated_dist_metadata_text(
first_seen_to_already_seen_dists_dict: dict[Distribution, list[Distribution]],
) -> None:
entries_to_pairs_dict: dict[str, list[FirstSeenWithDistsPair]] = {}
for first_seen, dists in first_seen_to_already_seen_dists_dict.items():
for dist in dists:
entry = str(dist.locate_file(""))
dist_list = entries_to_pairs_dict.setdefault(entry, [])
dist_list.append((first_seen, dist))

return list(distributions())
print("Warning!!! Duplicate package metadata found:", file=sys.stderr) # noqa: T201
for entry, pairs in entries_to_pairs_dict.items():
print(f'"{entry}"', file=sys.stderr) # noqa: T201
for first_seen, dist in pairs:
print( # noqa: T201
(
f" {dist.metadata['Name']:<32} {dist.version:<16} (using {first_seen.version},"
f" \"{first_seen.locate_file('')}\")"
),
file=sys.stderr,
)
print("-" * 72, file=sys.stderr) # noqa: T201


__all__ = [
Expand Down
12 changes: 7 additions & 5 deletions src/pipdeptree/_models/dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
from itertools import chain
from typing import TYPE_CHECKING, Iterator, List, Mapping

from packaging.utils import canonicalize_name

if TYPE_CHECKING:
from importlib.metadata import Distribution


from .package import DistPackage, ReqPackage, pep503_normalize
from .package import DistPackage, ReqPackage


class PackageDAG(Mapping[DistPackage, List[ReqPackage]]):
Expand Down Expand Up @@ -43,7 +45,7 @@ def from_pkgs(cls, pkgs: list[Distribution]) -> PackageDAG:
for p in dist_pkgs:
reqs = []
for r in p.requires():
d = idx.get(pep503_normalize(r.name))
d = idx.get(canonicalize_name(r.name))
# Distribution.requires only return the name of requirements in metadata file, which may not be
# the same with the capitalized one in pip. We should retain the casing of required package name.
# see https://github.com/tox-dev/pipdeptree/issues/242
Expand Down Expand Up @@ -112,8 +114,8 @@ def filter_nodes(self, include: list[str] | None, exclude: set[str] | None) -> P
include_with_casing_preserved: list[str] = []
if include:
include_with_casing_preserved = include
include = [pep503_normalize(i) for i in include]
exclude = {pep503_normalize(s) for s in exclude} if exclude else set()
include = [canonicalize_name(i) for i in include]
exclude = {canonicalize_name(s) for s in exclude} if exclude else set()

# Check for mutual exclusion of show_only and exclude sets
# after normalizing the values to lowercase
Expand Down Expand Up @@ -159,7 +161,7 @@ def filter_nodes(self, include: list[str] | None, exclude: set[str] | None) -> P
continue

non_existent_includes = [
i for i in include_with_casing_preserved if pep503_normalize(i) not in matched_includes
i for i in include_with_casing_preserved if canonicalize_name(i) not in matched_includes
]
if non_existent_includes:
raise ValueError("No packages matched using the following patterns: " + ", ".join(non_existent_includes))
Expand Down
8 changes: 2 additions & 6 deletions src/pipdeptree/_models/package.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,28 @@
from __future__ import annotations

import re
from abc import ABC, abstractmethod
from importlib import import_module
from importlib.metadata import Distribution, PackageNotFoundError, metadata, version
from inspect import ismodule
from typing import TYPE_CHECKING

from packaging.requirements import Requirement
from packaging.utils import canonicalize_name

if TYPE_CHECKING:
from importlib.metadata import Distribution

from pipdeptree._adapter import PipBaseDistributionAdapter


def pep503_normalize(name: str) -> str:
return re.sub("[-_.]+", "-", name).lower()


class Package(ABC):
"""Abstract class for wrappers around objects that pip returns."""

UNKNOWN_LICENSE_STR = "(Unknown license)"

def __init__(self, project_name: str) -> None:
self.project_name = project_name
self.key = pep503_normalize(project_name)
self.key = canonicalize_name(project_name)

def licenses(self) -> str:
try:
Expand Down
26 changes: 26 additions & 0 deletions tests/test_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@
import virtualenv

from pipdeptree.__main__ import main
from pipdeptree._discovery import get_installed_distributions

if TYPE_CHECKING:
import pytest
from pytest_mock import MockerFixture


def test_local_only(tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capfd: pytest.CaptureFixture[str]) -> None:
Expand Down Expand Up @@ -53,3 +55,27 @@ def test_user_only(tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capfd: pytes
expected = {"foo"}

assert found == expected


def test_duplicate_metadata(mocker: MockerFixture, capfd: pytest.CaptureFixture[str]) -> None:
mocker.patch(
"pipdeptree._discovery.distributions",
Mock(
return_value=[
Mock(metadata={"Name": "foo"}, version="1.2.5", locate_file=Mock(return_value="/path/1")),
Mock(metadata={"Name": "foo"}, version="5.9.0", locate_file=Mock(return_value="/path/2")),
]
),
)

dists = get_installed_distributions()
assert len(dists) == 1
# we expect it to use the first distribution found
assert dists[0].version == "1.2.5"

_, err = capfd.readouterr()
expected = (
'Warning!!! Duplicate package metadata found:\n"/path/2"\n foo 5.9.0 '
' (using 1.2.5, "/path/1")\n------------------------------------------------------------------------\n'
)
assert err == expected

0 comments on commit ff31dc4

Please sign in to comment.