Skip to content

Commit

Permalink
building: ensure TOC de-duplication when dest_name contains pardir loops
Browse files Browse the repository at this point in the history
Ensure that TOC is properly de-duplicated even if dest_name
contains loops with parent directory path components. For example,
`numpy/core/../../numpy.libs/libquadmath-2d0c479f.so.0.0.0` and
`numpy/linalg/../../numpy.libs/libquadmath-2d0c479f.so.0.0.0`
should be considered duplicates, as they are both normalized to
`numpy.libs/libquadmath-2d0c479f.so.0.0.0`.

Therefore, we now have the TOC normalization helpers to always
sanitize the `dest_name` using `os.path.normpath` (with `pathlib`
lacking the equivalent functionality), so that the entries are
properly de-duplicated and that destination name is always in
its compact/normalized form.

We should probably also look into path normalization in the
`bindepend.getImports` function, but at the end of the day,
the TOC normalization serves as the last guard against problematic
entries.
  • Loading branch information
rokm committed May 10, 2023
1 parent cf95497 commit 97bc98e
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 5 deletions.
10 changes: 6 additions & 4 deletions PyInstaller/building/datastruct.py
Expand Up @@ -337,8 +337,10 @@ def normalize_pyz_toc(toc):

def _normalize_toc(toc, toc_type_priorities, type_case_normalization_fcn=lambda typecode: False):
tmp_toc = dict()
for entry in toc:
dest_name, src_name, typecode = entry
for dest_name, src_name, typecode in toc:
# Always sanitize the dest_name with `os.path.normpath` to remove any local loops with parent directory path
# components. `pathlib` does not seem to offer equivalent functionality.
dest_name = os.path.normpath(dest_name)

# Normalize the destination name for uniqueness. Use `pathlib.PurePath` to ensure that keys are both
# case-normalized (on OSes where applicable) and directory-separator normalized (just in case).
Expand All @@ -350,12 +352,12 @@ def _normalize_toc(toc, toc_type_priorities, type_case_normalization_fcn=lambda
existing_entry = tmp_toc.get(entry_key)
if existing_entry is None:
# Entry does not exist - insert
tmp_toc[entry_key] = entry
tmp_toc[entry_key] = (dest_name, src_name, typecode)
else:
# Entry already exists - replace if its typecode has higher priority
_, _, existing_typecode = existing_entry
if toc_type_priorities.get(typecode, 0) > toc_type_priorities.get(existing_typecode, 0):
tmp_toc[entry_key] = entry
tmp_toc[entry_key] = (dest_name, src_name, typecode)

# Return the items as list. The order matches the original order due to python dict maintaining the insertion order.
return list(tmp_toc.values())
31 changes: 30 additions & 1 deletion tests/unit/test_toc_normalization.py
Expand Up @@ -11,6 +11,7 @@

# Tests for explicit TOC list normalization that replaced the implicit normalization with class:``TOC``.
import copy
import pathlib

from PyInstaller import compat
from PyInstaller.building.datastruct import normalize_pyz_toc, normalize_toc
Expand All @@ -21,7 +22,7 @@
('libpython3.10.so', '/usr/lib64/libpython3.10.so', 'BINARY'),
('libsomething.so', '/usr/local/lib64/libsomething.so', 'BINARY'),
('README', '/home/user/tmp/README', 'DATA'),
('data/data.csv', '/home/user/tmp/data/data.csv', 'DATA'),
(str(pathlib.PurePath('data/data.csv')), '/home/user/tmp/data/data.csv', 'DATA'),
('dependency.bin', 'other_multipackage:dependency.bin', 'DEPENDENCY'),
('myextension.so', 'myextension.so', 'EXTENSION'),
]
Expand Down Expand Up @@ -125,6 +126,34 @@ def test_normalize_toc_multipackage_dependency():
assert sorted(normalized_toc) == sorted(expected_toc)


def test_normalize_toc_with_parent_pardir_loops():
# Check that de-duplication works even if destination paths contain local loop with parent directory (..) components
# but can be normalized to the same path. Furthermore, we expect TOC normalization to sanitize the dest_name with
# normalized version.
toc = [
(
str(pathlib.PurePath('numpy/core/../../numpy.libs/libquadmath-2d0c479f.so.0.0.0')),
'/path/to/venv/lib/python3.11/site-packages/numpy/core/../../numpy.libs/libquadmath-2d0c479f.so.0.0.0',
'BINARY',
),
(
str(pathlib.PurePath('numpy/linalg/../../numpy.libs/libquadmath-2d0c479f.so.0.0.0')),
'/path/to/venv/lib/python3.11/site-packages/numpy/linalg/../../numpy.libs/libquadmath-2d0c479f.so.0.0.0',
'BINARY',
),
]
expected_toc = [
(
str(pathlib.PurePath('numpy.libs/libquadmath-2d0c479f.so.0.0.0')),
'/path/to/venv/lib/python3.11/site-packages/numpy/core/../../numpy.libs/libquadmath-2d0c479f.so.0.0.0',
'BINARY',
),
]

normalized_toc = normalize_toc(toc)
assert sorted(normalized_toc) == sorted(expected_toc)


# Tests for PYZ TOC normalization.
_BASE_PYZ_TOC = [
('copy', '/usr/lib64/python3.11/copy.py', 'PYMODULE'),
Expand Down

0 comments on commit 97bc98e

Please sign in to comment.