diff --git a/PyInstaller/building/datastruct.py b/PyInstaller/building/datastruct.py index c5b961d3d6..078b592101 100644 --- a/PyInstaller/building/datastruct.py +++ b/PyInstaller/building/datastruct.py @@ -337,8 +337,10 @@ def normalize_pyz_toc(toc): def _normalize_toc(toc, toc_type_priorities, type_case_normalization_fcn=lambda typecode: False): tmp_toc = dict() - for entry in toc: - dest_name, src_name, typecode = entry + for dest_name, src_name, typecode in toc: + # Always sanitize the dest_name with `os.path.normpath` to remove any local loops with parent directory path + # components. `pathlib` does not seem to offer equivalent functionality. + dest_name = os.path.normpath(dest_name) # Normalize the destination name for uniqueness. Use `pathlib.PurePath` to ensure that keys are both # case-normalized (on OSes where applicable) and directory-separator normalized (just in case). @@ -350,12 +352,12 @@ def _normalize_toc(toc, toc_type_priorities, type_case_normalization_fcn=lambda existing_entry = tmp_toc.get(entry_key) if existing_entry is None: # Entry does not exist - insert - tmp_toc[entry_key] = entry + tmp_toc[entry_key] = (dest_name, src_name, typecode) else: # Entry already exists - replace if its typecode has higher priority _, _, existing_typecode = existing_entry if toc_type_priorities.get(typecode, 0) > toc_type_priorities.get(existing_typecode, 0): - tmp_toc[entry_key] = entry + tmp_toc[entry_key] = (dest_name, src_name, typecode) # Return the items as list. The order matches the original order due to python dict maintaining the insertion order. return list(tmp_toc.values()) diff --git a/tests/unit/test_toc_normalization.py b/tests/unit/test_toc_normalization.py index 0a090bfb87..4e861a6263 100644 --- a/tests/unit/test_toc_normalization.py +++ b/tests/unit/test_toc_normalization.py @@ -11,6 +11,7 @@ # Tests for explicit TOC list normalization that replaced the implicit normalization with class:``TOC``. import copy +import pathlib from PyInstaller import compat from PyInstaller.building.datastruct import normalize_pyz_toc, normalize_toc @@ -21,7 +22,7 @@ ('libpython3.10.so', '/usr/lib64/libpython3.10.so', 'BINARY'), ('libsomething.so', '/usr/local/lib64/libsomething.so', 'BINARY'), ('README', '/home/user/tmp/README', 'DATA'), - ('data/data.csv', '/home/user/tmp/data/data.csv', 'DATA'), + (str(pathlib.PurePath('data/data.csv')), '/home/user/tmp/data/data.csv', 'DATA'), ('dependency.bin', 'other_multipackage:dependency.bin', 'DEPENDENCY'), ('myextension.so', 'myextension.so', 'EXTENSION'), ] @@ -125,6 +126,34 @@ def test_normalize_toc_multipackage_dependency(): assert sorted(normalized_toc) == sorted(expected_toc) +def test_normalize_toc_with_parent_pardir_loops(): + # Check that de-duplication works even if destination paths contain local loop with parent directory (..) components + # but can be normalized to the same path. Furthermore, we expect TOC normalization to sanitize the dest_name with + # normalized version. + toc = [ + ( + str(pathlib.PurePath('numpy/core/../../numpy.libs/libquadmath-2d0c479f.so.0.0.0')), + '/path/to/venv/lib/python3.11/site-packages/numpy/core/../../numpy.libs/libquadmath-2d0c479f.so.0.0.0', + 'BINARY', + ), + ( + str(pathlib.PurePath('numpy/linalg/../../numpy.libs/libquadmath-2d0c479f.so.0.0.0')), + '/path/to/venv/lib/python3.11/site-packages/numpy/linalg/../../numpy.libs/libquadmath-2d0c479f.so.0.0.0', + 'BINARY', + ), + ] + expected_toc = [ + ( + str(pathlib.PurePath('numpy.libs/libquadmath-2d0c479f.so.0.0.0')), + '/path/to/venv/lib/python3.11/site-packages/numpy/core/../../numpy.libs/libquadmath-2d0c479f.so.0.0.0', + 'BINARY', + ), + ] + + normalized_toc = normalize_toc(toc) + assert sorted(normalized_toc) == sorted(expected_toc) + + # Tests for PYZ TOC normalization. _BASE_PYZ_TOC = [ ('copy', '/usr/lib64/python3.11/copy.py', 'PYMODULE'),