diff --git a/PyInstaller/building/api.py b/PyInstaller/building/api.py index 21b1649fe0..1e477d86d8 100644 --- a/PyInstaller/building/api.py +++ b/PyInstaller/building/api.py @@ -24,7 +24,7 @@ from PyInstaller import HOMEPATH, PLATFORM from PyInstaller import log as logging from PyInstaller.archive.writers import CArchiveWriter, ZlibArchiveWriter -from PyInstaller.building.datastruct import TOC, Target, _check_guts_eq +from PyInstaller.building.datastruct import Target, _check_guts_eq, normalize_pyz_toc, normalize_toc from PyInstaller.building.utils import ( _check_guts_toc, _make_clean_directory, _rmtree, checkCache, get_code_object, strip_paths_in_code, compile_pymodule ) @@ -51,7 +51,7 @@ class PYZ(Target): def __init__(self, *tocs, **kwargs): """ tocs - One or more TOCs (Tables of Contents), usually an `Analysis.pure` and an `Analysis.zipped_data`. + One or more TOC (Table of Contents) lists, usually an `Analysis.pure` and an `Analysis.zipped_data`. If the passed TOC has an attribute `_code_cache`, it is expected to be a dictionary of module code objects from ModuleGraph. @@ -103,10 +103,14 @@ def __init__(self, *tocs, **kwargs): # Merge input TOC(s) and their code object dictionaries (if available). Skip the bootstrap modules, which will # be passed on to CArchive. bootstrap_module_names = set(name for name, _, typecode in self.dependencies if typecode == 'PYMODULE') - self.toc = TOC() + self.toc = [] self.code_dict = {} for toc in tocs: - self.code_dict.update(getattr(toc, '_code_cache', {})) + # Check if code cache association exists for the given TOC list + code_cache = CONF['code_cache'].get(id(toc)) + if code_cache is not None: + self.code_dict.update(code_cache) + for entry in toc: name, _, typecode = entry # PYZ expects PYMODULE entries (python code objects) and DATA entries (data collected from zipped eggs). @@ -116,6 +120,9 @@ def __init__(self, *tocs, **kwargs): continue self.toc.append(entry) + # Normalize TOC + self.toc = normalize_pyz_toc(self.toc) + # Alphabetically sort the TOC to enable reproducible builds. self.toc.sort() @@ -187,7 +194,7 @@ def __init__( ): """ toc - A TOC (Table of Contents) + A TOC (Table of Contents) list. name An optional filename for the PKG. cdict @@ -203,7 +210,7 @@ def __init__( """ super().__init__() - self.toc = toc + self.toc = normalize_toc(toc) # Ensure guts contain normalized TOC self.cdict = cdict self.name = name if name is None: @@ -322,7 +329,7 @@ class EXE(Target): def __init__(self, *args, **kwargs): """ args - One or more arguments that are either instances of TOC or Target. + One or more arguments that are either an instance of `Target` or an iterable representing TOC list. kwargs Possible keyword arguments: @@ -469,43 +476,23 @@ def __init__(self, *args, **kwargs): # file already exists. self.pkgname = os.path.join(CONF['workpath'], base_name + '.pkg') - self.toc = TOC() - - _deps_toc = TOC() # See the note below + self.toc = [] for arg in args: - # Valid arguments: PYZ object, Splash object, and TOC-like iterables + # Valid arguments: PYZ object, Splash object, and TOC-list iterables if isinstance(arg, (PYZ, Splash)): # Add object as an entry to the TOC, and merge its dependencies TOC if isinstance(arg, PYZ): self.toc.append((os.path.basename(arg.name), arg.name, "PYZ")) else: self.toc.append((os.path.basename(arg.name), arg.name, "SPLASH")) - # See the note below (and directly extend self.toc once this workaround is not necessary anymore). - # self.toc.extend(arg.dependencies) - for entry in arg.dependencies: - _, _, typecode = entry - if typecode in ('EXTENSION', 'BINARY', 'DATA'): - _deps_toc.append(entry) - else: - self.toc.append(entry) + self.toc.extend(arg.dependencies) elif miscutils.is_iterable(arg): # TOC-like iterable self.toc.extend(arg) else: raise TypeError(f"Invalid argument type for EXE: {type(arg)!r}") - # NOTE: this is an ugly work-around that ensures that when MERGE is used, the EXE's TOC is first populated with - # MERGE'd `binaries` and `datas` entries (which should be DEPENDENCY references for shared resources, and BINARY - # or DATA entries for non-shared resources), and that `PYZ.dependencies` is merged last. The latter may contain - # entries for `_struct` and `zlib` extensions, and if they end up in the TOC first, they will block the - # corresponding DEPENDENCY entries (if they are available) from being added to TOC. Which will in turn result in - # missing extensions with certain onefile/onedir referencing combinations. And even if not, the result would be - # but sub-optimal, as the extensions could be shared via DEPENDENCY mechanism. This work-around can be removed - # once we replace the TOC class with mechanism that implements a typecode-based priority system for the entries. - self.toc.extend(_deps_toc) - del _deps_toc - if self.runtime_tmpdir is not None: self.toc.append(("pyi-runtime-tmpdir " + self.runtime_tmpdir, "", "OPTION")) @@ -573,6 +560,9 @@ def makeabs(path): else: raise TypeError(f"Unsupported type for version info argument: {type(self.versrsrc)!r}") + # Normalize TOC + self.toc = normalize_toc(self.toc) + self.pkg = PKG( self.toc, name=self.pkgname, @@ -589,7 +579,7 @@ def makeabs(path): # Get the path of the bootloader and store it in a TOC, so it can be checked for being changed. exe = self._bootloader_file('run', '.exe' if is_win or is_cygwin else '') - self.exefiles = TOC([(os.path.basename(exe), exe, 'EXECUTABLE')]) + self.exefiles = [(os.path.basename(exe), exe, 'EXECUTABLE')] self.__postinit__() @@ -879,7 +869,7 @@ class COLLECT(Target): def __init__(self, *args, **kwargs): """ args - One or more arguments that are either TOCs or Targets. + One or more arguments that are either an instance of `Target` or an iterable representing TOC list. kwargs Possible keyword arguments: @@ -906,7 +896,7 @@ def __init__(self, *args, **kwargs): # DISTPATH). Old .spec formats included parent path, so strip it away. self.name = os.path.join(CONF['distpath'], os.path.basename(kwargs.get('name'))) - self.toc = TOC() + self.toc = [] for arg in args: # Valid arguments: EXE object and TOC-like iterables if isinstance(arg, EXE): @@ -931,6 +921,9 @@ def __init__(self, *args, **kwargs): else: raise TypeError(f"Invalid argument type for COLLECT: {type(arg)!r}") + # Normalize TOC + self.toc = normalize_toc(self.toc) + self.__postinit__() _GUTS = ( @@ -1035,8 +1028,8 @@ def __init__(self, *args): # onedir, `a.datas` and `a.binaries` need to be passed to `COLLECT` (as they were before the MERGE), while # `a.dependencies` needs to be passed to `EXE`. This split requires DEPENDENCY entries to be in a separate # TOC. - analysis.binaries = TOC(binaries) - analysis.datas = TOC(datas) + analysis.binaries = normalize_toc(binaries) + analysis.datas = normalize_toc(datas) analysis.dependencies += binaries_refs + datas_refs def _process_toc(self, toc, path_to_exe): diff --git a/PyInstaller/building/build_main.py b/PyInstaller/building/build_main.py index a9514810d8..48f526c614 100644 --- a/PyInstaller/building/build_main.py +++ b/PyInstaller/building/build_main.py @@ -26,7 +26,7 @@ from PyInstaller import log as logging from PyInstaller.archive import pyz_crypto from PyInstaller.building.api import COLLECT, EXE, MERGE, PYZ -from PyInstaller.building.datastruct import TOC, Target, Tree, _check_guts_eq +from PyInstaller.building.datastruct import TOC, Target, Tree, _check_guts_eq, normalize_toc, normalize_pyz_toc from PyInstaller.building.osx import BUNDLE from PyInstaller.building.splash import Splash from PyInstaller.building.toc_conversion import DependencyProcessor @@ -255,7 +255,7 @@ class Analysis(Target): """ Class that performs analysis of the user's main Python scripts. - An Analysis has five outputs, all TOCs (Table of Contents) accessed as attributes of the analysis. + An Analysis has five outputs, all TOC (Table of lists Contents) accessed as attributes of the analysis. scripts The scripts you gave Analysis as input, with any runtime hook scripts prepended. @@ -397,13 +397,13 @@ def __init__( self.hiddenimports.append('tinyaes') self.excludes = excludes or [] - self.scripts = TOC() - self.pure = TOC() - self.binaries = TOC() - self.zipfiles = TOC() - self.zipped_data = TOC() - self.datas = TOC() - self.dependencies = TOC() + self.scripts = [] + self.pure = [] + self.binaries = [] + self.zipfiles = [] + self.zipped_data = [] + self.datas = [] + self.dependencies = [] self.binding_redirects = CONF['binding_redirects'] = [] self.win_no_prefer_redirects = win_no_prefer_redirects self.win_private_assemblies = win_private_assemblies @@ -412,14 +412,18 @@ def __init__( self.module_collection_mode = module_collection_mode or {} # Initialize 'binaries' and 'datas' with lists specified in .spec file. + # Ensure the lists are normalized before guts comparison. if binaries: logger.info("Appending 'binaries' from .spec") for name, pth in format_binaries_and_datas(binaries, workingdir=spec_dir): self.binaries.append((name, pth, 'BINARY')) + self.binaries = normalize_toc(self.binaries) + if datas: logger.info("Appending 'datas' from .spec") for name, pth in format_binaries_and_datas(datas, workingdir=spec_dir): self.datas.append((name, pth, 'DATA')) + self.datas = normalize_toc(self.datas) self.__postinit__() @@ -486,13 +490,14 @@ def _check_guts(self, data, last_build): logger.info("Building because %s changed", filename) return True # Now we know that none of the input parameters and none of the input files has changed. So take the values - # calculated resp. analysed in the last run and store them in `self`. - self.scripts = TOC(data['scripts']) - self.pure = TOC(data['pure']) - self.binaries = TOC(data['binaries']) - self.zipfiles = TOC(data['zipfiles']) - self.zipped_data = TOC(data['zipped_data']) - self.datas = TOC(data['datas']) + # that were calculated / analyzed in the last run and store them in `self`. These TOC lists should already + # be normalized. + self.scripts = data['scripts'] + self.pure = data['pure'] + self.binaries = data['binaries'] + self.zipfiles = data['zipfiles'] + self.zipped_data = data['zipped_data'] + self.datas = data['datas'] # Store previously found binding redirects in CONF for later use by PKG/COLLECT from PyInstaller.config import CONF @@ -517,7 +522,7 @@ def assemble(self): libzip_filename = os.path.join(CONF['workpath'], 'base_library.zip') create_py3_base_library(libzip_filename, graph=self.graph) # Bundle base_library.zip as data file. - # Data format of TOC item: ('relative_path_in_dist_dir', 'absolute_path_on_disk', 'DATA') + # Data format of TOC item: ('relative_path_in_dist_dir', 'absolute_path_on_disk', 'DATA') self.datas.append((os.path.basename(libzip_filename), libzip_filename, 'DATA')) # Expand sys.path of module graph. The attribute is the set of paths to use for imports: sys.path, plus our @@ -583,9 +588,11 @@ def assemble(self): # Update 'binaries' TOC and 'datas' TOC. deps_proc = DependencyProcessor(self.graph, self.graph._additional_files_cache) + self.binaries.extend(deps_proc.make_binaries_toc()) self.datas.extend(deps_proc.make_datas_toc()) - self.zipped_data.extend(deps_proc.make_zipped_data_toc()) + + self.zipped_data = deps_proc.make_zipped_data_toc() # Already normalized # Note: zipped eggs are collected below # -- Look for dlls that are imported by Python 'ctypes' module. -- @@ -617,13 +624,36 @@ def assemble(self): # Initialize the scripts list with priority scripts in the proper order. self.scripts = self.graph.nodes_to_toc(priority_scripts) + self.scripts = normalize_toc(self.scripts) # Should not really contain duplicates, but just in case... # Extend the binaries list with all the Extensions modulegraph has found. - self.binaries = self.graph.make_binaries_toc(self.binaries) + self.binaries += self.graph.make_binaries_toc() + + # Convert extension module names into full filenames, and append suffix. Ensure that extensions that come from + # the lib-dynload are collected into _MEIPASS/lib-dynload instead of directly into _MEIPASS. + for idx, (dest, source, typecode) in enumerate(self.binaries): + if typecode != 'EXTENSION': + continue + + # Convert to full filename and append suffix + dest, source, typecode = add_suffix_to_extension(dest, source, typecode) + + # Divert into lib-dyload, if necessary (i.e., if file comes from lib-dynload directory) and its destination + # path does not already have a directory prefix. + src_parent = os.path.basename(os.path.dirname(source)) + if src_parent == 'lib-dynload' and not os.path.dirname(os.path.normpath(dest)): + dest = os.path.join('lib-dynload', dest) + + # Update + self.binaries[idx] = (dest, source, typecode) + + # Perform initial normalization of `datas` and `binaries` + self.datas = normalize_toc(self.datas) + self.binaries = normalize_toc(self.binaries) # Post-process GLib schemas self.datas = compile_glib_schema_files(self.datas, os.path.join(CONF['workpath'], "_pyi_gschema_compilation")) - self.datas = TOC(self.datas) + self.datas = normalize_toc(self.datas) # Process the pure-python modules list. Depending on the collection mode, these entries end up either in "pure" # list for collection into the PYZ archive, or in the "datas" list for collection as external data files. @@ -678,8 +708,18 @@ def assemble(self): self.datas.append((dest_path, obj_path, "DATA")) - # And get references to module code objects constructed by ModuleGraph to avoid writing .pyc files to hdd. - self.pure._code_cache = code_cache + # Normalize list of pure-python modules (these will end up in PYZ archive, so use specific normalization). + self.pure = normalize_pyz_toc(self.pure) + + # Associate the `pure` TOC list instance with code cache in the global `CONF`; this is used by `PYZ` writer + # to obtain modules' code from cache instead + # + # (NOTE: back when `pure` was an instance of `TOC` class, the code object was passed by adding an attribute + # to the `pure` itself; now that `pure` is plain `list`, we cannot do that anymore. But the association via + # object ID should have the same semantics as the added attribute). + from PyInstaller.config import CONF + global_code_cache_map = CONF['code_cache'] + global_code_cache_map[id(self.pure)] = code_cache # Add remaining binary dependencies - analyze Python C-extensions and what DLLs they depend on. # @@ -693,39 +733,25 @@ def assemble(self): collected_packages = self.graph.get_collected_packages() self.binaries.extend( - isolated.call(find_binary_dependencies, list(self.binaries), self.binding_redirects, collected_packages) + isolated.call(find_binary_dependencies, self.binaries, self.binding_redirects, collected_packages) ) # Include zipped Python eggs. logger.info('Looking for eggs') - self.zipfiles.extend(deps_proc.make_zipfiles_toc()) + self.zipfiles = deps_proc.make_zipfiles_toc() # Already normalized + + # Final normalization of datas and binaries + self.datas = normalize_toc(self.datas) + self.binaries = normalize_toc(self.binaries) # Verify that Python dynamic library can be found. Without dynamic Python library PyInstaller cannot continue. self._check_python_library(self.binaries) if is_win: # Remove duplicate redirects - self.binding_redirects[:] = list(set(self.binding_redirects)) + self.binding_redirects = list(set(self.binding_redirects)) logger.info("Found binding redirects: \n%s", self.binding_redirects) - # Convert extension module names into full filenames, and append suffix. Ensure that extensions that come from - # the lib-dynload are collected into _MEIPASS/lib-dynload instead of directly into _MEIPASS. - for idx, (dest, source, typecode) in enumerate(self.binaries): - if typecode != 'EXTENSION': - continue - - # Convert to full filename and append suffix - dest, source, typecode = add_suffix_to_extension(dest, source, typecode) - - # Divert into lib-dyload, if necessary (i.e., if file comes from lib-dynload directory) and its destination - # path does not already have a directory prefix. - src_parent = os.path.basename(os.path.dirname(source)) - if src_parent == 'lib-dynload' and not os.path.dirname(os.path.normpath(dest)): - dest = os.path.join('lib-dynload', dest) - - # Update - self.binaries[idx] = (dest, source, typecode) - # Write warnings about missing modules. self._write_warnings() # Write debug information about the graph @@ -842,6 +868,8 @@ def build(spec, distpath, workpath, clean_build): CONF['dot-file'] = os.path.join(workpath, 'graph-%s.dot' % CONF['specnm']) CONF['xref-file'] = os.path.join(workpath, 'xref-%s.html' % CONF['specnm']) + CONF['code_cache'] = dict() + # Clean PyInstaller cache (CONF['cachedir']) and temporary files (workpath) to be able start a clean build. if clean_build: logger.info('Removing temporary files and cleaning cache in %s', CONF['cachedir']) @@ -876,7 +904,7 @@ def build(spec, distpath, workpath, clean_build): 'WARNFILE': CONF['warnfile'], 'workpath': CONF['workpath'], # PyInstaller classes for .spec. - 'TOC': TOC, + 'TOC': TOC, # Kept for backward compatibility even though `TOC` class is deprecated. 'Analysis': Analysis, 'BUNDLE': BUNDLE, 'COLLECT': COLLECT, diff --git a/PyInstaller/building/datastruct.py b/PyInstaller/building/datastruct.py index 64368946c9..078b592101 100644 --- a/PyInstaller/building/datastruct.py +++ b/PyInstaller/building/datastruct.py @@ -10,6 +10,8 @@ #----------------------------------------------------------------------------- import os +import pathlib +import warnings from PyInstaller import log as logging from PyInstaller.building.utils import _check_guts_eq @@ -37,8 +39,9 @@ def unique_name(entry): return name +# This class is deprecated and has been replaced by plain lists with explicit normalization (de-duplication) via +# `normalize_toc` and `normalize_pyz_toc` helper functions. class TOC(list): - # TODO: simplify the representation and use directly Modulegraph objects. """ TOC (Table of Contents) class is a list of tuples of the form (name, path, typecode). @@ -58,6 +61,14 @@ class TOC(list): """ def __init__(self, initlist=None): super().__init__() + + # Deprecation warning + warnings.warn( + "TOC class is deprecated. Use a plain list of 3-element tuples instead.", + DeprecationWarning, + stacklevel=2, + ) + self.filenames = set() if initlist: for entry in initlist: @@ -146,7 +157,7 @@ def __init__(self): self.__class__.invcnum += 1 self.tocfilename = os.path.join(CONF['workpath'], '%s-%02d.toc' % (self.__class__.__name__, self.invcnum)) self.tocbasename = os.path.basename(self.tocfilename) - self.dependencies = TOC() + self.dependencies = [] def __postinit__(self): """ @@ -198,9 +209,9 @@ def _save_guts(self): misc.save_py_data_struct(self.tocfilename, data) -class Tree(Target, TOC): +class Tree(Target, list): """ - This class is a way of creating a TOC (Table of Contents) that describes some or all of the files within a + This class is a way of creating a TOC (Table of Contents) list that describes some or all of the files within a directory. """ def __init__(self, root=None, prefix=None, excludes=None, typecode='DATA'): @@ -221,7 +232,7 @@ def __init__(self, root=None, prefix=None, excludes=None, typecode='DATA'): the typcodes. """ Target.__init__(self) - TOC.__init__(self) + list.__init__(self) self.root = root self.prefix = prefix self.excludes = excludes @@ -293,3 +304,60 @@ def assemble(self): else: result.append((resfilename, fullfilename, self.typecode)) self[:] = result + + +def normalize_toc(toc): + # Default priority: 0 + _TOC_TYPE_PRIORITIES = { + # DEPENDENCY entries need to replace original entries, so they need the highest priority. + 'DEPENDENCY': 2, + # BINARY/EXTENSION entries undergo additional processing, so give them precedence over DATA and other entries. + 'BINARY': 1, + 'EXTENSION': 1, + } + + def _type_case_normalization_fcn(typecode): + # Case-normalize all entries except OPTION. + return typecode not in { + "OPTION", + } + + return _normalize_toc(toc, _TOC_TYPE_PRIORITIES, _type_case_normalization_fcn) + + +def normalize_pyz_toc(toc): + # Default priority: 0 + _TOC_TYPE_PRIORITIES = { + # Ensure that modules are never shadowed by PYZ-embedded data files. + 'PYMODULE': 1, + } + + return _normalize_toc(toc, _TOC_TYPE_PRIORITIES) + + +def _normalize_toc(toc, toc_type_priorities, type_case_normalization_fcn=lambda typecode: False): + tmp_toc = dict() + for dest_name, src_name, typecode in toc: + # Always sanitize the dest_name with `os.path.normpath` to remove any local loops with parent directory path + # components. `pathlib` does not seem to offer equivalent functionality. + dest_name = os.path.normpath(dest_name) + + # Normalize the destination name for uniqueness. Use `pathlib.PurePath` to ensure that keys are both + # case-normalized (on OSes where applicable) and directory-separator normalized (just in case). + if type_case_normalization_fcn(typecode): + entry_key = pathlib.PurePath(dest_name) + else: + entry_key = dest_name + + existing_entry = tmp_toc.get(entry_key) + if existing_entry is None: + # Entry does not exist - insert + tmp_toc[entry_key] = (dest_name, src_name, typecode) + else: + # Entry already exists - replace if its typecode has higher priority + _, _, existing_typecode = existing_entry + if toc_type_priorities.get(typecode, 0) > toc_type_priorities.get(existing_typecode, 0): + tmp_toc[entry_key] = (dest_name, src_name, typecode) + + # Return the items as list. The order matches the original order due to python dict maintaining the insertion order. + return list(tmp_toc.values()) diff --git a/PyInstaller/building/osx.py b/PyInstaller/building/osx.py index 7209a30011..f804f2e523 100644 --- a/PyInstaller/building/osx.py +++ b/PyInstaller/building/osx.py @@ -14,7 +14,7 @@ import shutil from PyInstaller.building.api import COLLECT, EXE -from PyInstaller.building.datastruct import TOC, Target, logger +from PyInstaller.building.datastruct import Target, logger, normalize_toc from PyInstaller.building.utils import _check_path_overlap, _rmtree, checkCache from PyInstaller.compat import is_darwin from PyInstaller.building.icon import normalize_icon_type @@ -53,7 +53,7 @@ def __init__(self, *args, **kwargs): self.appname = os.path.splitext(base_name)[0] self.version = kwargs.get("version", "0.0.0") - self.toc = TOC() + self.toc = [] self.strip = False self.upx = False self.console = True @@ -109,6 +109,9 @@ def __init__(self, *args, **kwargs): else: raise ValueError("No EXECUTABLE entry found in the TOC!") + # Normalize TOC + self.toc = normalize_toc(self.toc) + self.__postinit__() _GUTS = ( diff --git a/PyInstaller/building/splash.py b/PyInstaller/building/splash.py index 75ed7e9319..76728c3b5d 100644 --- a/PyInstaller/building/splash.py +++ b/PyInstaller/building/splash.py @@ -12,11 +12,12 @@ import os import re import struct +import pathlib from PyInstaller import log as logging from PyInstaller.archive.writers import SplashWriter from PyInstaller.building import splash_templates -from PyInstaller.building.datastruct import TOC, Target +from PyInstaller.building.datastruct import Target from PyInstaller.building.utils import _check_guts_eq, _check_guts_toc, misc from PyInstaller.compat import is_darwin, is_win, is_cygwin from PyInstaller.utils.hooks import tcl_tk as tcltk_utils @@ -66,11 +67,11 @@ def __init__(self, image_file, binaries, datas, **kwargs): .. note:: If PIL (Pillow) is installed and the image is bigger than max_img_size, the image will be resized to fit into the specified area. - :param TOC binaries: - The TOC of binaries the Analysis build target found. This TOC includes all extensionmodules and their - dependencies. This is required to figure out, if the users program uses tkinter. - :param TOC datas: - The TOC of data the Analysis build target found. This TOC includes all data-file dependencies of the + :param list binaries: + The TOC list of binaries the Analysis build target found. This TOC includes all extension modules and their + binary dependencies. This is required to determine whether the user's program uses `tkinter`. + :param list datas: + The TOC list of data the Analysis build target found. This TOC includes all data-file dependencies of the modules. This is required to check if all splash screen requirements can be bundled. :keyword text_pos: @@ -186,7 +187,8 @@ def __init__(self, image_file, binaries, datas, **kwargs): ) # Calculated / analysed values - self.uses_tkinter = self._uses_tkinter(binaries) + self.uses_tkinter = self._uses_tkinter(self._tkinter_file, binaries) + logger.debug("Program uses tkinter: %r", self.uses_tkinter) self.script = self.generate_script() self.tcl_lib, self.tk_lib = tcltk_utils.find_tcl_tk_shared_libs(self._tkinter_file) if is_darwin: @@ -198,16 +200,16 @@ def __init__(self, image_file, binaries, datas, **kwargs): # Check if tcl/tk was found assert all(self.tcl_lib) assert all(self.tk_lib) - logger.debug("Use Tcl Library from %s and Tk From %s" % (self.tcl_lib, self.tk_lib)) + logger.debug("Use Tcl Library from %s and Tk From %s", self.tcl_lib, self.tk_lib) self.splash_requirements = set([self.tcl_lib[0], self.tk_lib[0]] + splash_requirements) logger.info("Collect tcl/tk binaries for the splash screen") tcltk_tree = tcltk_utils.collect_tcl_tk_files(self._tkinter_file) if self.full_tk: # The user wants a full copy of tk, so make all tk files a requirement. - self.splash_requirements.update(toc[0] for toc in tcltk_tree) + self.splash_requirements.update(entry[0] for entry in tcltk_tree) - self.binaries = TOC() + self.binaries = [] if not self.uses_tkinter: # The user's script does not use tkinter, so we need to provide a TOC of all necessary files add the shared # libraries to the binaries. @@ -216,7 +218,7 @@ def __init__(self, image_file, binaries, datas, **kwargs): # Only add the intersection of the required and the collected resources, or add all entries if full_tk is # true. - self.binaries.extend(toc for toc in tcltk_tree if toc[0] in self.splash_requirements) + self.binaries.extend(entry for entry in tcltk_tree if entry[0] in self.splash_requirements) # Handle extra requirements of Tcl/Tk shared libraries (e.g., vcruntime140.dll on Windows - see issue #6284). # These need to be added to splash requirements, so they are extracted into the initial runtime directory in @@ -237,26 +239,26 @@ def __init__(self, image_file, binaries, datas, **kwargs): self.splash_requirements.update([name for name, *_ in binaries if name.lower() in EXTRA_REQUIREMENTS]) # Check if all requirements were found. - fnames = [toc[0] for toc in (binaries + datas + self.binaries)] + collected_files = set(entry[0] for entry in (binaries + datas + self.binaries)) - def _filter(_item): - if _item not in fnames: + def _filter_requirement(filename): + if filename not in collected_files: # Item is not bundled, so warn the user about it. This actually may happen on some tkinter installations # that are missing the license.terms file. logger.warning( "The local Tcl/Tk installation is missing the file %s. The behavior of the splash screen is " - "therefore undefined and may be unsupported." % _item + "therefore undefined and may be unsupported.", filename ) return False return True # Remove all files which were not found. - self.splash_requirements = set(filter(_filter, self.splash_requirements)) + self.splash_requirements = set(filter(_filter_requirement, self.splash_requirements)) # Test if the tcl/tk version is supported by the bootloader. self.test_tk_version() - logger.debug("Splash Requirements: %s" % self.splash_requirements) + logger.debug("Splash Requirements: %s", self.splash_requirements) self.__postinit__() @@ -300,7 +302,7 @@ def _check_guts(self, data, last_build): return False def assemble(self): - logger.info("Building Splash %s" % self.name) + logger.info("Building Splash %s", self.name) # Function to resize a given image to fit into the area defined by max_img_size. def _resize_image(_image, _orig_size): @@ -331,16 +333,14 @@ def _resize_image(_image, _orig_size): _img.close() _img_resized.close() _image_data = _image_stream.getvalue() - logger.info( - "Resized image %s from dimensions %s to (%d, %d)" % (self.image_file, str(_orig_size), _w, _h) - ) + logger.info("Resized image %s from dimensions %s to (%d, %d)", self.image_file, str(_orig_size), _w, _h) return _image_data else: raise ValueError( "The splash image dimensions (w: %d, h: %d) exceed max_img_size (w: %d, h:%d), but the image " "cannot be resized due to missing PIL.Image! Either install the Pillow package, adjust the " - "max_img_size, or use an image of compatible dimensions." % - (_orig_size[0], _orig_size[1], self.max_img_size[0], self.max_img_size[1]) + "max_img_size, or use an image of compatible dimensions.", _orig_size[0], _orig_size[1], + self.max_img_size[0], self.max_img_size[1] ) # Open image file @@ -368,11 +368,11 @@ def _resize_image(_image, _orig_size): img.save(image_data, format='PNG') img.close() image = image_data.getvalue() - logger.info("Converted image %s to PNG format" % self.image_file) + logger.info("Converted image %s to PNG format", self.image_file) else: raise ValueError( "The image %s needs to be converted to a PNG file, but PIL.Image is not available! Either install the " - "Pillow package, or use a PNG image for you splash screen." % self.image_file + "Pillow package, or use a PNG image for you splash screen.", self.image_file ) image_file.close() @@ -396,15 +396,15 @@ def test_tk_version(self): if tcl_version < 8.6 or tk_version < 8.6: logger.warning( "The installed Tcl/Tk (%s/%s) version might not work with the splash screen feature of the bootloader. " - "The bootloader is tested against Tcl/Tk 8.6" % - (self._tkinter_module.TCL_VERSION, self._tkinter_module.TK_VERSION) + "The bootloader is tested against Tcl/Tk 8.6", self._tkinter_module.TCL_VERSION, + self._tkinter_module.TK_VERSION ) # This should be impossible, since tcl/tk is released together with the same version number, but just in case if tcl_version != tk_version: logger.warning( "The installed version of Tcl (%s) and Tk (%s) do not match. PyInstaller is tested against matching " - "versions" % (self._tkinter_module.TCL_VERSION, self._tkinter_module.TK_VERSION) + "versions", self._tkinter_module.TCL_VERSION, self._tkinter_module.TK_VERSION ) # Ensure that Tcl is built with multi-threading support. @@ -450,9 +450,14 @@ def generate_script(self): return script @staticmethod - def _uses_tkinter(binaries): - # Test for _tkinter instead of tkinter, because a user might use a different wrapping library for tk. - return '_tkinter' in binaries.filenames + def _uses_tkinter(tkinter_file, binaries): + # Test for _tkinter extension instead of tkinter module, because user might use a different wrapping library for + # Tk. Use `pathlib.PurePath˙ in comparisons to account for case normalization and separator normalization. + tkinter_file = pathlib.PurePath(tkinter_file) + for dest_name, src_name, typecode in binaries: + if pathlib.PurePath(src_name) == tkinter_file: + return True + return False @staticmethod def _find_rundir(structure): diff --git a/PyInstaller/building/toc_conversion.py b/PyInstaller/building/toc_conversion.py index 0a372aff7b..ce43f7aa15 100644 --- a/PyInstaller/building/toc_conversion.py +++ b/PyInstaller/building/toc_conversion.py @@ -15,7 +15,7 @@ import pkg_resources from PyInstaller import log as logging -from PyInstaller.building.datastruct import TOC, Tree +from PyInstaller.building.datastruct import Tree, normalize_toc from PyInstaller.compat import ALL_SUFFIXES from PyInstaller.depend.utils import get_path_to_egg @@ -97,11 +97,11 @@ def _get_distribution_for_node(self, node): # Public methods. def make_binaries_toc(self): - # TODO create a real TOC when handling of more files is added. - return [(x, y, 'BINARY') for x, y in self._binaries] + toc = [(x, y, 'BINARY') for x, y in self._binaries] + return normalize_toc(toc) def make_datas_toc(self): - toc = TOC((x, y, 'DATA') for x, y in self._datas) + toc = [(x, y, 'DATA') for x, y in self._datas] for dist in self._distributions: if ( dist._pyinstaller_info['egg'] and not dist._pyinstaller_info['zipped'] @@ -110,7 +110,7 @@ def make_datas_toc(self): # this is a un-zipped, not-zip-safe egg tree = Tree(dist.location, excludes=PY_IGNORE_EXTENSIONS) toc.extend(tree) - return toc + return normalize_toc(toc) def make_zipfiles_toc(self): # TODO create a real TOC when handling of more files is added. @@ -119,7 +119,7 @@ def make_zipfiles_toc(self): if dist._pyinstaller_info['zipped'] and not dist._pyinstaller_info['egg']: # Hmm, this should never happen as normal zip-files are not associated with a distribution, are they? toc.append(("eggs/" + os.path.basename(dist.location), dist.location, 'ZIPFILE')) - return toc + return normalize_toc(toc) @staticmethod def __collect_data_files_from_zip(zipfilename): @@ -138,7 +138,7 @@ def __collect_data_files_from_zip(zipfilename): return Tree(workpath, excludes=PY_IGNORE_EXTENSIONS) def make_zipped_data_toc(self): - toc = TOC() + toc = [] logger.debug('Looking for egg data files...') for dist in self._distributions: if dist._pyinstaller_info['egg']: @@ -153,4 +153,4 @@ def make_zipped_data_toc(self): else: # this is an un-zipped, not-zip-safe egg, handled in make_datas_toc() pass - return toc + return normalize_toc(toc) diff --git a/PyInstaller/config.py b/PyInstaller/config.py index 80b18186ef..18db9d8a75 100644 --- a/PyInstaller/config.py +++ b/PyInstaller/config.py @@ -44,6 +44,8 @@ workpath tests_modgraph - cached PyiModuleGraph object to speed up tests + +code_cache - dictionary associating `Analysis.pure` list instances with code cache dictionaries. Used by PYZ writer. """ # NOTE: Do not import other PyInstaller modules here. Just define constants here. diff --git a/PyInstaller/depend/analysis.py b/PyInstaller/depend/analysis.py index 4ea44c5241..5d68cee302 100644 --- a/PyInstaller/depend/analysis.py +++ b/PyInstaller/depend/analysis.py @@ -43,7 +43,6 @@ from PyInstaller import HOMEPATH, PACKAGEPATH from PyInstaller import log as logging -from PyInstaller.building.datastruct import TOC from PyInstaller.building.utils import add_suffix_to_extension from PyInstaller.compat import ( BAD_MODULE_TYPES, BINARY_MODULE_TYPES, MODULE_TYPES_TO_TOC_DICT, PURE_PYTHON_MODULE_TYPES, PY3_BASE_MODULES, @@ -541,9 +540,9 @@ def get_code_objects(self): code_dict[node.identifier] = node.code return code_dict - def _make_toc(self, typecode=None, existing_TOC=None): + def _make_toc(self, typecode=None): """ - Return the name, path and type of selected nodes as a TOC, or appended to a TOC. The selection is via a list + Return the name, path and type of selected nodes as a TOC. The selection is determined by the given list of PyInstaller TOC typecodes. If that list is empty we return the complete flattened graph as a TOC with the ModuleGraph note types in place of typecodes -- meant for debugging only. Normally we return ModuleGraph nodes whose types map to the requested PyInstaller typecode(s) as indicated in the MODULE_TYPES_TO_TOC_DICT. @@ -559,16 +558,17 @@ def _make_toc(self, typecode=None, existing_TOC=None): regex_str = '(' + '|'.join(PY3_BASE_MODULES) + r')(\.|$)' module_filter = re.compile(regex_str) - result = existing_TOC or TOC() + toc = list() for node in self.iter_graph(start=self._top_script_node): # Skip modules that are in base_library.zip. if module_filter.match(node.identifier): continue entry = self._node_to_toc(node, typecode) + # Append the entry. We do not check for duplicates here; the TOC normalization is left to caller. + # However, as entries are obtained from modulegraph, there should not be any duplicates at this stage. if entry is not None: - # TOC.append the data. This checks for a pre-existing name and skips it if it exists. - result.append(entry) - return result + toc.append(entry) + return toc def make_pure_toc(self): """ @@ -577,11 +577,11 @@ def make_pure_toc(self): # PyInstaller should handle special module types without code object. return self._make_toc(PURE_PYTHON_MODULE_TYPES) - def make_binaries_toc(self, existing_toc): + def make_binaries_toc(self): """ Return all binary Python modules formatted as TOC. """ - return self._make_toc(BINARY_MODULE_TYPES, existing_toc) + return self._make_toc(BINARY_MODULE_TYPES) def make_missing_toc(self): """ @@ -624,16 +624,13 @@ def _node_to_toc(node, typecode=None): toc_type = MODULE_TYPES_TO_TOC_DICT[mg_type] return name, path, toc_type - def nodes_to_toc(self, node_list, existing_TOC=None): + def nodes_to_toc(self, nodes): """ Given a list of nodes, create a TOC representing those nodes. This is mainly used to initialize a TOC of scripts with the ones that are runtime hooks. The process is almost the same as _make_toc(), but the caller guarantees the nodes are valid, so minimal checking. """ - result = existing_TOC or TOC() - for node in node_list: - result.append(self._node_to_toc(node)) - return result + return [self._node_to_toc(node) for node in nodes] # Return true if the named item is in the graph as a BuiltinModule node. The passed name is a basename. def is_a_builtin(self, name): @@ -921,7 +918,7 @@ def get_bootstrap_modules(): # Import 'struct' modules to get real paths to module file names. mod_struct = __import__('struct') # Basic modules necessary for the bootstrap process. - loader_mods = TOC() + loader_mods = list() loaderpath = os.path.join(HOMEPATH, 'PyInstaller', 'loader') # On some platforms (Windows, Debian/Ubuntu) '_struct' and zlib modules are built-in modules (linked statically) # and thus does not have attribute __file__. 'struct' module is required for reading Python bytecode from diff --git a/PyInstaller/depend/imphookapi.py b/PyInstaller/depend/imphookapi.py index 68e48a9d80..38a35efba2 100644 --- a/PyInstaller/depend/imphookapi.py +++ b/PyInstaller/depend/imphookapi.py @@ -17,7 +17,6 @@ modules will be frozen into the executable. """ -from PyInstaller.building.datastruct import TOC from PyInstaller.building.utils import format_binaries_and_datas from PyInstaller.lib.modulegraph.modulegraph import (RuntimeModule, RuntimePackage) @@ -430,29 +429,37 @@ def del_imports(self, *module_names): """ self._deleted_imports.extend(module_names) - def add_binaries(self, list_of_tuples): + def add_binaries(self, binaries): """ - Add all external dynamic libraries in the passed list of `(name, path)` 2-tuples as dependencies of the + Add all external dynamic libraries in the passed list of `(src_name, dest_name)` 2-tuples as dependencies of the current module. This is equivalent to adding to the global `binaries` hook attribute. - For convenience, the `list_of_tuples` may also be a single TOC or TREE instance. + For convenience, the `binaries` may also be a list of TOC-style 3-tuples `(dest_name, src_name, typecode)`. """ - if isinstance(list_of_tuples, TOC): - self._added_binaries.extend(i[:2] for i in list_of_tuples) + + # Detect TOC 3-tuple list by checking the length of the first entry + if binaries and len(binaries[0]) == 3: + self._added_binaries.extend(entry[:2] for entry in binaries) else: - self._added_binaries.extend(format_binaries_and_datas(list_of_tuples)) + # NOTE: `format_binaries_and_datas` changes tuples from input format `(src_name, dest_name)` to output + # format `(dest_name, src_name)`. + self._added_binaries.extend(format_binaries_and_datas(binaries)) - def add_datas(self, list_of_tuples): + def add_datas(self, datas): """ - Add all external data files in the passed list of `(name, path)` 2-tuples as dependencies of the current - module. This is equivalent to adding to the global `datas` hook attribute. + Add all external data files in the passed list of `(src_name, dest_name)` 2-tuples as dependencies of the + current module. This is equivalent to adding to the global `datas` hook attribute. - For convenience, the `list_of_tuples` may also be a single TOC or TREE instance. + For convenience, the `datas` may also be a list of TOC-style 3-tuples `(dest_name, src_name, typecode)`. """ - if isinstance(list_of_tuples, TOC): - self._added_datas.extend(i[:2] for i in list_of_tuples) + + # Detect TOC 3-tuple list by checking the length of the first entry + if datas and len(datas[0]) == 3: + self._added_datas.extend(entry[:2] for entry in datas) else: - self._added_datas.extend(format_binaries_and_datas(list_of_tuples)) + # NOTE: `format_binaries_and_datas` changes tuples from input format `(src_name, dest_name)` to output + # format `(dest_name, src_name)`. + self._added_datas.extend(format_binaries_and_datas(datas)) def set_module_collection_mode(self, name, mode): """" diff --git a/doc/advanced-topics.rst b/doc/advanced-topics.rst index 2a0a2b99a4..70c7ce17b2 100644 --- a/doc/advanced-topics.rst +++ b/doc/advanced-topics.rst @@ -267,144 +267,159 @@ Functions .. _the toc and tree classes: -The TOC and Tree Classes -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -PyInstaller manages lists of files using the ``TOC`` -(Table Of Contents) class. -It provides the ``Tree`` class as a convenient way to build a ``TOC`` -from a folder path. - -TOC Class (Table of Contents) ---------------------------------- - -Objects of the ``TOC`` class are used as input to the classes created in -a spec file. -For example, the ``scripts`` member of an Analysis object is a TOC -containing a list of scripts. -The ``pure`` member is a TOC with a list of modules, and so on. - -Basically a ``TOC`` object contains a list of tuples of the form - - ``(``\ *name*\ ``,``\ *path*\ ``,``\ *typecode*\ ``)`` - -In fact, it acts as an ordered set of tuples; -that is, it contains no duplicates -(where uniqueness is based on the *name* element of each tuple). -Within this constraint, a TOC preserves the order of tuples added to it. - -A TOC behaves like a list and supports the same methods -such as appending, indexing, etc. -A TOC also behaves like a set, and supports taking differences and intersections. -In all of these operations a list of tuples can be used as one argument. -For example, the following expressions are equivalent ways to -add a file to the ``a.datas`` member:: - - a.datas.append( [ ('README', 'src/README.txt', 'DATA' ) ] ) - a.datas += [ ('README', 'src/README.txt', 'DATA' ) ] - -Set-difference makes excluding modules quite easy. For example:: - - a.binaries - [('badmodule', None, None)] - -is an expression that produces a new ``TOC`` that is a copy of -``a.binaries`` from which any tuple named ``badmodule`` has been removed. -The right-hand argument to the subtraction operator -is a list that contains one tuple -in which *name* is ``badmodule`` and the *path* and *typecode* elements -are ``None``. -Because set membership is based on the *name* element of a tuple only, -it is not necessary to give accurate *path* and *typecode* elements when subtracting. - -In order to add files to a TOC, you need to know the *typecode* values -and their related *path* values. -A *typecode* is a one-word string. -PyInstaller uses a number of *typecode* values internally, -but for the normal case you need to know only these: - - -+---------------+--------------------------------------+-----------------------+--------------------------------------+ -| **typecode** | **description** | **name** | **path** | -+===============+======================================+=======================+======================================+ -| 'DATA' | Arbitrary files. | Run-time name. | Full path name in build. | -+---------------+--------------------------------------+-----------------------+--------------------------------------+ -| 'BINARY' | A shared library. | Run-time name. | Full path name in build. | -+---------------+--------------------------------------+-----------------------+--------------------------------------+ -| 'EXTENSION' | A binary extension to Python. | Run-time name. | Full path name in build. | -+---------------+--------------------------------------+-----------------------+--------------------------------------+ -| 'OPTION' | A Python run-time option. | Option code | ignored. | -+---------------+--------------------------------------+-----------------------+--------------------------------------+ - -The run-time name of a file will be used in the final bundle. +The Table of Contents (TOC) lists and the Tree Class +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +PyInstaller manages lists of files that are to be collected in the +so-called Table of Contents (TOC) list format. These lists contain +three-element tuples that encapsulate information about a file's +destination name, the file's full source path, and its type. + +As part of utilities for managing the TOC lists, PyInstaller provides +a ``Tree`` class as a convenient way to build a TOC list from the +contents of the given directory. This utility class can be used either +in the :ref:`.spec files ` file or from custom hooks. + + +Table of Contents (TOC) lists +----------------------------- + +The ``Analysis`` object produces several TOC lists that provide information +about files to be collected. The files are grouped into distinct lists +based on their type or function, for example: +- ``Analysis.scripts``: program script(s) +- ``Analysis.pure``: pure-python modules +- ``Analysis.binaries``: binary extension modules and shared libraries +- ``Analysis.datas``: data files + +The generated TOC lists are passed to various build targets within the +:ref:`spec file `, such as ``PYZ``, ``EXE``, and +``COLLECT``. + +Each TOC list contains three-element tuples, + + ``(dest_name, src_name , typecode)`` + +where ``dest_name`` is the destination file name (i.e., file name within +the frozen application; as such, it must always be a relative name), +``src_name`` is the source file name (the path from where the file is +collected), and ``typecode`` is a string that denotes the type of the +file (or entry). + +Internally, PyInstaller uses a number of *typecode* values, but for the +normal case you need to know only these: + ++---------------+---------------------------------------+----------------------------------+---------------------------------------------+ +| **typecode** | **description** | **dest_name** | **src_name** | ++===============+=======================================+==================================+=============================================+ +| 'DATA' | Arbitrary (data) files. | Name in the frozen application. | Full path to the file on the build system. | ++---------------+---------------------------------------+----------------------------------+---------------------------------------------+ +| 'BINARY' | A shared library. | Name in the frozen application. | Full path to the file on the build system. | ++---------------+---------------------------------------+----------------------------------+---------------------------------------------+ +| 'EXTENSION' | A Python binary extension. | Name in the frozen application. | Full path to the file on the build system. | ++---------------+---------------------------------------+----------------------------------+---------------------------------------------+ +| 'OPTION' | A PyInstaller/Python run-time option. | Option name (and optional value, | Ignored. | +| | | separated by a whitespace). | | ++---------------+---------------------------------------+----------------------------------+---------------------------------------------+ + +The destination name corresponds to the name of the final in the +frozen application, relative to the top-level application directory. It may include path elements, for example :file:`extras/mydata.txt`. -A ``BINARY`` file or an ``EXTENSION`` file is assumed to be loadable, executable code, -for example a dynamic library. -The types are treated the same. -``EXTENSION`` is generally used for a Python extension module, -for example a module compiled by Cython_. -PyInstaller will examine either type of file for dependencies, -and if any are found, they are also included. +Entries of type ``BINARY`` and ``EXTENSION`` are assumed to represent a +file containing loadable executable code, such as a dynamic library. +Generally, ``EXTENSION`` is used to denote Python extensions modules, +such as modules compiled by Cython_. The two file types are treated in +the same way; PyInstaller scans them for additional link-time +dependencies and collects any dependencies that are discovered. On some +operating systems, binaries and extensions undergo additional processing +(such as path rewriting for link-time dependencies and code-signing +on macOS). + +The TOC lists produced by ``Analysis`` can be modified in the +:ref:`spec file ` file before they are passed on to +the build targets to either include additional entries (although it is +preferable to pass extra files to be included via `binaries` or `datas` +arguments of `Analysis`) or remove unwanted entries. + +.. versionchanged:: 5.11 + + In PyInstaller versions prior to 5.11, the TOC lists were in fact + instances of the :class:`TOC` class, which internally performed + implicit entry de-duplication; i.e., trying to insert an entry with + existing target name would result in no changes to the list. + + However, due to the shortcomings of the ``TOC`` class that resulted from + loosely-defined and conflicting semantics, the use of the ``TOC`` class + has been deprecated. The TOC lists are now instances of plain ``list``, + and PyInstaller performs explicit list normalization (entry de-duplication). + The explicit normalization is performed at the end of ``Analysis`` + instantiation, when the lists are stored in the class' properties (such + as ``Analysis.datas`` and ``Analysis.binaries``). Similarly, explicit + list normalization is also performed once the build targets (``EXE``, + ``PYZ``, ``PKG``, ``COLLECT``, ``BUNDLE``) consolidate the input TOC + lists into the final list. + The Tree Class ------------------- +-------------- -The Tree class is a way of creating a TOC that describes some or all of the -files within a directory: +The ``Tree`` class offers a convenient way of creating a TOC list that +describes contents of the given directory: ``Tree(``\ *root*\ ``, prefix=``\ *run-time-folder*\ ``, excludes=``\ *string_list*\ ``, typecode=``\ *code* | ``'DATA' )`` -* The *root* argument is a path string to a directory. +* The *root* argument is a string denoting the path to the directory. It may be absolute or relative to the spec file directory. -* The *prefix* argument, if given, is a name for a subfolder - within the run-time folder to contain the tree files. - If you omit *prefix* or give ``None``, - the tree files will be at - the top level of the run-time folder. +* The optional *prefix* argument is a name for a sub-directory + in the application directory into which files are to be collected. + If not specified or set to ``None``, the files will be collected + into the top-level application directory. -* The *excludes* argument, if given, is a list of one or more +* The optional *excludes* argument is a list of one or more strings that match files in the *root* that should be omitted from the Tree. An item in the list can be either: - a name, which causes files or folders with this basename to be excluded - - ``*.ext``, which causes files with this extension to be excluded + - a glob pattern (e.g., ``*.ext``), which causes matching files to be excluded -* The *typecode* argument, if given, specifies the TOC typecode string - that applies to all items in the Tree. - If omitted, the default is ``DATA``, which is appropriate for most cases. +* The optional *typecode* argument specifies the TOC typecode string + that is assigned to all entries in the TOC list. + The default value is ``DATA``, which is appropriate for most cases. For example:: - extras_toc = Tree('../src/extras', prefix='extras', excludes=['tmp','*.pyc']) + extras_toc = Tree('../src/extras', prefix='extras', excludes=['tmp', '*.pyc']) -This creates ``extras_toc`` as a TOC object that lists +This creates ``extras_toc`` as a TOC list that contains entries for all files from the relative path :file:`../src/extras`, omitting those that have the basename (or are in a folder named) ``tmp`` -or that have the type ``.pyc``. +or have the ``.pyc`` extension. Each tuple in this TOC has: -* A *name* composed of :file:`extras/{filename}`. +* A *dest_name* in form of:file:`extras/{filename}`. -* A *path* consisting of a complete, absolute path to that file in the +* A *src_name* that corresponds to the full absolute path to that file in the :file:`../src/extras` folder (relative to the location of the spec file). -* A *typecode* of ``DATA`` (by default). +* A *typecode* of ``DATA`` (the default). An example of creating a TOC listing some binary modules:: - cython_mods = Tree( '..src/cy_mods', excludes=['*.pyx','*.py','*.pyc'], typecode='EXTENSION' ) + cython_mods = Tree('..src/cy_mods', excludes=['*.pyx', '*.py', '*.pyc'], typecode='EXTENSION') -This creates a TOC with a tuple for every file in the :file:`cy_mods` folder, -excluding any with the ``.pyx``, ``.py`` or ``.pyc`` suffixes -(so presumably collecting the ``.pyd`` or ``.so`` modules created by Cython). +This creates a TOC list with entries for each file in the :file:`cy_mods` directory, +excluding files with the ``.pyx``, ``.py``, or ``.pyc`` extension +(so presumably collecting only the ``.pyd`` or ``.so`` modules created by Cython). Each tuple in this TOC has: -* Its own filename as *name* (no prefix; the file will be at the top level of the bundle). +* A *dest_name* that corresponds to the file's basename (all files are collected + in top-level application directory). -* A *path* as an absolute path to that file in :file:`../src/cy_mods` - relative to the spec file. +* A *src_name* that corresponds to the full absolute path to that file in + :file:`../src/cy_mods` relative to the spec file. * A *typecode* of ``EXTENSION`` (``BINARY`` could be used as well). diff --git a/news/7615.core.rst b/news/7615.core.rst new file mode 100644 index 0000000000..b421f7a0d6 --- /dev/null +++ b/news/7615.core.rst @@ -0,0 +1,9 @@ +Remove the use of the ``TOC`` class in the analysis / build process, +and use plain ``list`` instances instead. The implicit normalization +(de-duplication) of TOC entries performed by the ``TOC`` class has been +replaced with explicit normalization. The TOC lists produced by ``Analysis`` +are explicitly normalized at the end of Analysis instantiation, before +they are stored in the Analysis properties (e.g., ``Analysis.pure``, +``Analysis.binaries``, ``Analysis.datas``). Similarly, the TOC lists +passed to the build targets (e.g., ``PYZ``, ``EXE``, ``COLLECT``) are +explicitly normalized as part of the targets' instantiation process. diff --git a/news/7615.deprecation.rst b/news/7615.deprecation.rst new file mode 100644 index 0000000000..691ef3cca2 --- /dev/null +++ b/news/7615.deprecation.rst @@ -0,0 +1,5 @@ +The ``TOC`` class is now deprecated; use a plain ``list`` with the same +three-element tuples instead. PyInstaller now performs explicit +normalization (i.e., entry de-duplication) of the TOC lists passed +to the build targets (e.g., ``PYZ``, ``EXE``, ``COLLECT``) during their +instantiation. diff --git a/news/7615.doc.rst b/news/7615.doc.rst new file mode 100644 index 0000000000..38ac071635 --- /dev/null +++ b/news/7615.doc.rst @@ -0,0 +1,2 @@ +Update the documentation on TOC lists and ``Tree`` class to reflect the +deprecation of the ``TOC`` class. diff --git a/tests/functional/test_regression.py b/tests/functional/test_regression.py index 3b184626b5..96aa3a7d28 100644 --- a/tests/functional/test_regression.py +++ b/tests/functional/test_regression.py @@ -33,7 +33,8 @@ def test_issue_2492(monkeypatch, tmpdir): 'dot-file': str(tmpdir.join('imports.dot')), 'xref-file': str(tmpdir.join('imports.xref')), 'hiddenimports': [], - 'specnm': 'issue_2492_script' + 'specnm': 'issue_2492_script', + 'code_cache': dict(), } ) # Speedup: avoid analyzing base_library.zip @@ -87,7 +88,8 @@ def getImports(*args, **kwargs): 'dot-file': str(tmpdir.join('imports.dot')), 'xref-file': str(tmpdir.join('imports.xref')), 'hiddenimports': [], - 'specnm': 'issue_5131_script' + 'specnm': 'issue_5131_script', + 'code_cache': dict(), } ) # Speedup: avoid analyzing base_library.zip diff --git a/tests/unit/test_TOC.py b/tests/unit/test_TOC.py index 15f32e9d5c..10c658af3f 100644 --- a/tests/unit/test_TOC.py +++ b/tests/unit/test_TOC.py @@ -29,6 +29,9 @@ ELEMS3 = (('PIL.Image.py', '/usr/lib/python2.7/encodings/__init__.py', 'PYMODULE'),) +# Ignore deprecation warnings for the TOC class +pytestmark = pytest.mark.filterwarnings("ignore:TOC class is deprecated.") + def test_init_empty(): toc = TOC() diff --git a/tests/unit/test_toc_normalization.py b/tests/unit/test_toc_normalization.py new file mode 100644 index 0000000000..4e861a6263 --- /dev/null +++ b/tests/unit/test_toc_normalization.py @@ -0,0 +1,215 @@ +#----------------------------------------------------------------------------- +# Copyright (c) 2023, PyInstaller Development Team. +# +# Distributed under the terms of the GNU General Public License (version 2 +# or later) with exception for distributing the bootloader. +# +# The full license is in the file COPYING.txt, distributed with this software. +# +# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception) +#----------------------------------------------------------------------------- + +# Tests for explicit TOC list normalization that replaced the implicit normalization with class:``TOC``. +import copy +import pathlib + +from PyInstaller import compat +from PyInstaller.building.datastruct import normalize_pyz_toc, normalize_toc + +# Tests for regular TOC normalization. + +_BASE_TOC = [ + ('libpython3.10.so', '/usr/lib64/libpython3.10.so', 'BINARY'), + ('libsomething.so', '/usr/local/lib64/libsomething.so', 'BINARY'), + ('README', '/home/user/tmp/README', 'DATA'), + (str(pathlib.PurePath('data/data.csv')), '/home/user/tmp/data/data.csv', 'DATA'), + ('dependency.bin', 'other_multipackage:dependency.bin', 'DEPENDENCY'), + ('myextension.so', 'myextension.so', 'EXTENSION'), +] + + +def test_normalize_toc_no_duplicates(): + # No duplicates. We expect the output list to match the input list. + toc = copy.copy(_BASE_TOC) + expected_toc = _BASE_TOC + + normalized_toc = normalize_toc(toc) + assert normalized_toc == expected_toc + + +def test_normalize_toc_duplicate_binary(): + # A duplicated BINARY entry. We expect the (second) duplicate to be removed. + toc = copy.copy(_BASE_TOC) + toc.insert(2, ('libsomething.so', '/opt/something/lib/libsomething.so', 'BINARY')) + expected_toc = _BASE_TOC + + normalized_toc = normalize_toc(toc) + assert normalized_toc == expected_toc + + +def test_normalize_toc_duplicate_binary_case_sensitive(): + # A BINARY entry that is a duplicate only on case-insensitive OSes. + toc = copy.copy(_BASE_TOC) + toc.insert(2, ('libSoMeThInG.so', '/opt/something/lib/libSoMeThInG.so', 'BINARY')) + expected_toc = _BASE_TOC + + if compat.is_win: + expected_toc = _BASE_TOC + else: + expected_toc = toc + + normalized_toc = normalize_toc(toc) + assert normalized_toc == expected_toc + + +def test_normalize_toc_duplicate_data(): + # A duplicated DATA entry. We expect the (second) duplicate to be removed. + toc = copy.copy(_BASE_TOC) + toc.insert(3, ('README', '/home/user/tmp/README', 'DATA')) + expected_toc = _BASE_TOC + + normalized_toc = normalize_toc(toc) + assert normalized_toc == expected_toc + + +def test_normalize_toc_duplicate_data_case_sensitive(): + # A DATA entry that is a duplicate on case-insensitive OSes. + toc = copy.copy(_BASE_TOC) + toc.insert(-1, ('readme', '/home/user/tmp-other/readme', 'DATA')) + expected_toc = _BASE_TOC + + if compat.is_win: + expected_toc = _BASE_TOC + else: + expected_toc = toc + + normalized_toc = normalize_toc(toc) + assert normalized_toc == expected_toc + + +def test_normalize_toc_conflicting_binary_and_data1(): + # An entry that's duplicated as both BINARY and DATA. + # BINARY entry should be kept. + toc = copy.copy(_BASE_TOC) + toc.insert(2, ('libsomething.so', '/usr/local/lib64/libsomething.so', 'DATA')) # Insert after BINARY entry + expected_toc = _BASE_TOC + + normalized_toc = normalize_toc(toc) + assert normalized_toc == expected_toc + + +def test_normalize_toc_conflicting_binary_and_data2(): + # An entry that's duplicated as both BINARY and DATA, in reverse order. + # BINARY entry should be kept. + toc = copy.copy(_BASE_TOC) + toc.insert(1, ('libsomething.so', '/usr/local/lib64/libsomething.so', 'DATA')) # Insert before BINARY entry + expected_toc = _BASE_TOC + + normalized_toc = normalize_toc(toc) + assert normalized_toc == expected_toc + + +def test_normalize_toc_multipackage_dependency(): + # An entry that's duplicated as both BINARY, DATA, EXTENSION, and DEPENDENCY. + # DEPENDENCY should have the highest priority of the four. + # The priority-based replacement during normalization might not preserve the order, so we need to sort the + # resulting and expected TOC before comparing them. In this particular case, we insert duplicates at the + # start of the list, so de-duplication effectively moves the DEPENDENCY entry to the first place in the + # output list. + toc = copy.copy(_BASE_TOC) + toc.insert(0, ('dependency.bin', '/mnt/somewhere/dependency.bin', 'EXTENSION')) + toc.insert(0, ('dependency.bin', '/mnt/somewhere/dependency.bin', 'BINARY')) + toc.insert(0, ('dependency.bin', '/mnt/somewhere/dependency.bin', 'DATA')) + expected_toc = _BASE_TOC + + normalized_toc = normalize_toc(toc) + assert sorted(normalized_toc) == sorted(expected_toc) + + +def test_normalize_toc_with_parent_pardir_loops(): + # Check that de-duplication works even if destination paths contain local loop with parent directory (..) components + # but can be normalized to the same path. Furthermore, we expect TOC normalization to sanitize the dest_name with + # normalized version. + toc = [ + ( + str(pathlib.PurePath('numpy/core/../../numpy.libs/libquadmath-2d0c479f.so.0.0.0')), + '/path/to/venv/lib/python3.11/site-packages/numpy/core/../../numpy.libs/libquadmath-2d0c479f.so.0.0.0', + 'BINARY', + ), + ( + str(pathlib.PurePath('numpy/linalg/../../numpy.libs/libquadmath-2d0c479f.so.0.0.0')), + '/path/to/venv/lib/python3.11/site-packages/numpy/linalg/../../numpy.libs/libquadmath-2d0c479f.so.0.0.0', + 'BINARY', + ), + ] + expected_toc = [ + ( + str(pathlib.PurePath('numpy.libs/libquadmath-2d0c479f.so.0.0.0')), + '/path/to/venv/lib/python3.11/site-packages/numpy/core/../../numpy.libs/libquadmath-2d0c479f.so.0.0.0', + 'BINARY', + ), + ] + + normalized_toc = normalize_toc(toc) + assert sorted(normalized_toc) == sorted(expected_toc) + + +# Tests for PYZ TOC normalization. +_BASE_PYZ_TOC = [ + ('copy', '/usr/lib64/python3.11/copy.py', 'PYMODULE'), + ('csv', '/usr/lib64/python3.11/csv.py', 'PYMODULE'), + ('dataclasses', '/usr/lib64/python3.11/dataclasses.py', 'PYMODULE'), + ('datetime', '/usr/lib64/python3.11/datetime.py', 'PYMODULE'), + ('decimal', '/usr/lib64/python3.11/decimal.py', 'PYMODULE'), + ('mymodule1', 'mymodule1.py', 'PYMODULE'), + ('mymodule2', 'mymodule2.py', 'PYMODULE'), +] + + +def test_normalize_pyz_toc_no_duplicates(): + # No duplicates. We expect the output list to match the input list. + toc = copy.copy(_BASE_PYZ_TOC) + expected_toc = _BASE_PYZ_TOC + + normalized_toc = normalize_pyz_toc(toc) + assert normalized_toc == expected_toc + + +def test_normalize_pyz_toc_duplicates(): + # Duplicated entry. We expect the (second) duplicate to be removed. + toc = copy.copy(_BASE_PYZ_TOC) + toc.insert(6, ('mymodule1', 'some-other-path/mymodule1.py', 'PYMODULE')) + expected_toc = _BASE_PYZ_TOC + + normalized_toc = normalize_pyz_toc(toc) + assert normalized_toc == expected_toc + + +def test_normalize_pyz_toc_case_sensitivity(): + # Duplicated entry with different case. In PYZ, the entries are case-sensitive, so the list is not modified. + toc = copy.copy(_BASE_PYZ_TOC) + toc.insert(6, ('MyMoDuLe1', 'some-other-path/MyMoDuLe1.py', 'PYMODULE')) + expected_toc = toc + + normalized_toc = normalize_pyz_toc(toc) + assert normalized_toc == expected_toc + + +def test_normalize_pyz_toc_module_and_data1(): + # In PYZ TOC, a DATA entry should not mask a PYMODULE one. + toc = copy.copy(_BASE_PYZ_TOC) + toc.insert(5, ('mymodule1', 'data-dir/mymodule1', 'DATA')) + expected_toc = _BASE_PYZ_TOC + + normalized_toc = normalize_pyz_toc(toc) + assert normalized_toc == expected_toc + + +def test_normalize_pyz_toc_module_and_data2(): + # In PYZ TOC, a DATA entry should not mask a PYMODULE one. Variant with switched order. + toc = copy.copy(_BASE_PYZ_TOC) + toc.insert(6, ('mymodule1', 'data-dir/mymodule1', 'DATA')) + expected_toc = _BASE_PYZ_TOC + + normalized_toc = normalize_pyz_toc(toc) + assert normalized_toc == expected_toc