Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

building: macOS: use @rpath to rewrite binaries' dependency paths #7664

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
18 changes: 13 additions & 5 deletions PyInstaller/building/utils.py
Expand Up @@ -29,7 +29,6 @@
from PyInstaller import log as logging
from PyInstaller.compat import (EXTENSION_SUFFIXES, is_cygwin, is_darwin, is_win)
from PyInstaller.config import CONF
from PyInstaller.depend import dylib
from PyInstaller.depend.bindepend import match_binding_redirect
from PyInstaller.utils import misc

Expand Down Expand Up @@ -356,15 +355,22 @@ def checkCache(
cache_index[basenm] = digest
misc.save_py_data_struct(cacheindexfn, cache_index)

# On Mac OS we need relative paths to dll dependencies starting with @executable_path. While modifying
# the headers invalidates existing signatures, we avoid removing them in order to speed things up (and
# to avoid potential bugs in the codesign utility, like the one reported on Mac OS 10.13 in #6167).
# On macOS, we need to modify the given binary's paths to the dependent libraries, in order to ensure they are
# relocatable and always refer to location within the frozen application. Specifically, we make all dependent
# library paths relative to @rpath, and set @rpath to point to the top-level application directory, relative to
# the binary's location (i.e., @loader_path).
#
# While modifying the headers invalidates existing signatures, we avoid removing them in order to speed things up
# (and to avoid potential bugs in the codesign utility, like the one reported on Mac OS 10.13 in #6167).
# The forced re-signing at the end should take care of the invalidated signatures.
if is_darwin:
try:
osxutils.binary_to_target_arch(cachedfile, target_arch, display_name=fnm)
#osxutils.remove_signature_from_binary(cachedfile) # Disabled as per comment above.
dylib.mac_set_relative_dylib_deps(cachedfile, dist_nm)
target_rpath = str(
pathlib.PurePath('@loader_path', *['..' for level in pathlib.PurePath(dist_nm).parent.parts])
)
osxutils.set_dylib_dependency_paths(cachedfile, target_rpath)
osxutils.sign_binary(cachedfile, codesign_identity, entitlements_file)
except osxutils.InvalidBinaryError:
# Raised by osxutils.binary_to_target_arch when the given file is not a valid macOS binary (for example,
Expand All @@ -383,6 +389,8 @@ def checkCache(
if strict_arch_validation:
raise
logger.debug("File %s failed optional architecture validation - collecting as-is!", fnm)
except Exception as e:
raise SystemError(f"Failed to process binary {cachedfile!r}!") from e

return cachedfile

Expand Down
75 changes: 0 additions & 75 deletions PyInstaller/depend/dylib.py
Expand Up @@ -26,8 +26,6 @@

logger = logging.getLogger(__name__)

_BOOTLOADER_FNAMES = {'run', 'run_d', 'runw', 'runw_d'}

# Ignoring some system libraries speeds up packaging process
_excludes = {
# Ignore annoying warnings with Windows system DLLs.
Expand Down Expand Up @@ -378,76 +376,3 @@ def warn_missing_lib(libname):
Check if a missing-library warning should be displayed for the given library name (or full path).
"""
return not missing_lib_warning_suppression_list.search(libname)


def mac_set_relative_dylib_deps(libname, distname):
"""
On Mac OS set relative paths to dynamic library dependencies of `libname`.

Relative paths allow to avoid using environment variable DYLD_LIBRARY_PATH. There are known some issues with
DYLD_LIBRARY_PATH. Relative paths is more flexible mechanism.

Current location of dependent libraries is derived from the location of the library path (paths start with
'@loader_path').

'distname' path of the library relative to dist directory of frozen executable. We need this to determine the level
of directory level for @loader_path of binaries not found in dist directory.

For example, Qt5 plugins are not in the same directory as Qt*.dylib files. Without using
'@loader_path/../..' for Qt plugins, Mac OS would not be able to resolve shared library dependencies,
and Qt plugins will not be loaded.
"""

from macholib import util
from macholib.MachO import MachO

# Ignore bootloader; otherwise PyInstaller fails with exception like
# 'ValueError: total_size > low_offset (288 > 0)'
if os.path.basename(libname) in _BOOTLOADER_FNAMES:
return

# Determine how many directories up ('../') is the directory with shared dynamic libraries.
# E.g., ./qt4_plugins/images/ -> ./../../
parent_dir = ''
# Check if distname is not only base filename.
if os.path.dirname(distname):
parent_level = len(os.path.dirname(distname).split(os.sep))
parent_dir = parent_level * (os.pardir + os.sep)

def match_func(pth):
"""
For system libraries is still used absolute path. It is unchanged.
"""
# Leave system dynamic libraries unchanged.
if util.in_system_path(pth):
return None

# The older python.org builds that use system Tcl/Tk framework have their _tkinter.cpython-*-darwin.so
# library linked against /Library/Frameworks/Tcl.framework/Versions/8.5/Tcl and
# /Library/Frameworks/Tk.framework/Versions/8.5/Tk, although the actual frameworks are located in
# /System/Library/Frameworks. Therefore, they slip through the above in_system_path() check, and we need to
# exempt them manually.
_exemptions = [
'/Library/Frameworks/Tcl.framework/',
'/Library/Frameworks/Tk.framework/',
]
if any([x in pth for x in _exemptions]):
return None

# Use relative path to dependent dynamic libraries based on the location of the executable.
return os.path.join('@loader_path', parent_dir, os.path.basename(pth))

# Rewrite mach headers with @loader_path.
dll = MachO(libname)
dll.rewriteLoadCommands(match_func)

# Write changes into file. Write code is based on macholib example.
try:
with open(dll.filename, 'rb+') as f:
for header in dll.headers:
f.seek(0)
dll.write(f)
f.seek(0, 2)
f.flush()
except Exception:
pass
178 changes: 174 additions & 4 deletions PyInstaller/utils/osx.py
Expand Up @@ -14,11 +14,23 @@

import math
import os
import pathlib
import subprocess
import shutil

from macholib.mach_o import LC_BUILD_VERSION, LC_CODE_SIGNATURE, LC_SEGMENT_64, LC_SYMTAB, LC_VERSION_MIN_MACOSX
import tempfile

from macholib.mach_o import (
LC_BUILD_VERSION,
LC_CODE_SIGNATURE,
LC_ID_DYLIB,
LC_LOAD_DYLIB,
LC_RPATH,
LC_SEGMENT_64,
LC_SYMTAB,
LC_VERSION_MIN_MACOSX,
)
from macholib.MachO import MachO
import macholib.util

import PyInstaller.log as logging
from PyInstaller.compat import base_prefix
Expand Down Expand Up @@ -288,11 +300,22 @@ def get_binary_architectures(filename):
return bool(executable.fat), [_get_arch_string(hdr.header) for hdr in executable.headers]


def convert_binary_to_thin_arch(filename, thin_arch):
def convert_binary_to_thin_arch(filename, thin_arch, output_filename=None):
"""
Convert the given fat binary into thin one with the specified target architecture.
"""
cmd_args = ['lipo', '-thin', thin_arch, filename, '-output', filename]
output_filename = output_filename or filename
cmd_args = ['lipo', '-thin', thin_arch, filename, '-output', output_filename]
p = subprocess.run(cmd_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
if p.returncode:
raise SystemError(f"lipo command ({cmd_args}) failed with error code {p.returncode}!\noutput: {p.stdout}")


def merge_into_fat_binary(output_filename, *slice_filenames):
"""
Merge the given single-arch thin binary files into a fat binary.
"""
cmd_args = ['lipo', '-create', '-output', output_filename, *slice_filenames]
p = subprocess.run(cmd_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
if p.returncode:
raise SystemError(f"lipo command ({cmd_args}) failed with error code {p.returncode}!\noutput: {p.stdout}")
Expand Down Expand Up @@ -357,3 +380,150 @@ def sign_binary(filename, identity=None, entitlements_file=None, deep=False):
p = subprocess.run(cmd_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
if p.returncode:
raise SystemError(f"codesign command ({cmd_args}) failed with error code {p.returncode}!\noutput: {p.stdout}")


def set_dylib_dependency_paths(filename, target_rpath):
"""
Modify the given dylib's identity (in LC_ID_DYLIB command) and the paths to dependent dylibs (in LC_LOAD_DYLIB)
commands into `@rpath/<basename>` format, remove any existing rpaths (LC_RPATH commands), and add a new rpath
(LC_RPATH command) with the specified path.

Uses `install-tool-name` utility to make the changes.

The system libraries (e.g., the ones found in /usr/lib) are exempted from path rewrite.

For multi-arch fat binaries, this function extracts each slice into temporary file, processes it separately,
and then merges all processed slices back into fat binary. This is necessary because `install-tool-name` cannot
modify rpaths in cases when an existing rpath is present only in one slice.
"""

# Check if we are dealing with a fat binary; the `install-name-tool` seems to be unable to remove an rpath that is
# present only in one slice, so we need to extract each slice, process it separately, and then stich processed
# slices back into a fat binary.
is_fat, archs = get_binary_architectures(filename)

if is_fat:
with tempfile.TemporaryDirectory() as tmpdir:
slice_filenames = []
for arch in archs:
slice_filename = os.path.join(tmpdir, arch)
convert_binary_to_thin_arch(filename, arch, output_filename=slice_filename)
_set_dylib_dependency_paths(slice_filename, target_rpath)
slice_filenames.append(slice_filename)
merge_into_fat_binary(filename, *slice_filenames)
else:
# Thin binary - we can process it directly
_set_dylib_dependency_paths(filename, target_rpath)


def _set_dylib_dependency_paths(filename, target_rpath):
"""
The actual implementation of set_dylib_dependency_paths functionality.

Implicitly assumes that a single-arch thin binary is given.
"""

# Parse dylib's header to extract the following commands:
# - LC_LOAD_DYLIB: dylib load commands (dependent libraries)
# - LC_RPATH: rpath definitions
# - LC_ID_DYLIB: dylib's identity
binary = MachO(filename)

dylib_id = None
rpaths = set()
linked_libs = set()

for header in binary.headers:
for cmd in header.commands:
lc_type = cmd[0].cmd
if lc_type not in {LC_LOAD_DYLIB, LC_RPATH, LC_ID_DYLIB}:
continue

# Decode path, strip trailing NULL characters
path = cmd[2].decode('utf-8').rstrip('\x00')

if lc_type == LC_LOAD_DYLIB:
linked_libs.add(path)
elif lc_type == LC_RPATH:
rpaths.add(path)
elif lc_type == LC_ID_DYLIB:
dylib_id = path

del binary

# If dylib has identifier set, compute the normalized version, in form of `@rpath/basename`.
normalized_dylib_id = None
if dylib_id:
normalized_dylib_id = str(pathlib.PurePath('@rpath') / pathlib.PurePath(dylib_id).name)

# Find dependent libraries that should have their prefix path changed to `@rpath`. If any dependent libraries
# end up using `@rpath` (originally or due to rewrite), set the `rpath_required` boolean to True, so we know
# that we need to add our rpath.
changed_lib_paths = []
rpath_required = False
for linked_lib in linked_libs:
# Leave system dynamic libraries unchanged.
if macholib.util.in_system_path(linked_lib):
continue

# The older python.org builds that use system Tcl/Tk framework have their _tkinter.cpython-*-darwin.so
# library linked against /Library/Frameworks/Tcl.framework/Versions/8.5/Tcl and
# /Library/Frameworks/Tk.framework/Versions/8.5/Tk, although the actual frameworks are located in
# /System/Library/Frameworks. Therefore, they slip through the above in_system_path() check, and we need to
# exempt them manually.
_exemptions = [
'/Library/Frameworks/Tcl.framework/',
'/Library/Frameworks/Tk.framework/',
]
if any([x in linked_lib for x in _exemptions]):
continue

# This linked library will end up using `@rpath`, whether modified or not...
rpath_required = True

new_path = str(pathlib.PurePath('@rpath') / pathlib.PurePath(linked_lib).name)
if linked_lib == new_path:
continue

changed_lib_paths.append((linked_lib, new_path))

# Gather arguments for `install-name-tool`
install_name_tool_args = []

# Modify the dylib identifier if necessary
if normalized_dylib_id and normalized_dylib_id != dylib_id:
install_name_tool_args += ["-id", normalized_dylib_id]

# Changed libs
for original_path, new_path in changed_lib_paths:
install_name_tool_args += ["-change", original_path, new_path]

# Remove all existing rpaths except for the target rpath (if it already exists). `install_name_tool` disallows using
# `-delete_rpath` and `-add_rpath` with the same argument.
for rpath in rpaths:
if rpath == target_rpath:
continue
install_name_tool_args += [
"-delete_rpath",
rpath,
]

# If any of linked libraries use @rpath now and our target rpath is not already added, add it.
# NOTE: @rpath in the dylib identifier does not actually require the rpath to be set on the binary...
if rpath_required and target_rpath not in rpaths:
install_name_tool_args += [
"-add_rpath",
target_rpath,
]

# If we have no arguments, finish immediately.
if not install_name_tool_args:
return

# Run `install_name_tool`
cmd_args = ["install_name_tool", *install_name_tool_args, filename]
p = subprocess.run(cmd_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
if p.returncode:
raise SystemError(
f"install_name_tool command ({cmd_args}) failed with error code {p.returncode}!\noutput: {p.stdout}"
)
9 changes: 9 additions & 0 deletions news/7664.bugfix.rst
@@ -0,0 +1,9 @@
(macOS) When rewriting the dylib identifier and paths to linked
libraries in a collected binary, instead of directly using
``@loader_path``-based path, use ``@rpath``-based path and replace
rpaths in the binary with a single rpath that points to the top-level
application directory, relative to ``@loader_path``. This ensures that
the library identifiers of collected shared libraries and their
references in referring binaries always match, which allows packages
to pre-load a library from an arbitrary location via for example
``ctypes``.
8 changes: 8 additions & 0 deletions news/7664.core.rst
@@ -0,0 +1,8 @@
(macOS) Use macOS-provided ``install_name_tool`` utility to modify headers
on collected binaries: change the dylib identifier to ``@rpath/<name>.dylib``,
rewrite paths to linked non-system shared libraries to ``@rpath/<dependency>``,
remove any additional rpaths and add an rpath pointing to the application's
top-level directory, relative to the ``@loader_path``. Previously, the
header modification was performed using ``macholib`` and was limited
only to modification of dylib identifier and paths to linked non-system
shared libraries.
3 changes: 3 additions & 0 deletions news/7664.feature.rst
@@ -0,0 +1,3 @@
(macOS) PyInstaller now removes all rpaths from collected binaries
and replaces them with a single rpath pointing to the top-level
application directory, relative to ``@loader_path``.