-
-
Notifications
You must be signed in to change notification settings - Fork 210
/
darwintools.py
665 lines (585 loc) · 25.1 KB
/
darwintools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
# ruff: noqa
from __future__ import annotations
import os
import platform
import shutil
import stat
import subprocess
import tempfile
from collections.abc import Iterable
from pathlib import Path
from cx_Freeze.exception import PlatformError
# In a MachO file, need to deal specially with links that use @executable_path,
# @loader_path, @rpath
#
# @executable_path - where ultimate calling executable is
# @loader_path - directory of current object
# @rpath - list of paths to check
# (earlier rpaths have higher priority, i believe)
#
# Resolving these variables (particularly @rpath) requires tracing through the
# sequence linked MachO files leading the the current file, to determine which
# directories are included in the current rpath.
def isMachOFile(path: Path) -> bool:
"""Determines whether the file is a Mach-O file."""
if not path.is_file():
return False
if b"Mach-O" in subprocess.check_output(("file", path)):
return True
return False
class MachOReference:
"""Represents a linking reference from MachO file to another file."""
def __init__(
self,
source_file: DarwinFile,
raw_path: str,
resolved_path: Path | None,
):
""":param source_file: DarwinFile object for file in which the reference
was found
:param raw_path: The load path that appears in the file
(may include @rpath, etc.)
:param resolved_path: The path resolved to an explicit path to a file
on system. Or None, if the path could not be resolved at the time the
DarwinFile was processed.
"""
self.source_file: DarwinFile = source_file
self.raw_path: str = raw_path
self.resolved_path: Path | None = resolved_path
# True if the referenced file is copied into the frozen package
# (i.e., not a non-copied system file)
self.is_copied = False
# reference to target DarwinFile (but only if file is copied into app)
self.target_file: DarwinFile | None = None
def isResolved(self) -> bool:
return self.resolved_path is not None
def setTargetFile(self, darwin_file: DarwinFile):
self.target_file = darwin_file
self.is_copied = True
class DarwinFile:
"""A DarwinFile object represents a file that will be copied into the
application, and record where it was ultimately moved to in the application
bundle. Mostly used to provide special handling for copied files that are
Mach-O files.
"""
def __init__(
self,
path: str | Path,
referencing_file: DarwinFile | None = None,
strict: bool = False,
):
""":param path: The original path of the DarwinFile
(before copying into app)
:param referencing_file: DarwinFile object representing the referencing
source file
:param strict: Do not make guesses about rpath resolution. If the
load does not resolve, throw an Exception.
"""
self.path = Path(path).resolve()
self.referencing_file: DarwinFile | None = None
self.strict = strict
# path to file in build directory (set as part of freeze process)
self._build_path: Path | None = None
# commands in a Mach-O file
self.commands: list[MachOCommand] = []
self.loadCommands: list[MachOLoadCommand] = []
self.rpathCommands: list[MachORPathCommand] = []
# note: if file gets referenced twice (or more), it will only be the
# first reference that gets recorded.
# mapping of raw load paths to absolute resolved paths
# (or None, if no resolution was determined)
self.libraryPathResolution: dict[str, Path | None] = {}
# the is of entries in the rpath in effect for this file.
self._rpath: list[Path] | None = None
# dictionary of MachOReference objects, by their paths.
# Path used is the resolved path, if available, and otherwise the
# unresolved load path.
self.machOReferenceForTargetPath: dict[Path, MachOReference] = {}
if not isMachOFile(self.path):
self.isMachO = False
return
# if this is a MachO file, extract linking information from it
self.isMachO = True
self.commands = MachOCommand._getMachOCommands(self.path)
self.loadCommands = [
c for c in self.commands if isinstance(c, MachOLoadCommand)
]
self.rpathCommands = [
c for c in self.commands if isinstance(c, MachORPathCommand)
]
self.referencing_file = referencing_file
self.getRPath()
self.resolveLibraryPaths()
# Create MachOReference objects for all the binaries referenced form
# this file.
for raw_path, resolved_path in self.libraryPathResolution.items():
# the path to use for storing in dictionary
if resolved_path is None:
dict_path = Path(raw_path)
else:
dict_path = resolved_path
if dict_path in self.machOReferenceForTargetPath:
if self.strict:
raise PlatformError(
f"ERROR: Multiple dynamic libraries from {self.path}"
f" resolved to the same file ({dict_path})."
)
print(
f"WARNING: Multiple dynamic libraries from {self.path}"
f" resolved to the same file ({dict_path})."
)
continue
reference = MachOReference(
source_file=self,
raw_path=raw_path,
resolved_path=resolved_path,
)
self.machOReferenceForTargetPath[dict_path] = reference
def __str__(self):
parts = []
# parts.append("RPath Commands: {}".format(self.rpathCommands))
# parts.append("Load commands: {}".format(self.loadCommands))
parts.append(f"Mach-O File: {self.path}")
parts.append("Resolved rpath:")
for rpath in self.getRPath():
parts.append(f" {rpath}")
parts.append("Loaded libraries:")
for rpath in self.libraryPathResolution:
parts.append(f" {rpath} -> {self.libraryPathResolution[rpath]}")
return "\n".join(parts)
def fileReferenceDepth(self) -> int:
"""Returns how deep this Mach-O file is in the dynamic load order."""
if self.referencing_file is not None:
return self.referencing_file.fileReferenceDepth() + 1
return 0
def printFileInformation(self):
"""Prints information about the Mach-O file."""
print(f"[{self.fileReferenceDepth()}] File: {self.path}")
print(" Commands:")
if len(self.commands) > 0:
for cmd in self.commands:
print(f" {cmd}")
else:
print(" [None]")
# This can be included for even more detail on the problem file.
# print(" Load commands:")
# if len(self.loadCommands) > 0:
# for cmd in self.loadCommands: print(f' {cmd}')
# else: print(" [None]")
print(" RPath commands:")
if len(self.rpathCommands) > 0:
for rpc in self.rpathCommands:
print(f" {rpc}")
else:
print(" [None]")
print(" Calculated RPath:")
rpath = self.getRPath()
if len(rpath) > 0:
for path in rpath:
print(f" {path}")
else:
print(" [None]")
if self.referencing_file is not None:
print("Referenced from:")
self.referencing_file.printFileInformation()
def setBuildPath(self, path: Path):
self._build_path = path
def getBuildPath(self) -> Path | None:
return self._build_path
@staticmethod
def isExecutablePath(path: str) -> bool:
return path.startswith("@executable_path")
@staticmethod
def isLoaderPath(path: str) -> bool:
return path.startswith("@loader_path")
@staticmethod
def isRPath(path: str) -> bool:
return path.startswith("@rpath")
def resolveLoader(self, path: str) -> Path | None:
"""Resolve a path that includes @loader_path. @loader_path represents
the directory in which the DarwinFile is located.
"""
if self.isLoaderPath(path):
return self.path.parent / Path(path).relative_to("@loader_path")
raise PlatformError(f"resolveLoader() called on bad path: {path}")
def resolveExecutable(self, path: str) -> Path:
"""@executable_path should resolve to the directory where the original
executable was located. By default, we set that to the directory of
the library, so it would resolve in the same was as if linked from an
executable in the same directory.
"""
# consider making this resolve to the directory of the python
# interpreter? Apparently not a big issue in practice, since the
# code has been like this forever.
if self.isExecutablePath(path):
return self.path.parent / Path(path).relative_to(
"@executable_path/"
)
raise PlatformError(f"resolveExecutable() called on bad path: {path}")
def resolveRPath(self, path: str) -> Path | None:
for rpath in self.getRPath():
test_path = rpath / Path(path).relative_to("@rpath")
if isMachOFile(test_path):
return test_path
if not self.strict:
# If not strictly enforcing rpath, return None here, and leave any
# error to .finalizeReferences() instead.
return None
print(f"\nERROR: Problem resolving RPath [{path}] in file:")
self.printFileInformation()
raise PlatformError(f"resolveRPath() failed to resolve path: {path}")
def getRPath(self) -> list[Path]:
"""Returns the rpath in effect for this file. Determined by rpath
commands in this file and (recursively) the chain of files that
referenced this file.
"""
if self._rpath is not None:
return self._rpath
raw_paths = [c.rpath for c in self.rpathCommands]
rpath = []
for raw_path in raw_paths:
test_rp = Path(raw_path)
if test_rp.is_absolute():
rpath.append(test_rp)
elif self.isLoaderPath(raw_path):
rpath.append(self.resolveLoader(raw_path).resolve())
elif self.isExecutablePath(raw_path):
rpath.append(self.resolveExecutable(raw_path).resolve())
rpath = [raw_path for raw_path in rpath if raw_path.exists()]
if self.referencing_file is not None:
rpath = self.referencing_file.getRPath() + rpath
self._rpath = rpath
return rpath
def resolvePath(self, path: str) -> Path | None:
"""Resolves any @executable_path, @loader_path, and @rpath references
in a path.
"""
if self.isLoaderPath(path): # replace @loader_path
return self.resolveLoader(path)
if self.isExecutablePath(path): # replace @executable_path
return self.resolveExecutable(path)
if self.isRPath(path): # replace @rpath
return self.resolveRPath(path)
test_path = Path(path)
if test_path.is_absolute(): # just use the path, if it is absolute
return test_path
test_path = self.path.parent / path
if isMachOFile(test_path):
return test_path.resolve()
if self.strict:
raise PlatformError(
f"Could not resolve path: {path} from file {self.path}."
)
print(
f"WARNING: Unable to resolve reference to {path} from "
f"file {self.path}. Frozen application may not "
f"function correctly."
)
return None
def resolveLibraryPaths(self):
for cmd in self.loadCommands:
raw_path = cmd.load_path
resolved_path = self.resolvePath(raw_path)
self.libraryPathResolution[raw_path] = resolved_path
def getDependentFilePaths(self) -> set[Path]:
"""Returns a list the available resolved paths to dependencies."""
dependents: set[Path] = set()
for ref in self.machOReferenceForTargetPath.values():
# skip load references that could not be resolved
if ref.isResolved():
dependents.add(ref.resolved_path)
return dependents
def getMachOReferenceList(self) -> list[MachOReference]:
return list(self.machOReferenceForTargetPath.values())
def getMachOReferenceForPath(self, path: Path) -> MachOReference:
"""Returns the reference pointing to the specified path, baed on paths
stored in self.machOReferenceTargetPath. Raises Exception if not
available.
"""
try:
return self.machOReferenceForTargetPath[path]
except KeyError:
raise PlatformError(
f"Path {path} is not a path referenced from DarwinFile"
) from None
class MachOCommand:
"""Represents a load command in a MachO file."""
def __init__(self, lines: list[str]):
self.lines = lines
def displayString(self) -> str:
parts: list[str] = []
if len(self.lines) > 0:
parts.append(self.lines[0].strip())
if len(self.lines) > 1:
parts.append(self.lines[1].strip())
return " / ".join(parts)
def __repr__(self):
return f"<MachOCommand ({self.displayString()})>"
@staticmethod
def _getMachOCommands(path: Path) -> list[MachOCommand]:
"""Returns a list of load commands in the specified file, using
otool.
"""
shell_command = ("otool", "-l", path)
commands: list[MachOCommand] = []
current_command_lines = None
# split the output into separate load commands
out = subprocess.check_output(shell_command, encoding="utf-8")
for raw_line in out.splitlines():
line = raw_line.strip()
if line[:12] == "Load command":
if current_command_lines is not None:
commands.append(
MachOCommand.parseLines(current_command_lines)
)
current_command_lines = []
if current_command_lines is not None:
current_command_lines.append(line)
if current_command_lines is not None:
commands.append(MachOCommand.parseLines(current_command_lines))
return commands
@staticmethod
def parseLines(lines: list[str]) -> MachOCommand:
if len(lines) < 2:
return MachOCommand(lines)
parts = lines[1].split(" ")
if parts[0] != "cmd":
return MachOCommand(lines)
if parts[1] == "LC_LOAD_DYLIB":
return MachOLoadCommand(lines)
if parts[1] == "LC_RPATH":
return MachORPathCommand(lines)
return MachOCommand(lines)
class MachOLoadCommand(MachOCommand):
def __init__(self, lines: list[str]):
super().__init__(lines)
self.load_path = None
if len(self.lines) < 4:
return
pathline = self.lines[3]
pathline = pathline.strip()
if not pathline.startswith("name "):
return
pathline = pathline[4:].strip()
pathline = pathline.split("(offset")[0].strip()
self.load_path = pathline
def getPath(self):
return self.load_path
def __repr__(self):
return f"<LoadCommand path={self.load_path!r}>"
class MachORPathCommand(MachOCommand):
def __init__(self, lines: list[str]):
super().__init__(lines)
self.rpath = None
if len(self.lines) < 4:
return
pathline = self.lines[3]
pathline = pathline.strip()
if not pathline.startswith("path "):
return
pathline = pathline[4:].strip()
pathline = pathline.split("(offset")[0].strip()
self.rpath = pathline
def __repr__(self):
return f"<RPath path={self.rpath!r}>"
def _printFile(
darwinFile: DarwinFile,
seenFiles: set[DarwinFile],
level: int,
noRecurse=False,
):
"""Utility function to prints details about a DarwinFile and (optionally)
recursively any other DarwinFiles that it references.
"""
print("{}{}".format(level * "| ", os.fspath(darwinFile.path)), end="")
print(" (already seen)" if noRecurse else "")
if noRecurse:
return
for ref in darwinFile.machOReferenceForTargetPath.values():
if not ref.is_copied:
continue
file = ref.target_file
_printFile(
file,
seenFiles=seenFiles,
level=level + 1,
noRecurse=(file in seenFiles),
)
seenFiles.add(file)
return
def printMachOFiles(fileList: list[DarwinFile]):
seenFiles = set()
for file in fileList:
if file not in seenFiles:
seenFiles.add(file)
_printFile(file, seenFiles=seenFiles, level=0)
def changeLoadReference(
fileName: str, oldReference: str, newReference: str, VERBOSE: bool = True
):
"""Utility function that uses intall_name_tool to change oldReference to
newReference in the machO file specified by fileName.
"""
if VERBOSE:
print("Redirecting load reference for ", end="")
print(f"<{fileName}> {oldReference} -> {newReference}")
original = os.stat(fileName).st_mode
newMode = original | stat.S_IWUSR
os.chmod(fileName, newMode)
subprocess.call(
("install_name_tool", "-change", oldReference, newReference, fileName)
)
os.chmod(fileName, original)
def applyAdHocSignature(fileName: str):
if platform.machine() != "arm64":
return
print("Applying AdHocSignature")
args = (
"codesign",
"--sign",
"-",
"--force",
"--preserve-metadata=entitlements,requirements,flags,runtime",
fileName,
)
if subprocess.call(args):
# It may be a bug in Apple's codesign utility
# The workaround is to copy the file to another inode, then move it
# back erasing the previous file. The sign again.
with tempfile.TemporaryDirectory() as dirName:
tempName = os.path.join(dirName, os.path.basename(fileName))
shutil.copy(fileName, tempName)
shutil.move(tempName, fileName)
subprocess.call(args)
class DarwinFileTracker:
"""Object to track the DarwinFiles that have been added during a freeze."""
def __init__(self, strict: bool = False):
self.strict = strict
# list of DarwinFile objects for files being copied into project
self._copied_file_list: list[DarwinFile] = []
# mapping of (build directory) target paths to DarwinFile objects
self._darwin_file_for_build_path: dict[Path, DarwinFile] = {}
# mapping of (source location) paths to DarwinFile objects
self._darwin_file_for_source_path: dict[Path, DarwinFile] = {}
# a cache of MachOReference objects pointing to a given source path
self._reference_cache: dict[Path, MachOReference] = {}
def __iter__(self) -> Iterable[DarwinFile]:
return iter(self._copied_file_list)
def pathIsAlreadyCopiedTo(self, target_path: Path) -> bool:
"""Check if the given target_path has already has a file copied to
it.
"""
if target_path in self._darwin_file_for_build_path:
return True
return False
def getDarwinFile(
self, source_path: Path, target_path: Path
) -> DarwinFile:
"""Gets the DarwinFile for file copied from source_path to target_path.
If either (i) nothing, or (ii) a different file has been copied to
targetPath, raises a PlatformError.
"""
# check that the target file came from the specified source
targetDarwinFile: DarwinFile
try:
targetDarwinFile = self._darwin_file_for_build_path[target_path]
except KeyError:
raise PlatformError(
f"File {target_path} already copied to, "
"but no DarwinFile object found for it."
) from None
real_source = source_path.resolve()
target_real_source = targetDarwinFile.path.resolve()
if real_source != target_real_source:
# raise PlatformError(
print(
"*** WARNING ***\n"
f"Attempting to copy two files to {target_path}\n"
f"source 1: {targetDarwinFile.path} "
f"(real: {target_real_source})\n"
f"source 2: {source_path} (real: {real_source})\n"
"(This may be caused by including modules in the zip file "
"that rely on binary libraries with the same name.)"
"\nUsing only source 1."
)
return targetDarwinFile
def recordCopiedFile(self, target_path: Path, darwin_file: DarwinFile):
"""Record that a DarwinFile is being copied to a given path. If a
file has been copied to that path, raise a PlatformError.
"""
if self.pathIsAlreadyCopiedTo(target_path):
raise PlatformError(
"addFile() called with target_path already copied to "
f"(target_path={target_path})"
)
self._copied_file_list.append(darwin_file)
self._darwin_file_for_build_path[target_path] = darwin_file
self._darwin_file_for_source_path[darwin_file.path] = darwin_file
def cacheReferenceTo(self, source_path: Path, reference: MachOReference):
self._reference_cache[source_path] = reference
def getCachedReferenceTo(self, source_path: Path) -> MachOReference | None:
return self._reference_cache.get(source_path)
def findDarwinFileForFilename(self, filename: str) -> DarwinFile | None:
"""Attempts to locate a copied DarwinFile with the specified filename
and returns that. Otherwise returns None.
"""
basename = Path(filename).name
for file in self._copied_file_list:
if file.path.name == basename:
return file
return None
def finalizeReferences(self):
"""This function does a final pass through the references for all the
copied DarwinFiles and attempts to clean up any remaining references
that are not already marked as copied. It covers two cases where the
reference might not be marked as copied:
1) Files where _CopyFile was called without copyDependentFiles=True
(in which the information would not have been added to the
references at that time).
2) Files with broken @rpath references. We try to fix that up here by
seeing if the relevant file was located *anywhere* as part of the
freeze process.
"""
copied_file: DarwinFile
reference: MachOReference
for copied_file in self._copied_file_list:
for reference in copied_file.getMachOReferenceList():
if not reference.is_copied:
if reference.isResolved():
# if reference is resolve, simply check if the resolved
# path was otherwise copied and lookup the DarwinFile
# object.
target_path = reference.resolved_path.resolve()
if target_path in self._darwin_file_for_source_path:
reference.setTargetFile(
self._darwin_file_for_source_path[target_path]
)
else:
# if reference is not resolved, look through the copied
# files and try to find a candidate, and use it if
# found.
potential_target = self.findDarwinFileForFilename(
reference.raw_path
)
if potential_target is None:
# If we cannot find any likely candidate, fail.
if self.strict:
copied_file.printFileInformation()
raise PlatformError(
f"finalizeReferences() failed to resolve"
f" path [{reference.raw_path}] in file "
f"[{copied_file.path}]."
)
print(
"\nWARNING: Could not resolve dynamic link to "
f"[{reference.raw_path}] in file "
f"[{copied_file.path}], and could "
"not find any likely intended target."
)
continue
print(
f"WARNING: In file [{copied_file.path}]"
f" guessing that {reference.raw_path} "
f"resolved to {potential_target.path}."
)
reference.resolved_path = potential_target.path
reference.setTargetFile(potential_target)