Skip to content

Commit

Permalink
bindepend: use multiprocessing to speed up assemblies (win32) and dep…
Browse files Browse the repository at this point in the history
…endency search
  • Loading branch information
Giuseppe Corbelli authored and htgoebel committed Dec 26, 2020
1 parent a9b2999 commit 6649425
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 12 deletions.
75 changes: 63 additions & 12 deletions PyInstaller/depend/bindepend.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@
"""

import ctypes.util
import collections
import functools
import itertools
import multiprocessing
import os
import platform
import re
import sys
from glob import glob
Expand Down Expand Up @@ -217,19 +222,63 @@ def Dependencies(lTOC, xtrapath=None, manifest=None, redirects=None):
# directly with PyInstaller.
lTOC = _extract_from_egg(lTOC)

for nm, pth, typ in lTOC:
if nm.upper() in seen:
continue
logger.debug("Analyzing %s", pth)
seen.add(nm.upper())
if is_win:
for ftocnm, fn in getAssemblyFiles(pth, manifest, redirects):
lTOC.append((ftocnm, fn, 'BINARY'))
for lib, npth in selectImports(pth, xtrapath):
if lib.upper() in seen or npth.upper() in seen:
# 4 processes may yield up to +40% speed on 2 CPUs
# 2 processes may yield up to +30% speed on 2 CPUs
processes = 2 * multiprocessing.cpu_count()
pool = multiprocessing.Pool(processes)

if is_win:
# Search for required assemblies and add them to the TOC
paths = set(os.path.normpath(os.path.normcase(path))
for name, path, typ in lTOC)
assemblies = pool.map(
functools.partial(getAssemblyFiles, manifest=manifest,
redirects=redirects),
paths)
# getAssemblyFiles returns a list of tuples, so assemblies is a
# list of list of tuples
for ftocnm, fn in itertools.chain(*assemblies):
lTOC.append((ftocnm, fn, 'BINARY'))

dataset = collections.deque(lTOC)
while True:
# Breakdown the data-set in chunks as big as the chosen number
# of processes instead of just feeding the whole data-set into
# process pool so that we can keep the "seen" cache in main
# process only.

# It's true that elements in two different chunks may find
# common dependencies and since processes don't share a common
# cache these same common dependencies will be inspected
# multiple times. But we accept this trade-off (at least for
# now) since implementing a common shared cache would bring in
# far more complexity.
chunk = collections.deque()
while dataset and len(chunk) < processes:
name, path, typ = dataset.pop()
name = name.upper()
if name not in seen:
chunk.append(path)
seen.add(name)

if not chunk:
break # From while True, no more data

imports = pool.map(
functools.partial(selectImports, xtrapath=xtrapath),
chunk)
# selectImports returns a list of pairs, so 'imports' is
# a list of lists of pairs
for lib, npath in itertools.chain(*imports):
npath = os.path.normpath(os.path.normcase(npath))
if lib in seen or npath in seen:
continue
seen.add(npth.upper())
lTOC.append((lib, npth, 'BINARY'))
seen.add(lib)
seen.add(npath)
lTOC.append((lib, npath, 'BINARY'))

pool.close()
pool.join()

return lTOC

Expand Down Expand Up @@ -396,6 +445,7 @@ def getAssemblyFiles(pth, manifest=None, redirects=None):
Return a list of pairs (name, fullpath)
"""
logger.debug("Analyzing %s for assembly dependencies", pth)
rv = []
if manifest:
_depNames = set(dep.name for dep in manifest.dependentAssemblies)
Expand Down Expand Up @@ -490,6 +540,7 @@ def selectImports(pth, xtrapath=None):
Return a list of pairs (name, fullpath)
"""
logger.debug("Analyzing %s for binary dependencies", pth)
rv = []
if xtrapath is None:
xtrapath = [os.path.dirname(pth)]
Expand Down
1 change: 1 addition & 0 deletions news/3735.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Shortened the time required to build binary dependencies graph using multiple processes.

0 comments on commit 6649425

Please sign in to comment.