Skip to content

Commit

Permalink
Hooks: numpy: Simplify numpy hook.
Browse files Browse the repository at this point in the history
Whilst I'm here also:

- Exclude numpy's testing and C/Fortran compiling code, and
  everything else it drags with it (namely scipy).
- Try to fix Conda support.
- Avoid dragging 400MB of unused MKL DLLs on Conda.

They're all closed due to our no-conda policy but, (for now) fixes
 #4935, #4968, #5075, #5082, #5019 and #5019. Although likely at
the expense of junk DLLs being dragged in.
  • Loading branch information
bwoodsend committed Sep 17, 2020
1 parent ad39eb8 commit 0ea8912
Show file tree
Hide file tree
Showing 2 changed files with 144 additions and 96 deletions.
47 changes: 0 additions & 47 deletions PyInstaller/hooks/hook-numpy.core.py

This file was deleted.

193 changes: 144 additions & 49 deletions PyInstaller/hooks/hook-numpy.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,149 @@
# -----------------------------------------------------------------------------
# Copyright (c) 2013-2020, PyInstaller Development Team.
#!/usr/bin/env python3

# --- Copyright Disclaimer ---
#
# In order to support PyInstaller with numpy<1.20.0 this file will be
# duplicated for a short period inside PyInstaller's repository [1]. However
# this file is the intellectual property of the NumPy team and is under the
# terms and conditions outlined their repository [2].
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
# .. refs:
#
# The full license is in the file COPYING.txt, distributed with this software.
# [1] PyInstaller: https://github.com/pyinstaller/pyinstaller/
# [2] NumPy's license: https://github.com/numpy/numpy/blob/master/LICENSE.txt
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
# -----------------------------------------------------------------------------

"""
This hook should collect all binary files and any hidden modules that numpy
needs.
Our (some-what inadequate) docs for writing PyInstaller hooks are kept here:
https://pyinstaller.readthedocs.io/en/stable/hooks.html
PyInstaller has a lot of numpy users so we'd consider maintaining this hook to
be high priority. Feel free to @mention either bwoodsend or Legorooj on Github
for help keeping it working.
"""

import os
import glob
from PyInstaller.compat import is_win, is_venv, base_prefix
from PyInstaller.utils.hooks import get_module_file_attribute

# numpy.testing is unconditionally imported by numpy, thus we can not exclude
# .testing (which would be preferred). Anyway, this only saves about 7
# modules. See also https://github.com/numpy/numpy/issues/17183
#excludedimports = ["numpy.testing"]

# FIXME check if this workaround is still necessary!
if is_win:
from PyInstaller.utils.win32.winutils import extend_system_path
from distutils.sysconfig import get_python_lib
# SciPy/Numpy Windows builds from http://www.lfd.uci.edu/~gohlke/pythonlibs
# contain some dlls in directory like C:\Python27\Lib\site-packages\numpy\core\
numpy_core_paths = [os.path.join(get_python_lib(), 'numpy', 'core')]
# In virtualenv numpy might be installed directly in real prefix path.
# Then include this path too.
if is_venv:
numpy_core_paths.append(
os.path.join(base_prefix, 'Lib', 'site-packages', 'numpy', 'core')
import re
from pathlib import Path

from PyInstaller.utils.hooks import collect_dynamic_libs, exec_statement, logger
from PyInstaller import compat

# --- Plain official numpy from PyPI ---

# Supporting regular numpy is actually dead easy.

# We need to collect all used dll/so/dylibs. As these are all inside of numpy's
# `site-packages/numpy` directory we can just use the following:
binaries = collect_dynamic_libs("numpy", ".")
# Note: this does not collect Python extension modules. They are found using
# Python's native import scheme.

# PyInstaller can't detect imports from a cython or C extension module. If a
# submodule is *only* ever `import`ed by a cython or C module then it wont be
# found. Numpy currently has none of these. But should one be added, its string
# name should be listed here. e.g. "numpy.core._hidden._guts._module".
# These issues should be very easy to track. You'll get a ModuleNotFoundError
# for that hidden module at runtime if you forget to include one.
hiddenimports = []


# --- Additional support for less official mkl builds ---

# Check if MKL is being used.
# We avoid using `import numpy` directly in hooks in-case doing so alters either
# sys.path or PATH which could confuse the build.
is_mkl = exec_statement("""
# XXX: Numpy devs - is this a good way to test if using MKL?
import numpy
print(bool(numpy.__config__.blas_mkl_info))
""") == "True"

# The MKL binaries themselves are included inside the numpy folder and will
# therefore already have been found by `collect_dynamic_libs()` above.

def find_library(name):
"""Glob-find and include a dll (like) binary file which is usually found by
searching PATH.
"""
# We'll hopefully include this in later versions of PyInstaller. So that
# NumPy remains compatible with PyInstaller 4.0, I'm copying it here.

names = set()
binaries = []
for folder in os.environ["PATH"].split(os.pathsep):
for path in Path(folder).glob(name):
if not path.name in names:
binaries.append((str(path), "."))
names.add(path.name)
if not binaries:
logger.warning(
"Failed to find '%s' DLL in PATH. Your app will likely crash if run"
" on a different machine that doesn't already have it.", name
)
extend_system_path(numpy_core_paths)
del numpy_core_paths

# if we bundle the testing module, this will cause
# `scipy` to be pulled in unintentionally but numpy imports
# numpy.testing at the top level for historical reasons.
# excludedimports = collect_submodules('numpy.testing')

binaries = []

# package the DLL bundle that official numpy wheels for Windows ship
# The DLL bundle will either be in extra-dll on windows proper
# and in .libs if installed on a virtualenv created from MinGW (Git-Bash
# for example)
if is_win:
extra_dll_locations = ['extra-dll', '.libs']
for location in extra_dll_locations:
dll_glob = os.path.join(os.path.dirname(
get_module_file_attribute('numpy')), location, "*.dll")
if glob.glob(dll_glob):
binaries.append((dll_glob, "."))
return binaries


if is_mkl:
# Other dlls that MKL uses which PyInstaller can't detect itself. These are
# determined empirically using dynamic dependency sniffing and will lead
# to issues in the future should they change...
for lib in ["libcrypto*", "libffi*", "libssl*"]:
binaries.extend(find_library(lib))


# --- A vain attempt at Conda's numpy support ---

# Regular numpy, even with unofficial mkl builds, is pretty trivial to support
# with PyInstaller. Unfortunately Conda's numpy is the opposite. We need
# their help to maintain this because our own attempts have been a disaster.

if compat.is_conda:
hiddenimports.append("six")

# There are so many hidden binary dependencies. This list is heavily, OS,
# Python and NumPy versions dependent. Omitting any of these can lead to
# obscure and often traceback-less crashes.
# XXX: As you can see, this is really not a scalable solution. Needs help!
conda_dll_patterns = [re.compile(i) for i in (
'apphelp.*', 'crypt32.*', 'imagehlp.*', 'libblas.*',
'libcblas.*', 'libcrypto.*', 'libffi.*', 'libgcc_.*',
'libgfortran.*', 'libifcoremd.*', r'libiomp\d+md.*', 'liblapack.*',
'libmmd.*', 'libomp.*',
'libopenblas.*', 'libquadmath.*', 'libssl.*', 'libuuid.*',
'libz.*', 'mkl_avx.*', 'mkl_core.*',
'mkl_intel_thread.*', 'mkl_rt.*', 'mkl_vml_avx.*',
'mkl_vml_avx.*', 'msasn.*', 'mswsock.*', 'ole.*',
'oleaut.*', 'tbbmalloc.*', 'urandom'
)]

if compat.is_win:
lib_dir = os.path.join(compat.base_prefix, "Library", "bin")
else:
lib_dir = os.path.join(compat.base_prefix, "lib")

def _is_required(name):
return any(pattern.match(name) for pattern in conda_dll_patterns)

_to_add = set(filter(_is_required, os.listdir(lib_dir)))

for name in _to_add:
binaries.append((os.path.join(lib_dir, name), "."))


# --- Remove testing and building code ---

excludedimports = ["scipy", "pytest", "nose", "distutils", "f2py", "setuptools",
"numpy.f2py", "numpy.distutils"]

# I would suggest using the following to remove all the `tests` submodules but
# we don't need it. They will be included if any modules that are included
# contain an explicit `import numpy.xxx.tests`. Should you're tests structure
# change so that they start to get sucked in, uncomment the lines below.

# from PyInstaller.utils.hooks import collect_submodules
# is_tests = lambda x: "tests" in x.split(".")
# excludedimports += collect_submodules("numpy", filter=is_tests)

0 comments on commit 0ea8912

Please sign in to comment.