# import magic
* **Raúl Cumplido**
* **@raulcumplido**
* **PyConES 2015 - Valencia**

![caption](img/te_chie_la.png)

# Versions

* Almost everything **Python 3.4**
* Some bits **Python 3.5**
* Extra **Python 3.6**

# Covered
* Diving deep on how "straightforward" import works

# Not covered
* Packages
* NamespacePaths
* Importlock
* Lots of other stuff going on behind the scences (corner cases)

# Command Line Time

![caption](img/te_chie_la.png)

# Easy Peasy

![caption](img/import_1.png)

# Command Line Time

![caption](img/te_chie_la.png)

# Import machinery

* Importlib is the implementation of import

In [25]:
import importlib
print(importlib.machinery.SOURCE_SUFFIXES)
print(importlib.machinery.BYTECODE_SUFFIXES)
print(importlib._bootstrap_external.MAGIC_NUMBER)
importlib.import_module("collections")

['.py']
['.pyc']
b'\x16\r\r\n'


<module 'collections' from '/home/raulcd/.virtualenvs/import_magic/lib/python3.5/collections/__init__.py'>

In [21]:
# cpython/Lib/importlib/_bootstrap.py

def _gcd_import(name, package=None, level=0):
    ...
    if name not in sys.modules:
        return _find_and_load(name, _gcd_import)
    module = sys.modules[name]
    ...
    return module

# sys.modules: Caching modules

![caption](img/import_3.png)

In [None]:
# cpython/Lib/importlib/_bootstrap.py

def _find_and_load_unlocked(name, import_):
    ...
    spec = _find_spec(name, path)
    if spec is None:
        raise ImportError(_ERR_MSG.format(name), name=name)
    else:
        module = _load_unlocked(spec)
    ...
    return module

# Find spec and load module

![caption](img/import_4.png)

In [None]:
# cpython/Lib/importlib/_bootstrap.py

def _find_spec(name, path, target=None):
    """Find a module's loader."""
    ...
    for finder in sys.meta_path:
        ...
            find_spec = finder.find_spec
            ...
            spec = find_spec(name, path, target)
        if spec is not None:
            ...
                return spec
    else:
        return None

# sys.meta_path: Importers

![caption](img/import_5.png)

In [26]:
import sys
sys.meta_path

[_frozen_importlib.BuiltinImporter,
 _frozen_importlib.FrozenImporter,
 _frozen_importlib_external.PathFinder]

In [None]:
@host - ~$ python -vv
import _frozen_importlib # frozen
import imp # builtin
import sys # builtin
import '_warnings' # <class '_frozen_importlib.BuiltinImporter'>
import '_thread' # <class '_frozen_importlib.BuiltinImporter'>
import '_weakref' # <class '_frozen_importlib.BuiltinImporter'>
import '_frozen_importlib_external' # <class '_frozen_importlib.FrozenImporter'>
import '_io' # <class '_frozen_importlib.BuiltinImporter'>
import 'marshal' # <class '_frozen_importlib.BuiltinImporter'>
import 'posix' # <class '_frozen_importlib.BuiltinImporter'>
import _thread # previously loaded ('_thread')
import '_thread' # <class '_frozen_importlib.BuiltinImporter'>
import _weakref # previously loaded ('_weakref')
import '_weakref' # <class '_frozen_importlib.BuiltinImporter'>
# installing zipimport hook
import 'zipimport' # <class '_frozen_importlib.BuiltinImporter'>


In [None]:
# cpython/Lib/importlib/_bootstrap.py
class BuiltinImporter:

    """Meta path import for built-in modules.
    All methods are either class or static methods to avoid the need to
    instantiate the class.
    """
    ...
    @classmethod
    def find_spec(cls, fullname, path=None, target=None):
        if path is not None:
            return None
        if _imp.is_builtin(fullname):
            return spec_from_loader(fullname, cls, origin='built-in')
        else:
            return None

# sys.meta_path: BuiltinImporter

![caption](img/import_6.png)

In [None]:
# cpython/Lib/importlib/_bootstrap.py
# Python modules compiled and build with the python executable itself

class FrozenImporter:

    """Meta path import for frozen modules.
    All methods are either class or static methods to avoid the need to
    instantiate the class.
    """
    ...
    @classmethod
    def find_spec(cls, fullname, path=None, target=None):
        if _imp.is_frozen(fullname):
            return spec_from_loader(fullname, cls, origin='frozen')
        else:
            return None

# sys.meta_path: FrozenImporter

![caption](img/import_7.png)

In [None]:
# cpython/Lib/importlib/_bootstrap_external.py
class PathFinder:

    """Meta path finder for sys.path and package __path__ attributes."""

    @classmethod
    def find_spec(cls, fullname, path=None, target=None):
        """find the module on sys.path or 'path' based on sys.path_hooks and
        sys.path_importer_cache."""
        ...
        spec = cls._get_spec(fullname, path, target)
        ...
        return spec # it may return None
    
    @classmethod
    def _get_spec(cls, fullname, path, target=None):
        """Find the loader or namespace_path for this module/package name."""
        ...
            finder = cls._path_importer_cache(entry)
            ...
            spec = finder.find_spec(fullname, target)
            ...
            if spec.loader is not None:
                    return spec
            
            ...    
            raise ImportError('spec missing loader')
            ...
        # Namespacepath can be returned 

# PathFinder: The finder searchs a finder

![caption](img/import_8.png)

In [None]:
    @classmethod
    def _path_importer_cache(cls, path):
        """Get the finder for the path entry from sys.path_importer_cache.
        If the path entry is not in the cache, find the appropriate finder
        and cache it. If no finder is available, store None.
        """
        ...
        try:
            finder = sys.path_importer_cache[path]
        except KeyError:
            finder = cls._path_hooks(path)
            sys.path_importer_cache[path] = finder
        return finder

In [33]:
    @classmethod
    def _path_hooks(cls, path):
        """Search sequence of hooks for a finder for 'path'.
        If 'hooks' is false then use sys.path_hooks.
        """
        if sys.path_hooks is not None and not sys.path_hooks:
            _warnings.warn('sys.path_hooks is empty', ImportWarning)
        for hook in sys.path_hooks:
            try:
                return hook(path)
            except ImportError:
                continue
        else:
            return None

# PathFinder: The fun begins

![caption](img/import_9.png)

In [None]:
>>> import sys
>>> sys.path_importer_cache
{'/usr/lib/python35.zip': None, '/usr/lib/python3.5/plat-linux': FileFinder('/usr/lib/python3.5/plat-linux'), 
 '/usr/lib/python3.5/site-packages': FileFinder('/usr/lib/python3.5/site-packages'), 
 '/usr/lib/python3.5': FileFinder('/usr/lib/python3.5'), '/usr/lib/python3.5/': FileFinder('/usr/lib/python3.5/'), 
 '/usr/lib/python3.5/lib-dynload': FileFinder('/usr/lib/python3.5/lib-dynload'), 
 '/usr/lib/python3.5/encodings': FileFinder('/usr/lib/python3.5/encodings'), 
 '/usr/lib/python3.5/collections': FileFinder('/usr/lib/python3.5/collections')}

In [34]:
import sys
sys.path_hooks

[zipimport.zipimporter,
 <function _frozen_importlib_external.FileFinder.path_hook.<locals>.path_hook_for_FileFinder>]

In [43]:
# /test.py exists otherwise None is returned
sys.path_hooks[1]('/').find_spec('test')

ModuleSpec(name='test', loader=<_frozen_importlib_external.SourceFileLoader object at 0x7f43f4035e80>, origin='/test.py')

In [None]:
>>> spec = sys.meta_path[2].find_spec('collections')
# trying /home/raulcd/collections.cpython-35m-x86_64-linux-gnu.so
# trying /home/raulcd/collections.abi3.so
# trying /home/raulcd/collections.so
# trying /home/raulcd/collections.py
# trying /home/raulcd/collections.pyc
>>> dir(spec)
[... 'cached', 'has_location', 'loader', 'loader_state', 'name', 'origin', 'parent', 'submodule_search_locations']
>>> spec.name
'collections'
>>> spec.parent
'collections'
>>> spec.cached
'/usr/lib/python3.5/collections/__pycache__/__init__.cpython-35.pyc'
>>> spec.loader
<_frozen_importlib_external.SourceFileLoader object at 0x7f8240091be0>
>>> 

# Recap: find_spec

![caption](img/import_10.png)

# Find spec and load module

![caption](img/import_4.png)

In [None]:
def _load_unlocked(spec):
    # A helper for direct use by the import system.
    ...
    module = module_from_spec(spec)
    with _installed_safely(module):
       ...
            spec.loader.exec_module(module)
    ...
    return sys.modules[spec.name]

# Load module

![caption](img/import_11.png)

In [None]:
def module_from_spec(spec):
    """Create a module based on the provided spec."""
    # Typically loaders will not implement create_module().
    module = None
    if hasattr(spec.loader, 'create_module'):
        # If create_module() returns `None` then it means default
        # module creation should be used.
        module = spec.loader.create_module(spec)
    elif hasattr(spec.loader, 'exec_module'):
        _warnings.warn('starting in Python 3.6, loaders defining exec_module() '
                       'must also define create_module()',
                       DeprecationWarning, stacklevel=2)
    if module is None:
        module = _new_module(spec.name)
    _init_module_attrs(spec, module)
    return module

In [None]:
def _new_module(name):
    return type(sys)(name)

# Create module object

![caption](img/import_12.png)

# Load module: Context Manager and exec_module

![caption](img/import_11.png)

In [None]:
class _installed_safely:
    ...

    def __enter__(self):
        ...
        sys.modules[self._spec.name] = self._module

In [None]:
# LoaderBasic    
    def exec_module(self, module):
        """Execute the module."""
        code = self.get_code(module.__name__)
        if code is None:
            raise ImportError('cannot load module {!r} when get_code() '
                              'returns None'.format(module.__name__))
        _bootstrap._call_with_frames_removed(exec, code, module.__dict__)

In [None]:
    def get_code(self, fullname):
        """Concrete implementation of InspectLoader.get_code.
        Reading of bytecode requires path_stats to be implemented. To write
        bytecode, set_data must also be implemented.
        """
        ...
            bytecode_path = cache_from_source(source_path)
        ...

# Load module: Context Manager and exec_module

![caption](img/import_11.png)

# get_code

![caption](img/import_13.png)

In [44]:
from importlib import _bootstrap_external
_bootstrap_external.cache_from_source('/tomato.py')

'__pycache__/tomato.cpython-35.pyc'

In [46]:
_bootstrap_external.cache_from_source('/tomato.py', optimization=1)

'__pycache__/tomato.cpython-35.opt-1.pyc'

In [47]:
_bootstrap_external.cache_from_source('/tomato.py', optimization=2)

'__pycache__/tomato.cpython-35.opt-2.pyc'

In [None]:
python -O
python -OO
def test():
    """
    This is my test function --> Removed on optimization 2
    """
    assert False == True --> Removed on optimization 1 and 2

# get_code

![caption](img/import_13.png)

In [None]:
    def get_code(self, fullname):
        """Concrete implementation of InspectLoader.get_code.
        Reading of bytecode requires path_stats to be implemented. To write
        bytecode, set_data must also be implemented.
        """
        ...
            bytecode_path = cache_from_source(source_path)
        ...
                try:
                    data = self.get_data(bytecode_path)
                except OSError:
                    pass
                else:
                    try:
                        bytes_data = _validate_bytecode_header(data,
                                source_stats=st, name=fullname,
                                path=bytecode_path)
                    except (ImportError, EOFError):
                        pass
                    else:
                        ...
                        return _compile_bytecode(bytes_data, name=fullname,
                                                 bytecode_path=bytecode_path,
                                                 source_path=source_path)
        source_bytes = self.get_data(source_path)
        code_object = self.source_to_code(source_bytes, source_path)
        ...
        if (not sys.dont_write_bytecode ...):
            data = _code_to_bytecode(code_object, source_mtime,
                    len(source_bytes))
            try:
                self._cache_bytecode(source_path, bytecode_path, data)
                ...
        return code_object

In [63]:
compile("def my_func():\n    print(__name__)", "string", "exec")

<code object <module> at 0x7f43e77d30c0, file "string", line 1>

In [71]:
exec(code, globals())
my_func()

__main__


In [72]:
import dis
dis.dis(code)

  1           0 LOAD_CONST               0 (<code object my_func at 0x7f43f400c150, file "string", line 1>)
              3 LOAD_CONST               1 ('my_func')
              6 MAKE_FUNCTION            0
              9 STORE_NAME               0 (my_func)
             12 LOAD_CONST               2 (None)
             15 RETURN_VALUE


# Load code

![caption](img/import_14.png)

# Starting point

![caption](img/import_1.png)

# End

![caption](img/import_15.png)

# To remember

* Specs
 * Definition of how to load a module and metadata of the module
* Finders
 * Strategies to find the module and specify a Loader.
 * Creation of Spec
* Loaders
 * Load a module
 * Insert it in sys.modules

# Extend import

* sys.meta_path
 * New Finders (i.e. HTTPFinder, FTPFinder)
* sys.path_hooks
 * New PathFinders (i.e. XMLFinder, GoFinder)
* sys.modules
 * Cache of modules
* sys.path
 * Where the finders are going to take a look on