From 1949fab99ec0c11e7a9926685fda20ee3b5b5274 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sun, 3 Dec 2023 18:07:17 +0700 Subject: [PATCH] detect submodules not found via `pkgutil` --- CHANGELOG.md | 2 + pdoc/doc.py | 9 +- pdoc/extract.py | 82 ++++++--- pyproject.toml | 1 + test/test_extract.py | 8 + test/test_snapshot.py | 1 + test/testdata/pyo3_sample_library.html | 245 +++++++++++++++++++++++++ test/testdata/pyo3_sample_library.txt | 1 + 8 files changed, 320 insertions(+), 29 deletions(-) create mode 100644 test/testdata/pyo3_sample_library.html create mode 100644 test/testdata/pyo3_sample_library.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index f1d1e824..1535c48a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ + - pdoc now documents PyO3 or pybind11 submodules that are not picked up by Python's builtin pkgutil module. + ([#633](https://github.com/mitmproxy/pdoc/issues/633), @mhils) - Add support for `code-block` ReST directives ([#624](https://github.com/mitmproxy/pdoc/pull/624), @JCGoran) - If a variable's value meets certain entropy criteria and matches an environment variable value, diff --git a/pdoc/doc.py b/pdoc/doc.py index 2f00b1c8..352a0a33 100644 --- a/pdoc/doc.py +++ b/pdoc/doc.py @@ -28,7 +28,6 @@ import inspect import os from pathlib import Path -import pkgutil import re import sys import textwrap @@ -454,9 +453,6 @@ def own_members(self) -> list[Doc]: @cached_property def submodules(self) -> list[Module]: """A list of all (direct) submodules.""" - if not self.is_package: - return [] - include: Callable[[str], bool] mod_all = _safe_getattr(self.obj, "__all__", False) if mod_all is not False: @@ -471,9 +467,8 @@ def include(name: str) -> bool: # (think of OS-specific modules, e.g. _linux.py failing to import on Windows). return not name.startswith("_") - submodules = [] - for mod in pkgutil.iter_modules(self.obj.__path__, f"{self.fullname}."): - _, _, mod_name = mod.name.rpartition(".") + submodules: list[Module] = [] + for mod_name, mod in extract.iter_modules2(self.obj).items(): if not include(mod_name): continue try: diff --git a/pdoc/extract.py b/pdoc/extract.py index 2f32dd04..7d885035 100644 --- a/pdoc/extract.py +++ b/pdoc/extract.py @@ -229,22 +229,71 @@ def load_module(module: str) -> types.ModuleType: """ +def iter_modules2(module: types.ModuleType) -> dict[str, pkgutil.ModuleInfo]: + """ + Returns all direct child modules of a given module. + This function is similar to `pkgutil.iter_modules`, but + + 1. Respects a package's `__all__` attribute if specified. + If `__all__` is defined, submodules not listed in `__all__` are excluded. + 2. It will try to detect submodules that are not findable with iter_modules, + but are present in the module object. + """ + mod_all = getattr(module, "__all__", None) + + submodules = {} + + for submodule in pkgutil.iter_modules( + getattr(module, "__path__", []), f"{module.__name__}." + ): + name = submodule.name.rpartition(".")[2] + if mod_all is None or name in mod_all: + submodules[name] = submodule + + # 2023-12: PyO3 and pybind11 submodules are not detected by pkgutil + # This is a hacky workaround to register them. + members = dir(module) if mod_all is None else mod_all + for name in members: + if name in submodules or name == "__main__": + continue + member = getattr(module, name, None) + is_wild_child_module = ( + isinstance(member, types.ModuleType) + # the name is either just "bar", but can also be "foo.bar", + # see https://github.com/PyO3/pyo3/issues/759#issuecomment-1811992321 + and ( + member.__name__ == f"{module.__name__}.{name}" + or ( + member.__name__ == name + and sys.modules.get(member.__name__, None) is not member + ) + ) + ) + if is_wild_child_module: + # fixup the module name so that the rest of pdoc does not break + assert member + member.__name__ = f"{module.__name__}.{name}" + sys.modules[f"{module.__name__}.{name}"] = member + submodules[name] = pkgutil.ModuleInfo( + None, # type: ignore + name=f"{module.__name__}.{name}", + ispkg=True, + ) + + submodules.pop("__main__", None) # https://github.com/mitmproxy/pdoc/issues/438 + + return submodules + + def walk_packages2( modules: Iterable[pkgutil.ModuleInfo], ) -> Iterator[pkgutil.ModuleInfo]: """ For a given list of modules, recursively yield their names and all their submodules' names. - This function is similar to `pkgutil.walk_packages`, but respects a package's `__all__` attribute if specified. - If `__all__` is defined, submodules not listed in `__all__` are excluded. + This function is similar to `pkgutil.walk_packages`, but based on `iter_modules2`. """ - - # noinspection PyDefaultArgument - def seen(p, m={}): # pragma: no cover - if p in m: - return True - m[p] = True - + # the original walk_packages implementation has a recursion check for path, but that does not seem to be needed? for mod in modules: yield mod @@ -255,19 +304,8 @@ def seen(p, m={}): # pragma: no cover warnings.warn(f"Error loading {mod.name}:\n{traceback.format_exc()}") continue - mod_all = getattr(module, "__all__", None) - # don't traverse path items we've seen before - path = [p for p in (getattr(module, "__path__", None) or []) if not seen(p)] - - submodules = [] - for submodule in pkgutil.iter_modules(path, f"{mod.name}."): - name = submodule.name.rpartition(".")[2] - if name == "__main__": - continue # https://github.com/mitmproxy/pdoc/issues/438 - if mod_all is None or name in mod_all: - submodules.append(submodule) - - yield from walk_packages2(submodules) + submodules = iter_modules2(module) + yield from walk_packages2(submodules.values()) def module_mtime(modulename: str) -> float | None: diff --git a/pyproject.toml b/pyproject.toml index 5dcad820..3e2448c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,7 @@ dev = [ "pytest-timeout", "hypothesis", "pygments >= 2.14.0", + "pdoc-pyo3-sample-library==1.0.11", ] [build-system] diff --git a/test/test_extract.py b/test/test_extract.py index 7da44761..800ec4ae 100644 --- a/test/test_extract.py +++ b/test/test_extract.py @@ -60,6 +60,14 @@ def test_walk_specs(): "test.mod_with_main.__main__", ] + assert walk_specs(["pdoc_pyo3_sample_library"]) == [ + "pdoc_pyo3_sample_library", + "pdoc_pyo3_sample_library.submodule", + "pdoc_pyo3_sample_library.submodule.subsubmodule", + "pdoc_pyo3_sample_library.explicit_submodule", + "pdoc_pyo3_sample_library.correct_name_submodule", + ] + def test_parse_spec(monkeypatch): p = sys.path diff --git a/test/test_snapshot.py b/test/test_snapshot.py index ae2dd132..23721d14 100755 --- a/test/test_snapshot.py +++ b/test/test_snapshot.py @@ -160,6 +160,7 @@ def outfile(self, format: str) -> Path: }, with_output_directory=True, ), + Snapshot("pyo3_sample_library", specs=["pdoc_pyo3_sample_library"]), Snapshot("top_level_reimports", ["top_level_reimports"]), Snapshot("type_checking_imports"), Snapshot("type_stub", min_version=(3, 10)), diff --git a/test/testdata/pyo3_sample_library.html b/test/testdata/pyo3_sample_library.html new file mode 100644 index 00000000..4b0107ce --- /dev/null +++ b/test/testdata/pyo3_sample_library.html @@ -0,0 +1,245 @@ + + + + + + + pdoc_pyo3_sample_library API documentation + + + + + + + + + +
+
+

+pdoc_pyo3_sample_library

+ +

This is a PyO3 demo library used to verify that submodules are handled properly.

+
+ + + + + +
1from .pdoc_pyo3_sample_library import *
+2
+3__doc__ = pdoc_pyo3_sample_library.__doc__
+4if hasattr(pdoc_pyo3_sample_library, "__all__"):
+5    __all__ = pdoc_pyo3_sample_library.__all__
+
+ + +
+
+ + \ No newline at end of file diff --git a/test/testdata/pyo3_sample_library.txt b/test/testdata/pyo3_sample_library.txt new file mode 100644 index 00000000..5a429b70 --- /dev/null +++ b/test/testdata/pyo3_sample_library.txt @@ -0,0 +1 @@ + \ No newline at end of file