Skip to content

Commit

Permalink
refactor: Improve stats code and performance
Browse files Browse the repository at this point in the history
  • Loading branch information
pawamoy committed May 12, 2024
1 parent 721ce7d commit eeb497f
Show file tree
Hide file tree
Showing 3 changed files with 141 additions and 167 deletions.
5 changes: 3 additions & 2 deletions src/griffe/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
from griffe.git import get_latest_tag, get_repo_root
from griffe.loader import GriffeLoader, load, load_git
from griffe.logger import get_logger
from griffe.stats import _format_stats

if TYPE_CHECKING:
from griffe.extensions.base import Extensions, ExtensionType
Expand Down Expand Up @@ -411,7 +410,9 @@ def dump(
elapsed = datetime.now(tz=timezone.utc) - started

if stats:
logger.info(_format_stats({"time_spent_serializing": elapsed.microseconds, **loader.stats()}))
loader_stats = loader.stats()
loader_stats.time_spent_serializing = elapsed.microseconds
logger.info(loader_stats.as_text())

return 0 if len(data_packages) == len(packages) else 1

Expand Down
9 changes: 6 additions & 3 deletions src/griffe/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from griffe.importer import dynamic_import
from griffe.logger import get_logger
from griffe.merger import merge_stubs
from griffe.stats import stats
from griffe.stats import Stats

if TYPE_CHECKING:
from griffe.enumerations import Parser
Expand Down Expand Up @@ -506,13 +506,16 @@ def resolve_module_aliases(

return resolved, unresolved

def stats(self) -> dict:
def stats(self) -> Stats:
"""Compute some statistics.
Returns:
Some statistics.
"""
return {**stats(self), **self._time_stats}
stats = Stats(self)
stats.time_spent_visiting = self._time_stats["time_spent_visiting"]
stats.time_spent_inspecting = self._time_stats["time_spent_inspecting"]
return stats

def _load_package(self, package: Package | NamespacePackage, *, submodules: bool = True) -> Module:
top_module = self._load_module(package.name, package.path, submodules=submodules)
Expand Down
294 changes: 132 additions & 162 deletions src/griffe/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,172 +3,142 @@
from __future__ import annotations

from collections import defaultdict
from typing import TYPE_CHECKING, Iterable, Union, cast
from pathlib import Path
from typing import TYPE_CHECKING

from griffe.dataclasses import Class, Module
from griffe.exceptions import BuiltinModuleError
from griffe.enumerations import Kind

if TYPE_CHECKING:
from griffe.dataclasses import Alias, Object
from griffe.loader import GriffeLoader


def _direct(objects: Iterable[Object | Alias]) -> list[Object | Alias]:
return [obj for obj in objects if not obj.is_alias]


def _n_modules(module: Module) -> int:
submodules = _direct(module.modules.values())
return len(submodules) + sum(_n_modules(cast(Module, mod)) for mod in submodules)


def _n_classes(module_or_class: Module | Class) -> int:
submodules = _direct(module_or_class.modules.values())
subclasses = _direct(module_or_class.classes.values())
mods_or_classes = [mc for mc in (*submodules, *subclasses) if not mc.is_alias]
return len(subclasses) + sum(
_n_classes(cast(Union[Module, Class], mod_or_class)) for mod_or_class in mods_or_classes
)


def _n_functions(module_or_class: Module | Class) -> int:
submodules = _direct(module_or_class.modules.values())
subclasses = _direct(module_or_class.classes.values())
functions = _direct(module_or_class.functions.values())
mods_or_classes = [*submodules, *subclasses]
return len(functions) + sum(
_n_functions(cast(Union[Module, Class], mod_or_class)) for mod_or_class in mods_or_classes
)


def _n_attributes(module_or_class: Module | Class) -> int:
submodules = _direct(module_or_class.modules.values())
subclasses = _direct(module_or_class.classes.values())
attributes = _direct(module_or_class.attributes.values())
mods_or_classes = [*submodules, *subclasses]
return len(attributes) + sum(
_n_attributes(cast(Union[Module, Class], mod_or_class)) for mod_or_class in mods_or_classes
)


def _merge_exts(exts1: dict[str, int], exts2: dict[str, int]) -> dict[str, int]:
for ext, value in exts2.items():
exts1[ext] += value
return exts1


def _sum_extensions(exts: dict[str, int], module: Module) -> None:
current_exts = defaultdict(int)
try:
suffix = module.filepath.suffix # type: ignore[union-attr]
except BuiltinModuleError:
current_exts[""] = 1
except AttributeError:
suffix = ""
else:
if suffix:
current_exts[suffix] = 1
for submodule in _direct(module.modules.values()):
_sum_extensions(current_exts, cast(Module, submodule))
_merge_exts(exts, current_exts)


def stats(loader: GriffeLoader) -> dict:
"""Return some loading statistics.
Parameters:
loader: The loader to compute stats from.
Returns:
Some statistics.
"""
modules_by_extension = defaultdict(
int,
{
"": 0,
".py": 0,
".pyi": 0,
".pyc": 0,
".pyo": 0,
".pyd": 0,
".so": 0,
},
)
top_modules = loader.modules_collection.members.values()
for module in top_modules:
_sum_extensions(modules_by_extension, module)
n_lines = sum(len(lines) for lines in loader.lines_collection.values())
return {
"packages": len(top_modules),
"modules": len(top_modules) + sum(_n_modules(mod) for mod in top_modules),
"classes": sum(_n_classes(mod) for mod in top_modules),
"functions": sum(_n_functions(mod) for mod in top_modules),
"attributes": sum(_n_attributes(mod) for mod in top_modules),
"modules_by_extension": modules_by_extension,
"lines": n_lines,
}


def _format_stats(stats: dict) -> str:
lines = []
packages = stats["packages"]
modules = stats["modules"]
classes = stats["classes"]
functions = stats["functions"]
attributes = stats["attributes"]
objects = sum((modules, classes, functions, attributes))
lines.append("Statistics")
lines.append("---------------------")
lines.append("Number of loaded objects")
lines.append(f" Modules: {modules}")
lines.append(f" Classes: {classes}")
lines.append(f" Functions: {functions}")
lines.append(f" Attributes: {attributes}")
lines.append(f" Total: {objects} across {packages} packages")
per_ext = stats["modules_by_extension"]
builtin = per_ext[""]
regular = per_ext[".py"]
stubs = per_ext[".pyi"]
compiled = modules - builtin - regular - stubs
lines.append("")
lines.append(f"Total number of lines: {stats['lines']}")
lines.append("")
lines.append("Modules")
lines.append(f" Builtin: {builtin}")
lines.append(f" Compiled: {compiled}")
lines.append(f" Regular: {regular}")
lines.append(f" Stubs: {stubs}")
lines.append(" Per extension:")
for ext, number in sorted(per_ext.items()):
if ext:
lines.append(f" {ext}: {number}")
visit_time = stats["time_spent_visiting"] / 1000
inspect_time = stats["time_spent_inspecting"] / 1000
total_time = visit_time + inspect_time
visit_percent = visit_time / total_time * 100
inspect_percent = inspect_time / total_time * 100
try:
visit_time_per_module = visit_time / regular
except ZeroDivisionError:
visit_time_per_module = 0
inspected_modules = builtin + compiled
try:
inspect_time_per_module = visit_time / inspected_modules
except ZeroDivisionError:
inspect_time_per_module = 0
lines.append("")
lines.append(
f"Time spent visiting modules ({regular}): "
f"{visit_time}ms, {visit_time_per_module:.02f}ms/module ({visit_percent:.02f}%)",
)
lines.append(
f"Time spent inspecting modules ({inspected_modules}): "
f"{inspect_time}ms, {inspect_time_per_module:.02f}ms/module ({inspect_percent:.02f}%)",
)
serialize_time = stats["time_spent_serializing"] / 1000
serialize_time_per_module = serialize_time / modules
lines.append(f"Time spent serializing: {serialize_time}ms, {serialize_time_per_module:.02f}ms/module")
return "\n".join(lines)


__all__ = ["stats"]
class Stats:
"""Load statistics for a Griffe loader."""

def __init__(self, loader: GriffeLoader) -> None:
"""Initialiwe the stats object.
Parameters:
loader: The loader to compute stats for.
"""
self.loader = loader
modules_by_extension = defaultdict(
int,
{
"": 0,
".py": 0,
".pyi": 0,
".pyc": 0,
".pyo": 0,
".pyd": 0,
".so": 0,
},
)
top_modules = loader.modules_collection.members.values()
self.by_kind = {
Kind.MODULE: 0,
Kind.CLASS: 0,
Kind.FUNCTION: 0,
Kind.ATTRIBUTE: 0,
}
self.packages = len(top_modules)
self.modules_by_extension = modules_by_extension
self.lines = sum(len(lines) for lines in loader.lines_collection.values())
self.time_spent_visiting = 0
self.time_spent_inspecting = 0
self.time_spent_serializing = 0
for module in top_modules:
self._itercount(module)

def _itercount(self, root: Object | Alias) -> None:
if root.is_alias:
return
self.by_kind[root.kind] += 1
if root.is_module:
if isinstance(root.filepath, Path):
self.modules_by_extension[root.filepath.suffix] += 1
elif root.filepath is None:
self.modules_by_extension[""] += 1
for member in root.members.values():
self._itercount(member)

def as_text(self) -> str:
"""Format the statistics as text.
Returns:
Text stats.
"""
lines = []
packages = self.packages
modules = self.by_kind[Kind.MODULE]
classes = self.by_kind[Kind.CLASS]
functions = self.by_kind[Kind.FUNCTION]
attributes = self.by_kind[Kind.ATTRIBUTE]
objects = sum((modules, classes, functions, attributes))
lines.append("Statistics")
lines.append("---------------------")
lines.append("Number of loaded objects")
lines.append(f" Modules: {modules}")
lines.append(f" Classes: {classes}")
lines.append(f" Functions: {functions}")
lines.append(f" Attributes: {attributes}")
lines.append(f" Total: {objects} across {packages} packages")
per_ext = self.modules_by_extension
builtin = per_ext[""]
regular = per_ext[".py"]
stubs = per_ext[".pyi"]
compiled = modules - builtin - regular - stubs
lines.append("")
lines.append(f"Total number of lines: {self.lines}")
lines.append("")
lines.append("Modules")
lines.append(f" Builtin: {builtin}")
lines.append(f" Compiled: {compiled}")
lines.append(f" Regular: {regular}")
lines.append(f" Stubs: {stubs}")
lines.append(" Per extension:")
for ext, number in sorted(per_ext.items()):
if ext:
lines.append(f" {ext}: {number}")

visit_time = self.time_spent_visiting / 1000
inspect_time = self.time_spent_inspecting / 1000
total_time = visit_time + inspect_time
visit_percent = visit_time / total_time * 100
inspect_percent = inspect_time / total_time * 100

force_inspection = self.loader.force_inspection
visited_modules = 0 if force_inspection else regular
try:
visit_time_per_module = visit_time / visited_modules
except ZeroDivisionError:
visit_time_per_module = 0

inspected_modules = builtin + compiled + (regular if force_inspection else 0)
try:
inspect_time_per_module = inspect_time / inspected_modules
except ZeroDivisionError:
inspect_time_per_module = 0

lines.append("")
lines.append(
f"Time spent visiting modules ({visited_modules}): "
f"{visit_time}ms, {visit_time_per_module:.02f}ms/module ({visit_percent:.02f}%)",
)
lines.append(
f"Time spent inspecting modules ({inspected_modules}): "
f"{inspect_time}ms, {inspect_time_per_module:.02f}ms/module ({inspect_percent:.02f}%)",
)

serialize_time = self.time_spent_serializing / 1000
serialize_time_per_module = serialize_time / modules
lines.append(f"Time spent serializing: {serialize_time}ms, {serialize_time_per_module:.02f}ms/module")

return "\n".join(lines)


stats = Stats
"""Deprecated. Use `Stats` instead."""

__all__ = ["Stats"]

0 comments on commit eeb497f

Please sign in to comment.