# Advanced Module Patterns

**Chapter 7 - Learning Python, 5th Edition**

Beyond basic imports, Python's module system supports powerful patterns for
lazy loading, plugin architectures, conditional dependencies, and module-level
customization. These techniques are essential for building performant, extensible,
and maintainable applications.

## Lazy Imports for Performance

Heavy modules (e.g., `pandas`, `numpy`) can slow down application startup.
Lazy imports defer the actual import until the module is first used,
improving startup time for CLI tools and web servers.

In [None]:
import importlib
import time
from types import ModuleType
from typing import Any


class LazyModule:
    """A proxy that delays importing a module until first attribute access."""

    def __init__(self, module_name: str) -> None:
        self._module_name = module_name
        self._module: ModuleType | None = None

    def _load(self) -> ModuleType:
        if self._module is None:
            print(f"  [LazyModule] Importing '{self._module_name}' now...")
            self._module = importlib.import_module(self._module_name)
        return self._module

    def __getattr__(self, name: str) -> Any:
        return getattr(self._load(), name)

    def __repr__(self) -> str:
        status = "loaded" if self._module is not None else "deferred"
        return f"LazyModule({self._module_name!r}, {status})"


# These "imports" are instant - no actual loading happens
start = time.perf_counter()
json_lazy = LazyModule("json")
csv_lazy = LazyModule("csv")
xml_lazy = LazyModule("xml.etree.ElementTree")
elapsed = time.perf_counter() - start
print(f"Lazy setup time: {elapsed * 1000:.3f}ms")
print(f"json_lazy: {json_lazy}")
print(f"csv_lazy:  {csv_lazy}")

# Module is loaded only when first accessed
print(f"\nFirst access triggers import:")
result = json_lazy.dumps({"lazy": True})
print(f"json_lazy.dumps result: {result}")
print(f"json_lazy: {json_lazy}")

# Subsequent access reuses the loaded module
print(f"\nSecond access (no import message):")
result2 = json_lazy.loads(result)
print(f"json_lazy.loads result: {result2}")

## Plugin/Registry Pattern Using `importlib`

The plugin pattern allows extending an application without modifying its core code.
Using `importlib`, plugins can be discovered and loaded dynamically at runtime
based on configuration, file system scanning, or entry points.

In [None]:
import importlib
import sys
import tempfile
from pathlib import Path
from typing import Protocol, Any


# Define the plugin interface
class Serializer(Protocol):
    """Protocol that all serializer plugins must implement."""
    name: str
    def serialize(self, data: dict[str, Any]) -> str: ...
    def deserialize(self, raw: str) -> dict[str, Any]: ...


class PluginRegistry:
    """Discover and manage plugins dynamically."""

    def __init__(self) -> None:
        self._plugins: dict[str, Any] = {}

    def register(self, name: str, plugin: Any) -> None:
        """Manually register a plugin."""
        self._plugins[name] = plugin
        print(f"  Registered plugin: {name!r}")

    def load_from_module(self, module_name: str, attr_name: str) -> None:
        """Load a plugin from a module by importing it dynamically."""
        try:
            module = importlib.import_module(module_name)
            plugin = getattr(module, attr_name)
            self.register(plugin.name, plugin)
        except (ImportError, AttributeError) as e:
            print(f"  Failed to load plugin from {module_name}: {e}")

    def load_plugins_from_dir(self, plugin_dir: Path) -> None:
        """Auto-discover plugins from .py files in a directory."""
        if str(plugin_dir) not in sys.path:
            sys.path.insert(0, str(plugin_dir))

        for py_file in sorted(plugin_dir.glob("*.py")):
            if py_file.name.startswith("_"):
                continue
            module_name = py_file.stem
            self.load_from_module(module_name, "plugin")

    def get(self, name: str) -> Any:
        """Retrieve a registered plugin."""
        return self._plugins[name]

    def list_plugins(self) -> list[str]:
        """List all registered plugin names."""
        return list(self._plugins.keys())


# Create plugin files on disk
plugin_dir = Path(tempfile.mkdtemp())

(plugin_dir / "json_serializer.py").write_text(
    'import json\n'
    'from typing import Any\n'
    '\n'
    'class JsonSerializer:\n'
    '    name: str = "json"\n'
    '\n'
    '    def serialize(self, data: dict[str, Any]) -> str:\n'
    '        return json.dumps(data, indent=2)\n'
    '\n'
    '    def deserialize(self, raw: str) -> dict[str, Any]:\n'
    '        return json.loads(raw)\n'
    '\n'
    'plugin = JsonSerializer()\n'
)

(plugin_dir / "csv_serializer.py").write_text(
    'from typing import Any\n'
    '\n'
    'class CsvSerializer:\n'
    '    name: str = "csv"\n'
    '\n'
    '    def serialize(self, data: dict[str, Any]) -> str:\n'
    '        keys = list(data.keys())\n'
    '        vals = [str(data[k]) for k in keys]\n'
    '        return ",".join(keys) + "\\n" + ",".join(vals)\n'
    '\n'
    '    def deserialize(self, raw: str) -> dict[str, Any]:\n'
    '        lines = raw.strip().split("\\n")\n'
    '        keys = lines[0].split(",")\n'
    '        vals = lines[1].split(",")\n'
    '        return dict(zip(keys, vals))\n'
    '\n'
    'plugin = CsvSerializer()\n'
)

# Auto-discover and load plugins
registry = PluginRegistry()
print("Auto-discovering plugins:")
registry.load_plugins_from_dir(plugin_dir)

print(f"\nRegistered: {registry.list_plugins()}")

# Use plugins dynamically
test_data: dict[str, Any] = {"name": "Alice", "score": 95}
for plugin_name in registry.list_plugins():
    plugin = registry.get(plugin_name)
    serialized = plugin.serialize(test_data)
    deserialized = plugin.deserialize(serialized)
    print(f"\n[{plugin_name}] serialize: {serialized!r}")
    print(f"[{plugin_name}] deserialize: {deserialized}")

# Clean up
for name in ["json_serializer", "csv_serializer"]:
    sys.modules.pop(name, None)
sys.path.remove(str(plugin_dir))

## Module as Singleton (Module-Level State)

Because Python caches imported modules in `sys.modules`, a module is effectively
a singleton: all importers share the same module object and its state. This
makes modules a natural place for application-wide configuration and registries.

In [None]:
import sys
import types
from typing import Any, Callable


# Simulate a module that acts as a singleton configuration store
config_module = types.ModuleType("app_settings")
config_module.__doc__ = "Application-wide configuration (singleton pattern)."

# Module-level state acts as a singleton
exec(
    'from typing import Any\n'
    '\n'
    '_settings: dict[str, Any] = {}\n'
    '_frozen: bool = False\n'
    '\n'
    'def configure(**kwargs: Any) -> None:\n'
    '    """Set configuration values. Raises if config is frozen."""\n'
    '    if _frozen:\n'
    '        raise RuntimeError("Configuration is frozen")\n'
    '    _settings.update(kwargs)\n'
    '\n'
    'def freeze() -> None:\n'
    '    """Prevent further configuration changes."""\n'
    '    global _frozen\n'
    '    _frozen = True\n'
    '\n'
    'def get(key: str, default: Any = None) -> Any:\n'
    '    """Retrieve a configuration value."""\n'
    '    return _settings.get(key, default)\n'
    '\n'
    'def as_dict() -> dict[str, Any]:\n'
    '    """Return a copy of all settings."""\n'
    '    return dict(_settings)\n',
    config_module.__dict__,
)

sys.modules["app_settings"] = config_module

# Simulate two different parts of the application importing the same module
import app_settings

# Module A configures the settings
app_settings.configure(database="postgresql://localhost/mydb", debug=True)
print(f"After Module A configures: {app_settings.as_dict()}")

# Module B also imports and sees the same state
import app_settings as settings_b  # Same object from sys.modules
print(f"Module B sees same state: {settings_b.as_dict()}")
print(f"Same object? {app_settings is settings_b}")

# Module B adds more configuration
settings_b.configure(cache_ttl=300, max_retries=3)
print(f"\nAfter Module B configures: {app_settings.as_dict()}")

# Freeze to prevent accidental changes
app_settings.freeze()
try:
    app_settings.configure(debug=False)
except RuntimeError as e:
    print(f"\nFrozen config: {e}")

print(f"Final config: {app_settings.as_dict()}")

# Clean up
del sys.modules["app_settings"]

## Conditional Imports (`try/except ImportError`)

Conditional imports allow code to work with optional dependencies, provide
fallbacks for missing packages, and adapt to different Python versions.
This is the standard pattern for optional dependencies.

In [None]:
from typing import Any

# Pattern 1: Try fast C implementation, fall back to pure Python
try:
    from collections import OrderedDict  # C implementation
    _ORDERED_DICT_IMPL = "C (collections)"
except ImportError:
    # Hypothetical fallback
    _ORDERED_DICT_IMPL = "pure Python fallback"

print(f"OrderedDict implementation: {_ORDERED_DICT_IMPL}")


# Pattern 2: Optional dependency with graceful degradation
try:
    import tomllib  # Python 3.11+ built-in TOML parser
    HAS_TOML = True
except ImportError:
    HAS_TOML = False


def load_config(path: str) -> dict[str, Any]:
    """Load configuration from TOML or fall back to JSON."""
    if path.endswith(".toml"):
        if not HAS_TOML:
            raise ImportError(
                "TOML support requires Python 3.11+ or 'tomli' package. "
                "Install with: pip install tomli"
            )
        with open(path, "rb") as f:
            return tomllib.load(f)

    import json
    with open(path) as f:
        return json.load(f)


print(f"TOML support available: {HAS_TOML}")


# Pattern 3: Version-dependent imports
import sys

if sys.version_info >= (3, 11):
    from typing import Self  # Added in 3.11
    _SELF_SOURCE = "typing (3.11+)"
else:
    try:
        from typing_extensions import Self
        _SELF_SOURCE = "typing_extensions"
    except ImportError:
        from typing import TypeVar
        Self = TypeVar("Self")  # type: ignore[misc]
        _SELF_SOURCE = "TypeVar fallback"

print(f"Self type source: {_SELF_SOURCE}")


# Pattern 4: TYPE_CHECKING for annotation-only imports
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    # These imports only happen during static analysis (mypy, pyright)
    # They are never executed at runtime, avoiding circular imports
    # and unnecessary dependencies
    from pathlib import PurePosixPath


def process_path(path: "PurePosixPath") -> str:
    """Type annotation uses string literal to defer evaluation."""
    return str(path)


print(f"\nTYPE_CHECKING at runtime: {TYPE_CHECKING}")  # Always False at runtime
print("Annotations are strings at runtime, avoiding import overhead")

## Module-Level `__getattr__` (PEP 562)

PEP 562 (Python 3.7+) allows defining `__getattr__` at the module level.
When an attribute is not found in the module's namespace, `__getattr__` is
called. This enables lazy attribute computation, deprecation warnings,
and dynamic module APIs.

In [None]:
import sys
import types
import warnings
from typing import Any


# Create a module that uses __getattr__ for deprecation warnings
demo_mod = types.ModuleType("smart_module")
demo_mod.__doc__ = "Module with __getattr__ for smart attribute access."

exec(
    'import warnings\n'
    'from typing import Any\n'
    '\n'
    '# Current API\n'
    'TIMEOUT: int = 30\n'
    'MAX_RETRIES: int = 3\n'
    '\n'
    '# Deprecated names mapped to their replacements\n'
    '_DEPRECATED: dict[str, str] = {\n'
    '    "RETRY_COUNT": "MAX_RETRIES",\n'
    '    "DEFAULT_TIMEOUT": "TIMEOUT",\n'
    '}\n'
    '\n'
    '# Lazy-computed attributes\n'
    '_LAZY_CACHE: dict[str, Any] = {}\n'
    '\n'
    'def _compute_expensive_value() -> dict[str, int]:\n'
    '    """Simulate an expensive computation."""\n'
    '    print("  [Computing expensive value...]"  )\n'
    '    return {"computed": 42, "cost": 999}\n'
    '\n'
    'def __getattr__(name: str) -> Any:\n'
    '    # Handle deprecated attributes\n'
    '    if name in _DEPRECATED:\n'
    '        new_name = _DEPRECATED[name]\n'
    '        warnings.warn(\n'
    '            f"{name} is deprecated, use {new_name} instead",\n'
    '            DeprecationWarning,\n'
    '            stacklevel=2,\n'
    '        )\n'
    '        return globals()[new_name]\n'
    '\n'
    '    # Handle lazy-computed attributes\n'
    '    if name == "EXPENSIVE_DATA":\n'
    '        if name not in _LAZY_CACHE:\n'
    '            _LAZY_CACHE[name] = _compute_expensive_value()\n'
    '        return _LAZY_CACHE[name]\n'
    '\n'
    '    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")\n'
    '\n'
    'def __dir__() -> list[str]:\n'
    '    """Include deprecated and lazy names in dir()."""\n'
    '    public = [n for n in globals() if not n.startswith("_")]\n'
    '    return public + list(_DEPRECATED.keys()) + ["EXPENSIVE_DATA"]\n',
    demo_mod.__dict__,
)

sys.modules["smart_module"] = demo_mod

import smart_module

# Normal attribute access works as usual
print(f"TIMEOUT: {smart_module.TIMEOUT}")
print(f"MAX_RETRIES: {smart_module.MAX_RETRIES}")

# Deprecated attribute triggers a warning but still works
with warnings.catch_warnings(record=True) as caught:
    warnings.simplefilter("always")
    val = smart_module.RETRY_COUNT  # Deprecated name
    print(f"\nRETRY_COUNT (deprecated): {val}")
    print(f"Warning: {caught[0].message}")

# Lazy attribute is computed on first access
print(f"\nFirst access to EXPENSIVE_DATA:")
data = smart_module.EXPENSIVE_DATA
print(f"Result: {data}")

print(f"\nSecond access (cached):")
data2 = smart_module.EXPENSIVE_DATA
print(f"Result: {data2}")

# Missing attribute still raises AttributeError
try:
    _ = smart_module.NONEXISTENT
except AttributeError as e:
    print(f"\nMissing attribute: {e}")

# dir() includes all public names
print(f"\ndir(smart_module): {dir(smart_module)}")

# Clean up
del sys.modules["smart_module"]

## Package Distribution Basics (`pyproject.toml`)

Modern Python uses `pyproject.toml` (PEP 517/518/621) as the single
configuration file for package metadata, build system, and tool settings.
This replaced the older `setup.py` / `setup.cfg` approach.

In [None]:
# Modern pyproject.toml structure for distributing a Python package
pyproject_example: str = '''
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[project]
name = "my-analytics"
version = "1.0.0"
description = "Analytics toolkit for data processing"
readme = "README.md"
license = {text = "MIT"}
requires-python = ">=3.11"
authors = [
    {name = "Your Name", email = "you@example.com"},
]
classifiers = [
    "Programming Language :: Python :: 3",
    "License :: OSI Approved :: MIT License",
    "Operating System :: OS Independent",
]
dependencies = [
    "httpx>=0.24",
]

[project.optional-dependencies]
dev = [
    "pytest>=7.0",
    "mypy>=1.0",
    "ruff>=0.1",
]

[project.scripts]
my-analytics = "my_analytics.cli:main"

[tool.mypy]
strict = true

[tool.ruff]
line-length = 88
'''

print("Modern pyproject.toml for package distribution:")
print(pyproject_example)

# Corresponding directory structure
structure: str = '''
my-analytics/
    pyproject.toml           # Package metadata and build config
    README.md                # Package documentation
    src/
        my_analytics/        # The actual package (note underscores)
            __init__.py      # Package init with __version__
            cli.py           # CLI entry point
            core.py          # Core functionality
            transforms/
                __init__.py
                normalize.py
    tests/
        test_core.py
        test_transforms.py
'''

print("Recommended project layout (src layout):")
print(structure)

# Build and install commands
print("Common commands:")
print("  pip install -e '.[dev]'   # Editable install with dev deps")
print("  python -m build           # Build sdist and wheel")
print("  twine upload dist/*       # Upload to PyPI")

In [None]:
# Reading package metadata at runtime using importlib.metadata
from importlib.metadata import metadata, packages_distributions, version

# Get metadata for an installed package
pip_meta = metadata("pip")
print("pip package metadata:")
print(f"  Name:     {pip_meta['Name']}")
print(f"  Version:  {pip_meta['Version']}")
print(f"  Summary:  {pip_meta['Summary']}")
print(f"  License:  {pip_meta['License']}")
print(f"  Author:   {pip_meta.get('Author-email', 'N/A')}")

# Get just the version string
pip_version: str = version("pip")
print(f"\npip version (shortcut): {pip_version}")

# Discover which packages provide which top-level modules
pkg_map = packages_distributions()
print(f"\nSample package-to-distribution mapping:")
for module_name in ["json", "pip", "email"]:
    distributions = pkg_map.get(module_name, ["(stdlib)"])
    print(f"  {module_name} -> {distributions}")

## Summary

### Key Patterns
1. **Lazy imports** defer heavy module loading until first use, improving startup time
2. **Plugin registries** use `importlib.import_module()` for runtime-discovered extensions
3. **Module-as-singleton** leverages `sys.modules` caching for shared state
4. **Conditional imports** (`try/except ImportError`) handle optional dependencies gracefully
5. **Module `__getattr__`** (PEP 562) enables deprecation warnings and lazy attributes
6. **`pyproject.toml`** is the modern standard for package metadata and distribution

### When to Use Each Pattern
- **Lazy imports**: CLI tools, web apps, any startup-time-sensitive application
- **Plugin registry**: Extensible systems where users add functionality
- **Module singleton**: Application configuration, service registries
- **Conditional imports**: Libraries supporting multiple optional backends
- **Module `__getattr__`**: API evolution, lazy computation, deprecation management
- **`pyproject.toml`**: Any package intended for distribution or installation