# Packages and Imports

**Chapter 7 - Learning Python, 5th Edition**

Packages are directories of modules organized under a common namespace.
They provide hierarchical structure for large codebases, control import
behavior through `__init__.py`, and support both absolute and relative
import styles.

## Package Structure and `__init__.py`

A package is a directory containing an `__init__.py` file (which can be empty).
The `__init__.py` file is executed when the package is first imported and can
be used to set up package-level initialization.

```
mypackage/
    __init__.py          # Makes this a package; runs on import
    module_a.py          # mypackage.module_a
    module_b.py          # mypackage.module_b
    subpackage/
        __init__.py      # Nested package
        module_c.py      # mypackage.subpackage.module_c
```

In [None]:
import sys
import tempfile
from pathlib import Path

# Create a real package structure on disk to demonstrate
pkg_root = Path(tempfile.mkdtemp())
pkg_dir = pkg_root / "analytics"
sub_dir = pkg_dir / "transforms"
sub_dir.mkdir(parents=True)

# __init__.py - package initialization
(pkg_dir / "__init__.py").write_text(
    '"""Analytics package for data processing."""\n'
    '\n'
    '__version__: str = "1.0.0"\n'
    '__all__: list[str] = ["load_data", "transforms"]\n'
    '\n'
    'print(f"analytics __init__.py executed (v{__version__})")\n'
)

# module_a.py - data loading
(pkg_dir / "loader.py").write_text(
    '"""Data loading utilities."""\n'
    '\n'
    'def load_data(path: str) -> list[dict[str, object]]:\n'
    '    """Load data from a file path."""\n'
    '    return [{"source": path, "rows": 100}]\n'
)

# module_b.py - statistics
(pkg_dir / "stats.py").write_text(
    '"""Statistical analysis functions."""\n'
    '\n'
    'def mean(values: list[float]) -> float:\n'
    '    """Calculate arithmetic mean."""\n'
    '    return sum(values) / len(values)\n'
    '\n'
    'def median(values: list[float]) -> float:\n'
    '    """Calculate median value."""\n'
    '    sorted_vals = sorted(values)\n'
    '    n = len(sorted_vals)\n'
    '    mid = n // 2\n'
    '    if n % 2 == 0:\n'
    '        return (sorted_vals[mid - 1] + sorted_vals[mid]) / 2\n'
    '    return sorted_vals[mid]\n'
)

# subpackage __init__.py
(sub_dir / "__init__.py").write_text(
    '"""Data transformation subpackage."""\n'
    'print("analytics.transforms __init__.py executed")\n'
)

# subpackage module
(sub_dir / "normalize.py").write_text(
    '"""Normalization transforms."""\n'
    '\n'
    'def min_max(values: list[float]) -> list[float]:\n'
    '    """Min-max normalization to [0, 1] range."""\n'
    '    lo, hi = min(values), max(values)\n'
    '    span = hi - lo\n'
    '    return [(v - lo) / span for v in values] if span else [0.0] * len(values)\n'
)

# Add to sys.path
sys.path.insert(0, str(pkg_root))

# Show the created structure
print("Package structure:")
for p in sorted(pkg_root.rglob("*.py")):
    print(f"  {p.relative_to(pkg_root)}")

In [None]:
# Importing the package executes __init__.py
import analytics

print(f"\nPackage version: {analytics.__version__}")
print(f"Package path: {analytics.__path__}")
print(f"Package file: {analytics.__file__}")

# Import specific modules from the package
from analytics import stats
from analytics.loader import load_data

data = load_data("/tmp/sample.csv")
print(f"\nLoaded data: {data}")

values = [10.0, 20.0, 30.0, 40.0, 50.0]
print(f"Mean: {stats.mean(values)}")
print(f"Median: {stats.median(values)}")

# Import from subpackage
from analytics.transforms.normalize import min_max
print(f"\nNormalized: {min_max(values)}")

## Absolute vs Relative Imports

- **Absolute imports** use the full package path from the project root
- **Relative imports** use dots to navigate the package hierarchy

```python
# Inside analytics/stats.py:

# Absolute import (preferred in most cases)
from analytics.loader import load_data

# Relative imports (useful within packages)
from . import loader              # Same package
from .loader import load_data     # Same package, specific name
from ..other_pkg import utils     # Parent package
```

Relative imports only work inside packages (files with a `__package__` set).

In [None]:
# Demonstrate relative imports by adding them to our package
import importlib

# Update stats.py to use a relative import from loader
(pkg_dir / "stats.py").write_text(
    '"""Statistical analysis with relative imports."""\n'
    '\n'
    '# Relative import: . means "current package" (analytics)\n'
    'from .loader import load_data  # Same as: from analytics.loader import load_data\n'
    '\n'
    'def mean(values: list[float]) -> float:\n'
    '    """Calculate arithmetic mean."""\n'
    '    return sum(values) / len(values)\n'
    '\n'
    'def analyze_file(path: str) -> dict[str, float]:\n'
    '    """Load data and compute statistics using relative import."""\n'
    '    data = load_data(path)\n'
    '    return {"source": path, "records": len(data)}\n'
)

# Reload to pick up the changes
importlib.reload(analytics.stats)

# The relative import works
result = analytics.stats.analyze_file("/tmp/test.csv")
print(f"Analysis result: {result}")

# Show import resolution
print(f"\nstats.__package__ = {analytics.stats.__package__!r}")
print(f"stats.__name__ = {analytics.stats.__name__!r}")

# Relative imports CANNOT be used in top-level scripts
# This would fail in a script run directly:
try:
    exec("from . import something")  # No parent package
except ImportError as e:
    print(f"\nRelative import in top-level: {e}")

## `__all__` for Controlling Star Imports

`__all__` is a list of strings that defines what `from module import *` exports.
Without `__all__`, star imports export all names that do not start with an underscore.
With `__all__`, only the listed names are exported.

In [None]:
import types
import sys


def simulate_star_import(module: types.ModuleType) -> list[str]:
    """Show what 'from module import *' would import."""
    if hasattr(module, "__all__"):
        return list(module.__all__)
    return [name for name in dir(module) if not name.startswith("_")]


# Module WITHOUT __all__ - exports everything non-underscore
mod_no_all = types.ModuleType("mod_no_all")
exec(
    'PUBLIC_CONST = 42\n'
    '_PRIVATE_CONST = 99\n'
    'def public_func(): pass\n'
    'def _private_func(): pass\n'
    'class PublicClass: pass\n',
    mod_no_all.__dict__,
)

print("Without __all__:")
print(f"  Star import gets: {simulate_star_import(mod_no_all)}")

# Module WITH __all__ - only listed names are exported
mod_with_all = types.ModuleType("mod_with_all")
exec(
    '__all__ = ["public_func", "PublicClass"]\n'
    'PUBLIC_CONST = 42\n'
    '_PRIVATE_CONST = 99\n'
    'def public_func(): pass\n'
    'def _private_func(): pass\n'
    'class PublicClass: pass\n'
    'class InternalClass: pass\n',
    mod_with_all.__dict__,
)

print("\nWith __all__ = ['public_func', 'PublicClass']:")
print(f"  Star import gets: {simulate_star_import(mod_with_all)}")
print(f"  Note: PUBLIC_CONST and InternalClass are excluded")

# Real-world example: json module
import json
print(f"\njson.__all__ = {json.__all__}")
print(f"Star import would get {len(json.__all__)} names vs {len([n for n in dir(json) if not n.startswith('_')])} public names")

## Circular Import Problems and Solutions

Circular imports occur when module A imports module B, and module B imports
module A. This can cause `ImportError` or `AttributeError` because the
module's code hasn't finished executing when the circular import happens.

In [None]:
import tempfile
import importlib
from pathlib import Path

circ_root = Path(tempfile.mkdtemp())
circ_pkg = circ_root / "circular_demo"
circ_pkg.mkdir()
(circ_pkg / "__init__.py").write_text("")

# --- Problem: top-level circular import ---
(circ_pkg / "models.py").write_text(
    '# models.py imports services at the top level\n'
    'from circular_demo.services import validate_user  # Circular!\n'
    '\n'
    'class User:\n'
    '    def __init__(self, name: str) -> None:\n'
    '        self.name = name\n'
)

(circ_pkg / "services.py").write_text(
    '# services.py imports models at the top level\n'
    'from circular_demo.models import User  # Circular!\n'
    '\n'
    'def validate_user(user: User) -> bool:\n'
    '    return len(user.name) > 0\n'
)

sys.path.insert(0, str(circ_root))

# This will fail with ImportError
try:
    import circular_demo.models
except ImportError as e:
    print(f"Circular import error: {e}")

# Clean up failed imports
for key in list(sys.modules):
    if key.startswith("circular_demo"):
        del sys.modules[key]

# --- Solution 1: Import inside the function (deferred import) ---
(circ_pkg / "models.py").write_text(
    'class User:\n'
    '    def __init__(self, name: str) -> None:\n'
    '        self.name = name\n'
    '\n'
    '    def is_valid(self) -> bool:\n'
    '        # Deferred import breaks the cycle\n'
    '        from circular_demo.services import validate_user\n'
    '        return validate_user(self)\n'
)

(circ_pkg / "services.py").write_text(
    'from circular_demo.models import User\n'
    '\n'
    'def validate_user(user: "User") -> bool:\n'
    '    return len(user.name) > 0\n'
)

# Now import succeeds
import circular_demo.models
user = circular_demo.models.User("Alice")
print(f"\nSolution - deferred import: user.is_valid() = {user.is_valid()}")

# Clean up
for key in list(sys.modules):
    if key.startswith("circular_demo"):
        del sys.modules[key]
sys.path.remove(str(circ_root))

print("\nOther solutions for circular imports:")
print("  1. Deferred imports (import inside function) - shown above")
print("  2. Restructure: move shared code to a third module")
print("  3. Use TYPE_CHECKING for type-annotation-only imports")
print("  4. Import the module, not the attribute: import x (then x.attr)")

## Namespace Packages (PEP 420)

Namespace packages allow a package to be split across multiple directories
without requiring `__init__.py` files. This enables independent distribution
of sub-packages that share a common top-level namespace.

**Note:** PEP 420 was a proposal that was superseded by PEP 421 and the actual
implementation in PEP 328/PEP 302. The namespace package feature was fully
implemented via PEP 420's concepts in Python 3.3+.

In [None]:
import sys
import tempfile
from pathlib import Path

# Create two separate directories that contribute to the same namespace
dir_a = Path(tempfile.mkdtemp())
dir_b = Path(tempfile.mkdtemp())

# Directory A: provides mypkg.utils
ns_a = dir_a / "mypkg" / "utils"
ns_a.mkdir(parents=True)
(ns_a / "__init__.py").write_text(
    'def helper() -> str:\n'
    '    return "from dir_a: mypkg.utils.helper"\n'
)
# No __init__.py in mypkg/ - this makes it a namespace package

# Directory B: provides mypkg.core
ns_b = dir_b / "mypkg" / "core"
ns_b.mkdir(parents=True)
(ns_b / "__init__.py").write_text(
    'def engine() -> str:\n'
    '    return "from dir_b: mypkg.core.engine"\n'
)
# No __init__.py in mypkg/ - also a namespace package

# Add both directories to sys.path
sys.path.insert(0, str(dir_a))
sys.path.insert(0, str(dir_b))

# Import from the namespace package - both sub-packages are accessible!
import mypkg.utils
import mypkg.core

print(f"mypkg.utils.helper(): {mypkg.utils.helper()}")
print(f"mypkg.core.engine():  {mypkg.core.engine()}")

# The namespace package has multiple paths
print(f"\nmypkg.__path__: {list(mypkg.__path__)}")
print(f"Has __file__: {hasattr(mypkg, '__file__')}")
print(f"__file__ value: {getattr(mypkg, '__file__', 'None (namespace pkg)')}")

# Clean up
for key in list(sys.modules):
    if key.startswith("mypkg"):
        del sys.modules[key]
sys.path.remove(str(dir_a))
sys.path.remove(str(dir_b))

## Package-Level Initialization Patterns

The `__init__.py` file runs when a package is first imported. Common patterns
include re-exporting key names, setting up logging, and version management.

In [None]:
import sys
import tempfile
from pathlib import Path

init_root = Path(tempfile.mkdtemp())
init_pkg = init_root / "webapp"
init_pkg.mkdir()

# Create sub-modules
(init_pkg / "config.py").write_text(
    'from typing import Final\n'
    '\n'
    'DEFAULT_HOST: Final[str] = "0.0.0.0"\n'
    'DEFAULT_PORT: Final[int] = 8000\n'
    '\n'
    'class Settings:\n'
    '    def __init__(self, host: str = DEFAULT_HOST, port: int = DEFAULT_PORT) -> None:\n'
    '        self.host = host\n'
    '        self.port = port\n'
    '\n'
    '    def __repr__(self) -> str:\n'
    '        return f"Settings(host={self.host!r}, port={self.port})"\n'
)

(init_pkg / "routing.py").write_text(
    'from typing import Callable, Any\n'
    '\n'
    'class Router:\n'
    '    def __init__(self) -> None:\n'
    '        self.routes: dict[str, Callable[..., Any]] = {}\n'
    '\n'
    '    def add_route(self, path: str, handler: Callable[..., Any]) -> None:\n'
    '        self.routes[path] = handler\n'
    '\n'
    '    def __repr__(self) -> str:\n'
    '        return f"Router(routes={list(self.routes.keys())})"\n'
)

# __init__.py with professional initialization patterns
(init_pkg / "__init__.py").write_text(
    '"""Web application framework."""\n'
    '\n'
    '# Pattern 1: Version metadata\n'
    '__version__: str = "2.1.0"\n'
    '__author__: str = "Development Team"\n'
    '\n'
    '# Pattern 2: Re-export key classes for convenient access\n'
    '# Users can do: from webapp import Settings, Router\n'
    '# Instead of: from webapp.config import Settings\n'
    'from webapp.config import Settings\n'
    'from webapp.routing import Router\n'
    '\n'
    '# Pattern 3: Control star imports\n'
    '__all__: list[str] = ["Settings", "Router", "create_app"]\n'
    '\n'
    '# Pattern 4: Package-level factory function\n'
    'def create_app(host: str = "0.0.0.0", port: int = 8000) -> dict:\n'
    '    """Create and configure an application instance."""\n'
    '    settings = Settings(host=host, port=port)\n'
    '    router = Router()\n'
    '    return {"settings": settings, "router": router}\n'
)

sys.path.insert(0, str(init_root))

# Import the package - __init__.py re-exports make the API clean
import webapp

print(f"Version: {webapp.__version__}")
print(f"Author: {webapp.__author__}")

# Convenient access - no need to know internal module structure
settings = webapp.Settings(host="localhost", port=3000)
router = webapp.Router()
print(f"\nSettings: {settings}")
print(f"Router: {router}")

# Factory function at package level
app = webapp.create_app(port=5000)
print(f"\nApp: {app}")

# __all__ controls what star import exposes
print(f"\n__all__: {webapp.__all__}")

# Clean up
for key in list(sys.modules):
    if key.startswith("webapp"):
        del sys.modules[key]
sys.path.remove(str(init_root))

## Summary

### Key Concepts
1. **Packages** are directories with `__init__.py` that group related modules
2. **`__init__.py`** runs on import and can re-export names for a clean public API
3. **Absolute imports** (`from pkg.mod import name`) are explicit and preferred
4. **Relative imports** (`from . import mod`) are useful within packages
5. **`__all__`** controls what `from module import *` exports
6. **Circular imports** can be resolved by deferring imports or restructuring
7. **Namespace packages** allow splitting a package across multiple directories

### Best Practices
- Keep `__init__.py` lightweight; avoid heavy computation on import
- Define `__all__` in every public module to make the API explicit
- Prefer absolute imports for clarity; use relative imports within packages
- Avoid circular imports by separating concerns into distinct modules
- Re-export key names in `__init__.py` so users have a clean import path