In [14]:
from pathlib import Path

import pandas as pd
import plotly.express as px
from pathspec import PathSpec


def treemap_dir(path=".", use_gitignore=False):
    base = Path(path).resolve()
    spec = None
    if use_gitignore:
        gitignore = base / ".gitignore"
        if gitignore.exists():
            patterns = gitignore.read_text().splitlines()
            spec = PathSpec.from_lines("gitwildmatch", patterns)

    data = []
    for p in base.rglob("*"):
        rel = p.relative_to(base)
        if spec and spec.match_file(str(rel)):
            continue
        if p.is_file() or not any(p.iterdir()):
            size_mb = p.stat().st_size / (1024**2)
            parts = rel.parts
            data.append({"size_MB": size_mb, **{f"level{i}": part for i, part in enumerate(parts)}})

    df = pd.DataFrame(data)
    fig = px.treemap(
        df,
        path=[col for col in df.columns if col.startswith("level")],
        values="size_MB",
        color="size_MB",
        color_continuous_scale="Viridis",
        title=f"Filesize treemap, total size = {df['size_MB'].sum():.2f} MB",
    )
    fig.update_layout(coloraxis_colorbar=dict(title="Size (MB)"))
    fig.show()


In [17]:
treemap_dir()


In [16]:
treemap_dir(use_gitignore=True)