In [4]:
from pathlib import Path, PurePosixPath
import json

MAX_READ_BYTES = 200_000
SKIP_DIRS      = {'__pycache__', '.git', '.vscode', 'node_modules', 'venv', 'env', 'tmp_streamlit'}
IGNORED_EXTS   = {
    '.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp', '.svg',
    '.pdf', '.joblib'
}
PATHS_ONLY_EXT = {'.txt', '.csv', '.json'}         # list names, no content

def _tree(root: Path) -> str:
    """ASCII tree of *root*, skipping SKIP_DIRS and IGNORED_EXTS files."""
    lines, stack = [root.name], []

    def walk(cur: Path):
        kids = [
            p for p in cur.iterdir()
            if p.name not in SKIP_DIRS
            and not (p.is_file() and p.suffix.lower() in IGNORED_EXTS)
        ]
        kids.sort(key=lambda p: (p.is_file(), p.name.lower()))
        for i, kid in enumerate(kids):
            connector = "└── " if i == len(kids) - 1 else "├── "
            lines.append(''.join(stack) + connector + kid.name)
            if kid.is_dir():
                stack.append('    ' if i == len(kids) - 1 else '│   ')
                walk(kid)
                stack.pop()
    walk(root)
    return '\n'.join(lines)

def _extract_ipynb_code(nb_path: Path) -> str:
    """Return concatenated source of code cells from a notebook."""
    try:
        nb_json = json.loads(nb_path.read_text(encoding='utf‑8'))
        cells   = nb_json.get('cells', [])
        code_blocks = []
        for idx, cell in enumerate(cells, 1):
            if cell.get('cell_type') == 'code':
                src = ''.join(cell.get('source', ''))
                code_blocks.append(f"# ── cell {idx} ──\n{src}")
        return '\n\n'.join(code_blocks) or '[Notebook contains no code cells]'
    except Exception as exc:
        return f"[Could not read notebook: {exc}]"

def _dump(root: Path) -> str:
    out_lines, listed_only = [], []

    for path in root.rglob('*'):
        if path.is_dir() or any(p.name in SKIP_DIRS for p in path.parents):
            continue
        ext = path.suffix.lower()
        if ext in IGNORED_EXTS:
            continue                      # ignore images/PDFs/etc.

        rel = path.relative_to(root)

        # .txt / .csv / .json  → list name only
        if ext in PATHS_ONLY_EXT:
            listed_only.append(rel)
            continue

        # .ipynb  → dump only code cells
        if ext == '.ipynb':
            out_lines.append(f"\n# {rel}  (code cells only)\n")
            out_lines.append(_extract_ipynb_code(path))
            continue

        # everything else → dump file content
        out_lines.append(f"\n# {rel}\n")
        try:
            text = path.read_text('utf‑8', errors='replace')
            if len(text) > MAX_READ_BYTES:
                text = text[:MAX_READ_BYTES] + "\n[...TRUNCATED...]"
            out_lines.append(text)
        except Exception as exc:
            out_lines.append(f"[Could not read file: {exc}]")

    if listed_only:
        out_lines.append("\n## .txt / .csv / .json files (names only)\n")
        out_lines.extend(map(str, listed_only))

    return '\n'.join(out_lines)

def build_dir_report(root='.', out_file='dir_report.txt', show=True) -> str:
    root = Path(root or '.').expanduser().resolve()
    if not root.is_dir():
        raise NotADirectoryError(root)

    report = (
        "================  DIRECTORY TREE  ================\n"
        + _tree(root)
        + "\n\n================  FILE CONTENTS  =================\n"
        + _dump(root)
    )

    Path(out_file).write_text(report, encoding='utf‑8')
    if show:
        print(report)
        print(f"\nReport saved to {out_file}")
    return report


rep_text = build_dir_report(out_file="snapshot.txt", show=False)
