Nantucket.ipynb, the interactive command-centre notebook: when run, the notebook reads the single YAML manifest that captures the Moby-Dick-themed directory plan, creates any folders and placeholder notebooks that are missing so your local OneDrive copy always matches that plan, checks that everything on disk still lines up with the manifest, and then regenerates human-readable documentation—a Markdown outline that includes an auto-rendered Mermaid diagram—so the project structure appears clearly in both GitHub (where the Markdown is rendered) and OneNote (with either the YAML or the diagram for easy reference). Because the whole Leviathan repository is cloned into Databricks Repos, Nantucket.ipynb travels with it; open the notebook inside an Azure Databricks workspace, run the same cells to scaffold or validate the structure there, and commit any changes straight back to GitHub. Edit the YAML manifest locally in JupyterLab, directly in Databricks, or in any text editor, rerunning Nantucket keeps every environment—OneDrive, GitHub, and Databricks—in lockstep.


In [5]:
# ── Cell 1 ── initialise structure + scaffold folders/notebooks
import yaml, json, pathlib, datetime as _dt

NOTEBOOK_DIR = pathlib.Path.cwd()
ROOT = NOTEBOOK_DIR.parent                              # expect notebook sits in C:\Users\mikek\OneDrive\Leviathan
MANIFEST = ROOT / "leviathan_directory_manifest.yml"

structure = {
    "Leviathan": {
        "type": "directory",
        "metaphor": "Leviathan",
        "role": "Project root",
        "notes": "",
        "children": {
            "Pequod": {
                "type": "directory",
                "metaphor": "Pequod",
                "role": "Docs & notebooks hub",
                "notes": "",
                "children": {
                    "Chartroom": {"type": "directory", "metaphor": "Chartroom", "role": "High-level overviews + env files", "notes": "", "children": {}},
                    "Maps":      {"type": "directory", "metaphor": "Maps",      "role": "Detailed architecture diagrams",   "notes": "", "children": {}},
                    "Logbook":   {"type": "directory", "metaphor": "Logbook",   "role": "Project diary & research notes",  "notes": "", "children": {}},
                },
            },
            "Ocean": {
                "type": "directory",
                "metaphor": "Ocean",
                "role": "Data lake",
                "notes": "",
                "children": {
                    "England_1": {"type": "directory", "metaphor": "England_1", "role": "Premier League data", "notes": "", "children": {}},
                    "England_2": {"type": "directory", "metaphor": "England_2", "role": "Championship data",   "notes": "", "children": {}},
                },
            },
            "Armory": {
                "type": "directory",
                "metaphor": "Armory",
                "role": "Feature-engineering pipelines",
                "notes": "",
                "children": {
                    "Gam": {"type": "directory", "metaphor": "Gam", "role": "Cross-league utilities", "notes": "", "children": {}},
                    "England_1": {
                        "type": "directory",
                        "metaphor": "England_1 (Armory)",
                        "role": "League-specific transforms",
                        "notes": "",
                        "children": {
                            "Spouter_inn": {"type": "directory", "metaphor": "Spouter_inn", "role": "Head-to-head & league tables", "notes": "", "children": {}},
                            "Crows_nest":  {"type": "directory", "metaphor": "Crows_nest",  "role": "Rolling-form metrics",          "notes": "", "children": {}},
                            "Whaleboat":   {"type": "directory", "metaphor": "Whaleboat",   "role": "Expected-points models",        "notes": "", "children": {}},
                            "Tackle":      {"type": "directory", "metaphor": "Tackle",      "role": "Composite rankings",            "notes": "", "children": {}},
                        },
                    },
                },
            },
        },
    }
}

def _ensure_ipynb(p: pathlib.Path):
    if p.exists(): return
    blank = {"cells": [], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
    p.write_text(json.dumps(blank, indent=2))

def _create(node: dict, base: pathlib.Path):
    for name, meta in node.get("children", {}).items():
        path = base / name
        if meta["type"] == "directory":
            path.mkdir(parents=True, exist_ok=True)
            _create(meta, path)
        elif meta["type"] == "file":
            if path.suffix == ".ipynb": _ensure_ipynb(path)
            else:                       path.touch(exist_ok=True)

# write manifest (overwrite if empty)
if MANIFEST.stat().st_size == 0:
    MANIFEST.write_text(yaml.safe_dump(structure, sort_keys=False))

# scaffold folders/notebooks
_create(structure["Leviathan"], ROOT)
print("🟢  Structure created & manifest written.")



FileNotFoundError: [WinError 2] The system cannot find the file specified: 'C:\\Users\\mikek\\OneDrive\\Leviathan\\leviathan_directory_manifest.yml'

In [6]:
# ── Cell 2 ── quick validation check (run anytime)
import yaml, pathlib, sys, json

NOTEBOOK_DIR = pathlib.Path.cwd()
ROOT = NOTEBOOK_DIR.parent 
MANIFEST = ROOT / "leviathan_directory_manifest.yml"
manifest = yaml.safe_load(MANIFEST.read_text())

def _validate(node, base):
    ok = True
    for name, meta in node.get("children", {}).items():
        path = base / name
        if meta["type"] == "directory":
            if not path.is_dir():
                print(f"❌ missing folder {path.relative_to(ROOT)}"); ok = False
            ok &= _validate(meta, path)
        elif meta["type"] == "file":
            if not path.is_file():
                print(f"❌ missing file {path.relative_to(ROOT)}"); ok = False
    return ok

passed = _validate(manifest["Leviathan"], ROOT)
print("✅ structure matches manifest." if passed else "⚠️  mismatch detected.")


FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\mikek\\OneDrive\\Leviathan\\leviathan_directory_manifest.yml'

In [8]:
# ── Cell 3 ── generate docs (outline + table + Mermaid)
import yaml, pathlib, textwrap, json

NOTEBOOK_DIR = pathlib.Path.cwd()        # Leviathan/Nantucket
ROOT = NOTEBOOK_DIR.parent               # Leviathan
MANIFEST = NOTEBOOK_DIR / "leviathan_directory_manifest.yml"
DOCS_DIR = NOTEBOOK_DIR                  # still write the docs here
DOCS_DIR.mkdir(exist_ok=True)

manifest  = yaml.safe_load(MANIFEST.read_text())["Leviathan"]

# -------------------------------- helpers --------------------------------
def flatten(node, base=""):
    rows = []
    for name, meta in node.get("children", {}).items():
        path = f"{base}/{name}".lstrip("/")
        rows.append((path, meta["metaphor"], meta["role"], meta["notes"]))
        rows += flatten(meta, path)
    return rows

rows = flatten(manifest)

# 1. Markdown outline
outline_lines = ["# Leviathan Directory Map\n"]
for path, metaphor, role, notes in rows:
    depth = path.count("/")
    indent = "  " * depth
    outline_lines.append(f"{indent}- **{path.split('/')[-1]}/** – {role}  ")
DOCS_DIR.joinpath("directory_map.md").write_text("\n".join(outline_lines))

# 2. Markdown table with Mermaid diagram
table_lines = [
    "| Path | Metaphor | Intended Role | Notes |",
    "|------|----------|---------------|-------|",
] + [
    f"| {p}/ | {m} | {r} | {n} |" for p, m, r, n in rows
]
mermaid = ["```mermaid", "graph TD"]
for p, m, _, _ in rows:
    parts = p.split("/")
    if len(parts) == 1:
        continue
    parent = parts[-2] or "Leviathan"
    mermaid.append(f"  {parent.replace('/', '_')} --> {parts[-1].replace('/', '_')}")
mermaid.append("```")

DOCS_DIR.joinpath("directory_glossary.md").write_text(
    "\n".join(table_lines) + "\n\n" + "\n".join(mermaid)
)

print("📝  docs/directory_map.md and docs/directory_glossary.md regenerated.")


📝  docs/directory_map.md and docs/directory_glossary.md regenerated.
