## <center>Nantucket

Nantucket.ipynb, the interactive command-centre notebook: when run, it defines the Moby-Dick-themed project structure directly in code, writes a matching YAML manifest, scaffolds any missing folders and placeholder notebooks to bring your OneDrive into alignment, validates that the actual directory structure matches the declared plan, and regenerates human-readable documentation: a Markdown table and Mermaid diagram that appear in both GitHub and OneNote.
Because the entire Leviathan repository is cloned into Databricks Repos, Spouter_inn.ipynb travels with it. You can open and run the notebook in Azure Databricks to scaffold, validate, and document the same structure there. Edit the structure in the notebook, rerun the cells, and every environment—OneDrive, GitHub, Databricks—stays in lockstep.

In [27]:
import yaml, pathlib

NOTEBOOK_DIR = pathlib.Path.cwd()            # Leviathan/Nantucket
ROOT         = NOTEBOOK_DIR.parent           # Leviathan
MANIFEST     = NOTEBOOK_DIR / "leviathan_directory_manifest.yml"

# Simplified project structure as Python dict
structure = {
    "Leviathan": {
        "type": "directory",
        "metaphor": "Leviathan",
        "role": "Project root",
        "notes": "",
        "children": {
            "Pequod": {
                "type": "directory",
                "metaphor": "Pequod",
                "role": "Docs & notebooks hub",
                "notes": "",
                "children": {
                    "Chartroom": {
                        "type": "directory",
                        "metaphor": "Chartroom",
                        "role": "High-level overviews + config",
                        "notes": "",
                        "children": {}
                    },
                    "Maps": {
                        "type": "directory",
                        "metaphor": "Maps",
                        "role": "Architecture diagrams + specs",
                        "notes": "",
                        "children": {}
                    },
                    "Logbook": {
                        "type": "directory",
                        "metaphor": "Logbook",
                        "role": "Project diary & notes",
                        "notes": "",
                        "children": {}
                    },
                },
            },
            "Ocean": {
                "type": "directory",
                "metaphor": "Ocean",
                "role": "Data lake",
                "notes": "",
                "children": {
                    "England_1": {
                        "type": "directory",
                        "metaphor": "England_1",
                        "role": "Premier League data",
                        "notes": "",
                        "children": {}
                    },
                    "England_2": {
                        "type": "directory",
                        "metaphor": "England_2",
                        "role": "Championship data",
                        "notes": "",
                        "children": {}
                    },
                },
            },
            "Armory": {
                "type": "directory",
                "metaphor": "Armory",
                "role": "Feature-engineering pipelines",
                "notes": "",
                "children": {
                    "Gam": {
                        "type": "directory",
                        "metaphor": "Gam",
                        "role": "Cross-league utilities",
                        "notes": "",
                        "children": {}
                    },
                    "England_1": {
                        "type": "directory",
                        "metaphor": "England_1",
                        "role": "League-specific transforms",
                        "notes": "",
                        "children": {
                            "Blubber_room": {"type": "directory", "metaphor": "Blubber_room", "role": "Head-to-head & tables", "notes": "", "children": {}},
                            "Crows_nest": {"type": "directory", "metaphor": "Crows_nest", "role": "Rolling-form metrics", "notes": "", "children": {}},
                            "Whaleboat": {"type": "directory", "metaphor": "Whaleboat", "role": "Expected-points models", "notes": "", "children": {}},
                            "Tackle": {"type": "directory", "metaphor": "Tackle", "role": "Composite rankings", "notes": "", "children": {}},
                            "Line": {"type": "directory", "metaphor": "Line", "role": "Line analytics", "notes": "", "children": {}},
                            "Harpoon": {"type": "directory", "metaphor": "Harpoon", "role": "Harpoon analytics", "notes": "", "children": {}},
                        }
                    },
                }
            },
            "Awhalin": {
                "type": "directory",
                "metaphor": "Awhalin",
                "role": "Model staging",
                "notes": "",
                "children": {
                    "Gam": {"type": "directory", "metaphor": "Gam", "role": "", "notes": "", "children": {}},
                    "England_1": {
                        "type": "directory",
                        "metaphor": "England_1",
                        "role": "",
                        "notes": "",
                        "children": {
                            name: {"type": "directory", "metaphor": name, "role": "", "notes": "", "children": {}} for name in [
                                "Mapple", "Peleg", "Bildad", "Flask", "Stubb", "Fedallah", "Tashtego",
                                "Daggoo", "Elijah", "Ahab", "Starbuck", "Ishmael", "Queequeg", "Rachel"
                            ]
                        }
                    }
                }
            }
        }
    }
}

def _create(node, base):
    base.mkdir(exist_ok=True)
    for name, meta in node.get("children", {}).items():
        path = base / name
        if meta["type"] == "directory":
            path.mkdir(exist_ok=True)
            _create(meta, path)
        elif meta["type"] == "file":
            path.parent.mkdir(parents=True, exist_ok=True)
            path.touch(exist_ok=True)

# Write YAML and create folders
MANIFEST.write_text(yaml.dump(structure, sort_keys=False))
_create(structure["Leviathan"], ROOT)

print("✅ Manifest written and folders created under", ROOT)


✅ Manifest written and folders created under C:\Users\mikek\OneDrive\Leviathan


In [28]:
def _validate(node, base):
    ok = True
    for name, meta in node.get("children", {}).items():
        path = base / name
        if meta["type"] == "directory":
            if not path.is_dir():
                print(f"❌ missing folder {path.relative_to(ROOT)}")
                ok = False
            ok &= _validate(meta, path)
        elif meta["type"] == "file":
            if not path.is_file():
                print(f"❌ missing file {path.relative_to(ROOT)}")
                ok = False
    return ok

passed = _validate(structure["Leviathan"], ROOT)
print("✅ structure matches manifest." if passed else "⚠️  mismatch detected.")

✅ structure matches manifest.


In [29]:
import re

def _safe_id(name):
    return re.sub(r"[^a-zA-Z0-9_]", "_", name)

def _build_subtree(name, node):
    lines = []
    children = node.get("children", {})
    if children:
        lines.append(f"  subgraph {_safe_id(name)} [{name}]")
        for child_name, child_meta in children.items():
            child_id = _safe_id(child_name)
            lines.append(f"    {_safe_id(name)} --> {child_id}")
            lines.extend(_build_subtree(child_name, child_meta))
        lines.append("  end")
    return lines

structure = yaml.safe_load(MANIFEST.read_text())
root_children = structure["Leviathan"]["children"]

mermaid_lines = [
    "```mermaid",
    "flowchart TB",
    "  classDef default fill:#3BA17B,stroke:#3BA17B,color:gold;",
    "  Leviathan:::default",
]

mermaid_lines.append("  Leviathan --> Pequod")
mermaid_lines.append("  Leviathan --> Ocean")
mermaid_lines.append("  Leviathan --> Armory")
mermaid_lines.append("  Leviathan --> Awhalin")

for phase in ["Pequod", "Ocean", "Armory", "Awhalin"]:
    if phase in root_children:
        mermaid_lines.extend(_build_subtree(phase, root_children[phase]))

mermaid_lines.append("```")

# Generate glossary table
rows = []
def _gather(path, node, prefix=""):
    for name, meta in node.get("children", {}).items():
        full = f"{prefix}/{name}".strip("/")
        rows.append((full, meta.get("metaphor", ""), meta.get("role", ""), meta.get("notes", "")))
        _gather(path / name, meta, full)
_gather(ROOT, structure["Leviathan"])

table_lines = [
    "| Path | Metaphor | Intended Role | Notes |",
    "|------|----------|---------------|-------|",
] + [f"| {p}/ | {m} | {r} | {n} |" for p, m, r, n in rows]

DOCS_DIR = NOTEBOOK_DIR
DOCS_DIR.joinpath("directory_glossary.md").write_text("\n".join(table_lines + ["", "## Diagram", ""] + mermaid_lines), encoding="utf-8")
DOCS_DIR.joinpath("directory_diagram.mmd").write_text("\n".join(mermaid_lines[1:-1]), encoding="utf-8")

print("✅ Simplified glossary and Mermaid diagram written.")


✅ Simplified glossary and Mermaid diagram written.
