In [None]:
len(updated_topics)

In [None]:
import os, json, pathlib, re
from flask import Flask, jsonify, send_from_directory, request, abort
from markdown import markdown
from dotenv import load_dotenv

load_dotenv()
ROOT = pathlib.Path(".").parent.resolve()
DATA_DIR = pathlib.Path(os.getenv("DATA_DIR", ROOT / "data")).resolve()

app = Flask(__name__, static_folder=str(ROOT / "static"))  # assets served at /static


# ---------- helpers ----------
def _load_json(path: pathlib.Path, default=None):
    try:
        with path.open("r", encoding="utf-8") as f:
            return json.load(f)
    except FileNotFoundError:
        return default


def _resolve_outlines_dir():
    # 1) explicit env wins
    env_path = os.getenv("OUTLINES_DIR")
    if env_path:
        return pathlib.Path(env_path).resolve()
    # 2) data/outlines if present
    data_out = DATA_DIR / "outlines"
    if data_out.exists():
        return data_out.resolve()
    # 3) fallback project-root /outlines
    return (ROOT / "outlines").resolve()


OUTLINES_DIR = _resolve_outlines_dir()

CACHE = {
    "topics": _load_json(DATA_DIR / "topics.json", []),
    "theologians": _load_json(DATA_DIR / "theologians.json", []),
    "works": _load_json(DATA_DIR / "works.json", []),
    "by_topic": _load_json(DATA_DIR / "indices" / "by_topic.json", {}),
    "by_theologian": _load_json(DATA_DIR / "indices" / "by_theologian.json", {}),
    "by_work": _load_json(DATA_DIR / "indices" / "by_work.json", {}),
    "search": _load_json(DATA_DIR / "indices" / "search_index.json", []),
    "topic_mapping": _load_json(ROOT / "topic_mapping_updated.json", {})
}

In [None]:
works = CACHE["works"]
work_canon_map = []
for work in works:
    work_canon_map.append({"work_id": work["id"], "canonical_id": work["id"]})


# with open("data/work_canon_map.json", "w", encoding="utf-8") as f:
#     json.dump(work_canon_map, f, ensure_ascii=False, indent=2)

In [None]:
with open("data/work_canon_map.json", "r", encoding="utf-8") as f:
    work_canon_map = json.load(f)

len(work_canon_map), len({w["canonical_id"] for w in work_canon_map})

In [None]:
topic_mapping = CACHE["topic_mapping"]
by_work = CACHE["by_work"]
works_json = CACHE["works"]

In [34]:
#!/usr/bin/env python3
import os, json, pathlib, datetime, shutil

load_dotenv()
ROOT = pathlib.Path(".").parent.resolve()
DATA_DIR = pathlib.Path(os.getenv("DATA_DIR", ROOT / "data")).resolve()

def _load_json(path: pathlib.Path, default=None):
    try:
        return json.loads(path.read_text(encoding="utf-8"))
    except FileNotFoundError:
        return default


def _read_jsonl(p):
    try:
        lines = p.read_text(encoding="utf-8").splitlines()
    except FileNotFoundError:
        return []
    out = []
    for line in lines:
        line = line.strip()
        if not line:
            continue
        try:
            out.append(json.loads(line))
        except Exception:
            pass
    return out


def _write_json(path: pathlib.Path, obj):
    ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    if path.exists():
        backups = path.parent / "backups"
        backups.mkdir(parents=True, exist_ok=True)
        shutil.copy2(path, backups / f"{path.name}.bak-{ts}")
    path.write_text(json.dumps(obj, ensure_ascii=False, indent=2), encoding="utf-8")


def _write_jsonl(path: pathlib.Path, obj):
    ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    if path.exists():
        backups = path.parent / "backups"
        backups.mkdir(parents=True, exist_ok=True)
        shutil.copy2(path, backups / f"{path.name}.bak-{ts}")
    path.write_text("\n".join(json.dumps(r, ensure_ascii=False) for r in obj) + "\n", encoding="utf-8")


CACHE = {}

WORK_FILE = DATA_DIR / "works.json"
WORK_CANON_MAP = DATA_DIR / "work_canon_map.json"
TOPIC_FILE = DATA_DIR / "topics.json"
THEO_FILE = DATA_DIR / "theologians.json"
AUTHORS_REGISTRY = DATA_DIR / "authors_registry.json"
OUTLINES_JSONL = DATA_DIR / "outlines.jsonl"

TOPIC_WORK_EDGES = DATA_DIR / "indices/topic_work_edges.json"
SEARCH_INDEX = DATA_DIR / "indices/search_index.json"
BY_WORK = DATA_DIR / "indices/by_work.json"
BY_TOPIC = DATA_DIR / "indices/by_topic.json"
BY_TOPIC_KEYWORKS = DATA_DIR / "indices/by_topic_key_works.json"
BY_THEO = DATA_DIR / "indices/by_theologian.json"

TOPIC_MAPPING = ROOT / "topic_mapping_updated.json"

def _reload():
    CACHE["works"] = _load_json(WORK_FILE, [])
    CACHE["works_canon_map"] = _load_json(WORK_CANON_MAP, [])
    CACHE["topics"] = _load_json(TOPIC_FILE, [])
    CACHE["theologians"] = _load_json(THEO_FILE, [])
    CACHE["authors_registry"] = _load_json(AUTHORS_REGISTRY, {})
    CACHE["outlines"] = _read_jsonl(OUTLINES_JSONL)
    CACHE["topic_work_edges"] = _load_json(TOPIC_WORK_EDGES, [])
    CACHE["by_work"] = _load_json(BY_WORK, {})
    CACHE["by_topic"] = _load_json(BY_TOPIC, {})
    CACHE["by_topic_key_works"] = _load_json(BY_TOPIC_KEYWORKS, {})
    CACHE["by_theologian"] = _load_json(BY_THEO, {})
    CACHE["search_index"] = _load_json(SEARCH_INDEX, [])
    CACHE["topic_mapping"] = _load_json(TOPIC_MAPPING, {})


def _slugify(s: str) -> str:
    s = unicodedata.normalize("NFKD", s or "")
    s = s.encode("ascii", "ignore").decode("ascii").lower()
    s = re.sub(r"[^a-z0-9]+", "-", s).strip("-")
    return re.sub(r"-{2,}", "-", s)

_reload()


works           = CACHE["works"]
# work_canon_map = CACHE["work_canon_map"]
topics         = CACHE["topics"]
theologians     = CACHE["theologians"]
authors_registry= CACHE["authors_registry"]
topic_work_edges = CACHE["topic_work_edges"]
outlines        = CACHE["outlines"]
by_work         = CACHE["by_work"]
by_topic        = CACHE["by_topic"]
by_topic_key_works = CACHE["by_topic_key_works"]
by_theologian   = CACHE["by_theologian"]
search_index    = CACHE["search_index"]
topic_mapping  = CACHE["topic_mapping"]

In [38]:
for t in theologians:
    if "Johnson" in t["name"]:
        print(t["id"], t["name"])
        # break

theo_421f6f1cf18d Adam C. Johnson
theo_3698ee84fe2f Adam J. Johnson
theo_15f0416eb31e Keith E. Johnson
theo_7fb613bf3367 Keith L. Johnson
theo_c015821b2f38 Marcus Peter Johnson


In [41]:
import json, pathlib, textwrap, itertools, collections
DATA = pathlib.Path("data")  # adjust if different

def load_json_maybe_txt(path: pathlib.Path):
    """
    Reads JSON from .json or .txt. Returns None if missing/invalid.
    """
    for p in (path, path.with_suffix(".txt")):
        if p.exists():
            try:
                return json.loads(p.read_text(encoding="utf-8"))
            except Exception as e:
                print(f"Failed to parse {p.name}: {e}")
    print(f"Missing: {path.name} (and {path.with_suffix('.txt').name})")
    return None

def head(x, n=3):
    if isinstance(x, list):
        return x[:n]
    if isinstance(x, dict):
        return dict(itertools.islice(x.items(), n))
    return x


In [43]:
topics          = load_json_maybe_txt(DATA / "topics.json") or load_json_maybe_txt(DATA / "topics")
theologians     = load_json_maybe_txt(DATA / "theologians.json") or load_json_maybe_txt(DATA / "theologians")
works           = load_json_maybe_txt(DATA / "works.json") or load_json_maybe_txt(DATA / "works")
by_topic        = load_json_maybe_txt(DATA / "indices" / "by_topic.json") or load_json_maybe_txt(DATA / "by_topic")
by_theologian   = load_json_maybe_txt(DATA / "indices" / "by_theologian.json") or load_json_maybe_txt(DATA / "by_theologian")
by_work         = load_json_maybe_txt(DATA / "indices" / "by_work.json") or load_json_maybe_txt(DATA / "by_work")
search_index    = load_json_maybe_txt(DATA / "indices" / "search_index.json") or load_json_maybe_txt(DATA / "search_index")
canon_map_raw   = load_json_maybe_txt(DATA / "work_canon_map.json") or load_json_maybe_txt(DATA / "work_canon_map")

summary = {
    "topics": (type(topics).__name__, 0 if topics is None else len(topics)),
    "theologians": (type(theologians).__name__, 0 if theologians is None else len(theologians)),
    "works": (type(works).__name__, 0 if works is None else len(works)),
    "by_topic": (type(by_topic).__name__, 0 if by_topic is None else len(by_topic)),
    "by_theologian": (type(by_theologian).__name__, 0 if by_theologian is None else len(by_theologian)),
    "by_work": (type(by_work).__name__, 0 if by_work is None else len(by_work)),
    "search_index": (type(search_index).__name__, 0 if search_index is None else len(search_index)),
    "canon_map_raw": (type(canon_map_raw).__name__, 0 if canon_map_raw is None else (len(canon_map_raw) if isinstance(canon_map_raw, dict) else len(canon_map_raw))),
}

summary


{'topics': ('list', 109),
 'theologians': ('list', 514),
 'works': ('list', 8225),
 'by_topic': ('dict', 109),
 'by_theologian': ('dict', 249),
 'by_work': ('dict', 8225),
 'search_index': ('list', 8982),
 'canon_map_raw': ('list', 8225)}

In [54]:
def check_required_keys():
    problems = []

    if not isinstance(topics, list) or not all(isinstance(t, dict) for t in topics or []):
        problems.append("topics should be a list[dict].")
    else:
        req = {"id", "slug", "title"}
        missing = [t.get("id") for t in topics if not req.issubset(set(t))]
        if missing:
            problems.append(f"topics missing required keys for ids: {missing[:5]}...")

    if not isinstance(theologians, list) or not all(isinstance(t, dict) for t in theologians or []):
        problems.append("theologians should be a list[dict].")
    else:
        req = {"id", "slug", "full_name"}
        missing = [t.get("id") for t in theologians if not req.issubset(set(t))]
        if missing:
            problems.append(f"theologians missing required keys for ids: {missing[:5]}...")

    if not isinstance(works, list) or not all(isinstance(w, dict) for w in works or []):
        problems.append("works should be a list[dict].")
    else:
        req = {"id", "title"}
        missing = [w.get("id") for w in works if not req.issubset(set(w))]
        if missing:
            problems.append(f"works missing required keys for ids: {missing[:5]}...")

    if not isinstance(by_theologian, dict):
        problems.append("by_theologian should be a dict[theologian_id → {...}].")
    else:
        # spot-check outlines shape
        for tid, blob in list(by_theologian.items())[:3]:
            ob = blob.get("outlines_by_topic_category")
            if ob is None:
                problems.append(f"by_theologian[{tid}] missing 'outlines_by_topic_category'.")

    return problems or ["OK"]

check_required_keys()


['OK']

In [55]:
def theologians_with_outline_counts():
    counts = []
    if not isinstance(by_theologian, dict):
        return counts
    for tid, blob in by_theologian.items():
        ob = (blob or {}).get("outlines_by_topic_category") or {}
        total = sum(len(v or []) for v in ob.values())
        counts.append((tid, total))
    counts.sort(key=lambda x: -x[1])
    return counts

outline_counts = theologians_with_outline_counts()
outline_counts[:10]


[('theo_09edae13e8c2', 109),
 ('theo_aff4ec250119', 109),
 ('theo_124906e9ce1f', 109),
 ('theo_f0740853d304', 109),
 ('theo_3fc45f8c5a0a', 109),
 ('theo_ea6658b8fe68', 109),
 ('theo_1c0566e3d363', 109),
 ('theo_888da193d4d0', 109),
 ('theo_2a4f308c7e0f', 109),
 ('theo_0967b706d70a', 109)]

In [53]:
{i["work_id"]: i["canonical_id"] for i in canon_map_raw} == canon_map

True

In [56]:
# Maps for lookup
WORK_MAP = {w.get("id"): w for w in (works or []) if isinstance(w, dict)}
THEO_MAP = {t.get("id"): t for t in (theologians or []) if isinstance(t, dict)}
TOPIC_MAP = {t.get("id"): t for t in (topics or []) if isinstance(t, dict)}

from collections import Counter, defaultdict

def canon_counts_by_theologian(by_work_index, canon_map):
    ctr_by_theo = defaultdict(Counter)
    for wid, wdata in (by_work_index or {}).items():
        tid = (wdata or {}).get("primary_author_theologian_id")
        if not tid:
            continue
        cid = canon_map.get(wid, wid)
        ctr_by_theo[str(tid)][cid] += 1
    out = {}
    for tid, ctr in ctr_by_theo.items():
        def key(pair):
            cid, n = pair
            title = (WORK_MAP.get(cid) or {}).get("title", cid)
            return (-n, title)
        out[tid] = sorted(ctr.items(), key=key)
    return out

def canon_counts_by_topic(topics_list, canon_map):
    result = {}
    for t in topics_list or []:
        kw = (t.get("key_works") or {})
        wts = [canon_map.get(w, w) for w in (kw.get("wts_old_princeton") or [])]
        rec = [canon_map.get(w, w) for w in (kw.get("recent") or [])]
        wts_ctr = Counter(wts)
        rec_ctr = Counter(rec)
        def sort_ctr(ctr):
            items = list(ctr.items())
            def key(pair):
                cid, n = pair
                title = (WORK_MAP.get(cid) or {}).get("title", cid)
                return (-n, title)
            items.sort(key=key)
            return items
        result[t["id"]] = {"WTS": sort_ctr(wts_ctr), "Recent": sort_ctr(rec_ctr)}
    return result

theo_counts = canon_counts_by_theologian(by_work, canon_map)
topic_counts = canon_counts_by_topic(topics, canon_map)

len(theo_counts), len(topic_counts), head(theo_counts, 1), head(topic_counts, 1)


(514,
 109,
 {'theo_38bdde67d604': [('work_e23a86f8e59e', 7),
   ('work_00a7ffbf4f1f', 7),
   ('work_164359e1d5eb', 7),
   ('work_6e3ad522c1d9', 6),
   ('work_4f5d932ee86d', 6),
   ('work_7891d069bc45', 3),
   ('work_b14d7e28c087', 3),
   ('work_3b9bf5607d5b', 2),
   ('work_0df6a3008d62', 2),
   ('work_43953e5ff021', 2),
   ('work_c5945fb05040', 1),
   ('work_f73153fa5c01', 1),
   ('work_2bd9af54437f', 1),
   ('work_df16c5ba1c1e', 1),
   ('work_74239522b287', 1),
   ('work_5e45c2279463', 1),
   ('work_d164ab57d411', 1),
   ('work_df0b2de70103', 1),
   ('work_bcefacb5aa84', 1),
   ('work_ca9641222d99', 1),
   ('work_81541116dcf0', 1),
   ('work_b14a49c7a811', 1),
   ('work_487d92ef2be1', 1),
   ('work_2dfd45bba5b6', 1)]},
 {'top_73dd29f4e3dd': {'WTS': [('work_163d78b8375b', 1),
    ('work_db4c1732a6ae', 1),
    ('work_ee43657ddf33', 1),
    ('work_d10c4faf2802', 1),
    ('work_11c15342b809', 1)],
   'Recent': [('work_08823315cc21', 1),
    ('work_851dd8bc2ea0', 1),
    ('work_ade473659f

In [57]:
problems = []

# Are there any theologians referenced in by_work but missing in theologians list?
theo_ids_in_by_work = { (v or {}).get("primary_author_theologian_id") for v in (by_work or {}).values() }
theo_ids_in_by_work.discard(None)
missing_theos = sorted(tid for tid in theo_ids_in_by_work if tid not in THEO_MAP)
if missing_theos:
    problems.append(f"Theologian IDs referenced in by_work but not found in theologians: {missing_theos[:10]}...")

# Are there canonical IDs that have no matching entry in works?
canon_ids = set(canon_map.values())
missing_canon_works = sorted(cid for cid in canon_ids if cid not in WORK_MAP)
if missing_canon_works:
    problems.append(f"Canonical work IDs missing in works: {missing_canon_works[:10]}...")

# Are topic key_works referencing unknown IDs?
def unknowns_from_topic_keyworks():
    unknown = set()
    for t in topics or []:
        kw = (t.get("key_works") or {})
        for wid in (kw.get("wts_old_princeton") or []):
            if wid not in WORK_MAP and canon_map.get(wid, wid) not in WORK_MAP:
                unknown.add(wid)
        for wid in (kw.get("recent") or []):
            if wid not in WORK_MAP and canon_map.get(wid, wid) not in WORK_MAP:
                unknown.add(wid)
    return sorted(unknown)

unknown_topic_wids = unknowns_from_topic_keyworks()
if unknown_topic_wids:
    problems.append(f"Topic key_works include unknown work ids: {unknown_topic_wids[:10]}...")

problems or ["No linkage problems detected"]


['No linkage problems detected']

In [58]:
print("TOPICS (first 5):")
for t in (topics or [])[:5]:
    title = t.get("title")
    wts = len((t.get("key_works") or {}).get("wts_old_princeton") or [])
    rec = len((t.get("key_works") or {}).get("recent") or [])
    print(f" - {title}  | WTS={wts}  Recent={rec}")

print("\nTHEOLOGIANS (first 5):")
for th in (theologians or [])[:5]:
    outlines_total = 0
    entry = (by_theologian or {}).get(th.get("id")) or {}
    ob = entry.get("outlines_by_topic_category") or {}
    outlines_total = sum(len(v or []) for v in ob.values())
    print(f" - {th.get('full_name')}  | outlines={outlines_total}")

print("\nWORKS (first 5):")
for w in (works or [])[:5]:
    cid = canon_map.get(w.get("id"), w.get("id"))
    alias = "(alias)" if cid != w.get("id") else ""
    print(f" - {w.get('title')}  [{w.get('id')} → {cid}] {alias}")


TOPICS (first 5):
 - Nature and task of theology  | WTS=5  Recent=15
 - General and special revelation  | WTS=5  Recent=15
 - Self-attestation and authority of Scripture  | WTS=5  Recent=15
 - Inspiration and inerrancy  | WTS=5  Recent=15
 - Accommodation and incarnational analogy  | WTS=5  Recent=15

THEOLOGIANS (first 5):
 - A. Andrew Das  | outlines=0
 - A. Edward Siecienski  | outlines=0
 - A.A. Hodge  | outlines=1
 - A.T.B. McGowan  | outlines=0
 - Abraham Kuyper  | outlines=109

WORKS (first 5):
 - "1–2 Thessalonians" (IVP New Testament Commentary, 2003)  [work_e0fde1126f74 → work_0df6a3008d62] (alias)
 - "A Brief Declaration and Vindication of the Doctrine of the Trinity"  [work_cfa06cf99d84 → work_2aafa13e949a] (alias)
 - "A Brief Declaration and Vindication of the Doctrine of the Trinity" (for anthropological context)  [work_9a8bad95ad9e → work_2aafa13e949a] (alias)
 - "A Brief Instruction in the Worship of God" (Works, vol. 15)  [work_97263a885a5f → work_beb8536a9f2a] (alias)

In [59]:
spa_datasets_shape = {
    "topics": isinstance(topics, list),
    "theologians": isinstance(theologians, list),
    "works": isinstance(works, list),
    "byTopic": isinstance(by_topic, dict),
    "byTheo": isinstance(by_theologian, dict),
    "byWork": isinstance(by_work, dict),
    "workCanonMap_or_canonMap": isinstance(canon_map, dict),
}

spa_datasets_shape


{'topics': True,
 'theologians': True,
 'works': True,
 'byTopic': True,
 'byTheo': True,
 'byWork': True,
 'workCanonMap_or_canonMap': True}

In [63]:
len(set(canon_map.values()))

3447

In [64]:
import json, pathlib, re

def slugify(name: str) -> str:
    """Lowercase, replace non-alphanum with hyphens, collapse dashes."""
    s = name.lower()
    s = re.sub(r'[^a-z0-9]+', '-', s)
    s = re.sub(r'-+', '-', s).strip('-')
    return s

# load theologians
p = pathlib.Path("data/theologians.json")
theologians = json.loads(p.read_text(encoding="utf-8"))

# regenerate slugs
for t in theologians:
    full = t.get("full_name") or t.get("name") or ""
    t["slug"] = slugify(full)

# overwrite file
p.write_text(json.dumps(theologians, indent=2, ensure_ascii=False), encoding="utf-8")

print("Updated slugs for", len(theologians), "theologians")
print("Example:", theologians[0]["full_name"], "→", theologians[0]["slug"])


Updated slugs for 514 theologians
Example: A. Andrew Das → a-andrew-das
