In [None]:
#!/usr/bin/env python3
import os, json, pathlib, datetime, shutil
from flask import Flask, jsonify, request, send_from_directory
from dotenv import load_dotenv

load_dotenv()
ROOT = pathlib.Path(".").parent.resolve()
DATA_DIR = pathlib.Path(os.getenv("DATA_DIR", ROOT / "../data")).resolve()

THEO_FILE = DATA_DIR / "theologians.json"
WORKS_FILE = DATA_DIR / "works.json"
MAP_FILE = DATA_DIR / "work_canon_map.json"
AUTHORS_REGISTRY = DATA_DIR / "authors_registry.json"
OUTLINES_JSONL = DATA_DIR / "outlines.jsonl"
BY_WORK = DATA_DIR / "indices/by_work.json"
BY_TOPIC = DATA_DIR / "indices/by_topic.json"
BY_THEO = DATA_DIR / "indices/by_theologian.json"

app = Flask(__name__, static_folder=str(ROOT / "static"))


def _load_json(path: pathlib.Path, default=None):
    try:
        return json.loads(path.read_text(encoding="utf-8"))
    except FileNotFoundError:
        return default


def _write_json(path: pathlib.Path, obj):
    ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    if path.exists():
        backups = path.parent / "backups"
        backups.mkdir(parents=True, exist_ok=True)
        shutil.copy2(path, backups / f"{path.name}.bak-{ts}")
    path.write_text(json.dumps(obj, ensure_ascii=False, indent=2), encoding="utf-8")


CACHE = {}


def _reload():
    CACHE["theologians"] = _load_json(THEO_FILE, []) or []
    CACHE["works"] = _load_json(WORKS_FILE, []) or []
    CACHE["canon_map"] = _load_map(CACHE["works"])


def _load_map(all_works):
    rows = _load_json(MAP_FILE, []) or []
    m = {}
    for r in rows:
        wid = r.get("work_id");
        cid = r.get("canonical_id")
        if wid and cid:
            m[wid] = cid
    # ensure all work ids have a mapping (identity)
    for w in (all_works or []):
        wid = w.get("id")
        if wid:
            m.setdefault(wid, wid)
    # compress
    for wid in list(m.keys()):
        m[wid] = _root(m, wid)
    return m


def _save_map(m):
    rows = [{"work_id": wid, "canonical_id": cid} for wid, cid in sorted(m.items())]
    _write_json(MAP_FILE, rows)

In [1]:
#!/usr/bin/env python3
import os, json, pathlib, datetime, shutil
from flask import Flask, jsonify, request, send_from_directory
from dotenv import load_dotenv

load_dotenv()
ROOT = pathlib.Path(".").parent.resolve()
DATA_DIR = pathlib.Path(os.getenv("DATA_DIR", ROOT / "../data")).resolve()

THEO_FILE = DATA_DIR / "theologians.json"
WORKS_FILE = DATA_DIR / "works.json"
MAP_FILE = DATA_DIR / "work_canon_map.json"

app = Flask(__name__, static_folder=str(ROOT / "static"))


def _load_json(path: pathlib.Path, default=None):
    try:
        return json.loads(path.read_text(encoding="utf-8"))
    except FileNotFoundError:
        return default


def _write_json(path: pathlib.Path, obj):
    ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    if path.exists():
        backups = path.parent / "backups"
        backups.mkdir(parents=True, exist_ok=True)
        shutil.copy2(path, backups / f"{path.name}.bak-{ts}")
    path.write_text(json.dumps(obj, ensure_ascii=False, indent=2), encoding="utf-8")


CACHE = {}


def _reload():
    CACHE["theologians"] = _load_json(THEO_FILE, []) or []
    CACHE["works"] = _load_json(WORKS_FILE, []) or []
    CACHE["canon_map"] = _load_map(CACHE["works"])


def _load_map(all_works):
    rows = _load_json(MAP_FILE, []) or []
    m = {}
    for r in rows:
        wid = r.get("work_id");
        cid = r.get("canonical_id")
        if wid and cid:
            m[wid] = cid
    # ensure all work ids have a mapping (identity)
    for w in (all_works or []):
        wid = w.get("id")
        if wid:
            m.setdefault(wid, wid)
    # compress
    for wid in list(m.keys()):
        m[wid] = _root(m, wid)
    return m


def _save_map(m):
    rows = [{"work_id": wid, "canonical_id": cid} for wid, cid in sorted(m.items())]
    _write_json(MAP_FILE, rows)


def _root(m, wid):
    seen = set()
    cur = wid
    while True:
        nxt = m.get(cur, cur)
        if nxt == cur or nxt in seen:
            break
        seen.add(cur);
        cur = nxt
    for s in seen:
        m[s] = cur
    return cur


def _repoint_all_to(m, old_ids, new_id):
    old = set(old_ids)
    for wid, cid in list(m.items()):
        if cid in old:
            m[wid] = new_id
    for wid in list(m.keys()):
        m[wid] = _root(m, wid)


@app.get("/")
def index():
    return send_from_directory(app.static_folder, "index.html")


@app.get("/static/<path:p>")
def static_files(p):
    return send_from_directory(app.static_folder, p)


@app.get("/api/authors")
def authors():
    _reload()
    theos = CACHE["theologians"]
    out = [{"id": t.get("id"),
            "full_name": t.get("full_name") or t.get("name"),
            "dates": t.get("dates"),
            "key_work_ids": t.get("key_work_ids") or []}
           for t in theos]
    out.sort(key=lambda x: (x["full_name"] or ""))
    return jsonify(out)


@app.get("/api/works")
def works_by_author():
    _reload()
    author_id = request.args.get("author_id")
    if not author_id:
        return jsonify({"error": "author_id required"}), 400
    t = next((x for x in CACHE["theologians"] if x.get("id") == author_id), None)
    if not t:
        return jsonify([])

    want = set((t.get("key_work_ids") or []))
    out = []
    for w in CACHE["works"]:
        wid = w.get("id")
        if wid in want:
            out.append({"id": wid, "title": w.get("title")})
    out.sort(key=lambda x: (x.get("title") or "", x.get("id") or ""))
    return jsonify(out)


@app.get("/api/map")
def api_get_map():
    _reload()
    rows = [{"work_id": wid, "canonical_id": cid} for wid, cid in sorted(CACHE["canon_map"].items())]
    return jsonify(rows)


@app.post("/api/map/merge")
def api_merge():
    _reload()
    data = request.get_json(force=True, silent=True) or {}
    canonical_id = data.get("canonical_id")
    merge_ids = [x for x in (data.get("merge_ids") or []) if x and x != canonical_id]
    if not canonical_id or not merge_ids:
        return jsonify({"error": "canonical_id and merge_ids required"}), 400

    m = dict(CACHE["canon_map"])
    for wid in set([canonical_id] + merge_ids):
        m.setdefault(wid, wid)
    for mid in merge_ids:
        m[mid] = canonical_id
    _repoint_all_to(m, merge_ids, canonical_id)
    for wid in list(m.keys()):
        m[wid] = _root(m, wid)
    _save_map(m)
    CACHE["canon_map"] = m
    return jsonify({"ok": True, "canonical_id": canonical_id, "merged": merge_ids})



In [9]:
theologians = CACHE["theologians"] = _load_json(THEO_FILE, []) or []
works = CACHE["works"] = _load_json(WORKS_FILE, []) or []

In [4]:
len(theologians)

539

In [6]:
theo_names = [theo["name"] for theo in theologians]
unique_theo_names = set(theo_names)

In [12]:
theologians

[{'id': 'theo_b9515ffe8f41',
  'slug': 'anselm-of-canterbury',
  'full_name': 'Anselm of Canterbury',
  'name': 'Anselm of Canterbury',
  'dates': '1033–1109',
  'era_category': [],
  'traditions': [],
  'bio': '',
  'timeline': [],
  'key_work_ids': [],
  'wts_relevance': True,
  'created_at': '2025-08-16',
  'updated_at': '2025-08-16'},
 {'id': 'theo_d4aee8289faa',
  'slug': 'peter-abelard',
  'full_name': 'Peter Abelard',
  'name': 'Peter Abelard',
  'dates': '1079–1142',
  'era_category': [],
  'traditions': [],
  'bio': '',
  'timeline': [],
  'key_work_ids': [],
  'wts_relevance': True,
  'created_at': '2025-08-16',
  'updated_at': '2025-08-16'},
 {'id': 'theo_f0740853d304',
  'slug': 'thomas-aquinas',
  'full_name': 'Thomas Aquinas',
  'name': 'Thomas Aquinas',
  'dates': '1225–1274',
  'era_category': [],
  'traditions': [],
  'bio': '',
  'timeline': [],
  'key_work_ids': [],
  'wts_relevance': True,
  'created_at': '2025-08-16',
  'updated_at': '2025-08-16'},
 {'id': 'theo_49

In [31]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Build an index of work -> theologians (authors) from your datasets.

Inputs (from DATA_DIR or ../theo-site/data by default):
  - theologians.json
  - works.json
  - outlines.jsonl (optional; improves inference)

Output:
  - data/work_authors_index.json  (list of rows)
"""

import os, json, pathlib, collections

ROOT = pathlib.Path(".").parent.resolve()
DATA_DIR = pathlib.Path(os.getenv("DATA_DIR", ROOT / "../data")).resolve()

THEO_FILE = DATA_DIR / "theologians.json"
WORKS_FILE = DATA_DIR / "works.json"
OUTLINES_L = DATA_DIR / "outlines.jsonl"
OUT_FILE = DATA_DIR / "work_authors_index.json"
canonical_map_file = DATA_DIR / "work_canon_map.json"
BY_WORK_FILE = DATA_DIR / "indices/by_work.json"
BY_THEOLOGIAN_FILE = DATA_DIR / "indices/by_theologian.json"


def read_json(p, default=None):
    try:
        return json.loads(p.read_text(encoding="utf-8"))
    except FileNotFoundError:
        return default


def read_jsonl(p):
    try:
        lines = p.read_text(encoding="utf-8").splitlines()
    except FileNotFoundError:
        return []
    out = []
    for line in lines:
        line = line.strip()
        if not line:
            continue
        try:
            out.append(json.loads(line))
        except Exception:
            pass
    return out


def to_id_list(authors):
    """Normalize a 'authors' field (list of ids or objects) -> list of ids"""
    out = []
    for a in authors or []:
        if isinstance(a, str):
            out.append(a)
        elif isinstance(a, dict):
            # prefer an explicit id if present
            if a.get("id"):
                out.append(a["id"])
            # sometimes only slug/full_name present; skip here (resolved later by name match)
    return [x for x in out if x]


def build_theo_name_maps(theologians):
    by_id = {t.get("id"): t for t in theologians if t.get("id")}
    # name/slug -> id for fallback matching if needed
    name_to_id = {}
    for t in theologians:
        tid = t.get("id")
        if not tid: continue
        for k in ("full_name", "name", "slug"):
            v = (t.get(k) or "").strip()
            if v:
                name_to_id.setdefault(v, tid)
    return by_id, name_to_id


def resolve_from_outlines(outlines):
    """Return: dict[work_id] -> Counter(theologian_id) from outline citations."""
    ctr = collections.defaultdict(collections.Counter)
    for o in outlines or []:
        tid = o.get("theologian_id")
        if not tid: continue
        for wid in (o.get("key_work_ids") or []):
            if wid:
                ctr[wid][tid] += 1
    return ctr


def main():
    theologians = read_json(THEO_FILE, []) or []
    works = read_json(WORKS_FILE, []) or []
    outlines = read_jsonl(OUTLINES_L) or []

    theo_by_id, name_to_id = build_theo_name_maps(theologians)
    outline_counts = resolve_from_outlines(outlines)  # work_id -> Counter(tid)

    rows = []
    for w in works:
        wid = w.get("id")
        title = w.get("title")
        if not wid:
            continue

        # 1) primary id
        primary_id = w.get("primary_author_theologian_id")

        # 2) authors[] → ids
        explicit_ids = to_id_list(w.get("authors"))

        # If authors[] contains only inline objects with names, try to map by name
        if not explicit_ids:
            inline_objs = [a for a in (w.get("authors") or []) if isinstance(a, dict)]
            for a in inline_objs:
                name = (a.get("full_name") or a.get("name") or "").strip()
                slug = (a.get("slug") or "").strip()
                guess = name_to_id.get(name) or name_to_id.get(slug)
                if guess:
                    explicit_ids.append(guess)

        # 3) inference from outlines (only if nothing above)
        inferred = False
        inferred_ids = []
        if not primary_id and not explicit_ids:
            counts = outline_counts.get(wid, collections.Counter())
            if counts:
                # choose the most frequent theologian; require >1 citation to reduce noise
                tid, n = counts.most_common(1)[0]
                if n >= 2:
                    primary_id = tid
                    inferred = True
                else:
                    # still keep all seen ids as secondary authors (weak signal)
                    inferred_ids = [t for t, _ in counts.most_common()]

        # Build final id list
        author_ids = []
        if primary_id: author_ids.append(primary_id)
        author_ids.extend(explicit_ids)
        author_ids.extend(inferred_ids)
        # de-dupe while preserving order
        seen = set()
        author_ids = [x for x in author_ids if not (x in seen or seen.add(x))]

        # Resolve names
        author_names = []
        for tid in author_ids:
            t = theo_by_id.get(tid)
            if t and (t.get("full_name") or t.get("name")):
                author_names.append(t.get("full_name") or t.get("name"))

        rows.append({
            "work_id": wid,
            "title": title,
            "primary_author_id": primary_id,
            "author_ids": author_ids,
            "author_names": author_names,
            "inferred_from_outlines": inferred
        })

    # Sort for stable diffs: by first author name then title/id
    rows.sort(key=lambda r: ((r["author_names"][0] if r["author_names"] else "~"), r.get("title") or "", r["work_id"]))

    OUT_FILE.parent.mkdir(parents=True, exist_ok=True)
    OUT_FILE.write_text(json.dumps(rows, ensure_ascii=False, indent=2), encoding="utf-8")
    print(f"Wrote {len(rows)} rows → {OUT_FILE}")

# if __name__ == "__main__":
#     main()


In [32]:
theologians = read_json(THEO_FILE, []) or []
works = read_json(WORKS_FILE, []) or []
outlines = read_jsonl(OUTLINES_L) or []
by_work = read_json(BY_WORK_FILE, []) or []
by_theologian = read_json(BY_THEOLOGIAN_FILE, []) or []

theo_by_id, name_to_id = build_theo_name_maps(theologians)
outline_counts = resolve_from_outlines(outlines)  # work_id -> Counter(tid)

In [28]:
work_ids_w = [w["id"] for w in works if w.get("id")]

In [25]:
work_ids_i = list(by_work.keys())

In [29]:
set(work_ids_i) == set(work_ids_w)

True

In [30]:
by_work

{'work_e0fde1126f74': {'referenced_in': [{'topic_id': 'top_bcdbe3b27a7f',
    'theologian_id': 'theo_38bdde67d604',
    'outline_id': 'out_88289cdc8036',
    'markdown_path': 'outlines/eschatology/antichrist-and-man-of-lawlessness/gregory-k.-beale.md'}],
  'title': '"1–2 Thessalonians" (IVP New Testament Commentary, 2003)',
  'primary_author_theologian_id': None,
  'is_topic_keywork': False},
 'work_cfa06cf99d84': {'referenced_in': [{'topic_id': 'top_a76905abad5a',
    'theologian_id': 'theo_01884231e7c8',
    'outline_id': 'out_fb514773a7c0',
    'markdown_path': 'outlines/soteriology/adoption/john-owen.md'}],
  'title': '"A Brief Declaration and Vindication of the Doctrine of the Trinity"',
  'primary_author_theologian_id': None,
  'is_topic_keywork': False},
 'work_9a8bad95ad9e': {'referenced_in': [{'topic_id': 'top_0e8b884aa7cb',
    'theologian_id': 'theo_01884231e7c8',
    'outline_id': 'out_d43aba4a1c96',
    'markdown_path': 'outlines/doctrine-of-sin/nature-and-origin-of-sin/jo

In [37]:
by_theologian[list(by_theologian.keys())[5]].keys()

dict_keys(['outlines_by_topic_category'])

In [43]:
work_keys = list(by_work.keys())

In [42]:
for work_id, work in by_work.items():
    print(work["referenced_in"])
    break

TypeError: 'dict_items' object is not subscriptable

In [44]:
by_work[work_keys[-1]]

{'title': 'Defending Constantine',
 'primary_author_theologian_id': 'theo_db4c242e157e',
 'is_topic_keywork': True,
 'authors': [{'id': 'theo_db4c242e157e',
   'slug': 'peter-j.-leithart',
   'full_name': 'Peter J. Leithart'}]}

In [45]:
by_work[work_keys[0]]


{'referenced_in': [{'topic_id': 'top_bcdbe3b27a7f',
   'theologian_id': 'theo_38bdde67d604',
   'outline_id': 'out_88289cdc8036',
   'markdown_path': 'outlines/eschatology/antichrist-and-man-of-lawlessness/gregory-k.-beale.md'}],
 'title': '"1–2 Thessalonians" (IVP New Testament Commentary, 2003)',
 'primary_author_theologian_id': None,
 'is_topic_keywork': False}

In [47]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Unify by_work.json entries:
- Ensure consistent keys for topic and outline works
- Fill primary_author_theologian_id for outline works if unique
- Always include authors[], even if empty
- Always include is_topic_keywork (bool)
"""

import json, pathlib

DATA_DIR = pathlib.Path("../data")
BY_WORK_JSON = DATA_DIR / "indices/by_work.json"

by_work = json.loads(BY_WORK_JSON.read_text(encoding="utf-8"))
changed = 0

for wid, node in by_work.items():
    # Ensure title
    node.setdefault("title", "")

    # Normalize is_topic_keywork
    node["is_topic_keywork"] = bool(node.get("is_topic_keywork"))

    # Normalize authors list
    authors = node.get("authors")
    if not isinstance(authors, list):
        node["authors"] = []

    # If primary_author_theologian_id is missing, try to infer from referenced_in
    if not node.get("primary_author_theologian_id"):
        refs = node.get("referenced_in") or []
        tids = {r.get("theologian_id") for r in refs if r.get("theologian_id")}
        if len(tids) == 1:
            node["primary_author_theologian_id"] = list(tids)[0]
            # also add to authors[] if empty
            if not node["authors"]:
                node["authors"] = [{"id": node["primary_author_theologian_id"]}]
            changed += 1

BY_WORK_JSON.write_text(json.dumps(by_work, ensure_ascii=False, indent=2), encoding="utf-8")
print(f"Normalized {len(by_work)} works, filled {changed} primary_author ids")

Normalized 8225 works, filled 7010 primary_author ids


In [385]:
#!/usr/bin/env python3
import os, json, pathlib, datetime, shutil
from flask import Flask, jsonify, request, send_from_directory
from dotenv import load_dotenv

load_dotenv()
ROOT = pathlib.Path(".").parent.resolve()
DATA_DIR = pathlib.Path(os.getenv("DATA_DIR", ROOT / "../data")).resolve()

THEO_FILE = DATA_DIR / "theologians.json"
WORK_FILE = DATA_DIR / "works.json"
AUTHORS_REGISTRY = DATA_DIR / "authors_registry.json"
OUTLINES_JSONL = DATA_DIR / "outlines.jsonl"
BY_WORK = DATA_DIR / "indices/by_work.json"
BY_TOPIC = DATA_DIR / "indices/by_topic.json"
BY_THEO = DATA_DIR / "indices/by_theologian.json"
SEARCH_INDEX = DATA_DIR / "indices/search_index.json"

app = Flask(__name__, static_folder=str(ROOT / "static"))


def _load_json(path: pathlib.Path, default=None):
    try:
        return json.loads(path.read_text(encoding="utf-8"))
    except FileNotFoundError:
        return default


def _read_jsonl(p):
    try:
        lines = p.read_text(encoding="utf-8").splitlines()
    except FileNotFoundError:
        return []
    out = []
    for line in lines:
        line = line.strip()
        if not line:
            continue
        try:
            out.append(json.loads(line))
        except Exception:
            pass
    return out


def _write_json(path: pathlib.Path, obj):
    ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    if path.exists():
        backups = path.parent / "backups"
        backups.mkdir(parents=True, exist_ok=True)
        shutil.copy2(path, backups / f"{path.name}.bak-{ts}")
    path.write_text(json.dumps(obj, ensure_ascii=False, indent=2), encoding="utf-8")


CACHE = {}


def _reload():
    CACHE["theologians"] = _load_json(THEO_FILE, [])
    CACHE["works"] = _load_json(WORKS_FILE, [])
    CACHE["outlines"] = _read_jsonl(OUTLINES_JSONL)
    CACHE["authors_registry"] = _load_json(AUTHORS_REGISTRY, {})
    CACHE["by_work"] = _load_json(BY_WORK_JSON, {})
    CACHE["by_topic"] = _load_json(BY_TOPIC, {})
    CACHE["by_theologian"] = _load_json(BY_THEO, {})
    CACHE["search_index"] = _load_json(SEARCH_INDEX, [])


def _load_map(all_works):
    rows = _load_json(MAP_FILE, []) or []
    m = {}
    for r in rows:
        wid = r.get("work_id");
        cid = r.get("canonical_id")
        if wid and cid:
            m[wid] = cid
    # ensure all work ids have a mapping (identity)
    for w in (all_works or []):
        wid = w.get("id")
        if wid:
            m.setdefault(wid, wid)
    # compress
    for wid in list(m.keys()):
        m[wid] = _root(m, wid)
    return m


def _save_map(m):
    rows = [{"work_id": wid, "canonical_id": cid} for wid, cid in sorted(m.items())]
    _write_json(MAP_FILE, rows)


_reload()
outlines = CACHE["outlines"]
works = CACHE["works"]
theologians = CACHE["theologians"]
authors_registry = CACHE["authors_registry"]
by_work = CACHE["by_work"]
by_topic = CACHE["by_topic"]
by_theologian = CACHE["by_theologian"]
search_index = CACHE["search_index"]



def IDT(tid):
    for t in theologians:
        if t["id"] == tid:
            return t.get("name", "")
    else:
        return ""
def TID(name):
    """Return the theologian id for a given name, or None if not found."""
    for t in theologians:
        if t.get("name") == name or t.get("full_name") == name:
            return t["id"]
    return None
check_names = {
    "Alan Spence": "Alan J. Spence",
    "Catherine McDowell": "Catherine L. McDowell",
    "David Fergusson": "David S. Fergusson",
    "Edward J. Young": "E.J. Young",
    "Gentry": "Peter J. Gentry",
    "Gordon Fee": "Gordon D. Fee",
    "Graham Cole": "Graham A. Cole",
    "Greg K. Beale": "Gregory K. Beale",
    "J.K. Beale": "Gregory K. Beale",
    "James Hamilton": "James M. Hamilton Jr.",
    "John Frame": "John M. Frame",
    "John MacArthur": "John F. MacArthur",
    "John Kilner": "John F. Kilner",
    "John Walton": "John H. Walton",
    "John Cooper": "John W. Cooper",
    "John Zizioulas": "John D. Zizioulas",
    "John Feinberg": "John S. Feinberg",
    "Joshua Farris": "Joshua R. Farris",
    "Michael Horton": "Michael S. Horton",
    "Peter Lillback": "Peter A. Lillback",
    "R. Michael Allen": "Michael Allen",
    "Richard Muller": "Richard A. Muller",
    "Richard Gaffin": "Richard B. Gaffin Jr.",
    "Scott Oliphint": "K. Scott Oliphint",
}


check_ids = {
    'theo_62f645237cbb': 'theo_c66c309571d9',
    'theo_253aff752917': 'theo_dde2bb8d741f',
    'theo_b9954d5c121e': 'theo_45caa804f8e0',
    'theo_4fee92d32985': 'theo_258a625b4ef0',
    'theo_46c70c2943c0': 'theo_3907f7b67a12',
    'theo_97952d3f743d': 'theo_6c80a418d7d3',
    'theo_440ea3109c7d': 'theo_364cc75ee079',
    'theo_c30de6e1e9d5': 'theo_38bdde67d604',
    'theo_af1da100ed05': 'theo_38bdde67d604',
    'theo_b4bc91dbb7bb': 'theo_02d402e5475a',
    'theo_a9d7b0a5f494': 'theo_ea6658b8fe68',
    'theo_eb306afbcaaa': 'theo_95d2d502c3ed',
    'theo_3744a9f4a218': 'theo_97c5607b2a3f',
    'theo_669053ae4ac4': 'theo_59a7835435c2',
    'theo_ce6917d33274': 'theo_19f8ef8dc16d',
    'theo_978af2b7052c': 'theo_c45d52affb4c',
    'theo_0a7a3b8cebf5': 'theo_e0b69ac23733',
    'theo_17f2cdad80ea': 'theo_3bbe3faed649',
    'theo_af308ef786d9': 'theo_2c429c991f77',
    'theo_fdc11111194f': 'theo_cf2f4adad47b',
    'theo_137dccd720d4': 'theo_d04d5e20087a',
    'theo_8632e4000bfa': 'theo_0a257bf889f6',
    'theo_452e8ff3c243': 'theo_01d6e8e3cff7',
    'theo_92ae76efc1c9': 'theo_36708a47db3b'
}
for o in outlines:
    tid = o.get("theologian_id")
    t_name = IDT(tid)
    if IDT(tid) in check_names:
        new_name = check_names[t_name]
        new_id = TID(new_name)


for t in theologians:
    t_name = t["name"]
    t_full_name = t["full_name"]
    if t_full_name in check_names:
        t_new_name = check_names[t_full_name]
        t_new_id = TID(t_new_name)

import re, unicodedata
def _slugify(s: str) -> str:
    s = unicodedata.normalize("NFKD", s or "")
    s = s.encode("ascii", "ignore").decode("ascii").lower()
    s = re.sub(r"[^a-z0-9]+", "-", s).strip("-")
    return re.sub(r"-{2,}", "-", s)

updated_by_work = dict()

for wid, work in by_work.items():
    deep_copy = work.copy()
    primary_tid = work.get("primary_author_theologian_id")
    new_primary_tid = check_ids.get(primary_tid, primary_tid)
    work["primary_author_theologian_id"] = new_primary_tid



    new_refs = []
    for ref in work.get("referenced_in", []):
        rt_id = ref["theologian_id"]
        new_rt_id = check_ids.get(rt_id, rt_id)

        ref["theologian_id"] = new_rt_id
        new_refs.append(ref)

        print("updated reference", rt_id, "to", new_rt_id)

    work["referenced_in"] = new_refs

    new_authors = []
    for author in work.get("authors", []):
        author_id = author.get("id")
        new_author_id = check_ids.get(author_id, author_id)
        new_author_full_name = IDT(new_author_id)
        new_slug = _slugify(new_author_full_name)

        new_authors.append({
            "id": new_author_id,
            "slug": new_slug,
            "full_name": new_author_full_name
        })
        print("updated author", author_id, "to", new_author_id, new_slug, new_author_full_name)

    work["authors"] = new_authors

    updated_by_work[wid] = work

    if work != by_work[wid]:
        print(f"Updated {wid} from {deep_copy} to {work}")
        updated_by_work[wid] = work

keyset = set()
updated_by_topic = {}
for topic_id, topic_data in by_topic.items():
    topic_theos = topic_data["theologians"]

    new_topic_theos = []
    for theo in topic_theos:
        theologian_id = theo["theologian_id"]
        if theologian_id in check_ids:
            new_id = check_ids.get(theologian_id, theologian_id)
            print(check_ids[theologian_id])
            new_theo = {
                "full_name": IDT(new_id),
                "theologian_id": new_id,
                "slug": _slugify(IDT(new_id))
            }
            new_topic_theos.append(new_theo)
        else:
            new_topic_theos.append(theo)

    new_topic_theos = new_topic_theos if new_topic_theos else [n for n in topic_theos]

    updated_by_topic[topic_id] = {k: v for k, v in by_topic.items()}
    updated_by_topic[topic_id]["theologians"] = new_topic_theos

In [432]:
keyset = set()
updated_by_topic = {}
for topic_id, topic_data in by_topic.items():
    topic_theos = topic_data["theologians"]

    new_topic_theos = []
    for theo in topic_theos:
        theologian_id = theo["theologian_id"]
        if theologian_id in check_ids:
            new_id = check_ids.get(theologian_id, theologian_id)
            print(check_ids[theologian_id])
            new_theo = {
                "full_name": IDT(new_id),
                "theologian_id": new_id,
                "slug": _slugify(IDT(new_id))
            }
            new_topic_theos.append(new_theo)
        else:
            new_topic_theos.append(theo)

    new_topic_theos = new_topic_theos if new_topic_theos else [n for n in topic_theos]


    updated_by_topic[topic_id] = {k:v for k, v in by_topic.items()}
    updated_by_topic[topic_id]["theologians"] = new_topic_theos


In [436]:
for k, v in updated_by_topic.items():
    if v != by_topic[k]:
        bt_theos = [b["theologian_id"] for b in by_topic[k]["theologians"]]
        ut_theo = [b["theologian_id"] for b in v["theologians"]]

        print(bt_theos == ut_theo)
        print(ut_theo)
        # print(by_topic[k])

        break

True
['theo_bbaf9204ff14', 'theo_fd6d1af54939', 'theo_83f0d17280ee', 'theo_0fb2dd147a7b', 'theo_991f5a107a5c', 'theo_ff69d567b651', 'theo_81db768c415a', 'theo_5ce5eb6d4e76', 'theo_4adfd8bebd91', 'theo_592b834e385e', 'theo_3fc45f8c5a0a', 'theo_1c0566e3d363', 'theo_6d606eaa0755', 'theo_aff4ec250119', 'theo_082b15f5fc01', 'theo_a1d8cf894ef6', 'theo_ea6658b8fe68', 'theo_0967b706d70a', 'theo_09edae13e8c2', 'theo_576aa8dc749e', 'theo_58387045c6b4', 'theo_4957eb6ae45c', 'theo_124906e9ce1f', 'theo_01d6e8e3cff7', 'theo_c4bd273d811a', 'theo_888da193d4d0', 'theo_de14dfbee36e', 'theo_f0740853d304', 'theo_4d5360a0b60c', 'theo_2a4f308c7e0f', 'theo_d202f60f304c', 'theo_e482a45012cc', 'theo_fdb0163ae082']


In [400]:
ifor wid, work in by_work.items():
    if work != updated_by_work[wid]:
        print("asdf")


In [365]:
updated_by_theologian = dict()
for tid, tdata in by_theologian.items():
    outlines = tdata["outlines_by_topic_category"]
    tid = check_ids.get(tid, tid)
    ndata = by_theologian.get(tid, {})
    if ndata:
        n_outlines = ndata["outlines_by_topic_category"]
        n_topic_outlines = [(category, topic) for category, topic_data in n_outlines.items() for topic in topic_data]
        for category, topic in n_topic_outlines:
            if category not in outlines:
                outlines[category] = []
            if topic not in outlines[category]:
                outlines[category].extend(topic)

    updated_by_theologian[tid] = tdata
    updated_by_theologian[tid]["outlines_by_topic_category"] = outlines

In [439]:
for theo in theologians:
    theo_id = theo["id"]
    if theo_id in check_ids:
        canon_id = check_ids[theo_id]
        canon_theo = [t for t in theologians if t["id"] == canon_id][0]


        for key, value in theo.items():
            if canon_theo[key] != value and key == "wts_relevance":
                print(value, canon_theo[key])

        updated_theo = {
                        "id": canon_theo.get("id", ""),
            "slug": _slugify(canon_theo.get("full_name", "")),
            "full_name": canon_theo.get("full_name", ""),
            "name": canon_theo.get("name", ""),
            "dates": max(theo.get("dates", ""), canon_theo.get("dates", "")),
            "era_category": canon_theo.get("era_category", []),
            "traditions": list(set(theo.get("traditions", []) + canon_theo.get("traditions", []))),
            "bio": canon_theo.get("bio", ""),
            "timeline": canon_theo.get("timeline", []),
            "key_work_ids": list(set(theo.get("key_work_ids", []) + canon_theo.get("key_work_ids", []))),
            "wts_relevance": max(theo.get("wts_relevance", 0), canon_theo.get("wts_relevance", 0)),
            "created_at": canon_theo.get("created_at", ""),
            "updated_at": canon_theo.get("updated_at", "")
        }


{'id': 'theo_b9515ffe8f41',
 'slug': 'anselm-of-canterbury',
 'full_name': 'Anselm of Canterbury',
 'name': 'Anselm of Canterbury',
 'dates': '1033–1109',
 'era_category': [],
 'traditions': [],
 'bio': '',
 'timeline': [],
 'key_work_ids': [],
 'wts_relevance': True,
 'created_at': '2025-08-16',
 'updated_at': '2025-08-16'}

True False


AttributeError: 'NoneType' object has no attribute 'isInstance'