In [1]:
from utils import get_files, write_json
from datetime import date
from collections import defaultdict

CACHE = get_files()
authors_registry, authors_registry_path = CACHE["authors_registry"]
outlines_jsonl, outlines_jsonl_path = CACHE["outlines_jsonl"]
theologians, theologians_path = CACHE["theologians"]
topic_mapping, topic_mapping_path = CACHE["topic_mapping"]
topics, topics_path = CACHE["topics"]
traditions, traditions_path = CACHE["traditions"]
eras, eras_path = CACHE["eras"]
work_canon_map, work_canon_map_path = CACHE["work_canon_map"]
works, works_path = CACHE["works"]
by_theologian, by_theologian_path = CACHE["by_theologian"]
by_topic, by_topic_path = CACHE["by_topic"]
by_topic_keyworks, by_topic_keyworks_path = CACHE["by_topic_keyworks"]
by_work, by_work_path = CACHE["by_work"]
eras_registry, eras_registry_path = CACHE["eras_registry"]
institutions_registry, institutions_registry_path = CACHE["institutions_registry"]
theologian_profiles, theologian_profiles_path = CACHE["theologian_profiles"]
topic_work_edges, topic_work_edges_path = CACHE["topic_work_edges"]
search_index, search_index_path = CACHE["search_index"]

theo_ids = [t["id"] for t in theologians]
topic_data = {t["id"]: t for t in topics}
theo_outs = {out["theologian_id"]: [o for o in outlines_jsonl if o["theologian_id"] == out["theologian_id"]] for out in
             outlines_jsonl}
extra_works = {tid: [w for w, wdata in by_work.items() if not wdata["referenced_in"] and wdata["primary_author_theologian_id"] == tid] for tid in theo_ids}
theo_all_works = {tid: [w for w, wdata in by_work.items() if wdata["primary_author_theologian_id"] == tid] for tid in theo_ids}

new_by_theologian = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
# new_by_theologian = {t: {} for t in theo_ids}

new_theologians = []


for theo in theologians:
    tid = theo["id"]
    theo_outlines = theo_outs.get(tid, [])


    for theo_outline in theo_outlines:
        outline_id = theo_outline["id"]

        topic_id = theo_outline["topic_id"]
        topic_slug = topic_data[topic_id]["slug"]
        topic_title = topic_data[topic_id]["title"]
        topic_category = topic_data[topic_id]["category"]
        key_work_ids = theo_outline["key_work_ids"]
        mark_down_path = theo_outline["markdown_path"]
        updated_at = theo_outline["updated_at"]

        new_by_theologian[tid]["outlines_by_topic_category"][topic_category].append(
            {
                "topic_id": topic_id,
                "topic_slug": topic_slug,
                "topic_category": topic_category,
                "markdown_path": mark_down_path,
                "updated_at": updated_at,
                "key_work_ids": key_work_ids,
                "outline_id": outline_id,
            }
        )


for theo in theologians:
    tid = theo["id"]
    new_by_theologian[tid]["key_work_ids"] = extra_works.get(tid, [])

    theo["key_work_ids"] = theo_all_works[tid]
    new_theologians.append(theo)




write_json("data/indices/by_theologian.json", new_by_theologian)
write_json("data/theologians.json", new_theologians)

In [None]:
new_by_theologian

In [None]:
topic_data = {t["id"]: t for t in topics}
topic_data["top_73dd29f4e3dd"]


In [None]:
from collections import defaultdict

new_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))

new_dict["a"]["b"]["c"].append("3")

In [None]:
works[0]

In [None]:
a = list(by_work.keys())[0]
by_work[a]

In [None]:
for w, wdata in by_work.items():
    if not wdata["referenced_in"]:
        print(wdata)
        break

In [None]:
extra_works = {tid: [w for w, wdata in by_work.items() if not wdata["referenced_in"] and wdata["primary_author_theologian_id"] == tid] for tid in [t["id"] for t in theologians]}

In [None]:
extra_works

In [None]:
new_by_theologian

In [None]:
#!/usr/bin/env python3
import sys, json, re, shutil
from pathlib import Path
from datetime import datetime
from collections import OrderedDict

# --- helpers ---------------------------------------------------------------

CAT_NUM_RE   = re.compile(r'^\s*(\d{1,2})\s*\.')          # e.g., "4. Anthropology ..."
SLUG_CODE_RE = re.compile(r'^\s*(\d{1,2})[.\-]([a-zA-Z])') # e.g., "10-c-..." or "10.c..."

def cat_sort_key(cat_name: str, original_index: int):
    m = CAT_NUM_RE.match(cat_name or "")
    # Categories with no numeric prefix sink to the bottom, preserving their original order
    return (int(m.group(1)) if m else 10**9, original_index)

def topic_sort_key(topic_obj: dict):
    slug = (topic_obj or {}).get("topic_slug", "") or ""
    m = SLUG_CODE_RE.match(slug)
    if m:
        num = int(m.group(1))
        letter = m.group(2).upper()
        # Tie‑breaker: full slug to keep deterministic order among equal codes
        return (num, letter, slug)
    # Fallback: put “uncoded” slugs at the end (after coded ones), ordered lexicographically
    return (10**9, "Z", slug)

def load_json_preserve_order(p: Path):
    # Python 3.7+ preserves insertion order, but OrderedDict keeps intent explicit
    text = p.read_text(encoding="utf-8")
    return json.loads(text, object_pairs_hook=OrderedDict)

# --- main -----------------------------------------------------------------

def main():
    if len(sys.argv) < 2:
        print("Usage: sort_by_theologian.py <path/to/input.json> [--inplace]")
        sys.exit(1)

    src = Path(sys.argv[1]).resolve()
    inplace = ("--inplace" in sys.argv)

    data = load_json_preserve_order(src)  # top-level order preserved

    out = OrderedDict()
    for i, (theo_id, payload) in enumerate(data.items()):
        # Copy through everything; only touch outlines_by_topic_category
        new_payload = dict(payload)

        obtc = (payload or {}).get("outlines_by_topic_category")
        if isinstance(obtc, dict):
            # Preserve original category order indices for stable sorting of non-numbered cats
            cat_with_idx = list(obtc.items())
            cat_with_idx = [(name, items, idx) for idx, (name, items) in enumerate(cat_with_idx)]

            # 1) sort categories by their numeric prefix (e.g., "1.", "11.")
            cat_with_idx.sort(key=lambda row: cat_sort_key(row[0], row[2]))

            # 2) within each category, sort topic list by topic_slug code (e.g., "1-a-...", "11-k-...")
            new_obtc = OrderedDict()
            for cat_name, items, _idx in cat_with_idx:
                if isinstance(items, list):
                    items_sorted = sorted(items, key=topic_sort_key)
                else:
                    items_sorted = items
                new_obtc[cat_name] = items_sorted

            new_payload["outlines_by_topic_category"] = new_obtc

        out[theo_id] = new_payload  # keeps original theo_id order

    # Write result
    result = json.dumps(out, ensure_ascii=False, indent=2)
    if inplace:
        # make a timestamped backup then overwrite
        backup = src.with_suffix(src.suffix + f".bak-{datetime.now().strftime('%Y%m%d-%H%M%S')}")
        shutil.copy2(src, backup)
        src.write_text(result + "\n", encoding="utf-8")
        print(f"Updated in place. Backup written to: {backup}")
    else:
        sys.stdout.write(result + "\n")

if __name__ == "__main__":
    main()