In [1]:
import glob
import os
import pickle

# ─── paths ────────────────────────────────────────────────────────────────────
INPUT_DIR = "/home/jovyan/narratives-project/shirer_components"
PKL_DIR   = "/home/jovyan/narratives-project/hmm-objects"
SUFFIX    = "_withIDs"         # new file suffix (before ".pkl")
# ───────────────────────────────────────────────────────────────────────────────

# canonical filename order → IDs
h5_paths    = sorted(glob.glob(os.path.join(INPUT_DIR, "*.h5")))
subject_ids = [os.path.splitext(os.path.basename(p))[0] for p in h5_paths]
n_subj      = len(subject_ids)
print(f"✔ Found {n_subj} subjects in {INPUT_DIR}")

def preview(lst, n=3):
    return lst[:n] if len(lst) >= n else lst

total_files, total_entries = 0, 0

# iterate over each ORIGINAL pickle (ignore already-patched ones)
for pkl_path in sorted(glob.glob(os.path.join(PKL_DIR,
                                              "hmmlearn_consensus_results_k*.pkl"))):
    if SUFFIX in pkl_path:        # ← skip *_withIDs.pkl
        continue

    print(f"\n↪ Processing {os.path.basename(pkl_path)}")
    total_files += 1

    with open(pkl_path, "rb") as f:
        results = pickle.load(f)

    patched_this_file = 0
    for entry in results:
        if len(entry["subject_paths"]) != n_subj:
            print(f"  ⚠︎ k={entry['k']} skipped (mismatch: "
                  f"{len(entry['subject_paths'])} paths vs {n_subj} ids)")
            continue

        old_ids = entry.get("subject_ids", [])
        print(f"    k={entry['k']}: before → {preview(old_ids)}")

        entry["subject_ids"] = subject_ids[:]   # deep copy.
        patched_this_file   += 1
        total_entries       += 1

        print(f"               after  → {preview(entry['subject_ids'])}")

    if patched_this_file:
        base, ext = os.path.splitext(pkl_path)
        new_path  = base + SUFFIX + ext          # e.g. *_withIDs.pkl
        with open(new_path, "wb") as f:
            pickle.dump(results, f)
        print(f"  📝 Patched {patched_this_file} entry(s); wrote {new_path}")
    else:
        print("  Nothing patched; original left untouched.")

print(f"\nDone. Patched {total_entries} entry(s) across {total_files} file(s).")

✔ Found 75 subjects in /home/jovyan/narratives-project/shirer_components

↪ Processing hmmlearn_consensus_results_k10.pkl
    k=10: before → []
               after  → ['sub-002_desc-shirercomponents', 'sub-003_desc-shirercomponents', 'sub-004_desc-shirercomponents']
  📝 Patched 1 entry(s); wrote /home/jovyan/narratives-project/hmm-objects/hmmlearn_consensus_results_k10_withIDs.pkl

↪ Processing hmmlearn_consensus_results_k12.pkl
    k=12: before → []
               after  → ['sub-002_desc-shirercomponents', 'sub-003_desc-shirercomponents', 'sub-004_desc-shirercomponents']
  📝 Patched 1 entry(s); wrote /home/jovyan/narratives-project/hmm-objects/hmmlearn_consensus_results_k12_withIDs.pkl

↪ Processing hmmlearn_consensus_results_k14.pkl
    k=14: before → []
               after  → ['sub-002_desc-shirercomponents', 'sub-003_desc-shirercomponents', 'sub-004_desc-shirercomponents']
  📝 Patched 1 entry(s); wrote /home/jovyan/narratives-project/hmm-objects/hmmlearn_consensus_results_k14_wit