# Extract Viterbi paths from HMM objects

extract_statepaths.py  ─  Export subject-level HMM Viterbi paths to CSV.

#### Usage
-----
python extract_statepaths.py /path/to/hmmlearn_consensus_results_k12.pkl \
        --k 12 \
        --out /path/to/statepaths_k12.csv

If the pickle only contains one k, you can omit --k.

In [3]:
import argparse
import os
import pickle
import numpy as np
import pandas as pd

In [5]:
PKL_PATH = "/home/jovyan/narratives-project/hmm-objects/hmmlearn_consensus_results_k10.pkl"
OUT_CSV  = "/home/jovyan/narratives-project/viterbi-paths/statepaths_k10.csv"
K        = 10       # number of states in the model
PAD_VALUE = np.nan  # pad shorter runs with NaN

In [6]:
def load_entry(results, k):
    matches = [d for d in results if d.get("k") == k]
    if not matches:
        raise ValueError(f"No entry with k={k} in pickle.")
    if len(matches) > 1:
        raise ValueError(f"Duplicate entries with k={k}.")
    return matches[0]


def build_matrix(paths, pad_value=PAD_VALUE):
    max_len = max(len(p) for p in paths)
    mat = np.full((len(paths), max_len), pad_value, dtype=float)
    for i, seq in enumerate(paths):
        mat[i, :len(seq)] = seq
    return mat


def main():
    if not os.path.exists(PKL_PATH):
        raise FileNotFoundError(f"Pickle not found: {PKL_PATH}")

    with open(PKL_PATH, "rb") as f:
        results = pickle.load(f)

    entry = load_entry(results, k=K)
    mat   = build_matrix(entry["subject_paths"])

    df = pd.DataFrame(
        mat,
        index=[f"sub-{i:03d}" for i in range(mat.shape[0])],
        dtype=float
    )
    df.to_csv(OUT_CSV, index=True)
    print(f"✅  Wrote {df.shape[0]} subjects × {df.shape[1]} TRs -> {OUT_CSV}")


if __name__ == "__main__":
    main()

✅  Wrote 75 subjects × 279 TRs -> /home/jovyan/narratives-project/viterbi-paths/statepaths_k10.csv
