In [None]:
from bids import BIDSLayout
import numpy as np
import pandas as pd
import shutil as sh
from pathlib import Path
import re
import sklearn.preprocessing as preproc
import sklearn.impute as impute
import sklearn.compose as compose
import sklearn.pipeline as pipeline
import scipy.io
import itertools as it

In [None]:
root = "/scratch/knavynde/newtopsy"
out = f"{root}/derivatives/prepdwi-recon-0.1.0"
layout = BIDSLayout(
    root,
    database_path=f"{root}/.pybids",
    derivatives=True,
    validate=False
)

In [None]:
WEIGHT="avgFA"
participant_file = f'{root}/participants.tsv'

def get_files():
    return layout.get(
        subject=get_subjects(),
        suffix="connectome",
        desc=WEIGHT,
        atlas="bn246"
    )

def get_file_subjects():
    return [file.get_entities()['subject'] for file in get_files()]


def get_connectomes():
    return np.dstack([np.loadtxt(file.path, delimiter=",") for file in get_files()])

def get_metadata(hem = None):
    metadata = pd.read_csv("resources/brainnetome.tsv", sep="\t")
    if hem in ("L", "R"):
        return metadata[metadata["hemisphere"] == hem].reset_index()
    return metadata.reset_index()

def get_subj_metadata():
    df = pd.read_csv(participant_file, sep="\t")
    participants = pd.read_csv(
        f'{root}/derivatives/snakedwi-0.1.0/participants.tsv',
        sep="\t",
    )["participant_id"]
    return df[
        lambda df: df["phenotype"].isin(["HC", "FEP"])
    ][
        lambda df: df["participant_id"].isin(participants)
    ]

def get_subjects():
    return list(
        get_subj_metadata()["participant_id"]
        .map(lambda subj: subj[4:])
    )
    


### Connectome Data (Left and Right)

In [None]:
HEMS = ("L", "R", None)
def get_hemispheric_connectomes():
    connectomes = get_connectomes()
    for hem in HEMS:
        index = get_metadata(hem)["Label ID"]
        yield connectomes[(*np.ix_(index, index), ...)]

for connectomes, hem in zip(get_hemispheric_connectomes(), HEMS):
    base = Path(f"{out}/nbs/{WEIGHT}_{hem or 'pan'}_connectomes")
    if base.exists():
        sh.rmtree(base)
    base.mkdir(exist_ok=True, parents=True)
    for connectome, sub in zip(np.moveaxis(connectomes, -1, 0), get_subjects()):
        with base.joinpath(sub).with_suffix(".txt").open('w') as f:
            for line in np.matrix(connectome):
                np.savetxt(f, line, fmt="%.10f")



In [None]:
# Mat file
HEMS = ("L", "R", None)
def get_hemispheric_connectomes():
    connectomes = get_connectomes()
    for hem in HEMS:
        index = get_metadata(hem)["Label ID"]
        yield connectomes[(*np.ix_(index, index), ...)]

for connectomes, hem in zip(get_hemispheric_connectomes(), HEMS):
    scipy.io.savemat(f"{out}/nbs/{WEIGHT}_{hem or 'pan'}_connectome.mat", {"Mat": connectomes})



In [None]:
connectome.shape

### Node MNI Coordinates (Left and Right)

In [None]:
for hem in HEMS:
    coords = get_metadata(hem)["MNI"].str.split(",", expand=True).astype(int).to_numpy()
    scipy.io.savemat(f"{out}/nbs/coords_{hem or 'pan'}.mat", {"coords": coords})
    with Path(f"{out}/nbs/coords_{hem or 'pan'}.txt").open('w') as f:
        f.write(
            "\n".join(
                " ".join(coord.strip().split(", "))
                for coord in get_metadata(hem)["MNI"]
            )
            + "\n"
        )

### Patient Diagnoses

In [None]:
partic = get_subj_metadata()[
    lambda df: df["participant_id"].map(lambda s: s[4:]).isin(get_file_subjects())
]
groups = ["HC", "FEP"]
for group in groups:
    partic[group] = (partic['phenotype'] == group).map(int)

sex_encode = pipeline.make_pipeline(
    preproc.OrdinalEncoder(),
    impute.SimpleImputer(strategy="most_frequent")
)
column_preproc = compose.make_column_transformer(
    ("passthrough", groups),
    (sex_encode, ["sex"]),
    (impute.SimpleImputer(), ['age']),
)

X = column_preproc.fit_transform(partic)
scipy.io.savemat(f"{out}/nbs/dds.mat", {"design": X})
np.savetxt(f"{out}/nbs/ddx.txt", X, fmt='%i')


### Node Labels

In [None]:
scipy.io.savemat(f"{out}/nbs/node_labels.mat", {"labels": get_metadata("L")["Name"].to_numpy()}, oned_as="column")
md = get_metadata()
pan_names = md["Name"] + "_" + md["hemisphere"]
scipy.io.savemat(f"{out}/nbs/node_labels_pan.mat", {"labels": pan_names.to_numpy()}, oned_as="column")
with Path(f"{out}/nbs/node_labels.txt").open("w") as f:
    # Labels the same on both hemispheres
    f.write("\n".join(get_metadata("L")["Name"]) + "\n")
    
with Path(f"{out}/nbs/node_labels_pan.txt").open("w") as f:
    # Labels the same on both hemispheres
    md = get_metadata()
    f.write("\n".join(md["Name"] + "_" + md["hemisphere"]) + "\n")

In [None]:
file = Path("results/nbs/atlas-bn246_weight-avgFA_model-FEP_nbs.mat")
results = scipy.io.loadmat(file)
import h5py
v = results["nbs"]
def unpack_mat(mat, scheme):
    result = {}
    for key, val in scheme.items():
        if isinstance(val, dict):
            result[key] = unpack_mat(mat[key][0,0], val)
        elif val == "literal":
            try:
                art = mat[key][0,0].flatten()
                if len(art):
                    result[key] = art[0]
                else:
                    result[key] = ''
            except Exception as err:
                print(key)
                print(mat[key])
                raise err
        elif val == "arr":
            result[key] = mat[key][0,0]
        else:
            raise TypeError()
    return result

UI_STRUC = {"ui": "literal", "ok": "literal"}   
nbs = unpack_mat(results["nbs"], {
    "NBS": {
        "n": "literal",
        "con_mat": "arr",
        "pval": "literal",
        "test_stat": "arr",
    },
    "GLM": {
        "y": "arr",
        "X": "arr",
        "contrast": "arr",
        "test": "literal",
        "perms": "literal",
    },
    "STATS": {
        "thresh": "literal",
        "alpha": "literal",
        "size": "literal",
        "N": "literal",
        "test_stat": "arr"
    },
    "UI": dict(zip([
        "method",
        "test",
        "size",
        "thresh",
        "perms",
        "alpha",
        "contrast",
        "design",
        "matrices",
        "node_coor",
        "node_label",
        "exchange",
    ], it.repeat(UI_STRUC)))
    # "UI": v["UI"],
    # "STATS": v["STATS"],
})

# np.savetxt(file.with_suffix(".tsv"), nbs["NBS"]["test_stat"], delimiter="\t")
with h5py.File(file.with_suffix(".hdf5"), 'w') as f:
    conmats = [nbs["NBS"]['con_mat'][i][0].A for i in range(len(nbs["NBS"]['con_mat']))]
    conmats = np.dstack(conmats) if conmats else np.ndarray((0,))
    NBS = f.create_group("nbs")
    NBS["con_mat"] = conmats
    NBS["test_stat"] = nbs["NBS"]["test_stat"]
    NBS.attrs['n'] = nbs["NBS"]['n']
    NBS.attrs['pval'] = nbs["NBS"]['pval']
