# Aggregate zBrains outputs

Combine zBrain outputs from multiple participants into single DF

In [5]:
import sys
import importlib
import pandas as pd
import numpy as np
import vrtx

sys.path.append('/host/verges/tank/data/daniel/')  # Replace with the path to Utils
from Utils import gen, id


In [16]:
importlib.reload(vrtx)
importlib.reload(id)
importlib.reload(gen)

<module 'Utils.gen' from '/host/verges/tank/data/daniel/Utils/gen.py'>

In [7]:
# IDs of interest
dir_IDs = "/host/verges/tank/data/daniel/3T7T/z/data/pt/master_demo_19Mar2025_age_y.csv" # should be in long format
output_dir = "/host/verges/tank/data/daniel/3T7T/z/outputs/values"
dir_tmp = "/host/verges/tank/data/daniel/3T7T/z/outputs/tmp"

MICs = {
    "name":"MICs",
    "dir_root": "/data/mica3/BIDS_MICs/derivatives",
    "ID_ctrl": ["HC"],
    "ID_pt": ["PX"]
}

PNI = {
    "name":"PNI",
    "dir_root": "/data/mica3/BIDS_PNI/derivatives",
    "ID_ctrl": ["PNC", "Pilot"],
    "ID_pt": ["PNE","PNA","PNF"]
}

studies = [MICs, PNI]


In [8]:
# zb output directories
# N.b. .surf.gii files usually in:
# root/zBrains_output/sub-ID/ses-xx/maps/region

dir_zb = "DM_zb_37comp" # that holds sub/ses
dir_sub = "norm-z"

cortex = {
    "region": "cortex",
    "surfaces": ["midthickness", "white"],
    "resolution": "32k",
    "features": ["ADC", "T1map", "volume"], # (list) features to extract
    #"smoothing": [10]
    "smoothing": [2,5,10]
}

hippocampus = {
    "region": "hippocampus",
    "surfaces": ["midthickness"],
    "resolution": "0p5mm",
    "features": ["ADC", "T1map", "volume"], # (list) features to extract
    #"smoothing": [5]
    "smoothing": [1,2,5]
}

subcortex = {
    "region": "subcortex",
    "features": ["ADC", "T1map", "volume"],
    "smoothing": [2,5,10]
}

# note 'volume' is the output from zBrains z-scores
#regions = [cortex, hippocampus, subcortex]
regions = [cortex, hippocampus]

In [9]:
#determine file names to look for

file_ptrns = []  # Initialize as an empty list

for region in regions:
    region_ptrns = []  # Initialize as an empty list
    
    region_ptrns = vrtx.zbFilePtrn(region)

    # save patterns for each region in seperate list
    file_ptrns.append(region_ptrns)  # Append directly to the list

print("Number of files per participant per region:")
for region, i in zip(regions, range(len(file_ptrns))): print(f"\t{region['region']}: {len(file_ptrns[i])}")
#print('\n'.join(' '.join(map(str,sl)) for sl in file_ptrns))

Number of files per participant per region:
	cortex: 36
	hippocampus: 18


In [10]:
# split IDs into 3T and 7T
IDptrn = sum([[study["ID_pt"]] for study in studies], [])
#print(IDptrn)
dir_splits = id.split(dir_IDs, "ID", IDptrn, dir_tmp)

[split] Saved: /host/verges/tank/data/daniel/3T7T/z/outputs/tmp/split_ID_1.csv (16 rows)PX: 16
[split] Saved: /host/verges/tank/data/daniel/3T7T/z/outputs/tmp/split_ID_2.csv (16 rows)PNE: 16


In [11]:
# Agregate z-scores into df
error_list = []

for region, region_ptrns in zip(regions, file_ptrns):
    print("≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡")
    region_name = region["region"]
    print(region_name)
    
    for ptrn in region_ptrns:
        print("================================")
        print(f"\t{ptrn}")
        
        for ID, study in zip(dir_splits, studies):
            study_name = study["name"]
            print("--------------------------------")
            print(f"\t\t{study_name}")
            IDs = gen.fmt(ID, ["ID", "SES"])
            
            if region["resolution"] == "32k":
                len_vertex = 32492
            if region["resolution"] == "5k":
                len_vertex = 4842
            elif region["resolution"] == "0p5mm":
                len_vertex = 7262
            
            # initialize empty dataframe of size 32492 x len(IDs) with NaN numbers
            ID_SES = IDs["ID"] + "_" + IDs["SES"]
            df = pd.DataFrame(np.nan, index=range(len_vertex), columns=ID_SES)
            #print(df.shape)

            for i, s in zip(IDs["ID"], IDs["SES"]):
                i_s = i + "_" + s
                # get study root dir from ID name
                root_dir = id.get_rootDir(study,i)
                #print(f"\n\tID-SES: {i}-{s}")
                
                dir = "/".join([root_dir, dir_zb, i, s, dir_sub, region_name, i_s + "_" + ptrn]) # read in this path, save into a column in df
                # print(dir)

                if dir.endswith("csv"): 
                    # MUST BE REWORKED
                    vals =  vrtx.load_csv(dir)
                elif dir.endswith("func.gii"):
                    vals = vrtx.load_gifti(dir)
                else:
                    print(f"File extension unrecognized for file: {dir}")
                    continue

                if vals is None:
                    # add path to error list
                    error_list.append(dir)
                    df[i_s] = np.nan
                    continue
                else:
                    df[i_s] = vals

            # save df
            out_name = study["name"] + "_" + ptrn.split(".")[0]
            out_path = f"{output_dir}/{study_name}/{region_name}/{out_name}.csv"
            df.to_csv(out_path)
            print(f"Saved {out_path}")

error_IDs = [vrtx.get_ID_SES(path)[0] for path in error_list]
print(pd.Series(error_IDs).value_counts())

≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡
cortex
	hemi-L_surf-fsLR-32k_label-midthickness_feature-ADC_smooth-2mm_analysis-regional.func.gii
--------------------------------
		MICs


Saved /host/verges/tank/data/daniel/3T7T/z/outputs/values/MICs/cortex/MICs_hemi-L_surf-fsLR-32k_label-midthickness_feature-ADC_smooth-2mm_analysis-regional.csv
--------------------------------
		PNI
Saved /host/verges/tank/data/daniel/3T7T/z/outputs/values/PNI/cortex/PNI_hemi-L_surf-fsLR-32k_label-midthickness_feature-ADC_smooth-2mm_analysis-regional.csv
	hemi-L_surf-fsLR-32k_label-midthickness_feature-T1map_smooth-2mm_analysis-regional.func.gii
--------------------------------
		MICs
Saved /host/verges/tank/data/daniel/3T7T/z/outputs/values/MICs/cortex/MICs_hemi-L_surf-fsLR-32k_label-midthickness_feature-T1map_smooth-2mm_analysis-regional.csv
--------------------------------
		PNI
Saved /host/verges/tank/data/daniel/3T7T/z/outputs/values/PNI/cortex/PNI_hemi-L_surf-fsLR-32k_label-midthickness_feature-T1map_smooth-2mm_analysis-regional.csv
	hemi-L_surf-fsLR-32k_label-midthickness_feature-volume_smooth-2mm_analysis-regional.func.gii
--------------------------------
		MICs
Saved /host/ver