# Aggregate zBrains outputs

In [1]:
import sys
import importlib
import pandas as pd
import numpy as np
import vrtx

sys.path.append('/host/verges/tank/data/daniel/')  # Replace with the path to Utils
from Utils import gen
from Utils import id


In [14]:
importlib.reload(vrtx)
importlib.reload(id)
importlib.reload(gen)

<module 'Utils.id' from '/host/verges/tank/data/daniel/Utils/id.py'>

In [3]:
# IDs of interest
dir_IDs = "/host/verges/tank/data/daniel/3T7T/z/data/pt/master_demo_19Mar2025_age_y.csv" # should be in long format
output_dir = "/host/verges/tank/data/daniel/3T7T/z/outputs/values"
dir_tmp = "/host/verges/tank/data/daniel/3T7T/z/outputs/tmp"

MICs = {
    "name":"MICs",
    "dir_root": "/data/mica3/BIDS_MICs/derivatives",
    "ID_ctrl": ["HC"],
    "ID_pt": ["PX"]
}

PNI = {
    "name":"PNI",
    "dir_root": "/data/mica3/BIDS_PNI/derivatives",
    "ID_ctrl": ["PNC", "Pilot"],
    "ID_pt": ["PNE","PNA","PNF"]
}

studies = [MICs, PNI]


In [4]:
# zb output directories
# N.b. .surf.gii files usually in:
# root/zBrains_output/sub-ID/ses-xx/maps/region

dir_zb = "DM_zb_37comp" # that holds sub/ses
dir_sub = "norm-z"

cortex = {
    "region": "cortex",
    "surfaces": ["midthickness", "white"],
    "resolution": "32k",
    "features": ["ADC", "T1map", "volume"], # (list) features to extract
    #"smoothing": [10]
    "smoothing": [2,5,10]
}

hippocampus = {
    "region": "hippocampus",
    "surfaces": ["midthickness"],
    "resolution": "0p5mm",
    "features": ["ADC", "T1map", "volume"], # (list) features to extract
    #"smoothing": [5]
    "smoothing": [1,2,5]
}

subcortex = {
    "region": "subcortex",
    "features": ["ADC", "T1map", "volume"],
    "smoothing": [2,5,10]
}

# note 'volume' is the output from zBrains z-scores
#regions = [cortex, hippocampus, subcortex]
regions = [cortex, hippocampus]

In [12]:
#determine file names to look for

file_ptrns = []  # Initialize as an empty list

for region in regions:
    region_ptrns = []  # Initialize as an empty list
    
    region_ptrns = vrtx.zbFilePtrn(region)

    # save patterns for each region in seperate list
    file_ptrns.append(region_ptrns)  # Append directly to the list

print("Number of files per participant per region:")
for region, i in zip(regions, range(len(file_ptrns))): print(f"\t{region['region']}: {len(file_ptrns[i])}")
#print('\n'.join(' '.join(map(str,sl)) for sl in file_ptrns))

Number of files per participant per region:
	cortex: 36
	hippocampus: 18


In [6]:
# split IDs into 3T and 7T
IDptrn = sum([[study["ID_pt"]] for study in studies], [])
#print(IDptrn)
dir_splits = id.split(dir_IDs, "ID", IDptrn, dir_tmp)

[split] Saved: /host/verges/tank/data/daniel/3T7T/z/outputs/tmp/split_ID_1.csv (16 rows)PX: 16
[split] Saved: /host/verges/tank/data/daniel/3T7T/z/outputs/tmp/split_ID_2.csv (16 rows)PNE: 16


In [15]:
# Agregate z-scores into df
for region, region_ptrns in zip(regions, file_ptrns):
    print("≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡")
    region_name = region["region"]
    print(region_name)
    
    for ptrn in region_ptrns:
        print("================================")
        print(f"\t{ptrn}")
        
        for ID, study in zip(dir_splits, studies):
            study_name = study["name"]
            print("--------------------------------")
            print(f"\t\t{study_name}")
            IDs = gen.fmt(ID, ["ID", "SES"])
            
            if region["resolution"] == "32k":
                len_vertex = 32492
            if region["resolution"] == "5k":
                len_vertex = 4842
            elif region["resolution"] == "0p5mm":
                len_vertex = 7262
            
            # initialize empty dataframe of size 32492 x len(IDs) with NaN numbers
            ID_SES = IDs["ID"] + "_" + IDs["SES"]
            df = pd.DataFrame(np.nan, index=range(len_vertex), columns=ID_SES)
            #print(df.shape)

            for i, s in zip(IDs["ID"], IDs["SES"]):
                i_s = i + "_" + s
                # get study root dir from ID name
                root_dir = id.get_rootDir(study,i)
                #print(f"\n\tID-SES: {i}-{s}")
                
                dir = "/".join([root_dir, dir_zb, i, s, dir_sub, region_name, i_s + "_" + ptrn]) # read in this path, save into a column in df
                # print(dir)

                if dir.endswith("csv"): 
                    # MUST BE REWORKED
                    vals =  vrtx.load_csv(dir)
                elif dir.endswith("func.gii"):
                    vals = vrtx.load_gifti(dir)
                else:
                    print(f"File extension unrecognized for file: {dir}")
                    continue

                if vals is None:
                    continue
                else:
                    df[i_s] = vals

            # save df
            out_name = study["name"] + "_" + ptrn.split(".")[0]
            out_path = f"{output_dir}/{study_name}/{region_name}/{out_name}.csv"
            df.to_csv(out_path)
            print(f"Saved {out_path}")


≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡
cortex
	hemi-L_surf-fsLR-32k_label-midthickness_feature-ADC_smooth-2mm_analysis-regional.func.gii
--------------------------------
		MICs
[fmt] ID formatted
[fmt] SES formatted
Saved /host/verges/tank/data/daniel/3T7T/z/outputs/values/MICs/cortex/MICs_hemi-L_surf-fsLR-32k_label-midthickness_feature-ADC_smooth-2mm_analysis-regional.csv
--------------------------------
		PNI
[fmt] ID formatted
[fmt] SES formatted
Saved /host/verges/tank/data/daniel/3T7T/z/outputs/values/PNI/cortex/PNI_hemi-L_surf-fsLR-32k_label-midthickness_feature-ADC_smooth-2mm_analysis-regional.csv
	hemi-L_surf-fsLR-32k_label-midthickness_feature-T1map_smooth-2mm_analysis-regional.func.gii
--------------------------------
		MICs
[fmt] ID formatted
[fmt] SES formatted
Saved /host/verges/tank/data/daniel/3T7T/z/outputs/values/MICs/cortex/MICs_hemi-L_surf-fsLR-32k_label-midthickness_feature-T1map_smooth-2mm_analysis-regional.csv
--------------------------------
		PNI
[fmt] ID formatted
[f

KeyboardInterrupt: 

In [8]:
df

Unnamed: 0,sub-PNE002_ses-a1,sub-PNE003_ses-a1,sub-PNE004_ses-a1,sub-PNE005_ses-a1,sub-PNE006_ses-a1,sub-PNE007_ses-a1,sub-PNE008_ses-a1,sub-PNE009_ses-a1,sub-PNE010_ses-a1,sub-PNE011_ses-a1,sub-PNE012_ses-a1,sub-PNE013_ses-a1,sub-PNE014_ses-a1,sub-PNE015_ses-a1,sub-PNE016_ses-a1,sub-PNE017_ses-a1
0,0.322492,0.137700,-0.901105,2.052413,-0.767744,-0.064111,-1.434923,1.609833,-0.398836,-1.256622,-0.896521,-0.092931,-0.664800,-1.253220,0.607645,
1,0.345837,-0.161419,-0.937237,2.141358,-1.113373,-0.268190,-1.223406,0.781992,-0.638839,-1.035027,-0.655036,-0.461188,0.062052,-1.007138,0.016829,
2,0.374129,-0.165359,-0.952383,2.211402,-1.111562,-0.239493,-1.223617,0.770655,-0.596745,-1.028418,-0.645440,-0.452201,0.120501,-0.954924,0.028588,
3,0.400916,-0.127812,-0.960929,2.306104,-1.130828,-0.210748,-1.215706,0.763522,-0.563301,-1.027697,-0.637832,-0.402689,0.212376,-0.944186,0.078900,
4,0.401800,-0.129108,-0.966541,2.285334,-1.132833,-0.212661,-1.220016,0.790280,-0.556688,-1.026433,-0.638295,-0.406679,0.168830,-0.946313,0.046291,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7257,0.073855,-1.197156,-2.926531,-0.603444,-1.782282,-0.532634,-1.314034,-1.251581,-0.431579,-0.988907,-1.319041,-1.118079,-1.141255,-1.206843,0.709479,
7258,-0.032113,-1.148259,-2.983616,-0.754985,-1.784798,-0.592454,-1.477597,-1.332789,-0.478769,-0.900483,-1.358390,-1.117304,-1.248798,-1.173295,-0.273985,
7259,0.217636,-1.360154,-3.829210,1.044344,-2.263106,-0.153864,-1.710983,-1.353154,-0.410783,-2.247413,-0.626059,-0.857214,-0.683947,-1.323870,2.473974,
7260,0.269491,-1.409924,-3.698246,0.795311,-2.169745,-0.213383,-1.594934,-1.295342,-0.378990,-2.067136,-0.654045,-0.966868,-0.797620,-1.306508,2.476067,


In [9]:
import re

# Example directory string
eg_dir = "/data/mica3/BIDS_PNI/derivatives/zbrains_3T7T_daniel_8Jan2025/sub-HC083/ses-02/maps/cortex/hemi-L_surf-fsLR-32k_label-pial_feature-ADC_smooth-2mm.func.gii"
print(eg_dir)

# Regular expressions to extract ID and session
match = re.search(r"sub-([^/]+)/ses-([^/]+)", eg_dir)

if match:
    ID = match.group(1)
    session = match.group(2)
    print(f"{ID}-{session}")
else:
    print("ID and session not found in the directory string.")

/data/mica3/BIDS_PNI/derivatives/zbrains_3T7T_daniel_8Jan2025/sub-HC083/ses-02/maps/cortex/hemi-L_surf-fsLR-32k_label-pial_feature-ADC_smooth-2mm.func.gii
HC083-02
