# count number of volumes per ID

Output:
Table. Each UID is a row. Cols: # n sessions per study, n images of interest per study

In [None]:
import importlib
import tTsTGrpUtils as tsutil
import pandas as pd
import datetime

In [None]:
df_pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/02c_mapPths_QC_26Sep2025-104109.csv"
df = pd.read_csv(df_pth, dtype=str)
df

In [None]:
def nImgbyID(root, ID, voi, verbose=False):
    """"
    Count number of images by ID and session for each volume of interest.
    Checks for existing files that contain the string in voi and end with '.nii.gz'

    Input:
        root:
            root dir of study
        study_id:
            ID of participant in study
        voi:
            volumes of interest to count

    Returns:
        list with len(voi) with number of images for each volume
    """
    # structure of BIDS:
    ## /sub-{ID}/ -> list of sessions. For each of these folders, look for the voi files. Handle just anat files for now
    import os
    import glob
    import pandas as pd
    out = []

    sub_dirs = glob.glob(f"{root}/sub-{ID}/*/") # get list of session dirs
    nSes = len(sub_dirs)
    if verbose:
        print("-"*40)
        print(f"[nImgbyID] {ID} has {nSes} sessions: {sub_dirs}")
    for v in voi:
        nImg = 0
        for ses in sub_dirs:
            # look for files in ses/anat that contain v and end with .nii.gz
            anat_dir = os.path.join(ses, 'anat')
            if os.path.exists(anat_dir):
                files = glob.glob(f"{anat_dir}/*{v}*.nii*")
                if len(files) > 1:
                    if verbose:
                        print(f"[nImgbyID] WARNING: {ID} {ses} found {len(files)} {v} files: {files}")
                    
                    nImg += 1 # count only once
                else: 
                    nImg += len(files)
                if verbose:
                    print(f"[nImgbyID] {ID} {ses} {v}: found {len(files)} files: {files}")
            else:
                if verbose:
                    print(f"[nImgbyID] {ID} {ses} has no anat dir: {anat_dir}")
        out.append(nImg)
    if verbose:
        print(f"[nImgbyID] {ID}: {voi} = {out}")
    return out

In [None]:
# create a df_out with Each UID is a row. Cols: # ses @ each study
save = True
save_pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/"
save_name = "01d_volumeCounts"

studies = df['study'].unique()
df_out = df[['UID', 'study', 'MICS_ID', 'PNI_ID']].drop_duplicates().reset_index(drop=True)
voi = ['FLAIR', 'T1w', 'T1map'] # volumes of interest
# create col study ID for each row
df_out['study_ID'] = df_out.apply(lambda row: row['MICS_ID'] if row['study']=='3T' else row['PNI_ID'] if row['study']=='7T' else 'UNKNOWN STUDY', axis=1)

roots = {
    '3T': '/data/mica3/BIDS_MICs/rawdata',
    '7T': '/data/mica3/BIDS_PNI/rawdata'
}

# include 3T, 7T IDs
for study in studies:
    df_study = df[df['study'] == study]
    df_study_count = df_study.groupby('UID').size().reset_index(name=f'nSES_{study}')
    df_out = df_out.merge(df_study_count, on='UID', how='left')
    root = roots[study]
    if study == '7T' and 'T1w' in voi:
        voi_iterate = voi.copy()  # create a copy of the list
        voi_iterate[voi_iterate.index('T1w')] = 'UNIT1'  # replace T1w with UNIT1 in PNI
    else:
        voi_iterate = voi

    # count number of each raw volume 
    new_cols = [f'n{v}_{study}' for v in voi]
    df_out[new_cols] = 'NA' # initialize cols
    
    ids = df_out[df_out['study'] == study]['study_ID'].unique()
    for id in ids:
        nVols = nImgbyID(root, id, voi_iterate, verbose=True)
        df_out.loc[df_out['study_ID'] == id, new_cols] = nVols

# remove the study and study_ID columns, put all values relating to the same UID in the same row
df_out = df_out.drop(columns=['study', 'study_ID'])
df_out = df_out.groupby('UID').agg({
    'MICS_ID': 'first',
    'PNI_ID': 'first',
    'nSES_7T': 'first',
    'nSES_3T': 'first',
    'nFLAIR_7T': lambda x: x.replace('NA', '0').astype(int).sum() if x.replace('NA', '0').astype(str).str.isdigit().all() else x.iloc[0],
    'nFLAIR_3T': lambda x: x.replace('NA', '0').astype(int).sum() if x.replace('NA', '0').astype(str).str.isdigit().all() else x.iloc[0],
    'nT1w_7T': lambda x: x.replace('NA', '0').astype(int).sum() if x.replace('NA', '0').astype(str).str.isdigit().all() else x.iloc[0],
    'nT1w_3T': lambda x: x.replace('NA', '0').astype(int).sum() if x.replace('NA', '0').astype(str).str.isdigit().all() else x.iloc[0],
    'nT1map_7T': lambda x: x.replace('NA', '0').astype(int).sum() if x.replace('NA', '0').astype(str).str.isdigit().all() else x.iloc[0],
    'nT1map_3T': lambda x: x.replace('NA', '0').astype(int).sum() if x.replace('NA', '0').astype(str).str.isdigit().all() else x.iloc[0]
}).reset_index()

# reorder cols: nSES first then nVolume by ordered in voi
df_out = df_out[['UID', 'MICS_ID', 'PNI_ID'] + \
                [col for col in df_out.columns if col.startswith('nSES_')] + \
                [col for v in voi for col in df_out.columns if col == f'n{v}_3T' or col == f'n{v}_7T']]

if save:
    sv_pth = f"{save_pth}/{save_name}_{datetime.datetime.now().strftime('%d%b%Y-%H%M%S')}.csv"
    df_out.to_csv(sv_pth, index=False)
    print(f"[main] df_out saved to {sv_pth}")
df_out