# count number of missing FLAIR volumes

Output:
Table. Each UID is a row. Cols: # ses @ each study, # FLAIR images @ each study, # T1w/T1map

In [1]:
import importlib
import tTsTGrpUtils as tsutil
import pandas as pd


In [3]:
df_pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/02c_mapPths_QC_26Sep2025-104109.csv"
df = pd.read_csv(df_pth, dtype=str)
df

Unnamed: 0,UID,MICS_ID,PNI_ID,study,SES,Date,gender,dob,language,sex,...,hipp_hemi-L_surf-0p5mm_label-outer_FA_unsmth,hipp_hemi-R_surf-0p5mm_label-outer_FA_unsmth,ctx_hemi-L_surf-fsLR-5k_label-swm1.0mm_T1map_unsmth,ctx_hemi-R_surf-fsLR-5k_label-swm1.0mm_T1map_unsmth,ctx_hemi-L_surf-fsLR-5k_label-swm1.0mm_flair_unsmth,ctx_hemi-R_surf-fsLR-5k_label-swm1.0mm_flair_unsmth,ctx_hemi-L_surf-fsLR-5k_label-swm1.0mm_ADC_unsmth,ctx_hemi-R_surf-fsLR-5k_label-swm1.0mm_ADC_unsmth,ctx_hemi-L_surf-fsLR-5k_label-swm1.0mm_FA_unsmth,ctx_hemi-R_surf-fsLR-5k_label-swm1.0mm_FA_unsmth
0,UID0001,HC129,Pilot013,7T,05,18.04.2024,F,,en,F,...,,,,,,,,,,
1,UID0001,HC129,Pilot013,3T,01,09.07.2024,F,,en,F,...,/host/verges/tank/data/daniel/3T7T/z/maps/sub-...,/host/verges/tank/data/daniel/3T7T/z/maps/sub-...,,,,,,,,
2,UID0002,HC082,PNC003,7T,01,06.05.2022,F,17.09.1997,English,F,...,/host/verges/tank/data/daniel/3T7T/z/maps/sub-...,/host/verges/tank/data/daniel/3T7T/z/maps/sub-...,,,,,,,,
3,UID0002,HC082,PNC003,7T,03,13.03.2023,F,17.09.1997,English,F,...,,,,,,,,,,
4,UID0002,HC082,PNC003,7T,02,13.06.2022,F,17.09.1997,English,F,...,/host/verges/tank/data/daniel/3T7T/z/maps/sub-...,/host/verges/tank/data/daniel/3T7T/z/maps/sub-...,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
197,UID0061,PX064,PNE041,3T,03,21.01.2025,F,01.10.1997,fr,F,...,/host/verges/tank/data/daniel/3T7T/z/maps/sub-...,/host/verges/tank/data/daniel/3T7T/z/maps/sub-...,,,,,,,,
198,UID0062,PX192,PNE042,7T,a1,26.08.2025,F,04.06.1964,English,F,...,,,,,,,,,,
199,UID0062,PX192,PNE042,3T,01,20.11.2024,F,04.06.1964,en,F,...,/host/verges/tank/data/daniel/3T7T/z/maps/sub-...,/host/verges/tank/data/daniel/3T7T/z/maps/sub-...,,,,,,,,
200,UID0063,PX231,PNE043,7T,a1,27.08.2025,F,21.08.2001,French,F,...,,,,,,,,,,


In [29]:
def nImgbyID(root, ID, voi, verbose=False):
    """"
    Count number of images by ID and session for each volume of interest.
    Checks for existing files that contain the string in voi and end with '.nii.gz'

    Input:
        root:
            root dir of study
        study_id:
            ID of participant in study
        voi:
            volumes of interest to count

    Returns:
        list with len(voi) with number of images for each volume
    """
    # structure of BIDS:
    ## /sub-{ID}/ -> list of sessions. For each of these folders, look for the voi files. Handle just anat files for now
    import os
    import glob
    import pandas as pd
    out = []

    sub_dirs = glob.glob(f"{root}/sub-{ID}/*/") # get list of session dirs
    nSes = len(sub_dirs)
    if verbose:
        print("-"*40)
        print(f"[nImgbyID] {ID} has {nSes} sessions: {sub_dirs}")
    for v in voi:
        nImg = 0
        for ses in sub_dirs:
            # look for files in ses/anat that contain v and end with .nii.gz
            anat_dir = os.path.join(ses, 'anat')
            if os.path.exists(anat_dir):
                files = glob.glob(f"{anat_dir}/*{v}*.nii*")
                if len(files) > 1:
                    if verbose:
                        print(f"[nImgbyID] WARNING: {ID} {ses} found {len(files)} {v} files: {files}")
                    
                    nImg += 1 # count only once
                else: 
                    nImg += len(files)
                if verbose:
                    print(f"[nImgbyID] {ID} {ses} {v}: found {len(files)} files: {files}")
            else:
                if verbose:
                    print(f"[nImgbyID] {ID} {ses} has no anat dir: {anat_dir}")
        out.append(nImg)
    if verbose:
        print(f"[nImgbyID] {ID}: {voi} = {out}")
    return out

In [None]:
# create a df_out with Each UID is a row. Cols: # ses @ each study
studies = df['study'].unique()
df_out = df[['UID', 'study', 'MICS_ID', 'PNI_ID']].drop_duplicates().reset_index(drop=True)
voi = ['FLAIR', 'T1w', 'T1map'] # volumes of interest
# create col study ID for each row
df_out['study_ID'] = df_out.apply(lambda row: row['MICS_ID'] if row['study']=='3T' else row['PNI_ID'] if row['study']=='7T' else 'UNKNOWN STUDY', axis=1)

roots = {
    '3T': '/data/mica3/BIDS_MICs/rawdata',
    '7T': '/data/mica3/BIDS_PNI/rawdata'
}


# include 3T, 7T IDs
for study in studies:
    df_study = df[df['study'] == study]
    df_study_count = df_study.groupby('UID').size().reset_index(name=f'nSES_{study}')
    df_out = df_out.merge(df_study_count, on='UID', how='left')
    root = roots[study]
    if study == '7T' and 'T1w' in voi:
        voi_iterate = voi.copy()  # create a copy of the list
        voi_iterate[voi_iterate.index('T1w')] = 'UNIT1'  # replace T1w with UNIT1 in PNI
    else:
        voi_iterate = voi

    # count number of each raw volume 
    new_cols = [f'n{v}_{study}' for v in voi_iterate]
    df_out[new_cols] = 0 # initialize cols
    
    ids = df_out[df_out['study'] == study]['study_ID'].unique()
    for id in ids:
        nVols = nImgbyID(root, id, voi_iterate, verbose=True)
        df_out.loc[df_out['study_ID'] == id, new_cols] = nVols

if save:
    sv_pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/flair_T1w_T1map_counts_byID_29Sep2025.csv"
    df_out.to_csv(sv_pth, index=False)
    print(f"[main] df_out saved to {sv_pth}")
df_out

----------------------------------------
[nImgbyID] Pilot013 has 1 sessions: ['/data/mica3/BIDS_PNI/rawdata/sub-Pilot013/ses-05/']
[nImgbyID] Pilot013 /data/mica3/BIDS_PNI/rawdata/sub-Pilot013/ses-05/ FLAIR: found 0 files: []
[nImgbyID] Pilot013 /data/mica3/BIDS_PNI/rawdata/sub-Pilot013/ses-05/ UNIT1: found 1 files: ['/data/mica3/BIDS_PNI/rawdata/sub-Pilot013/ses-05/anat/sub-Pilot013_ses-05_acq-05mm_UNIT1.nii.gz']
[nImgbyID] Pilot013 /data/mica3/BIDS_PNI/rawdata/sub-Pilot013/ses-05/ T1map: found 1 files: ['/data/mica3/BIDS_PNI/rawdata/sub-Pilot013/ses-05/anat/sub-Pilot013_ses-05_acq-05mm_T1map.nii.gz']
[nImgbyID] Pilot013: ['FLAIR', 'UNIT1', 'T1map'] = [0, 1, 1]
----------------------------------------
[nImgbyID] PNC003 has 4 sessions: ['/data/mica3/BIDS_PNI/rawdata/sub-PNC003/ses-03/', '/data/mica3/BIDS_PNI/rawdata/sub-PNC003/ses-01/', '/data/mica3/BIDS_PNI/rawdata/sub-PNC003/ses-02/', '/data/mica3/BIDS_PNI/rawdata/sub-PNC003/ses-04/']
[nImgbyID] PNC003 /data/mica3/BIDS_PNI/rawdata/su

Unnamed: 0,UID,study,MICS_ID,PNI_ID,study_ID,nSES_7T,nFLAIR_7T,nUNIT1_7T,nT1map_7T,nSES_3T,nFLAIR_3T,nT1w_3T,nT1map_3T
0,UID0001,7T,HC129,Pilot013,Pilot013,1,0,1,1,1.0,0,0,0
1,UID0001,3T,HC129,Pilot013,HC129,1,0,0,0,1.0,1,1,1
2,UID0002,7T,HC082,PNC003,PNC003,4,0,4,4,2.0,0,0,0
3,UID0002,3T,HC082,PNC003,HC082,4,0,0,0,2.0,2,2,2
4,UID0003,7T,HC081,PNC006,PNC006,4,0,4,4,3.0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
120,UID0061,3T,PX064,PNE041,PX064,1,0,0,0,3.0,3,3,3
121,UID0062,7T,PX192,PNE042,PNE042,1,1,1,1,1.0,0,0,0
122,UID0062,3T,PX192,PNE042,PX192,1,0,0,0,1.0,1,1,1
123,UID0063,7T,PX231,PNE043,PNE043,1,1,1,1,1.0,0,0,0
