In [1]:
import json
import glob
import os
from pathlib import Path
import pandas as pd
import nibabel as nib

In [2]:
DATASET_ROOT = "/home/nimhans/projects/data/PD_SERB/"
BIDS_DIR = f"{DATASET_ROOT}bids/"
participant_csv = f"{DATASET_ROOT}/tabular/demographics/participants.csv"

## Basic directory checks

In [3]:
#  bids/sub-SERB012/ses-01

bids_subdir_list = ["anat","func","fmap","dwi","perf"]

session_id = "01"


In [4]:
participant_df = pd.read_csv(participant_csv)

participant_ids = participant_df["participant_id"].values
participant_ids = "sub-" + participant_ids

subject_dir_list = glob.glob(f"{BIDS_DIR}sub*")
subject_dir_basename_list = [os.path.basename(f) for f in subject_dir_list]

subjects_with_missing_bids_dir = set(participant_ids) - set(subject_dir_basename_list)

print(f"Number of subjects in the participant_csv: {len(participant_ids)}\nNumber of subject dirs on disk: {len(subject_dir_list)}")
print(f"participants missing bids dir:\n{subjects_with_missing_bids_dir}")

Number of subjects in the participant_csv: 76
Number of subject dirs on disk: 71
participants missing bids dir:
{'sub-SERB050', 'sub-SERB026', 'sub-SERB011', 'sub-SERB020', 'sub-SERB046'}


In [12]:
bids_status_csv = f"{DATASET_ROOT}/proc/logs/bids_status.csv"
bids_status_df = pd.DataFrame(index=participant_ids, columns=bids_subdir_list + ["status"])
bids_status_df["status"] = "pass"
bids_status_df["notes"] = "<add reasons>"

# Set participants missing bids dir as failed
bids_status_df.loc[list(subjects_with_missing_bids_dir),bids_subdir_list] = False
bids_status_df.loc[list(subjects_with_missing_bids_dir),"status"] = "fail"
bids_status_df.loc[list(subjects_with_missing_bids_dir),"notes"] = "bids dir not found"

# Check number of files for the rest of the subjects
for subject_dir in subject_dir_list:
    id = os.path.basename(subject_dir)
    for bids_subdir in bids_subdir_list:
        bids_subdir_path = f"{subject_dir}/ses-{session_id}/{bids_subdir}"
        dir_status = Path(bids_subdir_path).is_dir()
        # print(dir_status)
        if dir_status:
            n_files = len(glob.glob(f"{bids_subdir_path}/*.nii.gz"))
            if n_files == 0:
                dir_status = False
                print(id,n_files)
                bids_status_df.loc[id,"status"] = "fail"
            else:                
                dir_status = n_files
        else:
            bids_status_df.loc[id,"status"] = "fail"
        
        bids_status_df.loc[id,bids_subdir] = dir_status

bids_status_df.to_csv(bids_status_csv)
bids_status_df.head(10)

sub-SERB024 0
sub-SERB018 0


Unnamed: 0,anat,func,fmap,dwi,perf,status,notes
sub-SERB001,6,2,4,3,1,pass,<add reasons>
sub-SERB002,4,2,False,3,2,fail,<add reasons>
sub-SERB006,6,2,4,3,2,pass,<add reasons>
sub-SERB007,9,2,4,3,2,pass,<add reasons>
sub-SERB009,6,2,4,3,2,pass,<add reasons>
sub-SERB010,11,2,4,3,2,pass,<add reasons>
sub-SERB011,False,False,False,False,False,fail,bids dir not found
sub-SERB012,9,False,2,3,False,fail,<add reasons>
sub-SERB013,6,2,4,3,2,pass,<add reasons>
sub-SERB014,6,1,4,3,2,pass,<add reasons>


### List number of subjects missing nii files

In [13]:
for bids_subdir in bids_subdir_list:    
    failed_subjects = bids_status_df[bids_status_df[bids_subdir]==False].index
    print(f"\nmodality: {bids_subdir}\nfailed subjects: {list(failed_subjects)}")


modality: anat
failed subjects: ['sub-SERB011', 'sub-SERB020', 'sub-SERB026', 'sub-SERB046', 'sub-SERB050']

modality: func
failed subjects: ['sub-SERB011', 'sub-SERB012', 'sub-SERB018', 'sub-SERB020', 'sub-SERB024', 'sub-SERB026', 'sub-SERB044', 'sub-SERB046', 'sub-SERB050', 'sub-SERB058', 'sub-SERB064', 'sub-SERB072', 'sub-SERB080', 'sub-SERB081', 'sub-SERB089', 'sub-SERB092', 'sub-SERB093', 'sub-SERB095', 'sub-SERB100', 'sub-SERB101', 'sub-SERB102']

modality: fmap
failed subjects: ['sub-SERB002', 'sub-SERB011', 'sub-SERB018', 'sub-SERB020', 'sub-SERB024', 'sub-SERB026', 'sub-SERB046', 'sub-SERB050', 'sub-SERB072']

modality: dwi
failed subjects: ['sub-SERB011', 'sub-SERB018', 'sub-SERB020', 'sub-SERB024', 'sub-SERB026', 'sub-SERB046', 'sub-SERB050', 'sub-SERB072']

modality: perf
failed subjects: ['sub-SERB011', 'sub-SERB012', 'sub-SERB018', 'sub-SERB020', 'sub-SERB024', 'sub-SERB026', 'sub-SERB046', 'sub-SERB050', 'sub-SERB053', 'sub-SERB074', 'sub-SERB077', 'sub-SERB080', 'sub-S

### Check nii header for fmap polarity

In [10]:
sample_nii_file = "/home/nimhans/projects/data/PD_SERB/test_data/bids/sub-BLR03/ses-01/fmap/sub-BLR03_ses-01_acq-AXbold_run-1_magnitude1.nii.gz"

nii = nib.load(sample_nii_file)

In [12]:
print(nii.header)

<class 'nibabel.nifti1.Nifti1Header'> object, endian='<'
sizeof_hdr      : 348
data_type       : b''
db_name         : b''
extents         : 0
session_error   : 0
regular         : b'r'
dim_info        : 57
dim             : [ 3 80 80 52  1  1  1  1]
intent_p1       : 0.0
intent_p2       : 0.0
intent_p3       : 0.0
intent_code     : none
datatype        : int16
bitpix          : 16
slice_start     : 0
pixdim          : [1.   3.   3.   3.   0.02 0.   0.   0.  ]
vox_offset      : 0.0
scl_slope       : nan
scl_inter       : nan
slice_end       : 0
slice_code      : unknown
xyzt_units      : 10
cal_max         : 0.0
cal_min         : 0.0
slice_duration  : 0.0
toffset         : 0.0
glmax           : 0
glmin           : 0
descrip         : b'TE=2.3;Time=91540.120'
aux_file        : b''
qform_code      : scanner
sform_code      : scanner
quatern_b       : 0.0
quatern_c       : 0.0
quatern_d       : 0.0
qoffset_x       : -114.10395
qoffset_y       : -119.462364
qoffset_z       : -70.16499
srow