# Check completeness of BIDS-formatted dataset
Natalia Vélez, September 2021

In [24]:
import os
import pandas as pd
import numpy as np

import sys
sys.path.append('..')
import utils

Find BIDS directory

In [2]:
data_dir = '../../BIDS_data/'
sub_dirs = utils.gsearch(data_dir, 'sub*')
sub_dirs.sort()

print(sub_dirs)

['../../BIDS_data/sub-01', '../../BIDS_data/sub-02', '../../BIDS_data/sub-03', '../../BIDS_data/sub-04', '../../BIDS_data/sub-05', '../../BIDS_data/sub-06', '../../BIDS_data/sub-07', '../../BIDS_data/sub-08', '../../BIDS_data/sub-09', '../../BIDS_data/sub-10', '../../BIDS_data/sub-11', '../../BIDS_data/sub-12', '../../BIDS_data/sub-13', '../../BIDS_data/sub-14', '../../BIDS_data/sub-15', '../../BIDS_data/sub-16', '../../BIDS_data/sub-17', '../../BIDS_data/sub-18', '../../BIDS_data/sub-19', '../../BIDS_data/sub-20', '../../BIDS_data/sub-21', '../../BIDS_data/sub-22', '../../BIDS_data/sub-23', '../../BIDS_data/sub-24', '../../BIDS_data/sub-25', '../../BIDS_data/sub-26', '../../BIDS_data/sub-27', '../../BIDS_data/sub-28', '../../BIDS_data/sub-29', '../../BIDS_data/sub-30']


Check subject sub-directories:

In [25]:
session_list = []
for sub in sub_dirs:
    sub_id = utils.str_extract('sub-[0-9]+', sub)
    
    # Count # anatomical, functional images
    anat = utils.gsearch(sub, 'anat', '*.nii')
    func = utils.gsearch(sub, 'func', '*.nii')
    teaching_func = [f for f in func if 'task-teaching' in f]
    tomloc_func = [f for f in func if 'task-tomloc' in f]

    # Count fieldmap images
    fmap = utils.gsearch(sub, 'fmap', '*AP*.nii')
    
    # Check: Has every functional been assigned a fieldmap image? 
    assigned_funcs = []
    for f in fmap:
        fmap_meta_file = f.replace('.nii', '.json')
        fmap_meta = utils.read_json(fmap_meta_file)
        assigned_funcs += fmap_meta['IntendedFor']
    
    assigned_funcs = ['%s/%s' % (sub, f) for f in assigned_funcs]
    all_assigned = np.array_equal(func, assigned_funcs)
    no_repeats = len(assigned_funcs) == len(np.unique(assigned_funcs))
    
    # Add to dataframe
    session_list.append([sub_id, len(anat), len(teaching_func), len(tomloc_func), len(fmap), all_assigned, no_repeats])

session_df = pd.DataFrame(session_list, columns=['participant_id', 'anat', 'teaching', 'tomloc', 'fmap', 'all_assigned', 'no_repeats'])
session_df

Unnamed: 0,participant_id,anat,teaching,tomloc,fmap,all_assigned,no_repeats
0,sub-01,1,10,2,0,False,True
1,sub-02,1,10,2,0,False,True
2,sub-03,1,9,1,0,False,True
3,sub-04,1,10,2,1,True,True
4,sub-05,1,10,2,3,True,True
5,sub-06,1,10,2,3,True,True
6,sub-07,1,10,2,3,True,True
7,sub-08,1,10,2,3,True,True
8,sub-09,1,10,2,3,True,True
9,sub-10,1,10,2,3,True,True
