In [1]:
import os
import pickle
import re
import boto3
import pandas as pd
from botocore import UNSIGNED
from botocore.config import Config
DATADIR = '/ocean/projects/med220004p/shared/data_sandbox/derived/RBC-testing/data/UPenn'


def fieldmap_type(path):
    if 'fmap' not in os.listdir(path):
        return 'None'
    else:
        fmaps = os.listdir(os.path.join(path, 'fmap'))
        if not fmaps:
            return 'None'
        elif any(fmap.endswith('_phase1.nii.gz') or fmap.endswith('_phasediff.nii.gz') for fmap in fmaps):
            return 'phasediff'
        elif any(fmap.endswith('_epi.nii.gz') for fmap in fmaps):
            return 'epi'
    return 'unknown'


def key_from_s3(key):
    _subses = [x_for_session(key, x) for x in ('task', 'run', 'acq') if
               f'_{x}-' in key]
    _key = '_'.join([part for part in [*key.split('/', 2)[:2], *_subses] if
                     part is not None])
    return _key


def match_all_entities(filepath, key, minimal=False):
    entities = key.split('_')
    if minimal:
        entities = [entity for entity in entities if
                    any(entity.startswith(minkey) for minkey in ('sub-', 'ses-'))]
    if all(entity in filepath for entity in entities):
        return True
    return False


def x_for_session(filename, x):
    return f"{x}-{re.search(f'(?<=_{x}-)[0-9a-zA-Z]*(?=_)', filename).group(0)}"

In [None]:
# Create and pickle raw data DataFrame
# local raw data
runs = {}
for dataset in [dataset for dataset in os.listdir(DATADIR) if dataset != 'HBN']:
    datasetdir = os.path.join(DATADIR, dataset)
    for subject in os.listdir(datasetdir):
        subdir = os.path.join(datasetdir, subject)
        if os.path.isdir(subdir):
            for ses in os.listdir(subdir):
                sesdir = os.path.join(subdir, ses)
                sestask = None
                sesrun = None
                for typedir in os.listdir(sesdir):
                    for _file in os.listdir(os.path.join(sesdir, typedir)):
                        _key = f'{subject}_{ses}'
                        _subses = [x_for_session(_file, x) for x in ('task', 'run', 'acq') if
                                   f'_{x}-' in _file]
                        if _subses:
                            _key = '_'.join([_key, *_subses])
                            runs.update({_key: {'dataset': dataset, 'path': os.path.join(subdir, ses)}})
                if sestask is None and sesrun is None:
                    runs.update({f'{subject}_{ses}': {'dataset': dataset, 'path': os.path.join(subdir, ses)}})
for run in list(runs.keys()):
    if any([_run != run and _run.startswith(run) for _run in runs]):
        del runs[run]
    else:
        for subdir in ['anat', 'func']:
            subpath = os.path.join(runs[run]['path'], subdir)
            runs[run][subdir] = os.path.exists(subpath) and bool(os.listdir(subpath))
df = pd.DataFrame(runs).T
df[(df['anat'] == True) & (df['func'] == True)]
df['fieldmaps'] = df['path'].apply(fieldmap_type)

In [None]:
# S3 raw data
client = boto3.client('s3', config=Config(signature_version=UNSIGNED))
hbn_files = []
hbn_run_table = {}
with open(f'{DATADIR}/HBN/HBN_subs.txt', 'r', encoding='utf-8') as _f:
    hbn_subs = [sub.strip() for sub in _f.readlines()]
keep_getting = True
nct = None
while keep_getting:
    if nct is None:
        hbn_next = client.list_objects_v2(Bucket='fcp-indi', Prefix='data/Projects/HBN/BIDS_datalad/')
    else:
        hbn_next = client.list_objects_v2(Bucket='fcp-indi', Prefix='data/Projects/HBN/BIDS_datalad/',
                                          ContinuationToken=nct)
    hbn_files += [key for key in [
        re.sub('^data/Projects/HBN/BIDS_datalad/', '', key.get('Key', '')) for key in hbn_next['Contents']
    ] if any(key.startswith(sub) for sub in hbn_subs)]
    nct = hbn_next.get('NextContinuationToken')
    keep_getting = hbn_next.get('IsTruncated')
_hbn_runs = list(set(key_from_s3(key) for key in hbn_files))
hbn_runs = []
for run in _hbn_runs:
    if not any([_run != run and _run.startswith(run) for _run in _hbn_runs]):
        hbn_runs.append(run)
for key in hbn_runs:
    hbn_run_table[key] = {'path': 's3://fcp-indi/data/Projects/HBN/BIDS_datalad/' + 
                          '/'.join(key.split('_', 2)[:2]), 'dataset': 'HBN',
                          'anat': False, 'func': False, 'fieldmaps': 'None'}
for _file in hbn_files:
    for subdir in ['anat', 'func']:
        if f'/{subdir}/' in _file and not _file.endswith(f'/{subdir}/'):
            s3key = key_from_s3(_file)
            if subdir == 'anat':
                for _key in list(hbn_run_table.keys()):
                    if match_all_entities(_file, _key, minimal=True):
                        hbn_run_table[_key][subdir] = True
            else:
                hbn_run_table[s3key][subdir] = True
    if 'fmap' in _file:
        s3key = key_from_s3(_file)
        if _file.endswith('_phasediff.nii.gz') or _file.endswith('_phase1.nii.gz'):
            for _key in list(hbn_run_table.keys()):
                if match_all_entities(_file, _key, minimal=True):
                    hbn_run_table[_key]['fieldmaps'] = 'phasediff'
        if _file.endswith('_epi.nii.gz'):
            for _key in list(hbn_run_table.keys()):
                if match_all_entities(_file, _key, minimal=True):
                    hbn_run_table[_key]['fieldmaps'] = 'epi'
hbn_df = pd.DataFrame(hbn_run_table).T
# join local & s3 tables
df = pd.concat([df, hbn_df])
# drop runs missing anatomical or functional data
df = df[(df['anat'] == True) & (df['func'] == True)]
for col in ('anat', 'func'):
    df.drop(col, axis='columns', inplace=True)
# save picke and TSV
df.index.rename('subject_session', inplace=True)
with open('raw_data.🥒', 'wb') as _f:
    pickle.dump(df, _f)
df.to_csv('raw_data.tsv', sep='\t')

In [2]:
# Load pickled dataframe
with open('raw_data.🥒', 'rb') as _f:
    df = pickle.load(_f)
len(df)

4168

In [None]:
CONNECTOME_DIRECTORY = ('/ocean/projects/med220004p/shared/data_sandbox/derived/RBC-testing/'
                        'runs/generated_connectomes')
df['connectome: fMRIPrep'] = ''
df['connectome: C-PAC: fmriprep-options'] = ''
for run in list(df.index):
    for connectome in os.listdir(os.path.join(CONNECTOME_DIRECTORY, 'Fmriprep')):
        if match_all_entities(connectome, run):
            df.loc[run]['connectome: fMRIPrep'] = connectome
    for connectome in os.listdir(os.path.join(CONNECTOME_DIRECTORY, 'CpacFmriprepOptions')):
        if match_all_entities(connectome, run):
            df.loc[run]['connectome: C-PAC: fmriprep-options'] = connectome
# update pickle and TSV
with open('raw_data.🥒', 'wb') as _f:
    pickle.dump(df, _f)
df.to_csv('raw_data.tsv', sep='\t')

In [34]:
# all runs
df

Unnamed: 0_level_0,dataset,path,fieldmaps,connectome: fMRIPrep,connectome: C-PAC: fmriprep-options
subject_session,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
sub-A00085406_ses-BAS1_acq-VARIANTObliquity,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,,
sub-A00085406_ses-BAS1_task-rest_acq-1400,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00085406_ses-BAS1_task-rest_acq-1400_spac...,
sub-A00085406_ses-BAS1_task-CHECKERBOARD_acq-1400,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00085406_ses-BAS1_task-CHECKERBOARD_acq-1...,
sub-A00085406_ses-BAS1_task-BREATHHOLD_acq-1400,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00085406_ses-BAS1_task-BREATHHOLD_acq-140...,
sub-A00085406_ses-BAS1_task-CHECKERBOARD_acq-645,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00085406_ses-BAS1_task-CHECKERBOARD_acq-6...,
...,...,...,...,...,...
sub-NDARRW940BL6_ses-HBNsiteRU_task-rest_run-1,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,,
sub-NDARPY458LTR_ses-HBNsiteCBIC_task-rest_run-1,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,,
sub-NDARDN489EXJ_ses-HBNsiteRU_task-rest_run-2,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,sub-NDARDN489EXJ_ses-HBNsiteRU_task-rest_run-2...,
sub-NDARPH567WUB_ses-HBNsiteCUNY_task-peer_run-1_acq-VARIANTObliquity,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,,


In [35]:
# all runs without fieldmaps
df[(df['fieldmaps'] == 'None')]

Unnamed: 0_level_0,dataset,path,fieldmaps,connectome: fMRIPrep,connectome: C-PAC: fmriprep-options
subject_session,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
sub-A00085406_ses-BAS1_acq-VARIANTObliquity,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,,
sub-A00085406_ses-BAS1_task-rest_acq-1400,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00085406_ses-BAS1_task-rest_acq-1400_spac...,
sub-A00085406_ses-BAS1_task-CHECKERBOARD_acq-1400,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00085406_ses-BAS1_task-CHECKERBOARD_acq-1...,
sub-A00085406_ses-BAS1_task-BREATHHOLD_acq-1400,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00085406_ses-BAS1_task-BREATHHOLD_acq-140...,
sub-A00085406_ses-BAS1_task-CHECKERBOARD_acq-645,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00085406_ses-BAS1_task-CHECKERBOARD_acq-6...,
...,...,...,...,...,...
sub-NDARLV387GP4_ses-HBNsiteRU_task-rest_run-1_acq-VARIANTNoFmap,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,,sub-NDARLV387GP4_ses-HBNsiteRU_task-rest_acq-V...,sub-NDARLV387GP4_ses-HBNsiteRU_task-rest_run-1...
sub-NDARVU683CTN_ses-HBNsiteRU_task-peer_run-1_acq-VARIANTObliquityNoFmap,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,,,
sub-NDARGX443CEU_ses-HBNsiteRU_task-rest_run-2_acq-VARIANTNoFmap,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,,sub-NDARGX443CEU_ses-HBNsiteRU_task-rest_acq-V...,
sub-NDARGF367KVL_ses-HBNsiteRU_task-rest_run-1_acq-VARIANTNoFmap,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,,sub-NDARGF367KVL_ses-HBNsiteRU_task-rest_acq-V...,


In [36]:
# all phasediff fieldmap runs
df[(df['fieldmaps'] == 'phasediff')]

Unnamed: 0_level_0,dataset,path,fieldmaps,connectome: fMRIPrep,connectome: C-PAC: fmriprep-options
subject_session,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
sub-2207352563_ses-PNC1_acq-refaced,PNC,/ocean/projects/med220004p/shared/data_sandbox...,phasediff,,
sub-2207352563_ses-PNC1_task-idemo_acq-VARIANTNumVolumes,PNC,/ocean/projects/med220004p/shared/data_sandbox...,phasediff,sub-2207352563_ses-PNC1_task-idemo_acq-VARIANT...,
sub-2207352563_ses-PNC1_task-frac2back,PNC,/ocean/projects/med220004p/shared/data_sandbox...,phasediff,sub-2207352563_ses-PNC1_task-frac2back_space-M...,
sub-2412716526_ses-PNC1_acq-refaced,PNC,/ocean/projects/med220004p/shared/data_sandbox...,phasediff,,
sub-2412716526_ses-PNC1_run-01,PNC,/ocean/projects/med220004p/shared/data_sandbox...,phasediff,,
...,...,...,...,...,...
sub-NDARYM695TZY_ses-HBNsiteSI_task-rest,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,phasediff,,
sub-NDARJP133YL3_ses-HBNsiteSI_task-rest_acq-VARIANTDim2Size,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,phasediff,sub-NDARJP133YL3_ses-HBNsiteSI_task-rest_acq-V...,sub-NDARJP133YL3_ses-HBNsiteSI_task-rest_acq-V...
sub-NDARTF833WXB_ses-HBNsiteSI_task-rest,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,phasediff,,
sub-NDARHB781DD8_ses-HBNsiteSI_task-rest,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,phasediff,sub-NDARHB781DD8_ses-HBNsiteSI_task-rest_space...,sub-NDARHB781DD8_ses-HBNsiteSI_task-rest_space...


In [37]:
# all epi fieldmap runs
df[(df['fieldmaps']) == 'epi']

Unnamed: 0_level_0,dataset,path,fieldmaps,connectome: fMRIPrep,connectome: C-PAC: fmriprep-options
subject_session,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
sub-NDARNA354MGW_ses-HBNsiteCBIC_task-peer_run-3,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,,
sub-NDARXF497LYF_ses-HBNsiteCBIC_task-movieTP,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,,
sub-NDARYX806FL1_ses-HBNsiteRU_task-rest_run-2,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,,
sub-NDARRL426AD5_ses-HBNsiteCBIC_task-rest_run-1_acq-VARIANTObliquity,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,,
sub-NDARFK610GY5_ses-HBNsiteRU_task-rest_run-1,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,,
...,...,...,...,...,...
sub-NDARRW940BL6_ses-HBNsiteRU_task-rest_run-1,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,,
sub-NDARPY458LTR_ses-HBNsiteCBIC_task-rest_run-1,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,,
sub-NDARDN489EXJ_ses-HBNsiteRU_task-rest_run-2,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,sub-NDARDN489EXJ_ses-HBNsiteRU_task-rest_run-2...,
sub-NDARPH567WUB_ses-HBNsiteCUNY_task-peer_run-1_acq-VARIANTObliquity,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,,


In [38]:
# all runs with completed fMRIPrep connectome
df[(df['connectome: fMRIPrep'] != '')]

Unnamed: 0_level_0,dataset,path,fieldmaps,connectome: fMRIPrep,connectome: C-PAC: fmriprep-options
subject_session,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
sub-A00085406_ses-BAS1_task-rest_acq-1400,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00085406_ses-BAS1_task-rest_acq-1400_spac...,
sub-A00085406_ses-BAS1_task-CHECKERBOARD_acq-1400,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00085406_ses-BAS1_task-CHECKERBOARD_acq-1...,
sub-A00085406_ses-BAS1_task-BREATHHOLD_acq-1400,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00085406_ses-BAS1_task-BREATHHOLD_acq-140...,
sub-A00085406_ses-BAS1_task-CHECKERBOARD_acq-645,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00085406_ses-BAS1_task-CHECKERBOARD_acq-6...,
sub-A00085406_ses-BAS1_task-rest_acq-CAP,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00085406_ses-BAS1_task-rest_acq-CAP_space...,
...,...,...,...,...,...
sub-NDARGH592NZ2_ses-HBNsiteCBIC_task-peer_run-3,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,sub-NDARGH592NZ2_ses-HBNsiteCBIC_task-peer_run...,
sub-NDAREM731BYM_ses-HBNsiteRU_task-peer_run-3,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,sub-NDAREM731BYM_ses-HBNsiteRU_task-peer_run-3...,
sub-NDARHW968ABB_ses-HBNsiteCBIC_task-peer_run-3,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,sub-NDARHW968ABB_ses-HBNsiteCBIC_task-peer_run...,
sub-NDARHG321LLD_ses-HBNsiteRU_task-peer_run-1,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,sub-NDARHG321LLD_ses-HBNsiteRU_task-peer_run-1...,


In [39]:
# all runs with completed C-PAC:fmriprep-options connectome
df[(df['connectome: C-PAC: fmriprep-options'] != '')]

Unnamed: 0_level_0,dataset,path,fieldmaps,connectome: fMRIPrep,connectome: C-PAC: fmriprep-options
subject_session,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
sub-A00037229_ses-FLU1_task-rest_acq-CAP,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00037229_ses-FLU1_task-rest_acq-CAP_space...,sub-A00037229_ses-FLU1_task-rest_acq-CAP_space...
sub-A00037229_ses-FLU1_task-CHECKERBOARD_acq-645,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00037229_ses-FLU1_task-CHECKERBOARD_acq-6...,sub-A00037229_ses-FLU1_task-CHECKERBOARD_acq-6...
sub-A00037229_ses-FLU1_task-BREATHHOLD_acq-1400,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00037229_ses-FLU1_task-BREATHHOLD_acq-140...,sub-A00037229_ses-FLU1_task-BREATHHOLD_acq-140...
sub-A00037229_ses-FLU1_task-CHECKERBOARD_acq-1400,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00037229_ses-FLU1_task-CHECKERBOARD_acq-1...,sub-A00037229_ses-FLU1_task-CHECKERBOARD_acq-1...
sub-A00037229_ses-FLU1_task-rest_acq-1400,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00037229_ses-FLU1_task-rest_acq-1400_spac...,sub-A00037229_ses-FLU1_task-rest_acq-1400_spac...
...,...,...,...,...,...
sub-NDARAX283MAK_ses-HBNsiteRU_task-rest_run-1,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,sub-NDARAX283MAK_ses-HBNsiteRU_task-rest_run-1...,sub-NDARAX283MAK_ses-HBNsiteRU_task-rest_run-1...
sub-NDAREY897LB1_ses-HBNsiteCBIC_task-peer_run-1,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,sub-NDAREY897LB1_ses-HBNsiteCBIC_task-peer_run...,sub-NDAREY897LB1_ses-HBNsiteCBIC_task-peer_run...
sub-NDARFG568PXZ_ses-HBNsiteRU_task-movieTP,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,sub-NDARFG568PXZ_ses-HBNsiteRU_task-movieTP_sp...,sub-NDARFG568PXZ_ses-HBNsiteRU_task-movieTP_sp...
sub-NDARCG073GZ6_ses-HBNsiteRU_task-peer_run-1,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,sub-NDARCG073GZ6_ses-HBNsiteRU_task-peer_run-1...,sub-NDARCG073GZ6_ses-HBNsiteRU_task-peer_run-1...


In [40]:
# all runs with both completed fMRIPrep connectome and completed C-PAC:fmriprep-options connectome
df[(df['connectome: fMRIPrep'] != '') & (df['connectome: C-PAC: fmriprep-options'] != '')]

Unnamed: 0_level_0,dataset,path,fieldmaps,connectome: fMRIPrep,connectome: C-PAC: fmriprep-options
subject_session,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
sub-A00037229_ses-FLU1_task-rest_acq-CAP,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00037229_ses-FLU1_task-rest_acq-CAP_space...,sub-A00037229_ses-FLU1_task-rest_acq-CAP_space...
sub-A00037229_ses-FLU1_task-CHECKERBOARD_acq-645,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00037229_ses-FLU1_task-CHECKERBOARD_acq-6...,sub-A00037229_ses-FLU1_task-CHECKERBOARD_acq-6...
sub-A00037229_ses-FLU1_task-BREATHHOLD_acq-1400,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00037229_ses-FLU1_task-BREATHHOLD_acq-140...,sub-A00037229_ses-FLU1_task-BREATHHOLD_acq-140...
sub-A00037229_ses-FLU1_task-CHECKERBOARD_acq-1400,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00037229_ses-FLU1_task-CHECKERBOARD_acq-1...,sub-A00037229_ses-FLU1_task-CHECKERBOARD_acq-1...
sub-A00037229_ses-FLU1_task-rest_acq-1400,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00037229_ses-FLU1_task-rest_acq-1400_spac...,sub-A00037229_ses-FLU1_task-rest_acq-1400_spac...
...,...,...,...,...,...
sub-NDARAX283MAK_ses-HBNsiteRU_task-rest_run-1,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,sub-NDARAX283MAK_ses-HBNsiteRU_task-rest_run-1...,sub-NDARAX283MAK_ses-HBNsiteRU_task-rest_run-1...
sub-NDAREY897LB1_ses-HBNsiteCBIC_task-peer_run-1,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,sub-NDAREY897LB1_ses-HBNsiteCBIC_task-peer_run...,sub-NDAREY897LB1_ses-HBNsiteCBIC_task-peer_run...
sub-NDARFG568PXZ_ses-HBNsiteRU_task-movieTP,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,sub-NDARFG568PXZ_ses-HBNsiteRU_task-movieTP_sp...,sub-NDARFG568PXZ_ses-HBNsiteRU_task-movieTP_sp...
sub-NDARCG073GZ6_ses-HBNsiteRU_task-peer_run-1,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,sub-NDARCG073GZ6_ses-HBNsiteRU_task-peer_run-1...,sub-NDARCG073GZ6_ses-HBNsiteRU_task-peer_run-1...


In [28]:
cpac = {'done': {'raw': df[df['connectome: C-PAC: fmriprep-options'] != '']},
        'remaining': {'raw': df[df['connectome: C-PAC: fmriprep-options'] == '']}}
for key in list(cpac.keys()):
    cpac[key]['grouped'] = cpac[key]['raw'][['dataset', 'fieldmaps', 'connectome: C-PAC: fmriprep-options']].groupby(by=['dataset', 'fieldmaps']).count()
    cpac[key]['rest-only'] = cpac[key]['raw'][['task-rest' in index for index in cpac[key]['raw'].index]][['dataset', 'fieldmaps', 'connectome: C-PAC: fmriprep-options']].groupby(by=['dataset', 'fieldmaps']).count()

In [4]:
# completed runs
cpac['done']['raw']

Unnamed: 0_level_0,dataset,path,fieldmaps,connectome: fMRIPrep,connectome: C-PAC: fmriprep-options
subject_session,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
sub-A00037229_ses-FLU1_task-rest_acq-CAP,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00037229_ses-FLU1_task-rest_acq-CAP_space...,sub-A00037229_ses-FLU1_task-rest_acq-CAP_space...
sub-A00037229_ses-FLU1_task-CHECKERBOARD_acq-645,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00037229_ses-FLU1_task-CHECKERBOARD_acq-6...,sub-A00037229_ses-FLU1_task-CHECKERBOARD_acq-6...
sub-A00037229_ses-FLU1_task-BREATHHOLD_acq-1400,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00037229_ses-FLU1_task-BREATHHOLD_acq-140...,sub-A00037229_ses-FLU1_task-BREATHHOLD_acq-140...
sub-A00037229_ses-FLU1_task-CHECKERBOARD_acq-1400,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00037229_ses-FLU1_task-CHECKERBOARD_acq-1...,sub-A00037229_ses-FLU1_task-CHECKERBOARD_acq-1...
sub-A00037229_ses-FLU1_task-rest_acq-1400,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00037229_ses-FLU1_task-rest_acq-1400_spac...,sub-A00037229_ses-FLU1_task-rest_acq-1400_spac...
...,...,...,...,...,...
sub-NDARAX283MAK_ses-HBNsiteRU_task-rest_run-1,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,sub-NDARAX283MAK_ses-HBNsiteRU_task-rest_run-1...,sub-NDARAX283MAK_ses-HBNsiteRU_task-rest_run-1...
sub-NDAREY897LB1_ses-HBNsiteCBIC_task-peer_run-1,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,sub-NDAREY897LB1_ses-HBNsiteCBIC_task-peer_run...,sub-NDAREY897LB1_ses-HBNsiteCBIC_task-peer_run...
sub-NDARFG568PXZ_ses-HBNsiteRU_task-movieTP,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,sub-NDARFG568PXZ_ses-HBNsiteRU_task-movieTP_sp...,sub-NDARFG568PXZ_ses-HBNsiteRU_task-movieTP_sp...
sub-NDARCG073GZ6_ses-HBNsiteRU_task-peer_run-1,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,sub-NDARCG073GZ6_ses-HBNsiteRU_task-peer_run-1...,sub-NDARCG073GZ6_ses-HBNsiteRU_task-peer_run-1...


In [5]:
# runs to be completed
cpac['remaining']['raw']

Unnamed: 0_level_0,dataset,path,fieldmaps,connectome: fMRIPrep,connectome: C-PAC: fmriprep-options
subject_session,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
sub-A00085406_ses-BAS1_acq-VARIANTObliquity,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,,
sub-A00085406_ses-BAS1_task-rest_acq-1400,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00085406_ses-BAS1_task-rest_acq-1400_spac...,
sub-A00085406_ses-BAS1_task-CHECKERBOARD_acq-1400,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00085406_ses-BAS1_task-CHECKERBOARD_acq-1...,
sub-A00085406_ses-BAS1_task-BREATHHOLD_acq-1400,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00085406_ses-BAS1_task-BREATHHOLD_acq-140...,
sub-A00085406_ses-BAS1_task-CHECKERBOARD_acq-645,NKI,/ocean/projects/med220004p/shared/data_sandbox...,,sub-A00085406_ses-BAS1_task-CHECKERBOARD_acq-6...,
...,...,...,...,...,...
sub-NDARRW940BL6_ses-HBNsiteRU_task-rest_run-1,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,,
sub-NDARPY458LTR_ses-HBNsiteCBIC_task-rest_run-1,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,,
sub-NDARDN489EXJ_ses-HBNsiteRU_task-rest_run-2,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,sub-NDARDN489EXJ_ses-HBNsiteRU_task-rest_run-2...,
sub-NDARPH567WUB_ses-HBNsiteCUNY_task-peer_run-1_acq-VARIANTObliquity,HBN,s3://fcp-indi/data/Projects/HBN/BIDS_datalad/s...,epi,,


In [6]:
# count of completed runs by dataset & type of fieldmaps
cpac['done']['grouped']

Unnamed: 0_level_0,Unnamed: 1_level_0,connectome: C-PAC: fmriprep-options
dataset,fieldmaps,Unnamed: 2_level_1
CCNP,,19
HBN,,12
HBN,epi,144
HBN,phasediff,6
HRC,,62
NKI,,119
PNC,,24


In [7]:
# count of remaining runs by dataset & type of fieldmaps
cpac['remaining']['grouped']

Unnamed: 0_level_0,Unnamed: 1_level_0,connectome: C-PAC: fmriprep-options
dataset,fieldmaps,Unnamed: 2_level_1
CCNP,,3
HBN,,28
HBN,epi,1501
HBN,phasediff,15
HRC,,109
NKI,,1559
PNC,,52
PNC,phasediff,515


In [8]:
# percent of completed runs by dataset & type of fieldmaps
cpac['done']['grouped'] / (cpac['done']['grouped'] + cpac['remaining']['grouped'])

Unnamed: 0_level_0,Unnamed: 1_level_0,connectome: C-PAC: fmriprep-options
dataset,fieldmaps,Unnamed: 2_level_1
CCNP,,0.863636
HBN,,0.3
HBN,epi,0.087538
HBN,phasediff,0.285714
HRC,,0.362573
NKI,,0.070918
PNC,,0.315789
PNC,phasediff,


In [29]:
# count of completed task-rest runs by dataset & type of fieldmaps
cpac['done']['rest-only']

Unnamed: 0_level_0,Unnamed: 1_level_0,connectome: C-PAC: fmriprep-options
dataset,fieldmaps,Unnamed: 2_level_1
CCNP,,19
HBN,,4
HBN,epi,36
HBN,phasediff,6
HRC,,43
NKI,,53
PNC,,1


In [30]:
# count of remaining task-rest runs by dataset & type of fieldmaps
cpac['remaining']['rest-only']

Unnamed: 0_level_0,Unnamed: 1_level_0,connectome: C-PAC: fmriprep-options
dataset,fieldmaps,Unnamed: 2_level_1
CCNP,,2
HBN,,15
HBN,epi,416
HBN,phasediff,15
HRC,,34
NKI,,737
PNC,,4
PNC,phasediff,69


In [31]:
# percent of completed task-rest runs by dataset & type of fieldmaps
cpac['done']['rest-only'] / (cpac['done']['rest-only'] + cpac['remaining']['rest-only'])

Unnamed: 0_level_0,Unnamed: 1_level_0,connectome: C-PAC: fmriprep-options
dataset,fieldmaps,Unnamed: 2_level_1
CCNP,,0.904762
HBN,,0.210526
HBN,epi,0.079646
HBN,phasediff,0.285714
HRC,,0.558442
NKI,,0.067089
PNC,,0.2
PNC,phasediff,
