In [1]:
import pandas as pd
from glob import glob
import numpy as np

In [2]:
subjects = [s.split('/')[-1] for s in sorted(glob('../data/sub-*'))]

In [3]:
def load_confounds(subject):
    confounds = sorted(glob(f"../outputs/fmriprep/{subject}/func/*.tsv"))
    dfs = [pd.read_csv(c, sep='\t') for c in confounds]
    return dfs

In [4]:
def extract_cols(df):
    cols = ['framewise_displacement']
    cols += [c for c in df.columns if 'motion_outlier' in c]
    return df[cols]

In [5]:
n_outliers = []
framewise_displacement = []
for subj in subjects:
    dfs = load_confounds(subj)
    dfs = [extract_cols(d) for d in dfs]
    for df in dfs:
        # add framewise displacement to a list since we're just going to compute
        # a group metric (median, min, max)
        framewise_displacement += df['framewise_displacement'].values.tolist()
        n_outliers.append(len([c for c in df.columns if 'motion' in c]))

In [6]:
# split outliers into subjects again
n_outliers_subject = np.split(np.array(n_outliers), len(subjects))

In [7]:
for subj, out in zip(subjects, n_outliers_subject):
    print(f"{subj}: {out.sum()}")

sub-sid000005: 26
sub-sid000007: 116
sub-sid000009: 695
sub-sid000010: 139
sub-sid000013: 97
sub-sid000020: 28
sub-sid000021: 32
sub-sid000024: 35
sub-sid000025: 1
sub-sid000029: 299
sub-sid000030: 114
sub-sid000034: 178
sub-sid000050: 115
sub-sid000052: 318
sub-sid000055: 111
sub-sid000114: 56
sub-sid000120: 37
sub-sid000134: 73
sub-sid000142: 30
sub-sid000278: 118
sub-sid000416: 83
sub-sid000499: 56
sub-sid000522: 25
sub-sid000535: 11
sub-sid000560: 223


In [8]:
dfs = load_confounds('sub-sid000009')
# get number of TRs for each run so we can compute percentage
n_trs = [len(df) for df in dfs]
all_n_trs = np.sum(n_trs)

In [9]:
for subj, out in zip(subjects, n_outliers_subject):
    print(f"{subj}: {out.sum()/all_n_trs*100:.2f} %")

sub-sid000005: 0.85 %
sub-sid000007: 3.80 %
sub-sid000009: 22.77 %
sub-sid000010: 4.55 %
sub-sid000013: 3.18 %
sub-sid000020: 0.92 %
sub-sid000021: 1.05 %
sub-sid000024: 1.15 %
sub-sid000025: 0.03 %
sub-sid000029: 9.80 %
sub-sid000030: 3.74 %
sub-sid000034: 5.83 %
sub-sid000050: 3.77 %
sub-sid000052: 10.42 %
sub-sid000055: 3.64 %
sub-sid000114: 1.83 %
sub-sid000120: 1.21 %
sub-sid000134: 2.39 %
sub-sid000142: 0.98 %
sub-sid000278: 3.87 %
sub-sid000416: 2.72 %
sub-sid000499: 1.83 %
sub-sid000522: 0.82 %
sub-sid000535: 0.36 %
sub-sid000560: 7.31 %


In [10]:
percentage_outliers = np.array([out.sum()/all_n_trs*100 for out in n_outliers_subject])

Compute the median percentage of outliers, min, and max

In [11]:
np.round((np.median(percentage_outliers), np.min(percentage_outliers), np.max(percentage_outliers)), 2)

array([ 2.72,  0.03, 22.77])

How many subjects with less than 5% outliers?

In [12]:
np.sum(percentage_outliers < 5)

20

Now compute median framewise dispalcement, max, and min

In [13]:
framewise_displacement_subjects = np.split(np.array(framewise_displacement), len(subjects))
median_fd_subjects = [np.nanmedian(fd) for fd in framewise_displacement_subjects]

In [14]:
np.round((np.nanmedian(median_fd_subjects), np.nanmin(median_fd_subjects), np.nanmax(median_fd_subjects)), 2)

array([0.09, 0.06, 0.19])