# Exclude participants with excessive motion
Natalia Vélez, October 2021

In [1]:
%matplotlib inline

# general
import os, sys
import pandas as pd
import numpy as np
from os.path import join as opj

# plotting
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('white')
sns.set_context('talk')

# project-specific
sys.path.append('..')
from utils import gsearch,str_extract,int_extract

Find confound files:

In [2]:
data_dir = '../../BIDS_data/derivatives/fmriprep'
confound_files = gsearch(data_dir, '**', 'func', '*desc-confounds_timeseries.tsv')
confound_files.sort()

print('Found %i files' % len(confound_files))
print(*confound_files[:10], sep='\n')

Found 358 files
../../BIDS_data/derivatives/fmriprep/sub-01/func/sub-01_task-teaching_run-10_desc-confounds_timeseries.tsv
../../BIDS_data/derivatives/fmriprep/sub-01/func/sub-01_task-teaching_run-1_desc-confounds_timeseries.tsv
../../BIDS_data/derivatives/fmriprep/sub-01/func/sub-01_task-teaching_run-2_desc-confounds_timeseries.tsv
../../BIDS_data/derivatives/fmriprep/sub-01/func/sub-01_task-teaching_run-3_desc-confounds_timeseries.tsv
../../BIDS_data/derivatives/fmriprep/sub-01/func/sub-01_task-teaching_run-4_desc-confounds_timeseries.tsv
../../BIDS_data/derivatives/fmriprep/sub-01/func/sub-01_task-teaching_run-5_desc-confounds_timeseries.tsv
../../BIDS_data/derivatives/fmriprep/sub-01/func/sub-01_task-teaching_run-6_desc-confounds_timeseries.tsv
../../BIDS_data/derivatives/fmriprep/sub-01/func/sub-01_task-teaching_run-7_desc-confounds_timeseries.tsv
../../BIDS_data/derivatives/fmriprep/sub-01/func/sub-01_task-teaching_run-8_desc-confounds_timeseries.tsv
../../BIDS_data/derivatives/f

Load motion parameters:

In [3]:
motion_list = []

for f in confound_files:

    # Keep only motion parameters
    run_motion = pd.read_csv(f, sep='\t')
    run_motion = run_motion.filter(regex='(framewise_displacement)|((trans|rot)_(x|y|z)$)')

    # Extract participant information
    run_motion['subject'] = str_extract('sub-[0-9]{2}', f)
    run_motion['task'] = str_extract('(?<=task-)[a-z]+', f)
    run_motion['run'] = int_extract('(?<=run-)[0-9]+', f)
    
    motion_list.append(run_motion)

Assemble dataframe:

In [4]:
motion_df = pd.concat(motion_list)
motion_df = motion_df.reset_index()
motion_df = motion_df.rename(columns={'index': 't'})
motion_df.head()

Unnamed: 0,t,framewise_displacement,trans_x,trans_y,trans_z,rot_x,rot_y,rot_z,subject,task,run
0,0,,-0.006187,0.063545,0.076468,-0.000973,-9.8e-05,6.6e-05,sub-01,teaching,10
1,1,0.056094,-0.003115,0.074887,0.105599,-0.001127,-6.8e-05,0.0,sub-01,teaching,10
2,2,0.025449,-0.006229,0.074913,0.090921,-0.001265,-5.3e-05,0.0,sub-01,teaching,10
3,3,0.041779,-0.007286,0.081286,0.085657,-0.001414,-0.000359,0.000127,sub-01,teaching,10
4,4,0.023688,-0.004253,0.080952,0.082234,-0.001696,-0.000352,0.000176,sub-01,teaching,10


Make motion plots:

In [5]:
for (sub, task, run), df in motion_df.groupby(['subject', 'task', 'run']):
    # Clean up data
    trans = df.filter(regex='^trans|t')
    trans = trans.melt(id_vars=['t'], value_vars=['trans_x', 'trans_y', 'trans_z'])
    
    rot = df.filter(regex='^rot|t')
    rot = rot.melt(id_vars=['t'], value_vars=['rot_x', 'rot_y', 'rot_z'])
    
    # Plot motion parameters
    fig, (ax1, ax2, ax3) = plt.subplots(3,1, figsize=(10,10), tight_layout=True)
    sns.lineplot(data=trans, x='t', y='value', hue='variable', ax=ax1, legend=False)
    ax1.legend(title='', labels=['x', 'y', 'z'])
    ax1.set(xlabel='Image', ylabel='Translation (mm)')

    sns.lineplot(data=rot, x='t', y='value', hue='variable', ax=ax2, legend=False)
    ax2.set(xlabel='Image', ylabel='Rotation (deg)')
    ax2.legend(title='', labels=['x', 'y', 'z'])

    sns.lineplot(data=df, x='t', y='framewise_displacement', ax=ax3, legend=False)
    ax3.set(xlabel='Image', ylabel='FD')

    fig.suptitle('%s, task: %s, run: %i' % (sub, task, run))
    
    # Save to file
    plt.savefig('plots/motion/%s_task-%s_run-%i_desc-motion.png' % (sub, task, run))
    plt.close(fig)

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.


KeyboardInterrupt



Error in callback <function flush_figures at 0x2adadd502950> (for post_execute):


KeyboardInterrupt: 

Did any runs have excessive motion?

In [6]:
# Get maximum translation/rotation
motion_summ = motion_df.melt(id_vars=['subject', 'task', 'run'], value_vars=['trans_x', 'trans_y', 'trans_z', 'rot_x', 'rot_y', 'rot_z'])
motion_summ['metric'] = motion_summ.variable.str.extract('(trans|rot)')
motion_summ['value'] = np.abs(motion_summ['value'])
motion_summ = motion_summ.groupby(['subject', 'task', 'run', 'metric'])['value'].agg('max').reset_index()

# Find ones that cross threshold
excessive_motion = motion_summ[motion_summ.value > 2]
excessive_motion = excessive_motion.groupby(['subject', 'task'])['run'].agg('count').reset_index()
excessive_motion = excessive_motion.rename(columns = {'run': 'dropped'})

excessive_motion

Unnamed: 0,subject,task,dropped
0,sub-03,teaching,1
1,sub-03,tomloc,1
2,sub-17,teaching,5
3,sub-17,tomloc,2


Calculate remaining runs, after exclusions:

In [7]:
run_df = pd.DataFrame({'runs': confound_files})

# count number of runs acquired
run_df['subject'] = run_df.runs.str.extract('(sub-[0-9]{2})')
run_df['task'] = run_df.runs.str.extract('(?<=task-)([a-z]+)')
run_df = run_df.groupby(['subject', 'task'])['runs'].agg('count').reset_index()

# subtract runs with excessive motion
run_df = run_df.merge(excessive_motion, how='left').fillna(0)
run_df['remaining'] = run_df['runs'] - run_df['dropped']
run_df = run_df.groupby(['subject'])['remaining'].agg('sum').reset_index()

run_df.head()
run_df.head()

Unnamed: 0,subject,remaining
0,sub-01,12.0
1,sub-02,12.0
2,sub-03,8.0
3,sub-04,12.0
4,sub-05,12.0


Exclude participants with <75% remaining runs:

In [8]:
exclusions_df = run_df[run_df['remaining'] < 12*.75]
excluded = exclusions_df.subject.values
exclusions_df

Unnamed: 0,subject,remaining
2,sub-03,8.0
16,sub-17,5.0


Save excluded participants to file:

In [9]:
np.savetxt('outputs/excluded_participants.txt', excluded, '%s')

(New) Save valid participants too (easier for bash):

In [13]:
valid = np.setdiff1d(run_df.subject, excluded).tolist()
valid = np.array([int_extract('(?<=sub-)[0-9]{2}', v) for v in valid])
print(valid)
np.savetxt('outputs/valid_participants.txt', valid, '%s')

[ 1  2  4  5  6  7  8  9 10 11 12 13 14 15 16 18 19 20 21 22 23 24 25 26
 27 28 29 30]
