## Data Quality Assurance (QA)

This script identify bad quality data (e.g. with motion artifacts) and returns vector specifying which subject can be included in second-level analysis. Script features:
- identifies high motion subjects
- returns vector specifying final sample with high quality data

---
**Last update**: 17.02.2020 

In [None]:
import os
import pandas as pd
import numpy as np

from bids import BIDSLayout

path_root = os.environ.get('DECIDENET_PATH')
path_code = os.path.join(path_root, 'code')
if path_code not in sys.path:
    sys.path.append(path_code)
from dn_utils.behavioral_models import load_behavioral_data 

In [None]:
# Directory to save exclusion table
path_out = os.path.join(path_root, 
                        'data/main_fmri_study/derivatives/nistats/exclusion')
os.makedirs(path_out, exist_ok=True)

# Load behavioral data
path_beh = os.path.join(path_root, 'data/main_fmri_study/sourcedata/behavioral')
beh, meta = load_behavioral_data(path=path_beh)
n_subjects, n_conditions, n_trials, _ = beh.shape

In [None]:
path_bids = os.path.join(path_root, 'data/main_fmri_study')

layout = BIDSLayout(
    root=path_bids,
    derivatives=True,
    index_metadata=False
)

conf_filter = {
    "extension": "tsv",
    "desc": "confounds",
    "return_type": "filename"
}

conf_files = []

for task_dict in [{"task": "prlrew"}, {"task": "prlpun"}]:
    conf_filter.update(task_dict)
    conf_files.append(layout.get(**conf_filter))

In [None]:
fd_stats = np.zeros((n_subjects, n_conditions, 3))

for sub in range(n_subjects):
    for con in range(n_conditions):

        df = pd.read_csv(conf_files[con][sub], sep='\t')

        fd_mean = df['framewise_displacement'].mean()
        fd_max = df['framewise_displacement'].max()
        fd_gt05 = (df['framewise_displacement'] > 0.5).sum()
        
        fd_stats[sub, con, :] = [fd_mean, fd_max, fd_gt05]

### Additional excluded subjects
- `sub-m19`: flipped response grips
- `sub-m32`: failed realignment in prlpun (TODO)