# Prepare data

## imports

In [2]:
import numpy as np
import pandas as pd
import nibabel as nib
import os
import subprocess
from config import root, conda_path
from tools.helpers import (
    leading_zeros, select_files, parrec2nii, convert_to_nifti)

## parrec to nifti

In [22]:
parrec2nii_cmd = os.path.join(conda_path, "bin", "parrec2nii")

In [23]:
raw_path = os.path.join(root, "data", "parrec")
nii_path = os.path.join(root, "data", "nii")
if not os.path.exists(nii_path):
    os.mkdir(nii_path)

In [None]:
# parent folders
sessions = os.listdir(raw_path)
dir_names = ['functional', 'localizer', 'anatomy', 'B0',]
kwargs = [{'include':['EPI'], 'exclude':['Loc']},
          {'include':['Loc'], 'exclude':[]},
          {'include':['T1'], 'exclude':[]},
          {'include':['B0'], 'exclude':[]}]
          
for ses in sessions:
    path = os.path.join(raw_path, ses)
    files_by_dir = [select_files(path, **kw) for kw in kwargs]       
    for i, files in enumerate(files_by_dir):
        outdir = os.path.join(nii_path, ses, dir_names[i])    
        parrec2nii([os.path.join(path, f) for f in files],
                   outdir=outdir,
                   compressed=True)

# Data architecture
Copy all data we are using in the analysis into the approriate folder structure. So, there's a lot of redundancy, but no need to take care of disk space yet.

In [24]:
fmri_data = os.path.join(root, "data", "nii")
fs_dir = os.path.join(root, "data", "fs_subjects")
output_dir = os.path.join(root, "data", "output")
working_dir = os.path.join(root, "data", "working_dir")
mni_standard_path = os.path.join(root, "data", "nii/misc/MNI152_T1_1mm_brain.nii.gz")
subject_info = os.path.join(root, "data", "subject_info.csv")

# Create directories
if not os.path.exists(working_dir):
    os.mkdir(working_dir)
if not os.path.exists(fmri_data):
    os.mkdir(fmri_data)
if not os.path.exists(output_dir):
    os.mkdir(output_dir)

In [25]:
# Extract the subject information from a file.
sub_info = (
    pd.read_csv(
        subject_info, 
        names=["sub", "ses", "data", "date", "runs", "name", "T1", "include"], 
        header=1
    )
    .sort_values(by=["sub", "ses"])
    .drop("runs", axis=1)
)

sub_info = sub_info[sub_info.include.astype(bool)]
sub_info = sub_info.assign(real_ses=None)

for sub, group in sub_info.groupby("sub"):
    real_ses = range(len(group))
    sub_info.loc[sub_info["sub"] == sub, "real_ses"] = real_ses

sub_info = sub_info.set_index(["sub", "real_ses"]).sort_index()

In [26]:
# Copy all the data to its new structure.
nii = os.path.join(root, "data", "in_analysis")
if not os.path.exists(nii):
    os.mkdir(nii)

In [None]:
for (sub, real_ses), group in sub_info.groupby(sub_info.index):
    ses_path = os.path.join(root, "data", "nii", sub_info.loc[(sub, real_ses), "data"])
    out_dir = os.path.join(nii, "sub_%03d" % sub, "ses_%03d" % real_ses)
    
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    
    print("=== %03d, %03d ===" % (sub, real_ses))
    
    sub_dirs = os.listdir(ses_path)
    for sub_dir in sub_dirs:
        data_path = os.path.join(ses_path, sub_dir)
        files = os.listdir(data_path)
        if not os.path.exists(os.path.join(out_dir, sub_dir)):
            os.mkdir(os.path.join(out_dir, sub_dir))

        # Anatomy
        if sub_dir == "anatomy":
            src_fname = [f for f in files if "T1_wholebrain_2.2_iso" in f][0]
            src = os.path.join(data_path, src_fname)
            dst = os.path.join(out_dir, sub_dir, "inplane.nii.gz")
            print(src, "->", dst)
            if not os.path.exists(dst):
                copyfile(src, dst)
        
        # Functional
        if (sub_dir == "functional") or (sub_dir == "localizer"):
            for i, f in enumerate(sorted(files)):
                src = os.path.join(data_path, f)
                dst = os.path.join(out_dir, sub_dir, "run_%03d.nii.gz" % i)
                if not os.path.exists(dst):
                    copyfile(src, dst)
                print(src, "->", dst)