# Creating a big csv file (fMRIdata + regressors) 

## Import

In [None]:
import glob
import os
from typing import Tuple

import nibabel as nib
from nideconv.utils import roi

import nilearn
from nilearn import datasets
from nilearn.input_data import NiftiLabelsMasker
import numpy as np
import pandas as pd

In [None]:
#Regressors
A = pd.read_csv('../data/model-regressors/regressors_A.csv')
B = pd.read_csv('../data/model-regressors/regressors_B.csv')
C = pd.read_csv('../data/model-regressors/regressors_C.csv')
D = pd.read_csv('../data/model-regressors/regressors_D.csv')

In [None]:
section2regressor = {
    'A': A,
    'B': B,
    'C': C,
    'D': D,
}

In [None]:
ATLAS = datasets.fetch_atlas_aal(
    version='SPM12', 
    resume=True,verbose=1
)
ATLAS_FILENAME = ATLAS.maps

In [None]:
MASKER = NiftiLabelsMasker(
    labels_img=ATLAS_FILENAME, 
    resampling_target = "data", 
    t_r=2.0,
    detrend=True,
    standardize=True
)

In [None]:
DROP_COLNAMES = [ ]

In [None]:
RP_COLUMNS = ['dx', 'dy', 'dz', 'rx', 'ry', 'rz']

In [None]:
def niipath2names(nii_path: str) -> Tuple[str]:
    split_nii_path = nii_path.split(os.sep)
    
    section = split_nii_path[3]
    subj = split_nii_path[4]
    
    return section, subj

In [None]:
def niipath2rppath(nii_path: str) -> str:
    split_nii_path = nii_path.split(os.sep)
    
    split_nii_path[2] = 'rp'
    split_nii_path[-1] = split_nii_path[-1].replace('swu', 'rp_').replace('.nii', '.txt')
    
    return '/'.join(split_nii_path)

In [None]:
def load_rp(rp_path: str) -> pd.DataFrame:
    with open(rp_path) as f:
        rows = [
            list(map(float, line.split())) for line in f
        ]
    return pd.DataFrame(rows, columns=RP_COLUMNS)

In [None]:
def concat_nii_and_regressor(nii_path: str, section: str) -> pd.DataFrame:
    nii_image = nib.load(nii_path)
    
    ts = roi.extract_timecourse_from_nii(
        ATLAS,
        nii_image,
        t_r=2.0,
    )
    ts.columns.name = None
    ts = ts.reset_index()
    
    # concat rp data
    rp_path = niipath2rppath(nii_path)
    rp = load_rp(rp_path)
    ts = pd.concat([ts, rp], axis=1)
    
    regressor = section2regressor[section]
    concat_data = ts.merge(regressor, on='time')
    
    concat_data_selected = concat_data.drop(DROP_COLNAMES, axis=1)
    
    return concat_data_selected

In [None]:
def make_savepath(section: str, subj: str) -> str:    
    save_root = os.path.join(
        '../data/Results-ts', section, subj
    )
    os.makedirs(save_root, exist_ok=True)
    
    save_path = os.path.join(
        save_root, f'ts.csv'
    )
    return save_path

In [None]:
nii_paths = [
    p for p in glob.glob('../data/nii/**', recursive=True) if os.path.isfile(p)
]

In [None]:
# DO nii2csv
for nii_path in nii_paths:
    section, subj = niipath2names(nii_path)
    df = concat_nii_and_regressor(nii_path, section)
    save_path = make_savepath(section, subj)
    df.to_csv(save_path, sep='\t', index=False)

In [None]:
def csvpath2subjid(csv_path: str) -> str:
    split_csv_path = csv_path.split(os.sep)
    
    subj_str = split_csv_path[-2]
    subj_int = int(subj_str.replace('sub', ''))
    return subj_int

In [None]:
def make_savepath_section(section: str) -> str:    
    save_root = os.path.join(
        '../data/Results-ts/concat', section,
    )
    os.makedirs(save_root, exist_ok=True)
    
    save_path = os.path.join(
        save_root, f'ts.csv'
    )
    return save_path

In [None]:
# DO concat section
SECTIONS = [
    'A', 
    'B', 
    'C', 
    'D'
]
for section in SECTIONS:
    csv_paths = [
        p for p in glob.glob(f'../data/Results-ts/{section}/**/*.csv')
    ]
    csv_paths = sorted(csv_paths)
    subj_dfs = []
    for csv_path in csv_paths:
        subj_df = pd.read_csv(csv_path, sep='\t')
        subj_id = csvpath2subjid(csv_path)
        subj_df['subject_number'] = subj_id
        subj_dfs.append(subj_df)
        
    section_df = pd.concat(subj_dfs).reset_index(drop=True)
    save_path = make_savepath_section(section)
    section_df.to_csv(save_path, sep='\t', index=False)

In [None]:
# DO concat all
section_csv_paths = [
    p for p in glob.glob(f'../data/Results-ts/concat/**/*.csv')
]
section_csv_paths = sorted(section_csv_paths)
section_dfs = [
    pd.read_csv(section_csv_path, sep='\t') for section_csv_path in section_csv_paths
]
all_df = pd.concat(section_dfs).reset_index(drop=True)

new_dir_path = '../data/Results-ts/all'
os.makedirs(new_dir_path, exist_ok=True)
save_path = f'../data/Results-ts/all/ts.csv'
all_df.to_csv(save_path, sep='\t', index=False)