In [1]:
%matplotlib inline


import numpy as np
import matplotlib.pyplot as plt
import os.path as osp
import os
import pandas as pd

from nilearn import datasets
from nilearn.input_data import NiftiMasker
from nilearn import plotting
from nilearn import image
from nilearn.connectome import ConnectivityMeasure

import nibabel as nib

import warnings
warnings.filterwarnings("ignore")

  from ._conv import register_converters as _register_converters
  return f(*args, **kwds)
  return f(*args, **kwds)


In [2]:
# Create a csv file (Pandas Dataframe) for further machine learning

def create_ml_csv(output_root_dir='/data_local/deeplearning/ABIDE_ML_inputs',
                  sbc_dir = 'sbc'):
    
    df_data_info = pd.read_csv(osp.join(output_root_dir, 'data_info.csv'))
    
    #outputs
    sbc_file_list = [] # seed_based filenames
        
    
    for sub_i in df_data_info['SUB_ID']:
        sbc_file_list.append(osp.join(output_root_dir, f'00{sub_i}', sbc_dir, 'ATLAS/BPTF/CONFOUNDS/sbc_maps.npy'))
    
    
    df_data_info['sbc_file'] = sbc_file_list
    
    return df_data_info


In [3]:
def bandpass(to_bandpass = False, tr=1.0, low_freq=0.01, high_freq=0.001):
    if to_bandpass:
        masker = NiftiMasker(smoothing_fwhm=6, detrend=True, standardize=True, 
                                   t_r=tr, low_pass=low_freq, high_pass=high_freq, 
                                   memory='nilearn_cache', memory_level=1, verbose=0)
    else:
        masker = NiftiMasker(smoothing_fwhm=6, detrend=True, standardize=True, 
                                   memory='nilearn_cache', memory_level=1, verbose=0)
        
    return masker



In [4]:
def get_sbc_maps(timecourses, brain_masker, func_filename, confound_filename):
    
    brain_time_series = brain_masker.fit_transform(func_filename,
                                               confounds=[confound_filename])
    
    sbc_maps = []
    for seed_time_series in timecourses:
        seed_based_correlations = np.dot(brain_time_series.T, seed_time_series) / seed_time_series.shape[0]
        seed_based_correlations_fisher_z = np.arctanh(seed_based_correlations)
        sbc_maps.append(np.squeeze(brain_masker.inverse_transform(seed_based_correlations_fisher_z.T).get_data()))
        
    return np.stack(sbc_maps)
        

In [17]:
def create_sbc_maps(atlas_names, bptf, confounds,
                    output_root_dir='/data_local/deeplearning/ABIDE_ML_inputs',
                    sbc_dir = 'sbc',
                    to_bandpass=False):

    
    df_data_info = create_ml_csv()
    
    # Write the generic input and output csv files
    #df_data_info.to_csv(osp.join(output_root_dir, 'data_info.csv'))
    
    
    nsubjects = len(df_data_info)
    
    print_counter = 0
    
    brain_masker = bandpass(to_bandpass=to_bandpass)

    for sub_i in df_data_info.index:

        
        if print_counter%100 == 0:
            print(f'{sub_i}/{nsubjects}')
            
        nuisance = pd.read_csv(df_data_info['nuisance_file'].loc[sub_i], sep='\t', header=None)
        nuisance.to_csv('temp_nuisance.csv') # required for the next step in csv format

        for atlas_name in atlas_names:
                
            if bptf:
                
                rsfilename = df_data_info['RSFMRI_bptf_file'].loc[sub_i]
                
                if confounds:
                    atlas_bptf_conf_name = osp.join(atlas_name, 'bptf/nilearn_regress')
                    
                else:
                    atlas_bptf_conf_name = osp.join(atlas_name, 'bptf/no_nilearn_regress')
              
            else:
                
                rsfilename = df_data_info['RSFMRI_nonbptf_file'].loc[sub_i]
                
                if confounds:
                    atlas_bptf_conf_name = osp.join(atlas_name, 'no_bptf/nilearn_regress')
                else:
                    atlas_bptf_conf_name = osp.join(atlas_name, 'no_bptf/no_nilearn_regress')
                    

            id_subject = df_data_info['SUB_ID'].loc[sub_i]        
            file_dir = osp.join(output_root_dir, f'00{id_subject}' , 
                              sbc_dir, atlas_bptf_conf_name)
            
            if not osp.exists(file_dir):
                os.makedirs(file_dir)

            # Read timeseries as csv file
            tc_file = df_data_info['tc_file'].loc[sub_i].replace('ATLAS/BPTF/CONFOUNDS', atlas_bptf_conf_name)
            timecourses = pd.read_csv(tc_file).values.transpose() # resulting in nrois X ntimes matrix
            
            # Get sbc data 
            sbc_data = get_sbc_maps(timecourses, brain_masker, rsfilename, 'temp_nuisance.csv')
            
            # Write seed based correlation maps
            sbc_file = df_data_info['sbc_file'].loc[sub_i].replace('ATLAS/BPTF/CONFOUNDS', atlas_bptf_conf_name)
            np.save(sbc_file, sbc_data)
            
            
            
        print_counter += 1

    return None

In [20]:
def run():
    
    #atlas_names=['JAMA_IC19', 'JAMA_IC52', 'JAMA_IC7', 'AAL', 'HO_cort_maxprob_thr25-2mm', 'schaefer_100', 'schaefer_400']
    atlas_names=['schaefer_100']

    output_root_dir='/data_local/deeplearning/ABIDE_ML_inputs'
    sbc_dir = 'sbc'
    
    for confs in [False]:
        create_sbc_maps(atlas_names, bptf=True, confounds=confs, 
                        output_root_dir=output_root_dir, sbc_dir=sbc_dir)
    

In [21]:
run()

0/2169
100/2169
200/2169
300/2169
400/2169
500/2169
600/2169
700/2169
800/2169
900/2169
1000/2169
1100/2169
1200/2169
1300/2169
1400/2169
1500/2169
1600/2169
1700/2169
1800/2169
1900/2169
2000/2169
2100/2169


In [None]:
# Data exploration

df = create_ml_csv()
# plot ntimes to check what to include
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(15,10))
df.hist(column='nTimes', bins=[50, 100, 125, 150, 200, 300, 500, 1000], ax=axes[0])
df.hist(column='nTimes', cumulative=-1, bins=[50, 100, 125, 150, 200, 300, 1000], linewidth=5, histtype='step', ax=axes[1])
#df_input_data['nTimes'].plot.kde()

In [None]:
# We will work with the first subject of the adhd data set.
# adhd_dataset.func is a list of filenames. We select the 1st (0-based)
# subject by indexing with [0]).

adhd_dataset = datasets.fetch_adhd(n_subjects=1)
func_filename = adhd_dataset.func[0]
confound_filename = adhd_dataset.confounds[0]


In [None]:
pcc_coords = [(0, -52, 18)]
from nilearn import input_data

seed_masker = input_data.NiftiSpheresMasker(
    pcc_coords, radius=8,
    detrend=True, standardize=True,
    low_pass=0.1, high_pass=0.01, t_r=2.,
    memory='nilearn_cache', memory_level=1, verbose=0)

seed_time_series = seed_masker.fit_transform(func_filename,
                                             confounds=[confound_filename])
brain_masker = input_data.NiftiMasker(
    smoothing_fwhm=6,
    detrend=True, standardize=True,
    low_pass=0.1, high_pass=0.01, t_r=2.,
    memory='nilearn_cache', memory_level=1, verbose=0)

plt.plot(seed_time_series)
plt.title('Seed time series (Posterior cingulate cortex)')
plt.xlabel('Scan number')
plt.ylabel('Normalized signal')
plt.tight_layout()

In [None]:
brain_time_series = brain_masker.fit_transform(func_filename,
                                               confounds=[confound_filename])

plt.plot(brain_time_series[:, [10, 45, 100, 5000, 10000]])
plt.title('Time series from 5 random voxels')
plt.xlabel('Scan number')
plt.ylabel('Normalized signal')
plt.tight_layout()

In [None]:
seed_based_correlations = np.dot(brain_time_series.T, seed_time_series) / \
                          seed_time_series.shape[0]
seed_based_correlations_fisher_z = np.arctanh(seed_based_correlations)
seed_based_correlation_img = brain_masker.inverse_transform(
    seed_based_correlations.T)

In [None]:
seed_based_correlation_img.shape

In [None]:
seed_based_correlations_fisher_z.shape

In [None]:
seed_based_correlations.shape

In [None]:
x=[]
x.append(np.squeeze(seed_based_correlation_img.get_data()))
x.append(np.squeeze(seed_based_correlation_img.get_data()))
x.append(np.squeeze(seed_based_correlation_img.get_data()))

In [None]:
xx=np.stack(x)

In [None]:
xx.shape

In [22]:
22311 - 22252

59