### Env

In [1]:
# Env
import nibabel as nib
from nilearn import datasets, input_data, plotting, connectome, image
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.spatial.distance import cdist
from scipy.stats import pearsonr, zscore
import pandas as pd
import os
import json
import glob
import time
import gc
import time
from joblib import Parallel, delayed

# Functions
def load_image(sub, movie):
    path = os.path.join(f'/home/tamires/projects/rpp-aevans-ab/tamires/data/fmri_datasets/ds002837/derivatives/sub-{sub}/func', 
                        f'sub-{sub}_task-{movie}_bold_blur_censor_ica.nii.gz')
    return nib.load(path, mmap=True)


def save_correlation_data(networks_data, sub, movie): 
    # format dataframe
    df = pd.DataFrame(networks_data, columns=['network', 'start', 'end', 
                                              'net_mean', 'net_median','net_std','net_mean_abs','net_median_abs','net_std_abs',
                                              'fc_mean_abs','fc_median_abs','fc_std_abs'])
    df['sub'] = sub
    df['movie'] = movie
    
    # file name
    base_filename = f'/home/tamires/projects/rpp-aevans-ab/tamires/data/fmri_derived/networks_data/sub-{sub}_task-{movie}_networks'
    version = 1
    filename = f"{base_filename}_v{version}.parquet"
    
    # check if there is a previous version
    while os.path.exists(filename):
        version += 1
        filename = f"{base_filename}_v{version}.parquet"
        
    df.to_parquet(filename)


def brain_networks_extraction(df_networks, network_name, fmri_img, radius = 6):
    # Coordinates
    peak_coords = df_networks[df_networks.name == network_name][['x', 'y', 'z']].values
    # Create 6mm spheres around these coordinates
    spheres_masker = input_data.NiftiSpheresMasker(seeds=peak_coords, radius=radius, standardize=True)
    # Extract time series data for each sphere
    time_series = spheres_masker.fit_transform(fmri_img) 
    return time_series


def network_statistic(time_series):
    net_stats = [np.mean(time_series), 
                 np.median(time_series), 
                 np.std(time_series),
                 np.mean(abs(time_series)), 
                 np.median(abs(time_series)), 
                 np.std(abs(time_series))]
    return net_stats
    

def functional_conectivity_statistic(time_series):
    # Compute the functional connectome using ConnectivityMeasure
    correlation_measure = connectome.ConnectivityMeasure(kind='correlation')
    correlation_matrix = correlation_measure.fit_transform([time_series])[0]
    fc_stats = [np.mean(abs(correlation_matrix)), 
                np.median(abs(correlation_matrix)), 
                np.std(abs(correlation_matrix))]
    return fc_stats


def process_time_intervals(img_original, network_list, start, end, step, radius=2):
    """ Extract data of interval using brain_networks_extraction, network_statistic and functional_conectivity_statistic """
    
    networks_data = []
    for t in range(start, end, step):
        img = img_original.dataobj[:,:,:,t:t+step]
        img = nib.Nifti1Image(img, img_original.affine, img_original.header)
        # here gonna come the function that change the image for mni space
        
        for network_name in network_list:
            time_series = brain_networks_extraction(df_networks, network_name=network_name, fmri_img=img, radius=radius)
            net_stats = network_statistic(time_series)
            fc_stats = functional_conectivity_statistic(time_series)
            networks_data.append([network_name, t, t+step] + net_stats + fc_stats) 
            #print(f"{network_name} | clip start: {t} | clip end: {t+step}")
    
    return networks_data


def process_participant(sub, movie, network_list, end, duration, step):
    print(f"Initializing processing participant {sub} | {movie}")
    
    # Load: 153 ms
    img_original = load_image(sub, movie)
    
    # Process: 2s per interval per network
    start = end - duration
    networks_data = process_time_intervals(img_original, network_list, start=start, end=end, step=step)
    
    # Save: 600ms
    save_correlation_data(networks_data, sub, movie)
    print(f"File saved participant {sub} | {movie}")

In [None]:
def image_to_mni_space(fmri_img):
    
    return fmri_img_mni

In [None]:
# Data
df_networks = pd.read_csv("/home/tamires/projects/rpp-aevans-ab/tamires/data/fmri_derived/mni_space_of_networks.csv")
participants = pd.read_csv("/home/tamires/projects/rpp-aevans-ab/tamires/data/fmri_datasets/ds002837/participants.tsv", sep='\t')
participants['sub'] = range(1,87)
participants = participants[participants['sub'] != 49] # ta corrompido
participants['end'] = [load_image(sub=row['sub'], movie=row['task']).shape[3] for index, row in participants.iterrows()]


# Parameters
participants_test = participants.iloc[20:22]
network_list = df_networks.name.unique()[:2]
duration = 20 #30 * 60
step = 10

# Set the number of jobs (parallel workers)
n_jobs = 5  # Adjust this number based on your system's capacity
os.environ["OMP_NUM_THREADS"] = "1"  # Ensure thread limiting if needed
results = Parallel(n_jobs=n_jobs)(
    delayed(process_participant)(sub, movie, network_list, (end-(30*60)), duration, step)
    for sub, movie, end in participants_test[['sub', 'task','end']].values
)

In [2]:
pd.read_parquet('/home/tamires/projects/rpp-aevans-ab/tamires/data/fmri_derived/networks_data/sub-21_task-citizenfour_networks_v1.parquet')

Unnamed: 0,network,start,end,net_mean,net_median,net_std,net_mean_abs,net_median_abs,net_std_abs,fc_mean_abs,fc_median_abs,fc_std_abs,sub,movie
0,Autobiographical memory,5004,5014,4.146410e-09,-0.039491,1.0,0.827963,0.746665,0.560783,0.099042,0.054861,0.195850,21,citizenfour
1,Cognitive attention control,5004,5014,-5.019339e-09,0.023035,1.0,0.800484,0.661379,0.599353,0.130278,0.078594,0.212287,21,citizenfour
2,Extended multiple demand network,5004,5014,-7.012311e-10,-0.044770,1.0,0.845635,0.741440,0.533761,0.146074,0.085131,0.222168,21,citizenfour
3,Emotional scene and face processing,5004,5014,7.947286e-09,-0.036325,1.0,0.811035,0.684829,0.584997,0.127213,0.080978,0.192049,21,citizenfour
4,Empathy,5004,5014,1.517209e-08,-0.037969,1.0,0.816780,0.675236,0.576949,0.118908,0.074932,0.199825,21,citizenfour
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2515,Reward,6794,6804,-2.670288e-08,0.024573,1.0,0.827763,0.752602,0.561078,0.082629,0.042846,0.189770,21,citizenfour
2516,Theory of mind,6794,6804,1.629194e-08,0.000121,1.0,0.825716,0.757196,0.564087,0.150342,0.097960,0.233176,21,citizenfour
2517,Vigilant attention,6794,6804,-1.192093e-08,0.003070,1.0,0.808579,0.679916,0.588388,0.127138,0.075261,0.229457,21,citizenfour
2518,Working memory,6794,6804,-1.658564e-08,-0.081248,1.0,0.839791,0.819601,0.542910,0.099668,0.055365,0.196236,21,citizenfour


In [3]:
pd.read_parquet('/home/tamires/projects/rpp-aevans-ab/tamires/data/fmri_derived/networks_data/sub-21_task-citizenfour_networks_v2.parquet')

Unnamed: 0,network,start,end,net_mean,net_median,net_std,net_mean_abs,net_median_abs,net_std_abs,fc_mean_abs,fc_median_abs,fc_std_abs,sub,movie
0,Autobiographical memory,4984,4994,1.036602e-09,0.037134,1.0,0.830784,0.787713,0.556595,0.139322,0.09856,0.193728,21,citizenfour
1,Cognitive attention control,4984,4994,-5.019339e-09,0.05013,1.0,0.839045,0.782259,0.544061,0.157645,0.105647,0.211377,21,citizenfour
2,Autobiographical memory,4994,5004,8.29282e-09,0.006287,1.0,0.818686,0.686127,0.574242,0.077657,0.031184,0.198383,21,citizenfour
3,Cognitive attention control,4994,5004,2.007735e-08,0.027837,1.0,0.817934,0.771652,0.575312,0.09977,0.043571,0.215004,21,citizenfour


In [48]:
participants[20:]

Unnamed: 0,participant_id,age,sex,task,sub,end
20,sub-21,27,M,citizenfour,21,6804
21,sub-22,23,M,citizenfour,22,6804
22,sub-23,24,M,citizenfour,23,6804
23,sub-24,22,M,citizenfour,24,6804
24,sub-25,20,F,citizenfour,25,6804
...,...,...,...,...,...,...
81,sub-82,50,M,12yearsaslave,82,7715
82,sub-83,18,F,12yearsaslave,83,7715
83,sub-84,22,F,12yearsaslave,84,7715
84,sub-85,23,F,12yearsaslave,85,7715
