# Notebook 2; Reconstruction accuracy:

This notebook contains the scripts that compute the reconstruction accuracy of various brain eigenmodes over HCP's resting-state and task fMRI data.


## Preliminary scripts

---

These scripts load required packages and define useful functions that are used by this notebook.

##### Package imports

---


In [1]:
import os
import gc
import sys
import glob
import json
import random
import datetime
import importlib
import itertools
import numpy as np
from scipy import spatial
import scipy.sparse as sparse
import scipy.stats as stats
import scipy.io as sio
import pandas as pd
import nibabel as nib
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.gridspec import GridSpec
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import seaborn as sns
import boto3
import lapy
import h5py
from sklearn import linear_model
from tqdm.notebook import tqdm

# CSS used for computing local distances and connectome smoothing
from Connectome_Spatial_Smoothing import CSS as css

# Cerebro brain viewer used for visualization
from cerebro import cerebro_brain_utils as cbu
from cerebro import cerebro_brain_viewer as cbv


Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


##### Basic functions

---


In [2]:
# Some useful functions

class MyNumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(MyEncoder, self).default(obj)


def ensure_dir(file_name):
    os.makedirs(os.path.dirname(file_name), exist_ok=True)
    return file_name


def list_dirs(path=os.getcwd()):
    files = glob.glob(os.path.join(path, '*'))
    files = [x for x in files if os.path.isdir(x)]
    return files


def file_exists(file_name, path_name=os.getcwd()):
    return os.path.isfile(os.path.join(path_name, file_name))


def write_json(json_obj, file_path):
    with open(file_path, 'w') as outfile:
        json.dump(json_obj, outfile, sort_keys=True, indent=4,
                  cls=MyNumpyEncoder)
    return json_obj


def load_json(file_path):
    with open(file_path, 'r') as infile:
        return json.load(infile)


def write_np(np_obj, file_path):
    with open(file_path, 'wb') as outfile:
        np.save(outfile, np_obj)


def load_np(file_path):
    with open(file_path, 'rb') as infile:
        return np.load(infile)


##### Directory structure

---


In [3]:
# path setting
main_dir = os.path.abspath('../')

data_dir = f"{main_dir}/data"


##### Loading Laplacian eigenmodes

---


In [4]:
# function to load pre-computed eigenmodes
def load_eigenmodes(prefix, store_path=f"{data_dir}/eigenmodes"):
    return load_np(f'{store_path}/{prefix}_eigenmodes.npy')


##### Brain atlas

---


In [5]:
# Load the brain atlas
HCPMMP1_labels, HCPMMP1_charmat = css.parcellation_characteristic_matrix()
left_HCPMMP1_labels = HCPMMP1_labels[:180]
left_HCPMMP1_charmat = HCPMMP1_charmat[:180, :29696]
left_HCPMMP1_mean = left_HCPMMP1_charmat / left_HCPMMP1_charmat.sum(1)


##### 255 subjects list

---


In [6]:
# 255 selected subjects
subjects = np.genfromtxt(f"{data_dir}/pang/empirical/subject_list_HCP.txt", dtype=str)


## Loading alternative connectome eigenmodes

---

Here we first load all various eigenmodes that are to be compared.

**Note**: These eigenmodes were constructed in Notebook 1.


In [7]:
eigenmodes = {}
N_modes = 200

# those provided by Pang and colleagues:
eigenmodes['Geometry (Pang et al.)'] = load_eigenmodes(prefix="pang_geometric")[:, :N_modes]
eigenmodes['EDR (Pang et al.)'] = load_eigenmodes(prefix="pang_edr")[:, :N_modes]
eigenmodes['Connectome (Pang et al.)'] = load_eigenmodes(prefix="pang_connectome")[:, :N_modes]

# those generated by us:
eigenmodes['Our connectome'] = load_eigenmodes(prefix="our_connectome")[:, :N_modes]
eigenmodes['Density increase to 1%'] = load_eigenmodes(prefix="1%_density_binary_connectome")[:, :N_modes]
eigenmodes['Gyral bias: regression'] = load_eigenmodes(prefix="1%_density_binary_gyral_bias_regression_connectome")[:, :N_modes]
eigenmodes['Gyral bias: tractography'] = load_eigenmodes(prefix="1%_density_binary_gyral_bias_tractography_connectome")[:, :N_modes]


## Computing reconstruction coefficients and accuracies

---


In [8]:
def get_dot_product_coeffs(signal, eigenmodes):
    return np.dot(signal, eigenmodes)

def get_regression_coeffs(signal, eigenmodes):
    # linear regression
    regr = linear_model.LinearRegression(fit_intercept=False)
    X = eigenmodes
    y = signal
    regr.fit(X, y)
    return regr.coef_

def compute_reconstruction_accuracy_atlas(eigenmodes, signal, atlas):
    # dot product
    eigenmodes_normalized = eigenmodes/np.linalg.norm(eigenmodes, axis=0)
    encoded_signal = get_dot_product_coeffs(signal.reshape(1,-1), eigenmodes_normalized)
    reconstruction_loadings = np.multiply(eigenmodes_normalized, encoded_signal)
    gradually_reconstructed_signal = np.cumsum(reconstruction_loadings, axis=1)
    # downsample to atlas
    signal_atlas = np.asarray(atlas.dot(signal)).ravel()
    gradually_reconstructed_signal_atlas = np.asarray(atlas.dot(gradually_reconstructed_signal))
    # evaluate accuracy
    reconstruction_accuracy_atlas = np.corrcoef(x=signal_atlas, y=gradually_reconstructed_signal_atlas.T)[1:,0]
    return reconstruction_accuracy_atlas

def compute_prediction_accuracy_atlas(eigenmodes, signal, atlas):
    # regression coefficients
    encoded_signal = get_regression_coeffs(signal, eigenmodes)
    reconstruction_loadings = np.multiply(eigenmodes, encoded_signal)
    gradually_reconstructed_signal = np.cumsum(reconstruction_loadings, axis=1)
    # downsample to atlas
    signal_atlas = np.asarray(atlas.dot(signal)).ravel()
    gradually_reconstructed_signal_atlas = np.asarray(atlas.dot(gradually_reconstructed_signal))
    # evaluate accuracy
    prediction_accuracy_atlas = np.corrcoef(x=signal_atlas, y=gradually_reconstructed_signal_atlas.T)[1:,0]
    return prediction_accuracy_atlas


### Task-evoked activity

---


In [None]:
# list of the 47 task contrasts used
task_contrast_cope_dict = {
    'emotion_faces': ('EMOTION', '1'),
    'emotion_faces_shapes': ('EMOTION', '3'),
    'emotion_shapes': ('EMOTION', '2'),
    'gambling_punish': ('GAMBLING', '1'),
    'gambling_punish_reward': ('GAMBLING', '3'),
    'gambling_reward': ('GAMBLING', '2'),
    'language_math': ('LANGUAGE', '1'),
    'language_math_story': ('LANGUAGE', '3'),
    'language_story': ('LANGUAGE', '2'),
    'motor_avg': ('MOTOR', '7'),
    'motor_cue': ('MOTOR', '1'),
    'motor_cue_avg': ('MOTOR', '8'),
    'motor_lf': ('MOTOR', '2'),
    'motor_lf_avg': ('MOTOR', '9'),
    'motor_lh': ('MOTOR', '3'),
    'motor_lh_avg': ('MOTOR', '10'),
    'motor_rf': ('MOTOR', '4'),
    'motor_rf_avg': ('MOTOR', '11'),
    'motor_rh': ('MOTOR', '5'),
    'motor_rh_avg': ('MOTOR', '12'),
    'motor_t': ('MOTOR', '6'),
    'motor_t_avg': ('MOTOR', '13'),
    'relational_match': ('RELATIONAL', '1'),
    'relational_match_rel': ('RELATIONAL', '3'),
    'relational_rel': ('RELATIONAL', '2'),
    'social_random': ('SOCIAL', '1'),
    'social_tom': ('SOCIAL', '2'),
    'social_tom_random': ('SOCIAL', '6'),
    'wm_0bk': ('WM', '10'),
    'wm_0bk_body': ('WM', '5'),
    'wm_0bk_face': ('WM', '6'),
    'wm_0bk_place': ('WM', '7'),
    'wm_0bk_tool': ('WM', '8'),
    'wm_2bk': ('WM', '9'),
    'wm_2bk_0bk': ('WM', '11'),
    'wm_2bk_body': ('WM', '1'),
    'wm_2bk_face': ('WM', '2'),
    'wm_2bk_place': ('WM', '3'),
    'wm_2bk_tool': ('WM', '4'),
    'wm_body': ('WM', '15'),
    'wm_body_avg': ('WM', '19'),
    'wm_face': ('WM', '16'),
    'wm_face_avg': ('WM', '20'),
    'wm_place': ('WM', '17'),
    'wm_place_avg': ('WM', '21'),
    'wm_tool': ('WM', '18'),
    'wm_tool_avg': ('WM', '22'),
}


In [None]:
# Store task names
task_names = list(task_contrast_cope_dict.keys())
np.savetxt(f"{data_dir}/task_names.txt", task_names, fmt="%s")


In [None]:
# Update the following address to link to your HCP data directory
hcp_data_directory = '/HCPmount'


In [None]:
%%time

# Save HCP data as NumPy binaries for fast access
for task_contrast_cope in tqdm(task_contrast_cope_dict):
    task, cope = task_contrast_cope_dict[task_contrast_cope]
    file_name = (
        '{hcp_data_directory}/3T_tfMRI_{task}_analysis_s4/{subject}/MNINonLinear/Results'
        '/tfMRI_{task}/tfMRI_{task}_hp200_s4_level2_MSMAll.feat/GrayordinatesStats'
        '/cope{cope}.feat/zstat1.dtseries.nii'
    )
    temp = np.array([
        nib.load(
            file_name.format(
                hcp_data_directory=hcp_data_directory,
                task=task,
                subject=subject,
                cope=cope,
            )
        ).get_fdata()[0,:29696]
        for subject in subjects
        if file_exists(file_name.format(
                hcp_data_directory=hcp_data_directory,
                task=task,
                subject=subject,
                cope=cope,
            )
        )
    ])
    np.save(
        ensure_dir(f'{data_dir}/HCP/task/{task_contrast_cope}_S255.npy'),
        temp
    )


In [None]:
%%time

# Load the stored NumPy binaries to compute reconstruction accuracy
subject_reconstruction_accuracies_atlas = {}
for task_contrast in tqdm(task_names, desc="task contrasts", position=0):
    task_data = np.load(f'{data_dir}/HCP/task/{task_contrast}_S255.npy')
    subject_reconstruction_accuracies_atlas[task_contrast] = {}
    for eigenmode in tqdm(list(eigenmodes), desc="eigenmodes", position=1, leave=False):
        subject_reconstruction_accuracies_atlas[task_contrast][eigenmode] = np.array(
            [
                compute_reconstruction_accuracy_atlas(
                    eigenmodes[eigenmode],
                    task_data[i],
                    left_HCPMMP1_mean,
                ) for i in tqdm(range(task_data.shape[0]), desc="subjects", position=2, leave=False)
            ]
        )
    eigenmode = "Geometry (Pang et al.)"
    subject_reconstruction_accuracies_atlas[task_contrast][f'{eigenmode} prediction'] = np.array(
        [
            compute_prediction_accuracy_atlas(
                eigenmodes[eigenmode],
                task_data[i],
                left_HCPMMP1_mean,
            ) for i in tqdm(range(task_data.shape[0]), desc="subjects", position=1, leave=False)
        ]
    )


In [None]:
# efficiently store values for future use
write_json(
    subject_reconstruction_accuracies_atlas,
    ensure_dir(f'{data_dir}/reconstruction_accuracy/test/subject_reconstruction_accuracies_atlas_task_fMRI.json'),
);


In [None]:
# this cell can be used to load the stored accuracies
subject_reconstruction_accuracies_atlas = load_json(
    f'{data_dir}/reconstruction_accuracy/test/subject_reconstruction_accuracies_atlas_task_fMRI.json'
)

for task in subject_reconstruction_accuracies_atlas:
    for basis in subject_reconstruction_accuracies_atlas[task]:
        subject_reconstruction_accuracies_atlas[task][basis] = np.array(subject_reconstruction_accuracies_atlas[task][basis])
        

### Resting-state FC

---


In [None]:
%%time

# Save HCP data as NumPy binaries for fast access
for subject in tqdm(subjects):
    file_name = (
        f"{hcp_data_directory}/3T_rfMRI_REST_fix/{subject}/MNINonLinear/"
        f"Results/rfMRI_REST1_LR/rfMRI_REST1_LR_Atlas_MSMAll_hp2000_clean.dtseries.nii"
    )
    if file_exists(file_name):
        temp = nib.load(file_name).get_fdata()[:,:29696]
        np.save(
            ensure_dir(f'{data_dir}/HCP/rest/{subject}_S255.npy'),
            temp
        )


In [None]:
%%time

# Load the stored NumPy binaries to compute reconstruction coefficients
rest_reconstruction_coeffs = {}
for subject in tqdm(subjects, desc="subjects", position=0):
    rest_reconstruction_coeffs[subject] = {}
    left_rest_data = np.load(f'{data_dir}/HCP/rest/{subject}_S255.npy')
    for eigenmode in tqdm(eigenmodes, desc="eigenmodes", position=1, leave=False):
        rest_reconstruction_coeffs[subject][eigenmode] = get_dot_product_coeffs(
            left_rest_data - left_rest_data.mean(),
            eigenmodes[eigenmode]
        )
    
    eigenmode = "Geometry (Pang et al.)"
    rest_reconstruction_coeffs[subject][f'{eigenmode} prediction'] = []
    for rest_idx in tqdm(range(left_rest_data.shape[0]), desc="indices", position=1, leave=False):
        rest_reconstruction_coeffs[subject][f'{eigenmode} prediction'].append(
            get_regression_coeffs(
                left_rest_data[rest_idx] - left_rest_data.mean(),
                eigenmodes[eigenmode]
            )
        )
    rest_reconstruction_coeffs[subject][f'{eigenmode} prediction'] = np.array(rest_reconstruction_coeffs[subject][f'{eigenmode} prediction'])


In [None]:
# efficiently store values for future use
for subject in tqdm(subjects, desc="subjects", position=0):
    for eigenmode in tqdm(eigenmodes, desc="eigenmodes", position=1, leave=False):
        write_np(
            rest_reconstruction_coeffs[subject][eigenmode],
            ensure_dir(f'{data_dir}/reconstruction_coefficients/rest/{subject}_{eigenmode}.npy')
        )
    eigenmode = "Geometry (Pang et al.)"
    write_np(
        rest_reconstruction_coeffs[subject][f'{eigenmode} prediction'],
        ensure_dir(f'{data_dir}/reconstruction_coefficients/rest/{subject}_{eigenmode} prediction.npy')
    )


In [None]:
%%time

# Load the stored NumPy binaries to compute reconstruction accuracy
subject_static_FC_reconstruction_accuracies_rest = {}
for subject in tqdm(subjects, desc="subjects", position=0):
    # compute actual FC
    left_rest_data = np.load(f'{data_dir}/HCP/rest/{subject}_S255.npy')
    left_static_FC = np.corrcoef(left_rest_data.dot(left_HCPMMP1_mean.T).T)
    left_static_FC_triu = left_static_FC[np.triu_indices_from(left_static_FC, k=1)]
    
    subject_static_FC_reconstruction_accuracies_rest[subject] = {}
    for eigenmode in tqdm(list(eigenmodes), desc="eigenmodes", position=1, leave=False):
        subject_static_FC_reconstruction_accuracies_rest[subject][eigenmode] = np.array([
            np.corrcoef(
                left_static_FC_triu,
                np.corrcoef(
                    np.dot(
                        rest_reconstruction_coeffs[subject][eigenmode][:,:length],
                        eigenmodes[eigenmode].T[:length]
                    ).dot(left_HCPMMP1_mean.T).T
                )[np.triu_indices_from(left_static_FC, k=1)]
            )[0, 1]
            for length in tqdm(range(1, 201), desc="number of modes", position=2, leave=False)
        ])
    eigenmode = "Geometry (Pang et al.)"
    subject_static_FC_reconstruction_accuracies_rest[subject][f'{eigenmode} prediction'] = np.array([
        np.corrcoef(
            left_static_FC_triu,
            np.corrcoef(
                np.dot(
                    rest_reconstruction_coeffs[subject][f'{eigenmode} prediction'][:,:length],
                    eigenmodes[eigenmode].T[:length]
                ).dot(left_HCPMMP1_mean.T).T
            )[np.triu_indices_from(left_static_FC, k=1)]
        )[0, 1]
        for length in range(1, 201)
    ])


In [None]:
# efficiently store values for future use
write_json(
    subject_static_FC_reconstruction_accuracies_rest,
    ensure_dir(f'{data_dir}/reconstruction_accuracy/rest/subject_static_FC_reconstruction_accuracies_rest.json'),
);


In [None]:
# this cell can be used to load the stored accuracies
subject_static_FC_reconstruction_accuracies_rest = load_json(
    f'{data_dir}/reconstruction_accuracy/rest/subject_static_FC_reconstruction_accuracies_rest.json'
)

for subject in subject_static_FC_reconstruction_accuracies_rest:
    for basis in subject_static_FC_reconstruction_accuracies_rest[subject]:
        subject_static_FC_reconstruction_accuracies_rest[subject][basis] = np.array(subject_static_FC_reconstruction_accuracies_rest[subject][basis])
        

#### Systematic evaluations:

---

In addition to the cells described above, we conducted a systematic evaluation of the influence of several connectome reconstruction decisions and parameters on reconstruction accuracy. These evaluations were completed using automated scripts that were executed as parallel jobs on a high-performance computing platform (via slurm). The scripts used for this systematic evaluation can be found in the `scripts` folder of the repository.


#### Store eigenmodes for the Matlab script (Procrustes)

---

A supplementary evaluation via Procrustes transformation was performed to evaluate the similarity between various eigenmodes. Considering this analysis was conducted by a Matlab script, the generated eigenmodes were exported to a Matlab-compatible format. If you wish to reproduce this analysis, make sure to execute the Matlab script (`demo_procrustes`) included in the `scripts` directory.


In [15]:
sio.savemat(
    ensure_dir(f"{data_dir}/eigenmodes/matlab/eigenmodes.mat"),
    {
        'geometry_eigenmodes': eigenmodes['Geometry (Pang et al.)'],
        'edr_eigenmodes': eigenmodes['EDR (Pang et al.)'],
        'pang_connectome_eigenmodes': eigenmodes['Connectome (Pang et al.)'],
        'our_connectome_eigenmodes': eigenmodes['Our connectome'],
    }
)
