In [1]:
import numpy as np
import pickle
import scipy
import h5py
import scipy.io as sio
from pyriemann.utils.mean import mean_covariance
import sklearn.datasets
import sklearn.decomposition
from scipy.spatial import distance

np.seterr(divide='ignore', invalid='ignore')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

In [2]:
def utri2mat(utri):
    n = int(-1 + np.sqrt(1 + 8 * len(utri))) // 2
    iu1 = np.triu_indices(n+1,1)
    ret = np.empty((n+1, n+1))
    ret[iu1] = utri
    ret.T[iu1] = utri
    np.fill_diagonal(ret, 1)
    return ret

def get_data(test_idx, retest_idx, parc, twin='DZ'):
    '''
    Navigates through file tree and extracts FCs with optional reconstruction
    '''
    # Yeo ordering
    master_dir = '../data/twins'
    tasks = ['rest', 'emotion', 'gambling', 'language', 'motor', 'relational', 'social', 'wm']
    FC, test, retest = {}, {}, {}
    for task in tasks:
        temp_parc = {}
        task_dir = master_dir + f'/{task.upper()}/origmat_{twin}_schaefer{parc}_tests.mat'
        f = h5py.File(task_dir, 'r')
        for k, v in f.items():
            temp_parc[k] = np.array(v)
        test[task] = temp_parc['orig_mat'][test_idx[task]]
        temp_parc = {}
        task_dir = master_dir + f'/{task.upper()}/origmat_{twin}_schaefer{parc}_retests.mat'
        f = h5py.File(task_dir, 'r')
        for k, v in f.items():
            temp_parc[k] = np.array(v)
        retest[task] = temp_parc['orig_mat'][retest_idx[task]]
        FC[task] = np.concatenate((test[task], retest[task])) 
        FC[task] = test[task]
    return FC


def q1invm(q1, eig_thresh=0):
    U, S, V = scipy.linalg.svd(q1)
    s = np.diag(S)
    s[s < eig_thresh] = eig_thresh
    S = np.diag(s ** (-1 / 2))
    Q1_inv_sqrt = U * S * np.transpose(V)
    Q1_inv_sqrt = (Q1_inv_sqrt + np.transpose(Q1_inv_sqrt)) / 2
    return Q1_inv_sqrt


def qlog(q):
    U, S, V = scipy.linalg.svd(q)
    s = np.diag(S)
    S = np.diag(np.log(s))
    Q = U * S * np.transpose(V)
    return Q


def tangential(all_FC, ref):
    # Regularization for riemann
    if ref in ['riemann', 'kullback_sym', 'logeuclid']: 
        print("Adding regularization!")
        eye_mat = np.eye(all_FC.shape[1])
        scaling_mat = np.repeat(eye_mat[None, ...], all_FC.shape[0], axis=0)
        all_FC += scaling_mat
    u, s, vh = np.linalg.svd(all_FC[0], full_matrices=True)
    Cg = mean_covariance(all_FC, metric=ref)
    Q1_inv_sqrt = q1invm(Cg)
    Q = Q1_inv_sqrt @ all_FC @ Q1_inv_sqrt
    tangent_FC = np.array([qlog(a) for a in Q])
    return tangent_FC


def pca_recon(FC, pctComp=None):
    '''
    Reconstructs FC based on number of principle components
    '''
    if pctComp is None:
        return FC
    nRegions = FC.shape[1]
    FC = np.reshape(FC, (FC.shape[0], -1))
    nComp = int(FC.shape[0] * pctComp)
    mu = np.mean(FC, axis=0)
    pca_rest = sklearn.decomposition.PCA()
    pca_rest.fit(FC)
    SCORES = pca_rest.transform(FC)[:, :nComp]
    COEFFS = pca_rest.components_[:nComp, :]
    FC_recon = np.dot(SCORES, COEFFS)
    del SCORES, COEFFS
    FC_recon += mu
    FC_recon = np.reshape(FC_recon, (FC.shape[0], nRegions, nRegions))
    return FC_recon

def utri2mat(utri):
    n = int(-1 + np.sqrt(1 + 8 * len(utri))) // 2
    iu1 = np.tril_indices(n+1,-1)
    ret = np.empty((n+1, n+1))
    ret[iu1] = utri
    ret.T[iu1] = utri
    np.fill_diagonal(ret, 1)
    return ret

## Twin Subject ID Matching

In [3]:
twin = "MZ"
tasks = ['rest', 'emotion', 'gambling', 'language', 'motor', 'relational', 'social', 'wm']
test_subj_ids, retest_subj_ids = {}, {}
for task in tasks:
    test_subj_vec, retest_subj_vec = {}, {}
    master_dir = '../data/twins'
    test_subj_dir = master_dir + f'/{task.upper()}/subjvec_test_{twin}_schaefer300_retests.mat'
    retest_subj_dir = master_dir + f'/{task.upper()}/subjvec_retest_{twin}_schaefer300_retests.mat'
    f = h5py.File(test_subj_dir, 'r')
    for k, v in f.items():
        test_subj_vec[k] = np.array(v)
    f = h5py.File(retest_subj_dir, 'r')
    for k, v in f.items():
        retest_subj_vec[k] = np.array(v)
    test_subj_ids[task] = test_subj_vec['subj_vec'].astype(int)
    retest_subj_ids[task] = retest_subj_vec['subj_vec'].astype(int)

In [4]:
test_twin1, retest_twin1 = {}, {}
for task in tasks:
    test_twin1[task]= set(test_subj_ids[task][0])
    retest_twin1[task] = set(retest_subj_ids[task][0])

In [5]:
# How many twin pairs are common between tasks?
common_twins = set.intersection(test_twin1['rest'], test_twin1['emotion'], test_twin1['gambling'],test_twin1['language'], test_twin1['motor'],test_twin1['relational'], test_twin1['social'],test_twin1['wm'],
                                retest_twin1['rest'], retest_twin1['emotion'], retest_twin1['gambling'],retest_twin1['language'], retest_twin1['motor'],retest_twin1['relational'], retest_twin1['social'],retest_twin1['wm'])
num_common_twins = len(common_twins)
print(f'Number of common twins over all tasks, test/retest: {num_common_twins}')

Number of common twins over all tasks, test/retest: 106


In [6]:
# Get indices of each task only for the common twins
test_twin_ind, retest_twin_ind = {}, {}
for task in tasks:
    test_twin_ind[task]= [2*i for i, val in enumerate(test_twin1[task]) if val in common_twins]
    test_twin_ind[task] = test_twin_ind[task] + [x+1 for x in test_twin_ind[task]]
    test_twin_ind[task].sort()
    retest_twin_ind[task]= [2*i for i, val in enumerate(retest_twin1[task]) if val in common_twins] 
    retest_twin_ind[task] = retest_twin_ind[task] + [x+1 for x in retest_twin_ind[task]]
    retest_twin_ind[task].sort()
    

In [19]:
common_FCs = get_data(test_twin_ind, retest_twin_ind, 100, twin=twin)
labels = np.repeat(np.arange(0,common_FCs['rest'].shape[0]/2),2)
labels = labels.astype(int)
train_idx = np.arange(0,common_FCs['rest'].shape[0],2)
train_idx = train_idx.astype(int)
test_idx = np.arange(1, int(common_FCs['rest'].shape[0]), 2)
test_idx = test_idx.astype(int)
train_labels = labels[train_idx]
test_labels = labels[test_idx]

In [22]:
nFCs = common_FCs['rest'].shape[0]

### KNN Approach

In [30]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
accuracies = {}
lengths = {100:6441, 200:22791, 300:49141, 400:85491, 500:131841}
for parc in np.arange(100, 600, 100):
    common_FCs = get_data(test_twin_ind, retest_twin_ind, parc, twin=twin)
    for task in ['rest', 'emotion', 'gambling', 'language', 'motor', 'relational', 'social', 'wm']:
        print(f'Analyzing {task}...')
        task_FCs = common_FCs[task]
        # Do optional transformations
        for ref in ['Raw FC']: #, 'pca', 'euclid', 'harmonic']:
            # Start with a fresh batch of FCs
            FC = np.zeros((task_FCs.shape[0], parc+14, parc+14))
            for idx, utri in enumerate(task_FCs):
                FC[idx] = utri2mat(utri)
            # Do optional transformations
            if ref != 'Raw FC' and ref != 'pca':
                FC = tangential(FC, ref)
            elif ref == 'pca':
                print('Reconstructing with PCA')
                FC = pca_recon(FC, 0.3)
            else:
                pass
            # Convert back into flattened utriu vectors
            vec_FCs = np.zeros(
                (nFCs, lengths[parc]), dtype=np.float32)
            for idx, mat in enumerate(FC):
                vec_FCs[idx] = mat[np.triu_indices(mat.shape[0], k=1)]
            # Split into train and test sets
            train_FCs = vec_FCs[train_idx]
            test_FCs = vec_FCs[test_idx]
            # KNN Classifier
            neigh = KNeighborsClassifier(n_neighbors=1, metric='correlation')
            neigh.fit(train_FCs, train_labels)
            predicted = neigh.predict(test_FCs)
            acc = accuracy_score(test_labels, predicted)
            print(f'{acc:.5f} accuracy')
            accuracies[f"{parc}:{task}"] = acc

Analyzing rest...
0.32075 accuracy
Analyzing emotion...
0.09434 accuracy
Analyzing gambling...
0.15094 accuracy
Analyzing language...
0.29245 accuracy
Analyzing motor...
0.07547 accuracy
Analyzing relational...
0.08491 accuracy
Analyzing social...
0.20755 accuracy
Analyzing wm...
0.25472 accuracy
Analyzing rest...
0.40566 accuracy
Analyzing emotion...
0.10377 accuracy
Analyzing gambling...
0.17925 accuracy
Analyzing language...
0.39623 accuracy
Analyzing motor...
0.10377 accuracy
Analyzing relational...
0.15094 accuracy
Analyzing social...
0.31132 accuracy
Analyzing wm...
0.36792 accuracy
Analyzing rest...
0.43396 accuracy
Analyzing emotion...
0.09434 accuracy
Analyzing gambling...
0.23585 accuracy
Analyzing language...
0.40566 accuracy
Analyzing motor...
0.08491 accuracy
Analyzing relational...
0.18868 accuracy
Analyzing social...
0.33962 accuracy
Analyzing wm...
0.41509 accuracy
Analyzing rest...
0.45283 accuracy
Analyzing emotion...
0.12264 accuracy
Analyzing gambling...
0.22642 acc

In [31]:
import csv
a_file = open(f"../results/twins/{twin}_twin_parcellations_single.csv", "w")

writer = csv.writer(a_file)
for key, value in accuracies.items():
    writer.writerow([key, value])
    
a_file.close()

### Tangent Space FCs

In [25]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
accuracies = {}
twin = 'MZ'
lengths = {100:6441, 200:22791, 300:49141, 400:85491, 500:131841}
for parc in np.arange(100,500,100):
    print(f'Using {parc} region parcellation...')
    for task in ['rest', 'emotion', 'gambling', 'language', 'motor', 'relational', 'social', 'wm']:
        print(f'Analyzing {task}...')
        for ref in ['logeuclid']:
            # Start with a fresh batch of FCs
            with open(f'../data/tangent_fcs/twins/{task}/{twin}_{parc}_{ref}.pickle', 'rb') as f:
                FC = pickle.load(f)
                FC = FC[:211]
            # Convert back into flattened utriu vectors
            vec_FCs = np.zeros((nFCs, lengths[parc]), dtype=np.float32)
            for idx, mat in enumerate(FC):
                vec_FCs[idx] = mat[np.triu_indices(mat.shape[0], k=1)]
            # Split into train and test sets
            train_FCs = vec_FCs[train_idx]
            test_FCs = vec_FCs[test_idx]
            # KNN Classifier
            neigh = KNeighborsClassifier(n_neighbors=1, metric='correlation')
            neigh.fit(train_FCs, train_labels)
            predicted = neigh.predict(test_FCs)
            acc = accuracy_score(test_labels, predicted)
            print(f'{acc:.5f} accuracy')
            accuracies[f"{parc}:{task}"] = acc

Using 100 region parcellation...
Analyzing rest...
0.65094 accuracy
Analyzing emotion...
0.39623 accuracy
Analyzing gambling...
0.46226 accuracy
Analyzing language...
0.67925 accuracy
Analyzing motor...
0.41509 accuracy
Analyzing relational...
0.48113 accuracy
Analyzing social...
0.59434 accuracy
Analyzing wm...
0.66038 accuracy
Using 200 region parcellation...
Analyzing rest...
0.82075 accuracy
Analyzing emotion...
0.57547 accuracy
Analyzing gambling...
0.73585 accuracy
Analyzing language...
0.86792 accuracy
Analyzing motor...
0.71698 accuracy
Analyzing relational...
0.74528 accuracy
Analyzing social...
0.80189 accuracy
Analyzing wm...
0.83962 accuracy
Using 300 region parcellation...
Analyzing rest...


KeyboardInterrupt: 

In [64]:
import csv
a_file = open(f"../results/twins/{twin}_twin_logeuclid.csv", "w")

writer = csv.writer(a_file)
for key, value in accuracies.items():
    writer.writerow([key, value])
    
a_file.close()