In [1]:
import numpy as np
import scipy
import scipy.io as sio
from pyriemann.utils.mean import mean_covariance
import sklearn.datasets
import sklearn.decomposition
from scipy.spatial import distance

np.seterr(divide='ignore', invalid='ignore')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

In [2]:

def get_data():
    '''
    Navigates through file tree and extracts FCs with optional reconstruction
    '''
    # Yeo ordering
    fname = '../data/100_unrelated.csv'
    yeo = True
    if yeo:
        yeo_order = list(sio.loadmat("../data/yeo_RS7_N374.mat",
                                     squeeze_me=True,
                                     struct_as_record=False)['yeoOrder'] - 1)
    # Load subject ID and task names
    subjectids = np.loadtxt(fname, dtype=np.int)
    nSubj = len(subjectids)
    tasks = ['rfMRI_REST1_LR', 'rfMRI_REST1_RL', 'rfMRI_REST2_LR',
             'rfMRI_REST2_RL', 'tfMRI_EMOTION_LR', 'tfMRI_EMOTION_RL',
             'tfMRI_GAMBLING_LR', 'tfMRI_GAMBLING_RL', 'tfMRI_LANGUAGE_LR',
             'tfMRI_LANGUAGE_RL', 'tfMRI_MOTOR_LR', 'tfMRI_MOTOR_RL',
             'tfMRI_RELATIONAL_LR', 'tfMRI_RELATIONAL_RL', 'tfMRI_SOCIAL_LR',
             'tfMRI_SOCIAL_RL', 'tfMRI_WM_LR', 'tfMRI_WM_RL']
    M = {}
    # Walk through file tree and extract FCs
    for task in tasks:
        masterFC_dir = '../data/results_SIFT2'
        restingstatename = 'fMRI/' + task + '/FC/FC_glasser_subc_GS_bp_z.mat'
        task_matrices = []
        for subject in subjectids:
            filename = masterFC_dir + '/' + \
                str(subject) + '/' + restingstatename
            mat = sio.loadmat(filename, squeeze_me=True,
                              struct_as_record=False)
            A_orig = mat['FC']
            if yeo:
                A_orig = A_orig[np.ix_(yeo_order, yeo_order)]
            np.fill_diagonal(A_orig, 1)
            task_matrices.append(A_orig)
        M[task] = np.array(task_matrices)
    test = np.concatenate((M['rfMRI_REST1_LR'], M['tfMRI_EMOTION_LR'],
                           M['tfMRI_GAMBLING_LR'], M['tfMRI_LANGUAGE_LR'],
                           M['tfMRI_MOTOR_LR'], M['tfMRI_RELATIONAL_LR'],
                           M['tfMRI_SOCIAL_LR'], M['tfMRI_WM_LR']))
    retest = np.concatenate((M['rfMRI_REST1_RL'], M['tfMRI_EMOTION_RL'],
                             M['tfMRI_GAMBLING_RL'], M['tfMRI_LANGUAGE_RL'],
                             M['tfMRI_MOTOR_RL'], M['tfMRI_RELATIONAL_RL'],
                             M['tfMRI_SOCIAL_RL'], M['tfMRI_WM_RL']))
    del M
    all_FC = np.concatenate((test, retest))
    del test, retest
    return all_FC, nSubj


def q1invm(q1, eig_thresh=0):
    U, S, V = scipy.linalg.svd(q1)
    s = np.diag(S)
    s[s < eig_thresh] = eig_thresh
    S = np.diag(s ** (-1 / 2))
    Q1_inv_sqrt = U * S * np.transpose(V)
    Q1_inv_sqrt = (Q1_inv_sqrt + np.transpose(Q1_inv_sqrt)) / 2
    return Q1_inv_sqrt


def qlog(q):
    U, S, V = scipy.linalg.svd(q)
    s = np.diag(S)
    S = np.diag(np.log(s))
    Q = U * S * np.transpose(V)
    return Q


def tangential(all_FC, ref):
    # Regularization for riemann
    if ref in ['riemann', 'kullback_sym', 'logeuclid']:
        print("Adding regularization!")
        eye_mat = np.eye(all_FC.shape[1])
        scaling_mat = np.repeat(eye_mat[None, ...], all_FC.shape[0], axis=0)
        all_FC += scaling_mat
    Cg = mean_covariance(all_FC, metric=ref)
    Q1_inv_sqrt = q1invm(Cg)
    Q = Q1_inv_sqrt @ all_FC @ Q1_inv_sqrt
    tangent_FC = np.array([qlog(a) for a in Q])
    return tangent_FC


def pca_recon(FC, pctComp=None):
    '''
    Reconstructs FC based on number of principle components
    '''
    if pctComp is None:
        return FC
    FC = np.reshape(FC, (FC.shape[0], -1))
    nComp = int(FC.shape[0] * pctComp)
    mu = np.mean(FC, axis=0)
    pca_rest = sklearn.decomposition.PCA()
    pca_rest.fit(FC)
    SCORES = pca_rest.transform(FC)[:, :nComp]
    COEFFS = pca_rest.components_[:nComp, :]
    FC_recon = np.dot(SCORES, COEFFS)
    del SCORES, COEFFS
    FC_recon += mu
    FC_recon = np.reshape(FC_recon, (FC.shape[0], 374, 374))
    return FC_recon


   

In [4]:
# Navigate tree and get raw correlation FC matrices
print("Importing all correlation matrices...", end=" ")
all_FC, nSubj = get_data()
print("All FCs successfully loaded!\n")

Importing all correlation matrices... All FCs successfully loaded!



In [19]:
classifier = 'subject'
if classifier == 'task':
    labels = np.tile(np.repeat(np.arange(0, 8), nSubj), 2)
    indices = np.random.permutation(nSubj)
    train_idx = indices[:int(0.80 * nSubj)]
    test_idx = indices[int(0.8 * nSubj):]
    train_idx_all, test_idx_all = np.empty(0, dtype=int), np.empty(0, dtype=int)
    for fc in np.arange(0, 16):
        train_idx_all = np.concatenate((train_idx_all, (fc * 95) + train_idx)).astype(int)
        test_idx_all = np.concatenate((test_idx_all, (fc * 95) + test_idx)).astype(int)
    train_idx = train_idx_all
    test_idx = test_idx_all
elif classifier == 'subject':
    labels = np.tile(np.tile(np.arange(0,nSubj),8),2)
    indices = np.random.permutation(all_FC.shape[0])
    train_idx = indices[:int(0.80 * all_FC.shape[0])]
    test_idx = indices[int(0.80 * all_FC.shape[0]):]
else:
    pass

train_labels = labels[train_idx]
test_labels = labels[test_idx]

In [11]:
accuracies = {}
for ref in ['euclid', 'harmonic', 'logeuclid', 'kullback_sym', 'riemann', 'none']:
    all_FC, nSubj = get_data()
    if ref != 'none':
        all_FC = tangential(all_FC, ref)
    train_FCs = np.zeros((len(train_idx),70125), dtype=np.float32)
    for idx, mat in enumerate(all_FC[train_idx]):
        train_FCs[idx] = mat[np.triu_indices(mat.shape[0], k=0)]
    test_FCs = np.zeros((len(test_idx),70125), dtype=np.float32)
    for idx, mat in enumerate(all_FC[test_idx]):
        test_FCs[idx] = mat[np.triu_indices(mat.shape[0], k=0)]
        
    for distance_method in ['corr', 'cosine', 'euclidean']:
        print(f'Testing {ref} and {distance_method}...')
        num_correct = 0
        for idx1, mat1 in enumerate(test_FCs):
            min_dist = np.inf
            true_label = labels[test_idx[idx1]]
            for idx2, mat2 in enumerate(train_FCs):
                if distance_method == 'corr':
                    temp_dist = distance.correlation(mat1, mat2)
                elif distance_method == 'cosine':
                    temp_dist = distance.cosine(mat1, mat2)
                else:
                    temp_dist = distance.euclidean(mat1, mat2)
                if temp_dist < min_dist:
                    min_dist = temp_dist
                    best_idx = train_idx[idx2]
            pred_label = labels[best_idx]
            if pred_label == true_label:
                num_correct += 1
        accuracy = num_correct / len(test_idx)
        print(accuracy)
        accuracies[ref+"_"+distance_method] = accuracy

Testing euclid and corr...
0.019736842105263157
Testing euclid and cosine...
0.019736842105263157
Testing euclid and euclidean...
0.019736842105263157
Testing harmonic and corr...
0.003289473684210526
Testing harmonic and cosine...
0.003289473684210526
Testing harmonic and euclidean...
0.006578947368421052
Adding regularization!
Testing logeuclid and corr...
0.039473684210526314
Testing logeuclid and cosine...
0.039473684210526314
Testing logeuclid and euclidean...
0.03289473684210526
Adding regularization!
Testing kullback_sym and corr...
0.046052631578947366
Testing kullback_sym and cosine...
0.049342105263157895
Testing kullback_sym and euclidean...
0.05592105263157895
Adding regularization!
Testing riemann and corr...
0.05263157894736842
Testing riemann and cosine...
0.05263157894736842
Testing riemann and euclidean...
0.05592105263157895
Testing none and corr...
0.7335526315789473
Testing none and cosine...
0.7335526315789473
Testing none and euclidean...
0.6085526315789473


In [12]:
import csv

a_file = open(f"../results/distances_{classifier}.csv", "w")

writer = csv.writer(a_file)
for key, value in accuracies.items():
    writer.writerow([key, value])
    
a_file.close()

### KNN Approach

In [20]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
accuracies = {}
for ref in ['euclid', 'harmonic', 'logeuclid', 'kullback_sym', 'riemann', 'none']:
    all_FC, nSubj = get_data()
    if ref != 'none':
        all_FC = tangential(all_FC, ref)
    train_FCs = np.zeros((len(train_idx),70125), dtype=np.float32)
    for idx, mat in enumerate(all_FC[train_idx]):
        train_FCs[idx] = mat[np.triu_indices(mat.shape[0], k=0)]
    test_FCs = np.zeros((len(test_idx),70125), dtype=np.float32)
    for idx, mat in enumerate(all_FC[test_idx]):
        test_FCs[idx] = mat[np.triu_indices(mat.shape[0], k=0)]
        
    for distance_method in ['correlation', 'cosine', 'euclidean']:
        print(f'Testing {ref} reference with {distance_method} distance...')
        neigh = KNeighborsClassifier(n_neighbors=30, metric=distance_method)
        neigh.fit(train_FCs, train_labels)
        predicted = neigh.predict(test_FCs)
        acc = accuracy_score(test_labels, predicted)
        print(acc)
        accuracies[ref+"_"+distance_method] = acc

Testing euclid reference with correlation distance...
0.0
Testing euclid reference with cosine distance...
0.0
Testing euclid reference with euclidean distance...
0.0
Testing harmonic reference with correlation distance...
0.0
Testing harmonic reference with cosine distance...
0.0
Testing harmonic reference with euclidean distance...
0.006578947368421052
Adding regularization!
Testing logeuclid reference with correlation distance...
0.049342105263157895
Testing logeuclid reference with cosine distance...
0.04276315789473684
Testing logeuclid reference with euclidean distance...
0.046052631578947366
Adding regularization!
Testing kullback_sym reference with correlation distance...
0.05263157894736842
Testing kullback_sym reference with cosine distance...
0.05921052631578947
Testing kullback_sym reference with euclidean distance...
0.05921052631578947
Adding regularization!
Testing riemann reference with correlation distance...
0.03618421052631579
Testing riemann reference with cosine di

In [21]:
import csv
a_file = open(f"../results/knn_distances_{classifier}.csv", "w")

writer = csv.writer(a_file)
for key, value in accuracies.items():
    writer.writerow([key, value])
    
a_file.close()