In [None]:
import numpy as np
import pickle
import scipy
import h5py
import scipy.io as sio
from pyriemann.utils.mean import mean_covariance
import sklearn.datasets
import sklearn.decomposition
from scipy.spatial import distance

np.seterr(divide='ignore', invalid='ignore')

In [None]:
def utri2mat(utri):
    n = int(-1 + np.sqrt(1 + 8 * len(utri))) // 2
    iu1 = np.triu_indices(n+1,1)
    ret = np.empty((n+1, n+1))
    ret[iu1] = utri
    ret.T[iu1] = utri
    np.fill_diagonal(ret, 1)
    return ret

def get_data(parc, twin='DZ'):
    '''
    Navigates through file tree and extracts test/retest FCs 
    '''
    master_dir = '../data/twins'
    tasks = ['rest', 'emotion', 'gambling', 'language', 'motor', 'relational', 'social', 'wm']
    FC, test, retest = {}, {}, {}
    for task in tasks:
        task_dir = master_dir + f'/{task.upper()}/origmat_{twin}_schaefer{parc}_tests.mat'
        f = h5py.File(task_dir, 'r')
        for k, v in f.items():
            test[task] = np.array(v)
        task_dir = master_dir + f'/{task.upper()}/origmat_{twin}_schaefer{parc}_retests.mat'
        f = h5py.File(task_dir, 'r')
        for k, v in f.items():
            retest[task] = np.array(v)
        FC[task] = np.concatenate((test[task], retest[task])) 
    return FC

def pca_recon(FC, pctComp=None):
    '''
    Reconstructs FC based on number of principle components
    '''
    if pctComp is None:
        return FC
    nRegions = FC.shape[1]
    FC = np.reshape(FC, (FC.shape[0], -1))
    nComp = int(FC.shape[0] * pctComp)
    mu = np.mean(FC, axis=0)
    pca_rest = sklearn.decomposition.PCA()
    pca_rest.fit(FC)
    SCORES = pca_rest.transform(FC)[:, :nComp]
    COEFFS = pca_rest.components_[:nComp, :]
    FC_recon = np.dot(SCORES, COEFFS)
    del SCORES, COEFFS
    FC_recon += mu
    FC_recon = np.reshape(FC_recon, (FC.shape[0], nRegions, nRegions))
    return FC_recon

def utri2mat(utri):
    n = int(-1 + np.sqrt(1 + 8 * len(utri))) // 2
    iu1 = np.tril_indices(n+1,-1)
    ret = np.empty((n+1, n+1))
    ret[iu1] = utri
    ret.T[iu1] = utri
    np.fill_diagonal(ret, 1)
    return ret

def get_schaefer(parc, ref='original'):
    if ref.lower() == 'original' or ref.lower() == 'geodesic':
        with open(f'../data/schaefer/schaefer{parc}.pickle', 'rb') as f:
            all_FC = pickle.load(f)
    else:
        with open(f'../data/tangent_fcs/schaefer/schaefer{parc}_{ref}.pickle', 'rb') as f:
            all_FC = pickle.load(f)
    nSubj = int(all_FC.shape[0]/16)
    return all_FC, nSubj

def get_task_fcs(parc, ref='original'):
    '''
    Outputs a dictionary of each group of task FCs
    '''
    FCs, nSubj = get_schaefer(parc, ref)
    taskFCs = {}
    n = 0
    for task in ['rest', 'emotion', 'gambling', 'language', 'motor', 'relational', 'social', 'wm']:
        taskFCs[task] = np.concatenate((FCs[n:n+424], FCs[n+3392:n+3392+424]))
        n += 424
    return taskFCs

In [None]:
# Separate each task into separate subsets
FCs, nSubj = get_schaefer(100)
taskFCs = get_task_fcs(100)
nFCs = taskFCs['rest'].shape[0]

In [None]:
# Label each subject
labels = np.tile(np.arange(0, nSubj), 2)
labels = labels.astype(int)
train_idx = np.arange(0,taskFCs['rest'].shape[0]/2)
train_idx = train_idx.astype(int)
test_idx = np.arange(taskFCs['rest'].shape[0]/2, int(taskFCs['rest'].shape[0]))
test_idx = test_idx.astype(int)
train_labels = labels[train_idx]
test_labels = labels[test_idx]

### Subject Identification

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
accuracies = {}
lengths = {100:6441, 200:22791, 300:49141, 400:85491, 500:131841}
for parc in np.arange(100, 500, 100): 
    for ref in ['original', 'euclid', 'harmonic', 'kullback_sym', 'logeuclid', 'riemann']:
        print(f'Analyzing {parc} with {ref}...')
        FCs = get_task_fcs(parc, ref)
        for task in ['rest', 'emotion', 'gambling', 'language', 'motor', 'relational', 'social', 'wm']:
            task_FCs = FCs[task]
            # Convert back into flattened utriu vectors
            vec_FCs = np.zeros((nFCs, lengths[parc]), dtype=np.float32)
            for idx, mat in enumerate(task_FCs):
                vec_FCs[idx] = mat[np.triu_indices(mat.shape[0], k=1)]
            # Split into train and test sets
            train_FCs = vec_FCs[train_idx]
            test_FCs = vec_FCs[test_idx]
            # KNN Classifier
            neigh = KNeighborsClassifier(n_neighbors=1, metric='correlation')
            neigh.fit(train_FCs, train_labels)
            predicted = neigh.predict(test_FCs)
            acc1 = accuracy_score(test_labels, predicted)
            neigh.fit(test_FCs, test_labels)
            predicted = neigh.predict(train_FCs)
            acc2 = accuracy_score(test_labels, predicted)
            print(f'{task}: {acc1:.5f} and {acc2:.5f}')
            accuracies[f"{parc}:{task}:{ref}"] = (acc1 + acc2) / 2

In [None]:
import csv
a_file = open(f"../results/subject/subject_identification.csv", "w")

writer = csv.writer(a_file)
for key, value in accuracies.items():
    writer.writerow([key, value])
    
a_file.close()