In [1]:
import os
import sys
import math
import numpy as np
import pandas as pd
import scipy
from scipy.io import loadmat
import bct.algorithms
import random
import pickle
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from ripser import ripser
from persim import plot_diagrams
from sklearn.utils import resample
from scipy import stats
import pickle
%matplotlib inline

In [2]:
hcp_data = loadmat("../HCP_networkdata_culled.mat")
print(hcp_data.keys())

dict_keys(['__header__', '__version__', '__globals__', 'X_measure', 'culled_id', 'culled_network'])


In [3]:
demo_measures = hcp_data['X_measure'] # Add in the documentation for these measures later
subj_ids = hcp_data['culled_id'][0]

subj_nets = hcp_data['culled_network']
print(subj_nets.shape)#3rd dimention accounts for individual subjects

(87, 87, 399)


In [2]:
np.set_printoptions(precision=10)
measures = ['Age', 'Gender', 'PMAT24 Correct', 'PMAT24 Skipped', 'PMAT24 Med. Time', 
           'Oral Comp.', 'Oral Comp. Age-Adj.', 'Pic.Vocab.Test', 'Pic.Vocab.Test Age-Adj.',
           'CPW Correct', 'CPW Med. Time', 'Pattern Comp. Spd.']

demo_measures_df = pd.DataFrame(demo_measures, index=subj_ids, columns=measures)
#print(demo_measures_df)
demo_measures_df.to_csv("Cognitive_Scores.csv")
#print(np.array(demo_measures).shape)

# Local nets

In [3]:
#display(demo_measures_df)

In [14]:
subjects_gender = demo_measures_df[['Age','Gender']].groupby(['Gender'])
print(subjects_gender.count())

        Age
Gender     
1.0     214
2.0     185


In [11]:
demo_measures_df['Gender'].value_counts()

1.0    214
2.0    185
Name: Gender, dtype: int64

In [None]:
demo_age = filtered_demo[['PTID', 'DX_bl', 'AGE']]

In [19]:
demo_measures_df.min()


Age                          22.00000
Gender                        1.00000
PMAT24 Correct                6.00000
PMAT24 Skipped                0.00000
PMAT24 Med. Time           2560.00000
Oral Comp.                   86.20000
Oral Comp. Age-Adj.          65.02000
Pic.Vocab.Test               92.84103
Pic.Vocab.Test Age-Adj.      69.45302
CPW Correct                  26.00000
CPW Med. Time              1040.00000
Pattern Comp. Spd.           60.09000
dtype: float64

In [21]:
demo_measures_df.max()

Age                           36.0000
Gender                         2.0000
PMAT24 Correct                24.0000
PMAT24 Skipped                13.0000
PMAT24 Med. Time           61641.0000
Oral Comp.                   150.7050
Oral Comp. Age-Adj.          138.0873
Pic.Vocab.Test               153.0889
Pic.Vocab.Test Age-Adj.      153.0889
CPW Correct                   40.0000
CPW Med. Time               3192.5000
Pattern Comp. Spd.           154.6900
dtype: float64

In [29]:
output = demo_measures_df.agg({'Pattern Comp. Spd.':['mean','std']})
display(output)

Unnamed: 0,Pattern Comp. Spd.
mean,115.063108
std,15.966502


In [259]:
dk_atlas = loadmat('DK.mat')
print(dk_atlas)
frontal_nets = subj_nets[0:20,0:20,:]
print(frontal_nets.shape)


{'__header__': b'MATLAB 5.0 MAT-file Platform: posix, Created on: Tue Dec 13 14:49:16 2022', '__version__': '1.0', '__globals__': [], 'frontal': array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19, 20]]), 'limbic': array([[21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]]), 'temporal': array([[33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46]]), 'parietal': array([[47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58]]), 'occipital': array([[59, 60, 61, 62, 63, 64, 65, 66, 67, 68]])}
(20, 20, 399)


In [None]:
limbic_nets = subj_nets[20:32,20:32,:]
print(limbic_nets.shape)
temporal_nets = subj_nets[32:46,32:46,:]
print(temporal_nets.shape)
parietal_nets = subj_nets[46:58,46:58,:]
print(parietal_nets.shape)
occipital_nets = subj_nets[58:68,58:68,:]
print(occipital_nets.shape)

In [260]:
# Efficiency requires a distance matrix of weights between 0 and 1
def inverse_RBF(W, with_inf=True, norm=False):
    W_new = np.zeros(W.shape)
    # Build a mask
    valid_edges = np.nonzero(W)
    mask = np.zeros(np.shape(W)).astype(bool)
    mask[valid_edges] = True
    zero_edges = np.argwhere(W==0)
    
    # Reverse RBF kernel
    # Denominator has a small margin to make sure that ln does not reach 1
    W_new[mask] = np.sqrt(-1 * np.log( W[mask]/(np.max(W[mask])+1e-5) ))
    
    # If we need to normalize between 0 and 1
    if norm == True:
        W_new = W_new/np.max(W_new[W_new<np.inf])
    
    # Fill in the non-edges as infinite distance
    if with_inf == True:
        W_new[~mask] = np.inf
    
    # Ensure that connections to self are 0 distance
    np.fill_diagonal(W_new, 0)
    
    return W_new

In [261]:
def drawLineColored(X, C):
    for i in range(X.shape[0]-1):
        plt.plot(X[i:i+2, 0], X[i:i+2, 1], c=C[i, :], lineWidth = 3)

def plotCocycle2D(D, X, cocycle, thresh):
    """
    Given a 2D point cloud X, display a cocycle projected
    onto edges under a given threshold "thresh"
    """
    #Plot all edges under the threshold
    N = X.shape[0]
    t = np.linspace(0, 1, 10)
    c = plt.get_cmap('Greys')
    C = c(np.array(np.round(np.linspace(0, 255, len(t))), dtype=np.int32))
    C = C[:, 0:3]

    for i in range(N):
        for j in range(N):
            if D[i, j] <= thresh:
                Y = np.zeros((len(t), 2))
                Y[:, 0] = X[i, 0] + t*(X[j, 0] - X[i, 0])
                Y[:, 1] = X[i, 1] + t*(X[j, 1] - X[i, 1])
                drawLineColored(Y, C)
    #Plot cocycle projected to edges under the chosen threshold
    for k in range(cocycle.shape[0]):
        [i, j, val] = cocycle[k, :]
        if D[i, j] <= thresh:
            [i, j] = [min(i, j), max(i, j)]
            a = 0.5*(X[i, :] + X[j, :])
            plt.text(a[0], a[1], '%g'%val, color='b')
    #Plot vertex labels
    for i in range(N):
        plt.text(X[i, 0], X[i, 1], '%i'%i, color='r')
    plt.axis('equal')

# Extracting Global Topological Features

## The 0D Persistence Homology
    - (Birth Value, Death Value, Persistence life)
    - Longest Persistrence
    - Mean Persistence
    - Persistence Entropy

## The 1D Persistence Homology
    - (Birth Value, Death Value, Persistence life)
    - Longest Persistrence
    - Mean Persistence
    - Persistence Entropy

## Co-cycle computations

In [262]:
#for computing cocycles on N = 87 nodes


# Inputs: 
#     nets: ((M x M) X N) vector of N networks sized (M x M)
# Outputs:
#     feats: (N x 3) vector of features
def compute_cocycles(subj_nets):
    
    #warnings.filterwarnings('ignore')
    np.set_printoptions(precision=12)
    
    Vertex_Importance =list(range(0, subj_nets.shape[0]))
    Vertex_Importance_NumCoCyles =list(range(0, subj_nets.shape[0]))
    Vertex_Importance_NumCoCyles.insert(subj_nets.shape[0], -1) # Last elemnt on the list holds the total number of cocycles for each subject 

    for i in tqdm(range(subj_nets.shape[2])):
    #for i  in tqdm(range(2)):
        
        net = subj_nets[:,:,i]
        net = net/np.max(net)

        sub = inverse_RBF(net, True, False)
        sub_Vertex_Importance = [0]*(subj_nets.shape[0]) #initialize with zeros for each subjects
        #print("Initialized Vertex Importance Profile =", sub_Vertex_Importance)
        vip_0 = [0]*(subj_nets.shape[0])
        vip_1 = [0]*(subj_nets.shape[0])
        
        #For each permutation of nodes compute the VIP
        #print("Number of nodes of the network=", subj_nets.shape[0])
        #for j in range(subj_nets.shape[0]):
        
        Avg_sub_Vertex_Importance = np.array([0]*(subj_nets.shape[0]), float)
        
        for j in range(0,(subj_nets.shape[0])):
            #print("j=", j)
            sub_temp=sub
            sub_temp[[1, j],:] = sub_temp[[j, 1],:]
            sub_temp[:,[1, j]] = sub_temp[:,[j, 1]]
            np.fill_diagonal(sub_temp, 0)
            
            result = ripser(sub_temp, distance_matrix=True, do_cocycles=True)
            diagrams = result['dgms']
            D = result['dperm2all']
            cocycles = result['cocycles']
           
        
            #print(diagrams)
            #print(cocycles)
        
            #Visualization
            
            #dgm1 = diagrams[1]
            #idx = np.argmax(dgm1[:, 1] - dgm1[:, 0])
            #plot_diagrams(diagrams, show = False)
            #plt.scatter(dgm1[idx, 0], dgm1[idx, 1], 20, 'k', 'x')
            #plt.title("Max 1D birth = %.3g, death = %.3g"%(dgm1[idx, 0], dgm1[idx, 1]))
            #plt.show()
        

            
        
            #print("Number of 1-dim cocyles :", len(cocycles[1]))
            #print("1-dim cocyles :", cocycles[1])
            
            
            sub_Vertex_Importance = [0]*(subj_nets.shape[0])
            for k,ar in enumerate(cocycles[1]):
            
                size_ar = len(ar)
                
                for rep_cyc in ar: 
                    for vertex in rep_cyc:
                    
                        sub_Vertex_Importance[vertex] = min(round(sub_Vertex_Importance[vertex]+round((1/size_ar), 5), 5), k+1)
            
            temp_vertex = sub_Vertex_Importance[j]
            sub_Vertex_Importance[j] = sub_Vertex_Importance[1]
            sub_Vertex_Importance[1] = temp_vertex
            
            #print(sub_Vertex_Importance)
            Avg_sub_Vertex_Importance += np.array(sub_Vertex_Importance)
            norm_arr = [len(cocycles[1])]*(subj_nets.shape[0]) # normalizing array
            
        Avg_sub_Vertex_Importance = np.subtract(Avg_sub_Vertex_Importance, norm_arr)
        Avg_sub_Vertex_Importance = Avg_sub_Vertex_Importance/np.max(Avg_sub_Vertex_Importance)
        Avg_sub_Vertex_Importance = np.round_(Avg_sub_Vertex_Importance, decimals = 5)
        Avg_sub_Vertex_Importance_NumCoCycles = np.concatenate((Avg_sub_Vertex_Importance, np.array([len(cocycles[1])])), axis=0)
        
        #print("VIP=",Avg_sub_Vertex_Importance)
                 
        Vertex_Importance = np.vstack((Vertex_Importance, Avg_sub_Vertex_Importance ))
        Vertex_Importance_NumCoCyles = np.vstack((Vertex_Importance_NumCoCyles, Avg_sub_Vertex_Importance_NumCoCycles))
        
    
    return(Vertex_Importance_NumCoCyles)

In [263]:
np.get_printoptions()['linewidth']
np.set_printoptions(precision=12)

Frontal_Vertex_Importance_NumCoCyles = compute_cocycles(frontal_nets)
print(Frontal_Vertex_Importance_NumCoCyles[0])
print(Frontal_Vertex_Importance_NumCoCyles[1])
print(Frontal_Vertex_Importance_NumCoCyles.shape)

100%|█████████████████████████████████████████| 399/399 [00:04<00:00, 85.58it/s]

[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17.
 18. 19. -1.]
[ 0.13798  0.46508  0.45916  0.32611  0.62769  0.65455  0.16078  0.28915
  0.27437  0.41394  0.67599  1.       0.85758  0.59468  0.36553  0.50598
  0.80411  0.07638  0.23396  0.57792 17.     ]
(400, 21)





In [264]:
print(Frontal_Vertex_Importance_NumCoCyles[13])

[ 0.48224  0.44013  0.44674  0.30966  1.       0.98183  0.30966  0.06936
  0.23782  0.55244  0.53097  0.57886  0.64657  0.70355  0.47812  0.35012
  0.0578   0.33691  0.16515  0.32948 24.     ]


In [265]:
file = open("HCP_Sturct_Frontal_Vertex_Importance_NumCoCyles_07172023.npy", 'wb')
np.save(file, np.array(Frontal_Vertex_Importance_NumCoCyles))

In [6]:
Frontal_Vertex_Importance = compute_cocycles(frontal_nets)

In [4]:
print(Frontal_Vertex_Importance[1])

In [5]:
file = open("HCP_Sturct_Frontal_Vertex_Importance.npy", 'wb')
np.save(file, np.array(Frontal_Vertex_Importance))