In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import glob

import networkx as nx
from nxviz import CircosPlot
import community

import gudhi as gd
import gudhi.representations
import gudhi.representations.vector_methods


import nilearn
from nilearn import datasets

from pathlib import Path

from nilearn.connectome import ConnectivityMeasure
from nilearn import plotting

In [None]:
from sklearn.preprocessing   import MinMaxScaler
from sklearn.pipeline        import Pipeline
from sklearn.svm             import SVC
from sklearn.ensemble        import RandomForestClassifier
from sklearn.ensemble        import GradientBoostingClassifier

from sklearn.neighbors       import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

In [None]:
aal_labels = nilearn.datasets.fetch_atlas_aal().labels
#ho_labels = nilearn.datasets.fetch_atlas_harvard_oxford('cort-maxprob-thr25-2mm').labels

# ADNI

In [None]:
counter=0
paths = []
p = Path("ADNI_tables")
for x in p.rglob('*_aal.csv'):
    paths.append(str(x).replace('\\', '/'))
timeseries_array = []
for path in paths:
    df = pd.read_csv(path, index_col=0)
    df = df.T
    df['roi'] = aal_labels
    df = df.set_index('roi')
    timeseries_array.append(df)
print(len(timeseries_array))
timeseries_array[0]


### Phenotypes

In [None]:
paths = []
p = Path("ADNI_tables")
for x in p.rglob('*_aal.csv'):
    paths.append(str(x).replace('\\', '/'))
subjects = [path.split('/')[1].split('_aal')[0] for path in paths]
subjects

In [None]:
phenotypic_df = pd.read_csv('CN_AD_fMRI_3_21_2023.csv')
subjects_control = list(set(phenotypic_df[phenotypic_df['Group']=='CN']['Subject']))
subjects_AD = list(set(phenotypic_df[phenotypic_df['Group']=='AD']['Subject']))

In [None]:
control = 0
ad = 0
no_category = 0
for subject in subjects:
    if subject in subjects_control:
        control = control + 1
    elif subject in subjects_AD:
        ad = ad + 1
    else:
        no_category = no_category + 1

print(control, ad, no_category)
print(len(subjects_control), len(subjects_AD))

In [None]:
phenotypes_array = []
for subject in subjects:
    if subject in subjects_control:
        phenotypes_array.append('CN')
    else:
        phenotypes_array.append('AD')
phenotypes_array

### Data view

In [None]:
class DataFMRI:
    
    #input parameters
    timeseries_array = []
    labels = None
    phenotypes_array = []
    connectivity_measure_kind = None
    rips_complex_max_dimension = None
    
    #derived parameters
    matrices = []
    diagrams = []
    simplex_trees = []
    
    def __init__(self, timeseries_array, labels, phenotypes_array,
                 connectivity_measure_kind='correlation', 
                 rips_complex_max_dimension=2):
        self.timeseries_array = timeseries_array
        self.labels = labels
        self.phenotypes_array = phenotypes_array
        self.connectivity_measure_kind = connectivity_measure_kind
        self.rips_complex_max_dimension = rips_complex_max_dimension
        
        # create matrix for each time_series
        self.create_matrices()  
        # Rips complex and persistent diagrams
        self.create_persistence_view()
        
    def create_matrices(self):
        self.matrices=[]
        measure = ConnectivityMeasure(kind=self.connectivity_measure_kind, discard_diagonal=True)
        for i in range(len(self.timeseries_array)):
            matrix = measure.fit_transform([self.timeseries_array[i].values.T])[0]
            self.matrices.append(matrix)
            
        
    def create_persistence_view(self):
        for matrix in self.matrices:
            rips_complex = gudhi.RipsComplex(distance_matrix=1-matrix, max_edge_length=2)
            simplex_tree = rips_complex.create_simplex_tree(max_dimension=self.rips_complex_max_dimension)
            diag=simplex_tree.persistence()
            self.diagrams.append(diag)
            self.simplex_trees.append(simplex_tree)
    
    
    def get_persistence_intervals(self, i, dim):
        return self.simplex_trees[i].persistence_intervals_in_dimension(dim)
        
    def get_persistence_intervals_array(self, dim):
        intervals_array=[]
        for i in range(len(self.timeseries_array)):
            intervals_array.append(self.get_persistence_intervals(i, dim))
        # delete elements with 'inf'
        intervals_array = [intervals_array[i][np.all(np.isfinite(intervals_array[i]), axis=1)] 
                           for i in range(len(intervals_array))]
        return intervals_array

    
    # visualize
    
    def plot_matrix(self, i):
        matrix=self.matrices[i].copy()
        np.fill_diagonal(matrix, 0)
        plotting.plot_matrix(matrix, figure=(10, 8), labels=self.labels, 
                             vmax=1, vmin=matrix.min(), reorder=True)
        
    def plot_persistence_diagram(self, arr_i):
        diagrams_res = self.diagrams[arr_i[0]]
        for i in range(1, len(arr_i)):
            diagrams_res = diagrams_res + self.diagrams[arr_i[i]]
        gudhi.plot_persistence_diagram(diagrams_res, legend=True)
        
    def plot_persistence_barcode(self, arr_i):
        axis = gudhi.plot_persistence_barcode(self.diagrams[arr_i[0]], max_intervals=0, legend=True, alpha=0.3)
        for i in range(1, len(arr_i)):
            gudhi.plot_persistence_barcode(self.diagrams[arr_i[i]], max_intervals=0, legend=True, alpha=0.3, axes=axis)
#         diagrams_res = self.diagrams[arr_i[0]]
#         for i in range(1, len(arr_i)):
#             diagrams_res = diagrams_res + self.diagrams[arr_i[i]]
#         gudhi.plot_persistence_barcode(diagrams_res, max_intervals=0, legend=True)

        
    def plot_persistence_density(self, arr_i):
        diagrams_res = self.diagrams[arr_i[0]]
        for i in range(1, len(arr_i)):
            diagrams_res = diagrams_res + self.diagrams[arr_i[i]]
        gudhi.plot_persistence_barcode(diagrams_res, dimension=1, legend=True)
        

In [None]:
data_fMRI = DataFMRI(timeseries_array, aal_labels, phenotypes_array)

In [None]:
data_fMRI.plot_matrix(0)

### Learning

##### dim 0

In [None]:
intervals_array=data_fMRI.get_persistence_intervals_array(dim=0)

In [None]:
pipe = Pipeline([("Separator", gd.representations.DiagramSelector(limit=np.inf, point_type="finite")),
                 ("Scaler",    gd.representations.DiagramScaler(scalers=[([0,1], MinMaxScaler())])),
                 ("TDA",       gd.representations.PersistenceImage()),
                 ("Estimator", GradientBoostingClassifier())])


param =    [{"Scaler__use":         [False, True],
             "TDA":                 [
                                     #gd.representations.PersistenceImage(),
                                     gd.representations.Landscape(),
                                     gd.representations.Silhouette(),
                                     gd.representations.TopologicalVector(),
                                     #gd.representations.vector_methods.BettiCurve()
                                     ], 
             
             "Estimator":           [GradientBoostingClassifier(),
                                     RandomForestClassifier(),
                                     SVC()]},]

from sklearn.metrics import f1_score, make_scorer
f1 = make_scorer(f1_score , average='macro')

In [None]:
landscape = gd.representations.Landscape(resolution=200)
#landscape.fit_transform(train_intervals)[0]
plt.plot(landscape.fit_transform(train_intervals)[0])

In [None]:
#for j in range(20):

phenotypes_array = [1 if el=='AD' else 0 for el in phenotypes_array]
test_size            = 0.3
perm                 = np.random.permutation(len(phenotypes_array))
limit                = int(test_size * len(phenotypes_array))
test_sub, train_sub  = perm[:limit], perm[limit:]
train_phenotypes     = np.array(phenotypes_array)[train_sub]
test_phenotypes      = np.array(phenotypes_array)[test_sub]
train_intervals      = [intervals_array[i] for i in train_sub]
test_intervals       = [intervals_array[i] for i in test_sub]

model = GridSearchCV(pipe, param, cv=3, scoring=f1)

model = model.fit(train_intervals, train_phenotypes)

print(model.best_params_)
print("Train f1 = " + str(model.score(train_intervals, train_phenotypes)))
print("Test f1  = " + str(model.score(test_intervals,  test_phenotypes)))

##### dim 1

In [None]:
intervals_array=data_fMRI.get_persistence_intervals_array(dim=1)

In [None]:
for i in range(20):
    test_size            = 0.3
    perm                 = np.random.permutation(len(phenotypes_array))
    limit                = int(test_size * len(phenotypes_array))
    test_sub, train_sub  = perm[:limit], perm[limit:]
    train_phenotypes     = np.array(phenotypes_array)[train_sub]
    test_phenotypes      = np.array(phenotypes_array)[test_sub]
    train_intervals      = [intervals_array[i] for i in train_sub]
    test_intervals       = [intervals_array[i] for i in test_sub]
    
    model = GridSearchCV(pipe, param, cv=3, scoring=f1)
    
    model = model.fit(train_intervals, train_phenotypes)
    
    print(model.best_params_)
    print("Train f1 = " + str(model.score(train_intervals, train_phenotypes)))
    print("Test f1  = " + str(model.score(test_intervals,  test_phenotypes)))

### diagrams

In [None]:
AD_indices = np.where(np.array(data_fMRI.phenotypes_array)=='AD')[0]
control_indices = np.where(np.array(data_fMRI.phenotypes_array)=='CN')[0]

In [None]:
hist_AD = plt.hist(np.array(data_fMRI.matrices)[AD_indices].flatten(),bins=50)
hist_control = plt.hist(np.array(data_fMRI.matrices)[control_indices].flatten(),bins=50)

In [None]:
dataset = 'ADNI'
np.save(f'fMRI_results/{dataset}/hist_control_0.npy', hist_control[0])
np.save(f'fMRI_results/{dataset}/hist_AD_0.npy', hist_AD[0])
np.save(f'fMRI_results/{dataset}/hist_control_1.npy', hist_control[1])
np.save(f'fMRI_results/{dataset}/hist_AD_1.npy', hist_AD[1])

In [None]:
data_fMRI.plot_persistence_diagram(AD_indices)

In [None]:
np.save(f'{dataset}/diagrams/{ae_name}_control_diagram_0.npy', data_genes_control.get_persistence_intervals(0))
np.save(f'{dataset}/diagrams/{ae_name}_control_diagram_1.npy', data_genes_control.get_persistence_intervals(1))
np.save(f'{dataset}/diagrams/{ae_name}_AD_diagram_0.npy', data_genes_AD.get_persistence_intervals(0))
np.save(f'{dataset}/diagrams/{ae_name}_AD_diagram_1.npy', data_genes_AD.get_persistence_intervals(1))

In [None]:
data_fMRI.plot_persistence_diagram(control_indices)

In [None]:
data_fMRI.plot_persistence_barcode(AD_indices)

In [None]:
data_fMRI.plot_persistence_barcode(control_indices)

In [None]:
import matplotlib.patches as mpatches

In [None]:
diagrams_AD = data_fMRI.diagrams[AD_indices[0]]
for i in AD_indices[1:]:
    diagrams_AD = diagrams_AD + data_fMRI.diagrams[i]
    
diagrams_control = data_fMRI.diagrams[control_indices[0]]
for i in control_indices[1:]:
    diagrams_control = diagrams_control + data_fMRI.diagrams[i]

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10,4))


gudhi.plot_persistence_diagram(diagrams_AD, legend=True,
                               max_intervals=0,
                               #colormap='red',
                               axes=ax[0], fontsize=10)
ax[0].set_title('AD')


gudhi.plot_persistence_diagram(diagrams_control, legend=True, max_intervals=0, 
                               #colormap='blue',
                                     axes=ax[1], fontsize=10)
ax[1].set_title('Control')


In [None]:
diagrams_AD_0 = [item[1] for item in diagrams_AD if item[0]==0]
diagrams_AD_1 = [item[1] for item in diagrams_AD if item[0]==1]
diagrams_control_0 = [item[1] for item in diagrams_control if item[0]==0]
diagrams_control_1 = [item[1] for item in diagrams_control if item[0]==1]

In [None]:
np.save(f'fMRI_results/{dataset}/control_diagram_0.npy', diagrams_control_0)
np.save(f'fMRI_results/{dataset}/control_diagram_1.npy', diagrams_control_1)
np.save(f'fMRI_results/{dataset}/AD_diagram_0.npy', diagrams_AD_0)
np.save(f'fMRI_results/{dataset}/AD_diagram_1.npy', diagrams_AD_1)

In [None]:
diagrams_AD_0 = sorted(diagrams_AD_0, key=lambda x: x[0]-x[1])

In [None]:
axis = gudhi.plot_persistence_barcode(data_fMRI.diagrams[control_indices[0]], max_intervals=0, legend=True, alpha=0.5,
                               colormap=['red', 'red'])
for i in range(1, len(control_indices)):
    gudhi.plot_persistence_barcode(data_fMRI.diagrams[control_indices[i]], max_intervals=0, legend=True, alpha=0.5,
                                   axes=axis, colormap=['red', 'red'])
    
gudhi.plot_persistence_barcode(data_fMRI.diagrams[AD_indices[0]], max_intervals=0, legend=True, alpha=0.5, axes=axis,
                                     colormap=['blue', 'blue'])
for i in range(1, len(AD_indices)):
    gudhi.plot_persistence_barcode(data_fMRI.diagrams[AD_indices[i]], max_intervals=0, legend=True, alpha=0.5, axes=axis,
                                  colormap=['blue', 'blue'])
      
        
        
axis.set_title('AD patients and controls')
patch1 = mpatches.Patch(color='red', label='Control')
patch2 = mpatches.Patch(color='blue', label='AD')
axis.legend(handles=[patch1, patch2])

In [None]:
np.save(f'fMRI_results/{dataset}/control_diagram_0.npy', diagrams_control_0)
np.save(f'fMRI_results/{dataset}/control_diagram_1.npy', diagrams_control_1)
np.save(f'fMRI_results/{dataset}/AD_diagram_0.npy', diagrams_AD_0)
np.save(f'fMRI_results/{dataset}/AD_diagram_1.npy', diagrams_AD_1)

### Clusters

In [None]:
intervals_array=data_fMRI.get_persistence_intervals_array(dim=0)

In [None]:
SH = gd.representations.Silhouette(resolution=1000, weight=lambda x: np.power(x[1]-x[0],1))
sh = SH.fit_transform(intervals_array)

In [None]:
for ind in AD_indices:
    plt.plot(sh[ind])
plt.title("Silhouette, AD")

In [None]:
for ind in control_indices:
    plt.plot(sh[ind])
plt.title("Silhouette, controls")

In [None]:
for ind in control_indices:
    plt.plot(sh[ind], color='red')
for ind in AD_indices:
    plt.plot(sh[ind], color='blue')
plt.title("Silhouette")

In [None]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

In [None]:
print(sh.shape)
print(np.array(phenotypes_array).shape)
colormap_phenotypes=[1 if item=='AD' else 0 for item in phenotypes_array ]
print(np.array(colormap_phenotypes).shape)

In [None]:
pca = PCA(n_components=2)
pca_data = pca.fit_transform(sh)
plot = plt.scatter(pca_data[:,0], pca_data[:,1], c=np.array(colormap_phenotypes))
plt.legend(handles=plot.legend_elements()[0], labels=['CN', 'AD'])
plt.title('PCA')
#plt.show()

In [None]:
tsne = TSNE(n_components=2)
tsne_data = tsne.fit_transform(sh)
plot = plt.scatter(tsne_data[:,0], tsne_data[:,1], c=np.array(colormap_phenotypes))
plt.legend(handles=plot.legend_elements()[0], labels=['CN', 'AD'])
plt.title('TSNE')
#plt.show()

In [None]:
intervals_array=data_fMRI.get_persistence_intervals_array(dim=0)

In [None]:
betti_curves_array = gudhi.representations.vector_methods.BettiCurve(resolution=None).fit_transform(intervals_array)
    
for ind in AD_indices:
    plt.plot(
        betti_curves_array[ind], 'bo-', markersize=0.5
    )
for ind in control_indices:
    plt.plot(
        betti_curves_array[ind], 'ro-', markersize=0.5
    )

In [None]:
intervals_array

# CNI

In [None]:
#! git clone https://github.com/mdschirmer/2019_CNI_TrainingRelease
#! git clone https://github.com/mdschirmer/2019_CNI_ValidationRelease

In [None]:
paths_aal = []
p = Path("CNI/2019_CNI_TrainingRelease/Training")
for x in p.rglob("timeseries_aal.csv"):
    paths_aal.append(str(x))

timeseries_array = []
for path in paths_aal:
    df = pd.read_csv(path, header=None)
    df['roi'] = aal_labels
    df = df.set_index('roi')
    timeseries_array.append(df)
len(timeseries_array)

In [None]:
timeseries_array[0]

In [None]:
phenotypic_training = pd.read_csv('CNI/2019_CNI_TrainingRelease/SupportingInfo/phenotypic_training.csv')
phenotypes_array = phenotypic_training['DX']

In [None]:
paths_aal = []
p = Path("CNI/2019_CNI_ValidationRelease/Validation")
for x in p.rglob("timeseries_aal.csv"):
    paths_aal.append(str(x))

#timeseries_array = []
for path in paths_aal:
    df = pd.read_csv(path, header=None)
    df['roi'] = aal_labels
    df = df.set_index('roi')
    timeseries_array.append(df)
len(timeseries_array)

In [None]:
phenotypic_training = pd.read_csv('CNI/2019_CNI_ValidationRelease/SupportingInfo/phenotypic_validation.csv')
phenotypes_array = list(phenotypes_array.values) + list(phenotypic_training['DX'].values)

In [None]:
len(phenotypes_array)

In [None]:
data_fMRI_CNI = DataFMRI(timeseries_array, aal_labels, phenotypes_array)

In [None]:
np.save(f'fMRI_results/CNI/matrices.npy', data_fMRI_CNI.matrices)

### Learning

In [None]:
pipe = Pipeline([("Separator", gd.representations.DiagramSelector(limit=np.inf, point_type="finite")),
                 ("Scaler",    gd.representations.DiagramScaler(scalers=[([0,1], MinMaxScaler())])),
                 #("TDA",       gd.representations.PersistenceImage()),
                 ("Estimator", GradientBoostingClassifier())])


param =    [{"Scaler__use":         [False, True],
#              "TDA":                 [
#                                      #gd.representations.PersistenceImage(),
#                                      gd.representations.Landscape(),
#                                      gd.representations.Silhouette(),
#                                      gd.representations.TopologicalVector(),
#                                      #gd.representations.vector_methods.BettiCurve()
#                                      ], 
             
             "Estimator":           [GradientBoostingClassifier(),
                                     RandomForestClassifier(),
                                     SVC()]},]

from sklearn.metrics import f1_score, make_scorer
f1 = make_scorer(f1_score , average='macro')

In [None]:
len(phenotypes_array)

##### dim 0

In [None]:
intervals_array=data_fMRI_CNI.get_persistence_intervals_array(dim=0)

In [None]:
phenotypes_array = [1 if el=='ADHD' else 0 for el in phenotypes_array]

In [None]:
#for i in range(20):
    
test_size            = 0.3
perm                 = np.random.permutation(len(phenotypes_array))
limit                = int(test_size * len(phenotypes_array))
test_sub, train_sub  = perm[:limit], perm[limit:]
train_phenotypes     = np.array(phenotypes_array)[train_sub]
test_phenotypes      = np.array(phenotypes_array)[test_sub]
train_intervals      = [intervals_array[i] for i in train_sub]
test_intervals       = [intervals_array[i] for i in test_sub]


In [None]:
len(test_intervals_transformed), len(train_intervals_transformed)

In [None]:
landscape = gd.representations.Landscape(num_landscapes=len(test_intervals))
len((landscape.fit_transform(test_intervals)))

In [None]:
landscape = gd.representations.Landscape(resolution=200)
train_intervals_transformed = [landscape.fit_transform([train_intervals[i]])[0] for i in range(len(train_intervals))]
test_intervals_transformed = [landscape.transform([test_intervals[i]])[0] for i in range(len(test_intervals))]
# test_intervals_transformed =  landscape.transform(test_intervals)
#plt.plot(landscape.fit_transform(train_intervals)[0])

In [None]:
param_grid = {
    'learning_rate': [0.01],
    'n_estimators': [300],
    'max_depth': [2, 3, 4, 5],
    'subsample': [1.0],
    'min_samples_leaf' : [1, 2, 3, 4],
    
}
model = GradientBoostingClassifier()
grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy')

In [None]:
grid_search.fit(train_intervals_transformed, train_phenotypes)
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_
best_model_score = best_model.score(test_intervals_transformed, test_phenotypes)

In [None]:
best_params, best_model, best_model_score

In [None]:
best_model.score(train_intervals_transformed, train_phenotypes)

In [None]:
res = []
for i in range(10):
    model = RandomForestClassifier(scoring = f1)
    model = model.fit(train_intervals_transformed, train_phenotypes)
    #print(model.best_params_)
    train_score = model.score(train_intervals_transformed, train_phenotypes)
    test_score = model.score(test_intervals_transformed,  test_phenotypes)
    print("Train f1 = " + str(train_score))
    print("Test f1  = " + str(test_score))
    res.append((train_score, test_score))

##### dim 1

In [None]:
intervals_array=data_fMRI_CNI.get_persistence_intervals_array(dim=1)

In [None]:
for i in range(20):
    test_size            = 0.3
    perm                 = np.random.permutation(len(phenotypes_array))
    limit                = int(test_size * len(phenotypes_array))
    test_sub, train_sub  = perm[:limit], perm[limit:]
    train_phenotypes     = np.array(phenotypes_array)[train_sub]
    test_phenotypes      = np.array(phenotypes_array)[test_sub]
    train_intervals      = [intervals_array[i] for i in train_sub]
    test_intervals       = [intervals_array[i] for i in test_sub]
    
    model = GridSearchCV(pipe, param, cv=3, scoring=f1)
    
    model = model.fit(train_intervals, train_phenotypes)
    
    print(model.best_params_)
    print("Train f1 = " + str(model.score(train_intervals, train_phenotypes)))
    print("Test f1  = " + str(model.score(test_intervals,  test_phenotypes)))

In [None]:
test_size            = 0.3
perm                 = np.random.permutation(len(phenotypes_array))
limit                = int(test_size * len(phenotypes_array))
test_sub, train_sub  = perm[:limit], perm[limit:]
train_phenotypes     = np.array(phenotypes_array)[train_sub]
test_phenotypes      = np.array(phenotypes_array)[test_sub]
train_intervals      = [intervals_array[i] for i in train_sub]
test_intervals       = [intervals_array[i] for i in test_sub]

In [None]:
model = GridSearchCV(pipe, param, cv=3, scoring=f1)
#model = GridSearchCV(pipe, param, cv=3)

In [None]:
model = model.fit(train_intervals, train_phenotypes)

In [None]:
print(model.best_params_)

In [None]:
print("Train f1 = " + str(model.score(train_intervals, train_phenotypes)))
print("Test f1  = " + str(model.score(test_intervals,  test_phenotypes)))

In [None]:
data_fMRI_CNI.phenotypes_array

### diagrams

In [None]:
ADHD_indices = np.where(np.array(data_fMRI_CNI.phenotypes_array)=='ADHD')[0]
control_indices = np.where(np.array(data_fMRI_CNI.phenotypes_array)=='Control')[0]

In [None]:
hist_ADHD = plt.hist(np.array(data_fMRI_CNI.matrices)[ADHD_indices].flatten(),bins=50)
hist_control = plt.hist(np.array(data_fMRI_CNI.matrices)[control_indices].flatten(),bins=50)

In [None]:
dataset = 'CNI'
np.save(f'fMRI_results/{dataset}/hist_control_0.npy', hist_control[0])
np.save(f'fMRI_results/{dataset}/hist_AD_0.npy', hist_ADHD[0])
np.save(f'fMRI_results/{dataset}/hist_control_1.npy', hist_control[1])
np.save(f'fMRI_results/{dataset}/hist_AD_1.npy', hist_ADHD[1])

In [None]:
print(len(ADHD_indices), len(control_indices))

In [None]:
data_fMRI_CNI.plot_persistence_barcode(ADHD_indices)

In [None]:
data_fMRI_CNI.plot_persistence_barcode(control_indices)

In [None]:
import matplotlib.patches as mpatches

In [None]:
diagrams_ADHD = data_fMRI_CNI.diagrams[ADHD_indices[0]]
for i in ADHD_indices[1:]:
    diagrams_ADHD = diagrams_ADHD + data_fMRI_CNI.diagrams[i]
    
diagrams_control = data_fMRI_CNI.diagrams[control_indices[0]]
for i in control_indices[1:]:
    diagrams_control = diagrams_control + data_fMRI_CNI.diagrams[i]

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10,4))


gudhi.plot_persistence_diagram(diagrams_ADHD, legend=True,
                               max_intervals=0,
                               #colormap='red',
                               axes=ax[0], fontsize=10)
ax[0].set_title('ADHD')


gudhi.plot_persistence_diagram(diagrams_control, legend=True, max_intervals=0, 
                               #colormap='blue',
                                     axes=ax[1], fontsize=10)
ax[1].set_title('Control')


In [None]:
diagrams_ADHD_0 = [item[1] for item in diagrams_ADHD if item[0]==0]
diagrams_ADHD_1 = [item[1] for item in diagrams_ADHD if item[0]==1]

diagrams_control_0 = [item[1] for item in diagrams_control if item[0]==0]
diagrams_control_1 = [item[1] for item in diagrams_control if item[0]==1]

In [None]:
np.save(f'fMRI_results/{dataset}/control_diagram_0.npy', diagrams_control_0)
np.save(f'fMRI_results/{dataset}/control_diagram_1.npy', diagrams_control_1)
np.save(f'fMRI_results/{dataset}/AD_diagram_0.npy', diagrams_ADHD_0)
np.save(f'fMRI_results/{dataset}/AD_diagram_1.npy', diagrams_ADHD_1)

In [None]:
data_fMRI_CNI.plot_persistence_diagram(control_indices)

In [None]:
diagrams_ADHD_0 = sorted(diagrams_ADHD_0, key=lambda x: x[0]-x[1])

In [None]:
axis = gudhi.plot_persistence_barcode(data_fMRI_CNI.diagrams[control_indices[0]], max_intervals=0, legend=True, alpha=0.5,
                               colormap=['red', 'red'])
for i in range(1, len(control_indices)):
    gudhi.plot_persistence_barcode(data_fMRI_CNI.diagrams[control_indices[i]], max_intervals=0, legend=True, alpha=0.5,
                                   axes=axis, colormap=['red', 'red'])
    
gudhi.plot_persistence_barcode(data_fMRI_CNI.diagrams[ADHD_indices[0]], max_intervals=0, legend=True, alpha=0.5, axes=axis,
                                     colormap=['blue', 'blue'])
for i in range(1, len(ADHD_indices)):
    gudhi.plot_persistence_barcode(data_fMRI_CNI.diagrams[ADHD_indices[i]], max_intervals=0, legend=True, alpha=0.5, axes=axis,
                                  colormap=['blue', 'blue'])
      
        
        
axis.set_title('ADHD patients and controls')
patch1 = mpatches.Patch(color='red', label='Control')
patch2 = mpatches.Patch(color='blue', label='AD')
axis.legend(handles=[patch1, patch2])

### Clusters CNI

In [None]:
intervals_array=data_fMRI_CNI.get_persistence_intervals_array(dim=0)

In [None]:
SH = gd.representations.Silhouette(resolution=1000, weight=lambda x: np.power(x[1]-x[0],1))
sh = SH.fit_transform(intervals_array)

In [None]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

In [None]:
print(sh.shape)
print(np.array(phenotypes_array).shape)
colormap_phenotypes=[1 if item=='ADHD' else 0 for item in phenotypes_array ]
print(np.array(colormap_phenotypes).shape)

In [None]:
pca = PCA(n_components=2)
pca_data = pca.fit_transform(sh)
plot = plt.scatter(pca_data[:,0], pca_data[:,1], c=np.array(colormap_phenotypes))
plt.legend(handles=plot.legend_elements()[0], labels=['CN', 'ADHD'])
plt.title('PCA')
#plt.show()

In [None]:
tsne = TSNE(n_components=2)
tsne_data = tsne.fit_transform(sh)
plot = plt.scatter(tsne_data[:,0], tsne_data[:,1], c=np.array(colormap_phenotypes))
plt.legend(handles=plot.legend_elements()[0], labels=['CN', 'ADHD'])
plt.title('TSNE')
#plt.show()

# OASIS

In [None]:
paths_aal = []
count = 0
p = Path("ts_extracted/controls/AAL")
for x in p.rglob("*"):
    if count > 199:
        break
    paths_aal.append(str(x))
    count = count + 1

timeseries_array_control = []
for path in paths_aal:
    df = pd.read_csv(path, header=0)
    
    df = df.drop(df.columns[0], axis=1)
    timeseries_array_control.append(df.T)
len(timeseries_array_control)

In [None]:
phenotypes_array = np.repeat('Control', len(timeseries_array_control))

In [None]:
paths_aal = []
p = Path("ts_extracted/patients/AAL")
count = 0
for x in p.rglob("*"):
    if count > 199:
        break
    paths_aal.append(str(x))
    count = count + 1

timeseries_array_patient = []
for path in paths_aal:
    df = pd.read_csv(path, header=0)
    df = df.drop(df.columns[0], axis=1)
    timeseries_array_patient.append(df.T)
len(timeseries_array_patient)

In [None]:
phenotypes_array = np.concatenate((phenotypes_array, np.repeat('Patient', len(timeseries_array_patient))))

In [None]:
timeseries_array = timeseries_array_control + timeseries_array_patient
print(len(timeseries_array))
print(len(phenotypes_array))

In [None]:
timeseries_array[0]

In [None]:
data_fMRI_OASIS = DataFMRI(timeseries_array, aal_labels, phenotypes_array)

In [None]:
np.save(f'fMRI_results/OASIS/matrices.npy', data_fMRI_OASIS.matrices)

### Learning

##### dim 0

In [None]:
intervals_array=data_fMRI_OASIS.get_persistence_intervals_array(dim=0)

In [None]:
test_size            = 0.3
perm                 = np.random.permutation(len(phenotypes_array))
limit                = int(test_size * len(phenotypes_array))
test_sub, train_sub  = perm[:limit], perm[limit:]
train_phenotypes     = np.array(phenotypes_array)[train_sub]
test_phenotypes      = np.array(phenotypes_array)[test_sub]
train_intervals      = [intervals_array[i] for i in train_sub]
test_intervals       = [intervals_array[i] for i in test_sub]

In [None]:
# Definition of pipeline
pipe = Pipeline([("Separator", gd.representations.DiagramSelector(limit=np.inf, point_type="finite")),
                 ("Scaler",    gd.representations.DiagramScaler(scalers=[([0,1], MinMaxScaler())])),
                 ("TDA",       gd.representations.PersistenceImage()),
                 ("Estimator", GradientBoostingClassifier())])


param =    [{"Scaler__use":         [False, True],
             "TDA":                 [
                                     #gd.representations.PersistenceImage(),
                                     gd.representations.Landscape(),
                                     gd.representations.Silhouette(),
                                     gd.representations.TopologicalVector()
                                     ], 
             
             "Estimator":           [GradientBoostingClassifier(),
                                     RandomForestClassifier(),
                                     SVC()]},]
            

In [None]:
from sklearn.metrics import f1_score, make_scorer
f1 = make_scorer(f1_score , average='macro')

In [None]:
model = GridSearchCV(pipe, param, cv=3, scoring=f1)
#model = GridSearchCV(pipe, param, cv=3)

In [None]:
model = model.fit(train_intervals, train_phenotypes)

In [None]:
print(model.best_params_)

In [None]:
print("Train f1 = " + str(model.score(train_intervals, train_phenotypes)))
print("Test f1  = " + str(model.score(test_intervals,  test_phenotypes)))

##### dim 1

In [None]:
intervals_array=data_fMRI_OASIS.get_persistence_intervals_array(dim=1)

In [None]:
test_size            = 0.3
perm                 = np.random.permutation(len(phenotypes_array))
limit                = int(test_size * len(phenotypes_array))
test_sub, train_sub  = perm[:limit], perm[limit:]
train_phenotypes     = np.array(phenotypes_array)[train_sub]
test_phenotypes      = np.array(phenotypes_array)[test_sub]
train_intervals      = [intervals_array[i] for i in train_sub]
test_intervals       = [intervals_array[i] for i in test_sub]

In [None]:
model = GridSearchCV(pipe, param, cv=3, scoring=f1)
#model = GridSearchCV(pipe, param, cv=3)

In [None]:
model = model.fit(train_intervals, train_phenotypes)

In [None]:
print(model.best_params_)

In [None]:
print("Train f1 = " + str(model.score(train_intervals, train_phenotypes)))
print("Test f1  = " + str(model.score(test_intervals,  test_phenotypes)))

In [None]:
data_fMRI_OASIS.phenotypes_array

### diagrams

In [None]:
ADHD_indices = np.where(np.array(phenotypes_array)=='Patient')[0]
control_indices = np.where(np.array(phenotypes_array)=='Control')[0]

In [None]:
hist_ADHD = plt.hist(np.array(data_fMRI_OASIS.matrices)[ADHD_indices].flatten(),bins=50)
hist_control = plt.hist(np.array(data_fMRI_OASIS.matrices)[control_indices].flatten(),bins=50)

In [None]:
dataset = 'OASIS'
np.save(f'fMRI_results/{dataset}/hist_control_0.npy', hist_control[0])
np.save(f'fMRI_results/{dataset}/hist_AD_0.npy', hist_ADHD[0])
np.save(f'fMRI_results/{dataset}/hist_control_1.npy', hist_control[1])
np.save(f'fMRI_results/{dataset}/hist_AD_1.npy', hist_ADHD[1])

In [None]:
data_fMRI_OASIS.plot_persistence_diagram(ADHD_indices)

In [None]:
data_fMRI_OASIS.plot_persistence_diagram(control_indices)

In [None]:
data_fMRI_OASIS.plot_persistence_barcode(ADHD_indices)

In [None]:
data_fMRI_OASIS.plot_persistence_barcode(control_indices)

In [None]:
import matplotlib.patches as mpatches

In [None]:
diagrams_ADHD = data_fMRI_OASIS.diagrams[ADHD_indices[0]]
for i in ADHD_indices[1:]:
    diagrams_ADHD = diagrams_ADHD + data_fMRI_OASIS.diagrams[i]
    
diagrams_control = data_fMRI_OASIS.diagrams[control_indices[0]]
for i in control_indices[1:]:
    diagrams_control = diagrams_control + data_fMRI_OASIS.diagrams[i]

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10,4))


gudhi.plot_persistence_diagram(diagrams_ADHD, legend=True,
                               max_intervals=0,
                               #colormap='red',
                               axes=ax[0], fontsize=10)
ax[0].set_title('AD')


gudhi.plot_persistence_diagram(diagrams_control, legend=True, max_intervals=0, 
                               #colormap='blue',
                                     axes=ax[1], fontsize=10)
ax[1].set_title('Control')


In [None]:
diagrams_ADHD_0 = [item[1] for item in diagrams_ADHD if item[0]==0]
diagrams_ADHD_1 = [item[1] for item in diagrams_ADHD if item[0]==1]

diagrams_control_0 = [item[1] for item in diagrams_control if item[0]==0]
diagrams_control_1 = [item[1] for item in diagrams_control if item[0]==1]

In [None]:
np.save(f'fMRI_results/{dataset}/control_diagram_0.npy', diagrams_control_0)
np.save(f'fMRI_results/{dataset}/control_diagram_1.npy', diagrams_control_1)
np.save(f'fMRI_results/{dataset}/AD_diagram_0.npy', diagrams_ADHD_0)
np.save(f'fMRI_results/{dataset}/AD_diagram_1.npy', diagrams_ADHD_1)

In [None]:
axis = gudhi.plot_persistence_barcode(data_fMRI_OASIS.diagrams[control_indices[0]], max_intervals=0, legend=True, alpha=0.5,
                               colormap=['red', 'red'])
for i in range(1, len(control_indices)):
    gudhi.plot_persistence_barcode(data_fMRI_OASIS.diagrams[control_indices[i]], max_intervals=0, legend=True, alpha=0.5,
                                   axes=axis, colormap=['red', 'red'])
    
gudhi.plot_persistence_barcode(data_fMRI_OASIS.diagrams[ADHD_indices[0]], max_intervals=0, legend=True, alpha=0.5, axes=axis,
                                     colormap=['blue', 'blue'])
for i in range(1, len(ADHD_indices)):
    gudhi.plot_persistence_barcode(data_fMRI_OASIS.diagrams[ADHD_indices[i]], max_intervals=0, legend=True, alpha=0.5, axes=axis,
                                  colormap=['blue', 'blue'])
      
        
        
axis.set_title('AD patients and controls')
patch1 = mpatches.Patch(color='red', label='Control')
patch2 = mpatches.Patch(color='blue', label='AD')
axis.legend(handles=[patch1, patch2])

### Clusters OASIS

In [None]:
intervals_array=data_fMRI_OASIS.get_persistence_intervals_array(dim=0)

In [None]:
SH = gd.representations.Silhouette(resolution=1000, weight=lambda x: np.power(x[1]-x[0],1))
sh = SH.fit_transform(intervals_array)

In [None]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

In [None]:
print(sh.shape)
print(np.array(phenotypes_array).shape)
colormap_phenotypes=[1 if item=='Patient' else 0 for item in phenotypes_array ]
print(np.array(colormap_phenotypes).shape)

In [None]:
pca = PCA(n_components=2)
pca_data = pca.fit_transform(sh)
plot = plt.scatter(pca_data[:,0], pca_data[:,1], c=np.array(colormap_phenotypes))
plt.legend(handles=plot.legend_elements()[0], labels=['CN', 'ADHD'])
plt.title('PCA')
#plt.show()

In [None]:
tsne = TSNE(n_components=2)
tsne_data = tsne.fit_transform(sh)
plot = plt.scatter(tsne_data[:,0], tsne_data[:,1], c=np.array(colormap_phenotypes))
plt.legend(handles=plot.legend_elements()[0], labels=['CN', 'ADHD'])
plt.title('TSNE')
#plt.show()