In [2]:
import os
import numpy as np
# import torch
# import networkx as nx
# from torch_geometric.data import InMemoryDataset
# from torch_geometric.utils import from_numpy_array, from_networkx

In [18]:
# Creating a dictionary of lists of paths to the correlation matrices for each method. Each list in the dictionary represents a different method.
methods = ['pearson', 'spearman', 'kendall', 'partial']
full_corr_path_lists = {}
for method in methods:
    method_dir = f'ADNI_full/corr_matrices/corr_matrix_{method}/'
    full_corr_path_lists[method] = []
    for file in os.listdir(method_dir):
        full_corr_path_lists[method].append(file)

In [20]:
# Generating the diagnostic file from the diagnostic_label.csv file
labels_full = np.loadtxt('ADNI_full/diagnostic_label.csv', dtype=str, delimiter=',')

CONTINUER A PARTIR DE LA

In [None]:
##remove "SMC" values
idx = f.filter_SMC_patient_info()
corr_matrices = [corr_matrices_full[i] for i in idx]
labels = [labels_full[i] for i in idx]
for i in range(len(labels)):
    if labels[i] == 'CN':
        labels[i] = 0

    elif labels[i] == 'EMCI' or labels[i] == 'MCI' or labels[i] == 'LMCI':
        labels[i] = 1

    elif labels[i] == 'AD':
        labels[i] = 2

    else:
        print('Error: incorrect label')

In [None]:
assert len(labels) == len(corr_matrices)

In [None]:
class ADNI_dataset(InMemoryDataset):
    def __init__(self, root, transform=None, pre_transform=None, threshold=0.4):
        self.threshold = threshold
        super().__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])

    @property
    def processed_file_names(self):
        return ['data.pt']

    def process(self):
        """ Converts raw data into GNN-readable format by constructing
        graphs out of connectivity matrices.

        """
        graphs=[]
        for i in range(len(corr_matrices)):
            corr_matrix = corr_matrices[i]
            n_rois = corr_matrix.shape[0]
            edge_matrix = np.zeros((n_rois,n_rois))
            for j in range(n_rois):
                for k in range(n_rois):
                    if np.abs(corr_matrix[ j , k ]) < self.threshold:
                        edge_matrix[ j , k ] = 0
                    else:
                        edge_matrix[ j , k ] = corr_matrix[ j , k]

            corr_matrix_nx = from_numpy_array(edge_matrix)

            deg_dict = dict(corr_matrix_nx.degree())
            bc_dict = nx.betweenness_centrality(corr_matrix_nx)
            cc_dict = nx.clustering(corr_matrix_nx)
            # Compute the global efficiency of the graph
            ge = nx.global_efficiency(corr_matrix_nx)

            le_dict = {}

            # loop over all nodes in the graph
            for node in corr_matrix_nx.nodes():
                # find the subgraph of neighbors of the current node
                subgraph = corr_matrix_nx.subgraph(corr_matrix_nx.neighbors(node))

                # calculate the efficiency of the subgraph
                if subgraph.number_of_nodes() > 1:
                    efficiency = nx.global_efficiency(subgraph)
                else:
                    efficiency = 0.0

                # store the efficiency in the dictionary
                le_dict[node] = efficiency


            # Compute the participation coefficient and ratio of local to global efficiency of each node
            ratio_le_ge = np.array(list(le_dict.values())) / ge

            # Convert the degree, participation coefficient, betweenness centrality, local efficiency, and ratio of local to global efficiency dictionaries to NumPy arrays
            deg_array = np.array(list(deg_dict.values()))
            bc_array = np.array(list(bc_dict.values()))
            le_array = np.array(list(le_dict.values()))



            cc_array = np.array(list(cc_dict.values()))
            ratio_le_ge_array = ratio_le_ge

            # Normalize the degree, participation coefficient, betweenness centrality, local efficiency, and ratio of local to global efficiency arrays to have zero mean and unit variance
            deg_array_norm = (deg_array - np.mean(deg_array)) / np.std(deg_array)
            bc_array_norm = (bc_array - np.mean(bc_array)) / np.std(bc_array)
            le_array_norm = (le_array - np.mean(le_array)) / np.std(le_array)
            ratio_le_ge_array_norm = (ratio_le_ge_array - np.mean(ratio_le_ge_array)) / np.std(ratio_le_ge_array)
            cc_array_norm = (cc_array - np.mean(cc_array)) / np.std(cc_array)

            # Concatenate the degree, participation coefficient, betweenness centrality, local efficiency, and ratio of local to global efficiency arrays to form a single feature vector
            x_conc = torch.tensor(np.concatenate((deg_array_norm, bc_array_norm, le_array_norm, cc_array_norm, ratio_le_ge_array_norm)), dtype=torch.float)
            x = torch.reshape(x_conc , (5 , n_rois)).T


            corr_matrix_data = from_networkx(corr_matrix_nx)
            corr_matrix_data.x = x
            corr_matrix_data.y = labels[i]
            #pcorr_matrix_data.pos = coordinates

            # Add to running list of all dataset items
            graphs.append(corr_matrix_data)

        data, slices = self.collate(graphs)
        torch.save((data, slices), self.processed_paths[0])

In [None]:
dataset = ADNI_dataset('ADNI_0.5')

print()
print(f'Dataset: {dataset}:')
print('====================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('=============================================================')

# Gather some statistics about the first graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

In [None]:
# J'ai peut-être besoin d'importer des trucs de la mais pour l'instant je ne sais pas quoi
from nilearn import datasets, plotting, image
import nibabel as nib
from nilearn.maskers import NiftiLabelsMasker
from nilearn.connectome import ConnectivityMeasure
from nilearn.interfaces.fmriprep import load_confounds
import pandas as pd
import torch.nn.functional as F
from torch.nn import Sequential, Linear, ReLU, GRU, BatchNorm1d
from torch_geometric.nn import EdgeConv, GCNConv, GraphConv
from torch_geometric.nn import global_mean_pool
from torch_geometric.data import Data, DataLoader
import networkx as nx
from sklearn.model_selection import StratifiedKFold, train_test_split
import wandb
import random
import functions as f