# Constructing the full set of graphs

Import libraries

In [None]:
import networkx as nx
import numpy as np
import pandas as pd
import torch

In [None]:
torch_version = str(torch.__version__)
scatter_src = f"https://pytorch-geometric.com/whl/torch-{torch_version}.html"
sparse_src = f"https://pytorch-geometric.com/whl/torch-{torch_version}.html"
!pip install torch-scatter -f $scatter_src
!pip install torch-sparse -f $sparse_src
!pip install torch-geometric

Looking in links: https://pytorch-geometric.com/whl/torch-2.5.1+cu121.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-2.5.0%2Bcu121/torch_scatter-2.1.2%2Bpt25cu121-cp310-cp310-linux_x86_64.whl (10.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.9/10.9 MB[0m [31m33.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-scatter
Successfully installed torch-scatter-2.1.2+pt25cu121
Looking in links: https://pytorch-geometric.com/whl/torch-2.5.1+cu121.html
Collecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-2.5.0%2Bcu121/torch_sparse-0.6.18%2Bpt25cu121-cp310-cp310-linux_x86_64.whl (5.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.1/5.1 MB[0m [31m51.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch-sparse
Successfully installed torch-sparse-0.6.18+pt25cu121
Collecting torch-geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[2K

In [None]:
torch.set_default_tensor_type(torch.cuda.FloatTensor)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  _C._set_default_tensor_type(t)


In [None]:
def torch_pearson_correlation_matrix(X):

  mean = torch.mean(X, dim=0)
  covariance = torch.matmul((X - mean).t(), (X - mean))
  variance = torch.diagonal(covariance)
  correlation = (variance**(-1/2)) * covariance * ((variance**(-1/2)).t())
  return correlation

def construct_graph(roi_time_series, threshold=0.7, return_functional_connectivity=True, adjacency_matrix_type='torch', adjacency_representation = 'edge_list'):

  roi_time_series = torch.from_numpy(roi_time_series).to(device)
  functional_connectivity = torch_pearson_correlation_matrix(roi_time_series)
  adjacency = (functional_connectivity > threshold).int().to(device) - torch.eye(len(functional_connectivity))

  if adjacency_representation == 'edge_list':
    adjacency = (adjacency.nonzero()).t()

  if adjacency_matrix_type == 'numpy':
    adjacency = adjacency.cpu().numpy()

  if return_functional_connectivity==True:
    return adjacency, functional_connectivity
  else:
    return adjacency

Obtaining data from the ABIDE I [Preprocessed Connectomes Project](http://preprocessed-connectomes-project.org/abide/download.html). We're going to use the AAL atlas, which uses 116 brain regions of interest.

In [None]:
import os
import subprocess
from torch_geometric.data import Data

def download_sample(sample_name):
  destination_folder = '/content/data/fmri'
  url = 'https://s3.amazonaws.com/fcp-indi/data/Projects/ABIDE_Initiative/Outputs/cpac/filt_global/rois_aal/{}_rois_aal.1D'.format(sample_name)
  subprocess.run([
    "wget",
    "-P", destination_folder,
    "--continue",
    "--quiet",
    "--no-check-certificate",
    url
  ])

def get_dataset(threshold=0.7, download=True):

  if download==True:
    os.makedirs('/content/data/fmri', exist_ok=True)
    !wget -P /content/data https://raw.githubusercontent.com/preprocessed-connectomes-project/abide/master/Phenotypic_V1_0b_preprocessed1.csv
    phenotypic_data = pd.read_csv('/content/data/Phenotypic_V1_0b_preprocessed1.csv')
    phenotypic_data['FILE_ID'].apply(download_sample)

  phenotypic_data = pd.read_csv('/content/data/Phenotypic_V1_0b_preprocessed1.csv') #Apagar

  data_list = []
  for index, row in phenotypic_data.iterrows():
    filepath = "/content/data/fmri/" + row['FILE_ID'] + "_rois_aal.1D"
    if not os.path.exists(filepath): #Not every line of the phenotypic table is in the PCP dataset
      continue
    roi_time_series = np.loadtxt(filepath)
    edge_index, x = construct_graph(roi_time_series=roi_time_series, threshold=threshold)
    y = torch.tensor([row['DX_GROUP']-1]) #For the graph labels, 0 is for autistic patients and 1 is for typical control
    data_list.append(Data(x=x, edge_index=edge_index, y=y))

  return data_list

In [None]:
dataset = get_dataset()
dataset

[Data(x=[116, 116], edge_index=[2, 744], y=[1]),
 Data(x=[116, 116], edge_index=[2, 388], y=[1]),
 Data(x=[116, 116], edge_index=[2, 555], y=[1]),
 Data(x=[116, 116], edge_index=[2, 443], y=[1]),
 Data(x=[116, 116], edge_index=[2, 752], y=[1]),
 Data(x=[116, 116], edge_index=[2, 720], y=[1]),
 Data(x=[116, 116], edge_index=[2, 671], y=[1]),
 Data(x=[116, 116], edge_index=[2, 403], y=[1]),
 Data(x=[116, 116], edge_index=[2, 593], y=[1]),
 Data(x=[116, 116], edge_index=[2, 504], y=[1]),
 Data(x=[116, 116], edge_index=[2, 413], y=[1]),
 Data(x=[116, 116], edge_index=[2, 587], y=[1]),
 Data(x=[116, 116], edge_index=[2, 610], y=[1]),
 Data(x=[116, 116], edge_index=[2, 660], y=[1]),
 Data(x=[116, 116], edge_index=[2, 559], y=[1]),
 Data(x=[116, 116], edge_index=[2, 709], y=[1]),
 Data(x=[116, 116], edge_index=[2, 541], y=[1]),
 Data(x=[116, 116], edge_index=[2, 607], y=[1]),
 Data(x=[116, 116], edge_index=[2, 674], y=[1]),
 Data(x=[116, 116], edge_index=[2, 498], y=[1]),
 Data(x=[116, 116], 

In [None]:
from torch.utils.data import random_split
from torch import Generator

train_size = int(0.8 * len(dataset)) #80% of the dataset goes into the training set
test_size = len(dataset) - train_size

generator = Generator(device='cuda')
train_dataset, test_dataset = random_split(dataset, [train_size, test_size], generator=generator)

# Training the GNN

In [None]:
from torch.utils.data import DataLoader

train_loader = DataLoader(train_dataset, batch_size=10, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=10, shuffle=False)