In [1]:
import numpy as np
import pandas as pd
import os

from nilearn.connectome import ConnectivityMeasure

import torch
from torch_geometric.data import Data

# Data sources.
data_timeseries = 'data/raw_ts'
data_ct = 'data/CT.csv'
data_euler = 'data/Euler.csv'
graph_root = 'data/graph'

def get_ts_filenames(num_subjects=None):
    ts_filenames = [f for f in sorted(os.listdir(data_timeseries))]

    if num_subjects is not None:
        ts_filenames = ts_filenames[:num_subjects]

    return ts_filenames


# TODO: make selection random.
def get_subject_ids(num_subjects=None):
    """
    Gets the list of subject IDs for a spcecified number of subjects.
    If the number of subjects is not specified, all IDs are returned.
  
    Args:
        num_subjects: The number of subjects.

    Returns:
        List of subject IDs.
    """

    return [f[:-len("_ts_raw.txt")] for f in get_ts_filenames(num_subjects)]


def get_raw_timeseries(subject_ids):
    """
    Gets raw timeseries arrays for the given list of subjects.

    Args:
        subject_ids: List of subject IDs.

    Returns:
        List of timeseries. Rows in timeseries correspond to brain regions, columns correspond to timeseries values.
    """

    timeseries = []
    for subject_id in subject_ids:
        f = os.path.join(data_timeseries, subject_id + '_ts_raw.txt')
        print("Reading timeseries file %s" % f)
        timeseries.append(np.loadtxt(f, delimiter=','))

    return timeseries


# TODO: include the argument for the kind of connectivity matrix (partial
# correlation, correlation, lasso,...)
def get_functional_connectivity(subject_id, timeseries, save=True, save_path='data/processed_ts'):
    """
    Derives the correlation matrix for the parcellated timeseries data.

    Args:
        subject_id: ID of subject.
        timeseries: Parcellated timeseries of shape [number ROI, timepoints].
        save: Indicates whether to save the connectivity matrix to a file.
        save_path: Indicates the path where to store the connectivity matrix.

    Returns:
        The flattened lower triangle of the correlation matrix for the parcellated timeseries data.
    """

    conn_measure = ConnectivityMeasure(
        kind='correlation',
        vectorize=True,
        discard_diagonal=True)
    connectivity = conn_measure.fit_transform([np.transpose(timeseries)])[0]

    if save:
        np.savetxt(os.path.join(save_path, "{}.csv".format(subject_id)),
                   connectivity,
                   delimiter=',')

    return connectivity




In [7]:
subject_ids = get_subject_ids(1)
print(subject_ids)
ts = get_raw_timeseries(subject_ids)
conn = get_functional_connectivity(subject_ids[0], ts[0])




['UKB1000028']
Reading timeseries file data/raw_ts/UKB1000028_ts_raw.txt


In [4]:
print(len(conn))

70500


In [6]:
np.save(os.path.join('data/processed_ts', subject_ids[0]), conn)