In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import os, glob, re

In [2]:
openbhb_dir = "/Users/mansoor/Documents/GSU/Projects/Neuroimaging-Project/Dataset/OpenBHB/"
roi_dir = os.path.join(openbhb_dir , "roi/")
roi_npy_dir = os.path.join(roi_dir , "npy_files/")
labels_dir = os.path.join(openbhb_dir , "labels/")
resource_dir = os.path.join(openbhb_dir , "resource/")

f_train_participants = os.path.join(labels_dir + "train_participants.tsv")
f_val_participants = os.path.join(labels_dir, "val_participants.tsv")
f_participants = os.path.join(labels_dir, "participants.csv")

f_train_sites = os.path.join(labels_dir + "train_official_site_class_labels.tsv")
f_val_sites = os.path.join(labels_dir, "val_official_site_class_labels.tsv")
f_sites = os.path.join(labels_dir, "site_labels.csv")

In [None]:
openbhb_dir = "/Users/mansoor/Documents/GSU/Projects/Neuroimaging-Project/Dataset/OpenBHB/"
roi_dir = os.path.join(openbhb_dir , "roi/")
debiased_roi_dir = os.path.join(openbhb_dir , "debiased_roi/")
labels_dir = os.path.join(openbhb_dir , "labels/")

f_participants = os.path.join(labels_dir, "participants.csv")
f_sites = os.path.join(labels_dir, "site_labels.csv")

In [3]:
train_participants = pd.read_csv(f_train_participants, sep="\t")
val_participants = pd.read_csv(f_val_participants, sep="\t")
train_sites = pd.read_csv(f_train_sites, sep="\t")
val_sites = pd.read_csv(f_val_sites, sep="\t")

participants = pd.concat([train_participants, val_participants], axis=0)
participants.iloc[:,:-1].to_csv(f_participants)

sites = pd.concat([train_sites, val_sites], axis=0)
sites.to_csv(f_sites)
print(participants.shape, sites.shape)

(3984, 13) (3984, 2)


In [5]:
# Load ROI files
fpaths_vbm_roi = glob.glob(os.path.join(roi_npy_dir, "*cat12vbm_desc-gm_ROI.npy"))
fpaths_deskn_roi = glob.glob(os.path.join(roi_npy_dir, "*desikan_ROI.npy"))
fpaths_destrx_roi = glob.glob(os.path.join(roi_npy_dir, "*destrieux_ROI.npy"))

# Load labels and channels
vbm_labels = np.loadtxt(os.path.join(resource_dir, 'cat12vbm_labels.txt'), dtype=str)
desikan_labels = np.loadtxt(os.path.join(resource_dir, 'freesurfer_atlas-desikan_labels.txt'), dtype=str)
destrieux_labels = np.loadtxt(os.path.join(resource_dir, 'freesurfer_atlas-destrieux_labels.txt'), dtype=str)
channels = np.loadtxt(os.path.join(resource_dir, 'freesurfer_channels.txt'), dtype=str)


In [6]:
# Function to create column names
def create_column_names(labels, channels=""):
    column_names = []
    for label in labels:
        for channel in channels:
            column_names.append(f"{channel}_{label}")
    return column_names

# Create column names
vbm_column_names = vbm_labels
desikan_column_names = create_column_names(desikan_labels, channels)
destrieux_column_names = create_column_names(destrieux_labels, channels)


In [None]:
# Initialize data matrices
vbm_data_matrix = []
norm_vbm_data_matrix = []

# Load data and construct matrices
for fpath in fpaths_vbm_roi:
    participant_id = int(re.search(r'sub-(\d+)', fpath).group(1))
    vbm_data_matrix.append([participant_id] + np.load(fpath).flatten().tolist())
    vbm_data = np.load(fpath).flatten()
    tiv = participants.loc[participants['participant_id'] == participant_id, 'tiv'].values[0]
    normalized_vbm_data = vbm_data / tiv
    norm_vbm_data_matrix.append([participant_id] + normalized_vbm_data.tolist())

vbm_df = pd.DataFrame(vbm_data_matrix, columns=np.insert(vbm_column_names, 0,'participant_id'))
vbm_df.to_csv(os.path.join(roi_dir +"vbm/", 'vbm_roi.csv'), index=False)
norm_vbm_df = pd.DataFrame(norm_vbm_data_matrix, columns=np.insert(vbm_column_names, 0,'participant_id'))
norm_vbm_df.to_csv(os.path.join(roi_dir +"vbm/", 'normalized_vbm_roi.csv'), index=False)


In [None]:
desikan_data_matrix = []

for fpath in fpaths_deskn_roi:
    participant_id = int(re.search(r'sub-(\d+)', fpath).group(1))
    desikan_data_matrix.append([participant_id] + np.load(fpath).reshape(-1).tolist())
    # desikan_data = np.load(fpath)
    # tiv = participants.loc[participants['participant_id'] == participant_id, 'tiv'].values[0]
    # # Normalize GM volumes
    # desikan_data[:,1, :] /= tiv  # Assuming GM volume is the second channel
    # print(desikan_data[:,1, :], tiv)
    # norm_desikan_data_matrix.append([participant_id] + desikan_data.reshape(-1).tolist())

# scaler = StandardScaler()
# norm_desikan_data_matrix = scaler.fit_transform(desikan_data_matrix)

desikan_df = pd.DataFrame(desikan_data_matrix, columns=np.insert(desikan_column_names, 0,'participant_id'))
desikan_df.to_csv(os.path.join(roi_dir + "desikan/", 'desikan_roi.csv'), index=False)
# desikan_df = pd.DataFrame(norm_desikan_data_matrix, columns=np.insert(desikan_column_names, 0,'participant_id'))
# desikan_df.to_csv(os.path.join(roi_dir + "desikan/", 'normalized_desikan_roi.csv'), index=False)


In [None]:
destrieux_data_matrix = []
norm_destrieux_data_matrix = []

for fpath in fpaths_destrx_roi:
    participant_id = int(re.search(r'sub-(\d+)', fpath).group(1))
    destrieux_data_matrix.append([participant_id] + np.load(fpath).reshape(-1).tolist())
    # destrieux_data = np.load(fpath)
    # tiv = participants.loc[participants['participant_id'] == participant_id, 'tiv'].values[0]
    # # Normalize GM volumes
    # destrieux_data[:, 1, :] /= tiv  # Assuming GM volume is the second channel
    # norm_destrieux_data_matrix.append([participant_id] + destrieux_data.reshape(-1).tolist())


destrieux_df = pd.DataFrame(destrieux_data_matrix, columns=np.insert(destrieux_column_names, 0,'participant_id'))
destrieux_df.to_csv(os.path.join(roi_dir + "destrieux/", 'destrieux_roi.csv'), index=False)
# norm_destrieux_df = pd.DataFrame(norm_destrieux_data_matrix, columns=np.insert(destrieux_column_names, 0,'participant_id'))
# norm_destrieux_df.to_csv(os.path.join(roi_dir + "destrieux/", 'normalized_destrieux_roi.csv'), index=False)


In [None]:
# Create separate data matrices for each channel in Desikan and Destrieux
for channel in channels:
    desikan_channel_columns = [col for col in desikan_column_names if col.startswith(channel)]
    destrieux_channel_columns = [col for col in destrieux_column_names if col.startswith(channel)]

    desikan_channel_df = desikan_df[['participant_id'] + desikan_channel_columns]
    destrieux_channel_df = destrieux_df[['participant_id'] + destrieux_channel_columns]

    desikan_channel_df.to_csv(os.path.join(roi_dir + "desikan/", f'desikan_{channel}_roi.csv'), index=False)
    destrieux_channel_df.to_csv(os.path.join(roi_dir + "destrieux/", f'destrieux_{channel}_roi.csv'), index=False)
