In [None]:
import glob
import os
import pandas as pd
from sklearn.preprocessing import MinMaxScaler


### PATH

#path = ""

deepCeres = path + "deepCeres/"

acapulco = path + "acapulco/"
#acapulco_source = "HNU/derivatives/acapulco/"

suit = path + "vbm_suit/"

ceres = path + "ceres/"

In [2]:
# Get subjects list and information
subjects = pd.read_csv(path+'subjects.csv')
subjects_list = subjects['ID'].tolist()

print(subjects_list)
subjects

['sub-0025434', 'sub-0025435', 'sub-0025440', 'sub-0025444', 'sub-0025446', 'sub-0025447', 'sub-0025449', 'sub-0025454', 'sub-0025455']


Unnamed: 0,ID,age,sex
0,sub-0025434,24,M
1,sub-0025435,30,F
2,sub-0025440,24,M
3,sub-0025444,27,F
4,sub-0025446,30,M
5,sub-0025447,26,M
6,sub-0025449,22,M
7,sub-0025454,26,M
8,sub-0025455,25,M


In [3]:
# Columns order
finalColumns = ['ID', 'ses', 'age', 'sex', 'ICV', 'Left.I.V', 'Right.I.V',
       'Left.VI', 'Right.VI',
       'Left.Crus.I', 'Right.Crus.I', 'Left.Crus.II', 'Right.Crus.II',
       'Left.VIIB', 'Right.VIIB', 'Left.VIIIA', 'Right.VIIIA', 'Left.VIIIB',
       'Right.VIIIB', 'Left.IX', 'Right.IX', 'Left.X', 'Right.X']

# SUIT

In [None]:
# Columns to keep and convert
col = {
    '1' : 'Left.I.IV',
    '2' : 'Right.I.IV',
    '3' : 'Left.V',
    '4' : 'Right.V',
    '5' : 'Left.VI',
    '6' : 'Vermis.VI',
    '7' : 'Right.VI',
    '8' : 'Left.Crus.I',
    '9' : 'Vermis.Crus.I',
    '10' : 'Right.Crus.I',
    '11' : 'Left.Crus.II',
    '12' : 'Vermis.Crus.II',
    '13' : 'Right.Crus.II',
    '14' : 'Left.VIIB',
    '15' : 'Vermis.VIIB',
    '16' : 'Right.VIIB',
    '17' : 'Left.VIIIA',
    '18' : 'Vermis.VIIIA',
    '19' : 'Right.VIIIA',
    '20' : 'Left.VIIIB',
    '21' : 'Vermis.VIIIB',
    '22' : 'Right.VIIIB',
    '23' : 'Left.IX',
    '24' : 'Vermis.IX',
    '25' : 'Right.IX',
    '26' : 'Left.X',
    '27' : 'Vermis.X',
    '28' : 'Right.X'
}

In [None]:
# List to store DataFrames
all_data = []

# Loop through subjects and sessions
for sub in subjects_list:
    for ses in range(1, 11):
            ses_str = f"{ses:02d}"

            # Get the matching file (for given subject and given session)
            dataFile = os.path.join(suit, 'output', sub, f"ses-{ses_str}", f"{sub}_ses-{ses_str}_volumes.csv")

            # Read CSV
            data = pd.read_csv(dataFile)
            
            # Keep only the columns in col and rename them
            data = data[list(col.keys())].rename(columns=col)

            # Group I.III, IV and V into I.V
            data.insert(4, "Left.I.V", data["Left.I.IV"] + data["Left.V"])
            data.insert(4, "Right.I.V", data["Right.I.IV"] + data["Right.V"])
            data = data.drop(["Left.I.IV", "Left.V", "Right.I.IV", "Right.V"], axis = 1)

            # Add Subject ID and Session columns at the beginning
            data.insert(0, 'ses', ses_str)
            data.insert(0, 'ID', sub)

            # Append to the list
            all_data.append(data)

# Concatenate all DataFrames into one
suit_df = pd.concat(all_data, ignore_index=True)

# Add ICV from acapulco
suit_df.insert(4, 'ICV', acapulco_df['ICV'])

# Sort given id and session
suit_df = suit_df.sort_values(by=["ID", "ses"], ascending=[True, True]).reset_index(drop=True)

# Merge with age and sex
suit_df= suit_df.merge(subjects, on="ID", how="left")

# Sort columns
suit_df = suit_df[finalColumns]

# Save it
#suit_df.to_csv(suit+'all_volumes_cm3_suit.csv', index=False)

suit_df

Unnamed: 0,ID,ses,age,sex,ICV,Left.I.V,Right.I.V,Left.VI,Right.VI,Left.Crus.I,...,Left.VIIB,Right.VIIB,Left.VIIIA,Right.VIIIA,Left.VIIIB,Right.VIIIB,Left.IX,Right.IX,Left.X,Right.X
0,sub-0025434,01,24,M,1.343446e+06,0.862681,1.002819,0.417066,0.597826,0.393821,...,0.535253,0.696555,0.668899,0.651620,0.590486,0.509083,0.568836,0.477569,0.361954,0.014215
1,sub-0025434,02,24,M,1.343372e+06,0.853955,1.014351,0.408112,0.598702,0.386122,...,0.540073,0.690953,0.673191,0.644646,0.595652,0.509788,0.564359,0.464320,0.353843,0.016188
2,sub-0025434,03,24,M,1.332420e+06,0.841538,1.018619,0.413622,0.585782,0.385711,...,0.541757,0.686634,0.685087,0.657475,0.602996,0.509134,0.561709,0.498715,0.353603,0.014643
3,sub-0025434,04,24,M,1.349433e+06,0.847238,1.004125,0.408945,0.587977,0.392067,...,0.540239,0.674168,0.679486,0.638758,0.588832,0.499491,0.560928,0.446026,0.356292,0.020995
4,sub-0025434,05,24,M,1.339030e+06,0.855414,1.021954,0.405161,0.585696,0.397264,...,0.533606,0.680285,0.667408,0.652617,0.597888,0.505500,0.563029,0.486722,0.374291,0.014974
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,sub-0025455,06,25,M,1.546649e+06,0.840829,1.072369,0.414127,0.603038,0.404564,...,0.551476,0.713603,0.660546,0.664862,0.594979,0.585125,0.626844,0.390269,0.368223,0.020601
86,sub-0025455,07,25,M,1.552172e+06,0.841449,1.068354,0.413700,0.614258,0.409943,...,0.549178,0.707143,0.655430,0.662806,0.582397,0.565542,0.627374,0.399066,0.351849,0.018049
87,sub-0025455,08,25,M,1.550504e+06,0.850791,1.085654,0.421811,0.619353,0.428034,...,0.551624,0.739004,0.671196,0.685510,0.609017,0.575221,0.631934,0.388032,0.359183,0.015951
88,sub-0025455,09,25,M,1.561325e+06,0.848014,1.067561,0.411961,0.603794,0.416272,...,0.558507,0.731791,0.644508,0.660577,0.580398,0.581645,0.641512,0.400539,0.369326,0.017902


In [None]:
# Scale data
scaler = MinMaxScaler()

# Divide by ICV and scale
suit_scaled = suit_df.copy()
suit_scaled.iloc[:,4:] = scaler.fit_transform(suit_scaled.iloc[:,4:].div(suit_scaled.ICV, axis=0))

# Save the dataframe
#suit_scaled.to_csv(suit+'all_volumes_cm3_suit_scaled.csv', index=False)

suit_scaled

Unnamed: 0,ID,ses,age,sex,ICV,Left.I.V,Right.I.V,Left.VI,Right.VI,Left.Crus.I,...,Left.VIIB,Right.VIIB,Left.VIIIA,Right.VIIIA,Left.VIIIB,Right.VIIIB,Left.IX,Right.IX,Left.X,Right.X
0,sub-0025434,01,24,M,0.0,0.833613,0.682306,0.642484,0.500622,0.568363,...,0.778786,0.824773,0.832211,0.922462,0.663737,0.448056,0.628883,0.503151,0.620832,0.426090
1,sub-0025434,02,24,M,0.0,0.780191,0.737209,0.554792,0.506430,0.465464,...,0.817874,0.795015,0.854795,0.881570,0.689215,0.451693,0.600992,0.456079,0.560226,0.595524
2,sub-0025434,03,24,M,0.0,0.746332,0.797099,0.642274,0.454601,0.502438,...,0.867337,0.802098,0.946236,0.989029,0.749502,0.469202,0.613331,0.593107,0.580198,0.473225
3,sub-0025434,04,24,M,0.0,0.715418,0.667385,0.544928,0.420958,0.521540,...,0.799655,0.689055,0.871711,0.829959,0.642827,0.389499,0.563657,0.383830,0.566578,1.000000
4,sub-0025434,05,24,M,0.0,0.806249,0.788932,0.538715,0.435407,0.632061,...,0.779695,0.749760,0.835914,0.941019,0.709676,0.438557,0.604098,0.541443,0.722489,0.495485
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,sub-0025455,06,25,M,0.0,0.019125,0.344303,0.079423,0.027326,0.000000,...,0.325594,0.414559,0.335879,0.485706,0.302656,0.443922,0.476132,0.009897,0.305414,0.741878
86,sub-0025455,07,25,M,0.0,0.006435,0.312137,0.063227,0.077504,0.045636,...,0.295835,0.372865,0.302127,0.463111,0.240273,0.350900,0.466856,0.032707,0.190737,0.546830
87,sub-0025455,08,25,M,0.0,0.061079,0.387873,0.136011,0.109392,0.260820,...,0.317035,0.524017,0.376577,0.582681,0.355970,0.395101,0.495309,0.000000,0.240787,0.392250
88,sub-0025455,09,25,M,0.0,0.014921,0.283221,0.027923,0.000000,0.090743,...,0.338067,0.467093,0.235749,0.431986,0.217360,0.405380,0.523204,0.030011,0.290005,0.528107


# Ceres

In [None]:
# Columns to keep and convert
col = {
    'ICV cm3':'ICV',
    'I-II right cm3':'Right.I.II',
    'I-II left cm3':'Left.I.II',
    'III right cm3':'Right.III',
    'III left cm3':'Left.III',
    'IV right cm3':'Right.IV',
    'IV left cm3':'Left.IV',
    'V right cm3':'Right.V',
    'V left cm3':'Left.V',
    'VI right cm3':'Right.VI',
    'VI left cm3':'Left.VI',
    'Crus I right cm3':'Right.Crus.I',
    'Crus I left cm3':'Left.Crus.I',
    'Crus II right cm3':'Right.Crus.II',
    'Crus II left cm3':'Left.Crus.II',
    'VIIB right cm3':'Right.VIIB',
    'VIIB left cm3':'Left.VIIB',
    'VIIIA right cm3':'Right.VIIIA',
    'VIIIA left cm3':'Left.VIIIA',
    'VIIIB right cm3':'Right.VIIIB',
    'VIIIB left cm3':'Left.VIIIB',
    'IX right cm3':'Right.IX',
    'IX left cm3':'Left.IX',
    'X right cm3':'Right.X',
    'X left cm3':'Left.X',
}

In [None]:
# List to store DataFrames
all_data = []

# Loop through subjects and sessions
for sub in subjects_list:
    for ses in range(1,11):
        ses_str = f"{ses:02d}"
        
        # Get the matching file (for given subject and given session)
        dataFile = glob.glob(os.path.join(ceres, 'output', f"{sub}_ses-{ses_str}_T1w_job*_archive", "report_job*.csv"))[0]

        # Read CSV
        data = pd.read_csv(dataFile, sep=';')

        # Keep only the columns in col and rename them
        data = data[list(col.keys())].rename(columns=col)

        # Group I.II, III, IV and V into I.V
        data.insert(4, "Left.I.V", data["Left.I.II"] + data["Left.III"] + data["Left.IV"] + data["Left.V"])
        data.insert(4, "Right.I.V", data["Right.I.II"] + data["Right.III"] + data["Right.IV"] + data["Right.V"])
        data = data.drop(["Left.I.II", "Left.III", "Left.IV", "Left.V", "Right.I.II", "Right.III", "Right.IV", "Right.V"], axis = 1)

        # Add Subject ID and Session columns at the beginning
        data.insert(0, 'ses', ses_str)
        data.insert(0, 'ID', sub)

        # Append to the list
        all_data.append(data)

# Concatenate all DataFrames into one
ceres_df = pd.concat(all_data, ignore_index=True)

# Merge with age and sex
ceres_df= ceres_df.merge(subjects, on="ID", how="left")

# Sort columns
ceres_df = ceres_df[finalColumns]

# Save it
#ceres_df.to_csv(ceres+'all_volumes_cm3_ceres.csv', index=False)

ceres_df

Unnamed: 0,ID,ses,age,sex,ICV,Left.I.V,Right.I.V,Left.VI,Right.VI,Left.Crus.I,...,Left.VIIB,Right.VIIB,Left.VIIIA,Right.VIIIA,Left.VIIIB,Right.VIIIB,Left.IX,Right.IX,Left.X,Right.X
0,sub-0025434,01,24,M,1418.7222,7.132228,6.759350,7.7821,8.0772,11.2271,...,4.1741,4.0692,6.2838,5.8551,4.1771,4.3809,2.7709,3.0298,0.50270,0.53592
1,sub-0025434,02,24,M,1418.1212,7.296152,6.946668,7.5907,7.6660,11.0415,...,4.0624,4.0164,6.2243,5.8062,4.1181,4.3306,2.7751,3.0379,0.52051,0.52202
2,sub-0025434,03,24,M,1402.7339,7.240692,6.653297,7.6716,7.9788,10.8914,...,4.3348,4.1617,6.3355,5.6604,4.2029,4.3873,2.8159,3.0160,0.54251,0.52078
3,sub-0025434,04,24,M,1407.3909,7.366328,7.025498,7.2819,7.6181,10.9443,...,3.9195,3.8675,6.6064,5.8314,4.0801,4.4156,2.7321,2.9892,0.50511,0.57372
4,sub-0025434,05,24,M,1410.6375,7.276687,6.727409,7.1922,7.5574,10.6189,...,3.9187,3.9828,6.3191,5.5275,4.1745,4.1013,2.7890,2.9883,0.51012,0.50710
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,sub-0025455,06,25,M,1485.0321,7.313601,5.681421,8.3756,9.4175,9.7702,...,4.0195,4.8932,6.1187,5.1283,4.5453,3.4591,3.4511,3.8996,0.57169,0.62402
86,sub-0025455,07,25,M,1482.4569,7.027703,5.701556,8.4550,9.2323,9.9022,...,3.9602,5.0549,6.4989,5.7240,4.7824,3.4666,3.5620,3.7719,0.58097,0.58899
87,sub-0025455,08,25,M,1494.1008,6.972269,5.806073,8.7405,9.2614,10.0760,...,3.9842,4.4417,6.6473,5.9169,4.6407,3.8710,3.5869,3.7193,0.55220,0.60597
88,sub-0025455,09,25,M,1490.8639,7.096977,5.708222,8.6374,9.7394,9.7828,...,3.7958,5.0930,6.5299,5.6255,4.7549,3.4505,3.5236,3.8979,0.57910,0.65138


In [None]:
# Scale data
scaler = MinMaxScaler()

# Divide by ICV and scale
ceres_scaled = ceres_df.copy()
ceres_scaled.iloc[:,4:] = scaler.fit_transform(ceres_scaled.iloc[:,4:].div(ceres_scaled.ICV, axis=0))

# Save the dataframe
#ceres_scaled.to_csv(ceres+'all_volumes_cm3_ceres_scaled.csv', index=False)

ceres_scaled

Unnamed: 0,ID,ses,age,sex,ICV,Left.I.V,Right.I.V,Left.VI,Right.VI,Left.Crus.I,...,Left.VIIB,Right.VIIB,Left.VIIIA,Right.VIIIA,Left.VIIIB,Right.VIIIB,Left.IX,Right.IX,Left.X,Right.X
0,sub-0025434,01,24,M,0.0,0.585333,0.453955,0.657375,0.539514,0.641987,...,0.466811,0.435980,0.519799,0.503558,0.540026,0.455366,0.130130,0.181440,0.001654,0.230756
1,sub-0025434,02,24,M,0.0,0.663168,0.518816,0.598873,0.381773,0.581420,...,0.403584,0.409337,0.501332,0.486497,0.509301,0.440384,0.133601,0.187223,0.096970,0.161804
2,sub-0025434,03,24,M,0.0,0.674342,0.443653,0.650206,0.536630,0.571163,...,0.587604,0.508945,0.560041,0.455733,0.579578,0.472803,0.179908,0.194116,0.244793,0.184359
3,sub-0025434,04,24,M,0.0,0.722071,0.563969,0.520101,0.385712,0.576812,...,0.338582,0.347046,0.641842,0.512099,0.505601,0.477083,0.119281,0.171255,0.035892,0.444584
4,sub-0025434,05,24,M,0.0,0.672091,0.456210,0.486803,0.355273,0.458753,...,0.332891,0.402832,0.543025,0.394848,0.551469,0.376199,0.152137,0.166425,0.056334,0.100130
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,sub-0025455,06,25,M,0.0,0.517667,0.000000,0.725616,0.895164,0.008209,...,0.274746,0.751705,0.377451,0.152437,0.628699,0.122662,0.469815,0.610003,0.231415,0.534396
86,sub-0025455,07,25,M,0.0,0.395814,0.009783,0.753559,0.832675,0.055927,...,0.245954,0.836629,0.498915,0.364993,0.754511,0.126655,0.542039,0.538704,0.283379,0.370628
87,sub-0025455,08,25,M,0.0,0.346856,0.029120,0.818233,0.816735,0.086451,...,0.242075,0.513366,0.528938,0.416598,0.663167,0.237368,0.540152,0.490605,0.116057,0.429760
88,sub-0025455,09,25,M,0.0,0.408861,0.001457,0.793335,1.000000,0.000000,...,0.143726,0.841310,0.497106,0.319211,0.726618,0.116136,0.506034,0.600029,0.257398,0.653885


# Acapulco

In [7]:
# Columns to keep and convert
col = {
    'eTIV':'ICV',
    'Total_Cerebel_Vol':'Total_Cereb',
    'Rigt.IV' : 'Right.IV' # Error in ACAPULCO's outputs
}

# Column to drop
col_drop = ['Total_Cereb', 'Background', 'Corpus.Medullare', 'Vermis.IX', 'Vermis.VI', 'Vermis.VII', 'Vermis.VIII', 'Vermis.X']

In [None]:
# List to store DataFrames
all_data = []

# Loop through sessions
for ses in range(1, 11):
    ses_str = f"{ses:02d}"

    # Read CSV for given session
    data = pd.read_csv(acapulco_source + 'ses-' + ses_str + '/QC+vols/Cerebel_vols.csv', index_col=0)

    # Keep chosen subjects
    data = data[data["ID"].isin(subjects_list)].reset_index(drop=True)

    # Rename the columns in col and drop col_drop
    data = data.rename(columns=col)
    data = data.drop(col_drop, axis=1)

    # Group I.III, IV and V into I.V
    data.insert(4, "Left.I.V", data["Left.I.III"] + data["Left.IV"] + data["Left.V"])
    data.insert(4, "Right.I.V", data["Right.I.III"] + data["Right.IV"] + data["Right.V"])
    data = data.drop(["Left.I.III", "Left.IV", "Left.V", "Right.I.III", "Right.IV", "Right.V"], axis = 1)

    # Add Session column at the beginning
    data.insert(1, 'ses', ses_str)

    # Append to the list
    all_data.append(data)

# Concatenate all DataFrames into one
acapulco_df = pd.concat(all_data, ignore_index=True)

# Sort given id and session
acapulco_df = acapulco_df.sort_values(by=["ID", "ses"], ascending=[True, True]).reset_index(drop=True)

# Merge with age and sex
acapulco_df= acapulco_df.merge(subjects, on="ID", how="left")

# Sort columns
acapulco_df = acapulco_df[finalColumns]

for i in range(4,acapulco_df.shape[1]):
    acapulco_df.isetitem(i, acapulco_df.iloc[:,i].astype(float))

# Save it
#acapulco_df.to_csv(acapulco+'all_volumes_cm3_acapulco.csv', index=False)

acapulco_df

Unnamed: 0,ID,ses,age,sex,ICV,Left.I.V,Right.I.V,Left.VI,Right.VI,Left.Crus.I,...,Left.VIIB,Right.VIIB,Left.VIIIA,Right.VIIIA,Left.VIIIB,Right.VIIIB,Left.IX,Right.IX,Left.X,Right.X
0,sub-0025434,01,24,M,1.343446e+06,7937.0,7227.0,7128.0,7968.0,10977.0,...,5346.0,5079.0,4545.0,4665.0,3739.0,3217.0,2365.0,2350.0,354.0,358.0
1,sub-0025434,02,24,M,1.343372e+06,7859.0,7349.0,7462.0,7932.0,11118.0,...,5346.0,5003.0,5130.0,4963.0,3676.0,3064.0,2429.0,2405.0,352.0,371.0
2,sub-0025434,03,24,M,1.332420e+06,7706.0,7588.0,7558.0,7593.0,10475.0,...,5222.0,5108.0,5080.0,4812.0,4027.0,3267.0,2389.0,2432.0,365.0,402.0
3,sub-0025434,04,24,M,1.349433e+06,7855.0,7310.0,7548.0,8131.0,11026.0,...,5647.0,5012.0,4602.0,4877.0,3634.0,3433.0,2360.0,2437.0,365.0,382.0
4,sub-0025434,05,24,M,1.339030e+06,7635.0,7277.0,7378.0,7783.0,10772.0,...,4899.0,4616.0,5317.0,4942.0,3820.0,3135.0,2352.0,2332.0,356.0,370.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,sub-0025455,06,25,M,1.546649e+06,6925.0,6418.0,9392.0,9149.0,10762.0,...,5541.0,6173.0,6025.0,4413.0,3116.0,3456.0,3125.0,3162.0,407.0,416.0
86,sub-0025455,07,25,M,1.552172e+06,7219.0,6765.0,9179.0,9210.0,11306.0,...,6126.0,5779.0,5566.0,4373.0,2873.0,3343.0,3067.0,3119.0,469.0,415.0
87,sub-0025455,08,25,M,1.550504e+06,7061.0,6686.0,9065.0,8875.0,11458.0,...,6109.0,5975.0,6041.0,4687.0,2893.0,3275.0,3276.0,3078.0,377.0,428.0
88,sub-0025455,09,25,M,1.561325e+06,7253.0,6911.0,9176.0,9118.0,11103.0,...,6301.0,6220.0,5851.0,5080.0,2800.0,3254.0,3094.0,3244.0,436.0,447.0


In [9]:
# Scale data
scaler = MinMaxScaler()

# Divide by ICV and scale
acapulco_scaled = acapulco_df.copy()
acapulco_scaled.iloc[:, 4:] = scaler.fit_transform(acapulco_scaled.iloc[:, 4:].div(acapulco_scaled.ICV, axis=0))

# Save the dataframe
#acapulco_scaled.to_csv(acapulco+'all_volumes_cm3_acapulco_scaled.csv', index=False)

acapulco_scaled

Unnamed: 0,ID,ses,age,sex,ICV,Left.I.V,Right.I.V,Left.VI,Right.VI,Left.Crus.I,...,Left.VIIB,Right.VIIB,Left.VIIIA,Right.VIIIA,Left.VIIIB,Right.VIIIB,Left.IX,Right.IX,Left.X,Right.X
0,sub-0025434,01,24,M,0.0,0.865763,0.598178,0.397863,0.790578,0.600640,...,0.420060,0.286929,0.404210,0.529233,0.894147,0.617016,0.275993,0.361725,0.334403,0.366802
1,sub-0025434,02,24,M,0.0,0.842978,0.635613,0.544393,0.769896,0.644820,...,0.420172,0.252974,0.625073,0.666298,0.873459,0.542686,0.315047,0.388265,0.327504,0.415832
2,sub-0025434,03,24,M,0.0,0.816641,0.727785,0.613683,0.609056,0.471066,...,0.389166,0.318884,0.621961,0.615081,1.000000,0.654481,0.302646,0.410886,0.383235,0.545018
3,sub-0025434,04,24,M,0.0,0.831439,0.613642,0.567222,0.864380,0.600664,...,0.525580,0.246914,0.418014,0.616709,0.854245,0.714660,0.266572,0.398400,0.367075,0.450795
4,sub-0025434,05,24,M,0.0,0.784452,0.620809,0.518069,0.697928,0.547766,...,0.255306,0.086149,0.702151,0.664012,0.924981,0.582164,0.272800,0.356762,0.345458,0.416585
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,sub-0025455,06,25,M,0.0,0.301266,0.092880,0.849208,0.778352,0.092436,...,0.216244,0.413811,0.663992,0.146998,0.554048,0.512421,0.488777,0.552627,0.332756,0.379396
86,sub-0025455,07,25,M,0.0,0.369724,0.178666,0.755708,0.792611,0.228956,...,0.403316,0.252347,0.507049,0.124821,0.481589,0.459659,0.452326,0.530006,0.515280,0.371296
87,sub-0025455,08,25,M,0.0,0.331487,0.159659,0.716172,0.628816,0.273329,...,0.399865,0.330909,0.664315,0.251716,0.488179,0.432518,0.564340,0.514304,0.239160,0.415167
88,sub-0025455,09,25,M,0.0,0.367557,0.206588,0.734168,0.719405,0.156563,...,0.449010,0.409345,0.588931,0.394175,0.456094,0.414164,0.456997,0.574164,0.408093,0.467052


# DeepCeres

In [None]:
# Columns to keep and convert
col = {
    'ICV total volume cm3':'ICV',
    'Lobules I-II right volume cm3':'Right.I.II',
    'Lobules I-II left volume cm3':'Left.I.II',
    'Lobule III right volume cm3':'Right.III',
    'Lobule III left volume cm3':'Left.III',
    'Lobule IV right volume cm3':'Right.IV',
    'Lobule IV left volume cm3':'Left.IV',
    'Lobule V right volume cm3':'Right.V',
    'Lobule V left volume cm3':'Left.V',
    'Lobule VI right volume cm3':'Right.VI',
    'Lobule VI left volume cm3':'Left.VI',
    'Crus I right volume cm3':'Right.Crus.I',
    'Crus I left volume cm3':'Left.Crus.I',
    'Crus II right volume cm3':'Right.Crus.II',
    'Crus II left volume cm3':'Left.Crus.II',
    'Lobule VIIB right volume cm3':'Right.VIIB',
    'Lobule VIIB left volume cm3':'Left.VIIB',
    'Lobule VIIIA right volume cm3':'Right.VIIIA',
    'Lobule VIIIA left volume cm3':'Left.VIIIA',
    'Lobule VIIIB right volume cm3':'Right.VIIIB',
    'Lobule VIIIB left volume cm3':'Left.VIIIB',
    'Lobule IX right volume cm3':'Right.IX',
    'Lobule IX left volume cm3':'Left.IX',
    'Lobule X right volume cm3':'Right.X',
    'Lobule X left volume cm3':'Left.X',
}

In [None]:
# List to store DataFrames
all_data = []

# Loop through subjects and sessions
for sub in subjects_list:
    for ses in range(1,11):
        ses_str = f"{ses:02d}"
        
        # Get the matching file (for given subject and given session)
        dataFile = glob.glob(os.path.join(deepCeres, 'output', f"{sub}_ses-{ses_str}_T1w_job*_archive", "report_job*.csv"))[0]

        # Read CSV
        data = pd.read_csv(dataFile, sep=';')

        # Keep only the columns in col and rename them
        data = data[list(col.keys())].rename(columns=col)

        # Group I.II, III, IV and V into I.V
        data.insert(4, "Left.I.V", data["Left.I.II"] + data["Left.III"] + data["Left.IV"] + data["Left.V"])
        data.insert(4, "Right.I.V", data["Right.I.II"] + data["Right.III"] + data["Right.IV"] + data["Right.V"])
        data = data.drop(["Left.I.II", "Left.III", "Left.IV", "Left.V", "Right.I.II", "Right.III", "Right.IV", "Right.V"], axis = 1)

        # Add Subject ID and Session columns at the beginning
        data.insert(0, 'ses', ses_str)
        data.insert(0, 'ID', sub)

        # Append to the list
        all_data.append(data)

# Concatenate all DataFrames into one
deepCeres_df = pd.concat(all_data, ignore_index=True)

# Merge with age and sex
deepCeres_df= deepCeres_df.merge(subjects, on="ID", how="left")

# Sort columns
deepCeres_df = deepCeres_df[finalColumns]

# Save it
#deepCeres_df.to_csv(deepCeres+'all_volumes_cm3_deepCeres.csv', index=False)

deepCeres_df

Unnamed: 0,ID,ses,age,sex,ICV,Left.I.V,Right.I.V,Left.VI,Right.VI,Left.Crus.I,...,Left.VIIB,Right.VIIB,Left.VIIIA,Right.VIIIA,Left.VIIIB,Right.VIIIB,Left.IX,Right.IX,Left.X,Right.X
0,sub-0025434,01,24,M,1408.201707,4.246582,4.752831,6.454264,6.269872,7.857019,...,3.293878,3.547996,4.009021,4.119523,2.901065,3.155561,2.335308,1.985258,0.491773,0.469257
1,sub-0025434,02,24,M,1408.383019,4.352160,4.910541,6.463093,6.261966,7.830394,...,3.194440,3.462921,3.998851,4.388891,2.870346,3.101909,2.336107,1.986555,0.511975,0.441707
2,sub-0025434,03,24,M,1402.347482,4.305733,4.846783,6.362117,6.291957,7.897685,...,3.266066,3.540338,3.850674,4.424790,2.885757,3.152629,2.324942,2.075681,0.493933,0.477728
3,sub-0025434,04,24,M,1405.817267,4.335323,4.827556,6.397526,6.392161,8.074036,...,3.276598,3.588691,3.993677,4.234806,2.886858,3.061069,2.294390,1.996227,0.508515,0.437457
4,sub-0025434,05,24,M,1405.291993,4.321827,4.813056,6.362219,6.212109,7.757220,...,3.204526,3.460364,3.837572,4.217985,2.741096,3.105206,2.319976,1.906866,0.470874,0.476811
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,sub-0025455,06,25,M,1479.323756,4.086042,4.891135,6.998606,7.028995,8.019136,...,3.900391,3.577991,4.045591,4.693309,2.701555,3.138566,2.685456,2.555550,0.530591,0.507447
86,sub-0025455,07,25,M,1479.673477,4.331893,4.667056,6.876086,6.922523,7.736560,...,3.801978,3.484630,4.174870,4.450086,2.319917,2.745250,2.758961,2.382967,0.541324,0.538021
87,sub-0025455,08,25,M,1483.479303,4.158589,4.635578,7.028928,7.079569,7.918853,...,3.913689,3.575013,4.227145,4.421203,2.674379,2.914375,2.862232,2.344410,0.537438,0.468282
88,sub-0025455,09,25,M,1485.778507,4.105124,4.819584,7.059520,7.040491,7.994983,...,3.956714,3.488227,4.081563,4.585995,2.601596,2.935468,2.668753,2.533030,0.566252,0.479361


In [None]:
# Scale data
scaler = MinMaxScaler()

# Divide by ICV and scale
deepCeres_scaled = deepCeres_df.copy()
deepCeres_scaled.iloc[:, 4:] = scaler.fit_transform(deepCeres_scaled.iloc[:, 4:].div(deepCeres_scaled.ICV, axis=0))

# Save the dataframe
#deepCeres_scaled.to_csv(deepCeres+'all_volumes_cm3_deepCeres_scaled.csv', index=False)

deepCeres_scaled

Unnamed: 0,ID,ses,age,sex,ICV,Left.I.V,Right.I.V,Left.VI,Right.VI,Left.Crus.I,...,Left.VIIB,Right.VIIB,Left.VIIIA,Right.VIIIA,Left.VIIIB,Right.VIIIB,Left.IX,Right.IX,Left.X,Right.X
0,sub-0025434,01,24,M,0.0,0.345361,0.822083,0.846639,0.672430,0.509840,...,0.074021,0.416224,0.500798,0.546727,0.746792,0.786289,0.282527,0.180655,0.248214,0.411485
1,sub-0025434,02,24,M,0.0,0.423839,0.948674,0.851768,0.667368,0.498007,...,0.000000,0.359312,0.494475,0.661405,0.726923,0.734141,0.282941,0.181400,0.352004,0.314114
2,sub-0025434,03,24,M,0.0,0.402995,0.914107,0.804578,0.700520,0.541373,...,0.063511,0.420964,0.416607,0.684843,0.744708,0.796158,0.281980,0.251493,0.269976,0.448399
3,sub-0025434,04,24,M,0.0,0.417170,0.888904,0.817212,0.749773,0.608638,...,0.065331,0.447357,0.495727,0.598974,0.740842,0.700135,0.251814,0.190917,0.338959,0.301940
4,sub-0025434,05,24,M,0.0,0.408277,0.878651,0.796059,0.646344,0.473982,...,0.012701,0.362676,0.404043,0.592459,0.648183,0.743911,0.273794,0.127436,0.145512,0.441612
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,sub-0025455,06,25,M,0.0,0.078590,0.744033,0.979946,0.917123,0.414165,...,0.384631,0.321710,0.407340,0.695254,0.536275,0.624314,0.465877,0.500519,0.316812,0.460138
86,sub-0025455,07,25,M,0.0,0.252750,0.571281,0.904159,0.857336,0.298230,...,0.314548,0.262045,0.479609,0.596051,0.303761,0.262501,0.523403,0.382689,0.368850,0.562366
87,sub-0025455,08,25,M,0.0,0.121910,0.538016,0.986437,0.934125,0.364243,...,0.386279,0.313479,0.502941,0.579718,0.515184,0.410922,0.599131,0.352364,0.343022,0.324232
88,sub-0025455,09,25,M,0.0,0.079478,0.673078,0.998410,0.906565,0.390160,...,0.412247,0.255246,0.417615,0.643581,0.468586,0.426078,0.443534,0.477705,0.479722,0.358843
