In [1]:
from pathlib import Path

# Make the path object:
cache_path = Path('/tmp/cache')

# Just because we have made a cache path object doesn't mean that the directory
# we made exists; here we check if it exists and make the directory if not.
if not cache_path.exists():
    cache_path.mkdir()

In [2]:
from cloudpathlib import S3Path, S3Client

client = S3Client(
    local_cache_dir=cache_path,
    no_sign_request=True)

hbn_base_path = S3Path(
    "s3://fcp-indi/",
    client=client)

In [3]:
import pandas as pd
from freesurfer_stats import CorticalParcellationStats

In [6]:
hbn_pod2_path = hbn_base_path / "data" / "Projects" / "HBN" / "derivatives" / "Freesurfer_version6.0.0"

In [7]:
import numpy as np
import pandas as pd
import os
import glob
import argparse
from freesurfer_stats import CorticalParcellationStats

# HELPTEXT = """
# Script to parse and collate FreeSurfer stats files across subjects
# Author: nikhil153
# Date: May-5-2022
# """

# Sample cmd:
#  python collate_freesurfer_stats.py --stat_file aparc.DKTatlas.stats \
#                                     --stat_measure average_thickness_mm \
#                                     --fs_output_dir /home/nikhil/projects/brain_changes/data/adni/derivatives/freesurfer-6.0.1/ \
#                                     --ukbb_dkt_ct_fields ../metadata/UKBB_DKT_CT_Fields.csv \
#                                     --ukbb_aseg_vol_fields ../metadata/UKBB_ASEG_vol_Fields.csv \
#                                     --aseg \
#                                     --save_dir ./

# parser = argparse.ArgumentParser(description=HELPTEXT)

# data
# TODO: Not sure how to handle multiple visits..
# parser.add_argument('--participants_list', dest='participants_list',                      
#                     help='path to participants list (csv or tsv')

# parser.add_argument('--fs_output_dir', help='path to fs_output_dir with all the subjects')
# parser.add_argument('--stat_file', default='aparc.DKTatlas.stats', help='name of a standard FS stat file')
# parser.add_argument('--stat_measure', default='average_thickness_mm', help='path to bids_dir')                    
# parser.add_argument('--ukbb_dkt_ct_fields', help='UKBB lookup table with fields ID and DKT ROI names')
# parser.add_argument('--ukbb_aseg_vol_fields', default="", help='UKBB lookup table with fields ID and ASEG ROI names')
# parser.add_argument('--aseg', action='store_true', help='Parse aseg.stats to collate subcortical volumes')
# parser.add_argument('--save_dir', default='./', help='path to save_dir')
# args = parser.parse_args()

def parse_aseg(aseg_file, stat_measure):
    """Function to parse aseg.stats file from freesurfer"""

    aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")

    aseg_df = pd.DataFrame(data=aseg_data)
    aseg_df = aseg_df[["f4","f3"]].rename(columns={"f3":stat_measure, "f4":"hemi_ROI"})
    aseg_df["hemi_ROI"] = aseg_df["hemi_ROI"].str.decode('utf-8') 

    # print(f"number of ROIs in aseg file: {len(aseg_df)}")

    # Get global volumes from the "measure" lines
    file_data = open(aseg_file, 'r')
    lines = file_data.readlines()
    measure_lines = []
    for line in lines:
        if "Measure" in line:
            measure_lines.append(line)

    global_df = pd.DataFrame(measure_lines)
    global_df = global_df.replace('\n','', regex=True)
    global_df = global_df[0].str.split(",", expand=True)
    global_df[0] = global_df[0].str.split(" ", expand=True)[2]
    global_df[0] = global_df[0].replace({"EstimatedTotalIntraCranialVol":"EstimatedTotalIntraCranial"}) #To match UKB field names
    global_df = global_df[[0,3]]

    global_df = global_df.rename(columns = {0:"hemi_ROI",3:stat_measure})

    aseg_df = pd.concat([aseg_df,global_df],axis=0)

    return aseg_df



# if __name__ == "__main__":
    # Read from csv
    # fs_output_dir = args.fs_output_dir
    # stat_file = args.stat_file
    # stat_measure = args.stat_measure
    # save_dir = args.save_dir
    # ukbb_dkt_ct_fields = args.ukbb_dkt_ct_fields
    # ukbb_aseg_vol_fields = args.ukbb_aseg_vol_fields

    # aseg = args.aseg

    # ukbb_dkt_ct_fields_df = pd.read_csv(ukbb_dkt_ct_fields)

# fs_output_dir = hbn_pod2_path
# # save_dir = args.save_dir
# ukbb_dkt_ct_fields = args.ukbb_dkt_ct_fields
# ukbb_aseg_vol_fields = args.ukbb_aseg_vol_fields

def get_freesurfer_stats(fs_output_dir, subject_id, stat_file='aparc.DKTatlas.stats', stat_measure='average_thickness_mm', aseg=True):

    # print(f"Starting to collate {stat_measure} in {fs_output_dir}\n")
    subject_dir_list = glob.glob(f"{fs_output_dir}sub*")
    subject_id_list = [subject_id]#[os.path.basename(x) for x in subject_dir_list]
    
    # print(f"Found {len(subject_id_list)} subjects\n")
    
    ### cortical surface measures 
    # print(f"***Parsing ASEG subcortical volumes***")
    hemispheres = ["lh", "rh"]
    
    hemi_stat_measures_dict = {}
    for hemi in hemispheres:
        stat_measure_df = pd.DataFrame()
        for subject_id in subject_id_list:
            # try:
            fs_stats_dir = fs_output_dir / subject_id / 'stats'
            stats = CorticalParcellationStats.read(fs_stats_dir / f"{hemi}.{stat_file}").structural_measurements
            
            cols = ["subject_id"] + list(stats["structure_name"].values)
            vals = [subject_id] + list(stats[stat_measure].values)
            
            df = pd.DataFrame(columns=cols)
            df.loc[0] = vals
            stat_measure_df = pd.concat([stat_measure_df, df], axis=0)
            # except:
                # print(f"Error parsing cortical data for {subject_id} ({hemi})")
    
        # # replace columns names with ukbb field IDs
        # field_df = ukbb_dkt_ct_fields_df[ukbb_dkt_ct_fields_df["hemi"]==hemi][["Field ID","roi"]]
        # roi_field_id_dict = dict(zip(field_df["roi"], field_df["Field ID"]))
        # stat_measure_df = stat_measure_df.rename(columns=roi_field_id_dict)
        
        hemi_stat_measures_dict[hemi] = stat_measure_df
    
    # merge left and right dfs
    stat_measure_LR_df = pd.merge(hemi_stat_measures_dict["lh"],hemi_stat_measures_dict["rh"], on="subject_id")
    
    # Drop columns omitted by DKT atlas
    if stat_file == "aparc.DKTatlas.stats":
        drop_ROIs = ["temporalpole","frontalpole","banks of the superior temporal sulcus"]
        for d_roi in drop_ROIs:
            if d_roi in stat_measure_LR_df.columns:
                stat_measure_LR_df = stat_measure_LR_df.drop(columns=[d_roi])
    
    # save_file = f"{stat_file.split('.')[1]}_{stat_measure.rsplit('_',1)[0]}.csv"
    
    # print(f"Saving cortical stat measures here: {save_dir}/{save_file}\n")
    # stat_measure_LR_df.to_csv(f"{save_dir}/{save_file}")
    
    # ASEG subcortical volumes
    if aseg:
        # print(f"***Parsing ASEG subcortical volumes***")
        stat_file = "aseg.stats"
        stat_measure = "Volume_mm3"
    
        # # Grab UKBB field ids lookup table
        # ukbb_aseg_vol_fields_df = pd.read_csv(ukbb_aseg_vol_fields)
        
        stat_measure_df = pd.DataFrame()
        for subject_id in subject_id_list:
            # try: 
            fs_stats_dir = fs_output_dir / subject_id / 'stats'
            aseg_file = fs_stats_dir / f"{stat_file}"
            stats = parse_aseg(aseg_file,stat_measure)
            
            cols = ["subject_id"] + list(stats["hemi_ROI"].values)
            vals = [subject_id] + list(stats[stat_measure].values)
            
            df = pd.DataFrame(columns=cols)
            df.loc[0] = vals
            stat_measure_df = pd.concat([stat_measure_df, df], axis=0)
    
            # except:
            #     print(f"Error parsing subcortical volumes for {subject_id}")
    
        
        # field_df = ukbb_aseg_vol_fields_df[ukbb_aseg_vol_fields_df["hemi_ROI"].isin(stat_measure_df.columns)]
        # common_rois = list(field_df["hemi_ROI"].values)
        # roi_field_id_dict = dict(zip(field_df["hemi_ROI"], field_df["Field ID"]))
    
        # print(f"Number of aseg vol ROIs after UKBB merge: {len(roi_field_id_dict)}")
    
        # # Rename ROIs with ukbb ids (remove the ROIs which don't have ukbb ids)
        # stat_measure_df = stat_measure_df[["subject_id"] + common_rois].copy()
        # stat_measure_df = stat_measure_df.rename(columns=roi_field_id_dict)
    
        # save_file = f"aseg_subcortical_volumes.csv"
        
        # print(f"Saving subcortical stat measures here: {save_dir}/{save_file}")
        # stat_measure_df.to_csv(f"{save_dir}/{save_file}")

    return pd.merge(stat_measure_LR_df, stat_measure_df, on='subject_id')

In [8]:
# Test with one subject
fs_subject_stats = get_freesurfer_stats(fs_output_dir=hbn_pod2_path, subject_id='NDARAA536PTU')
fs_subject_stats

    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")


Unnamed: 0,subject_id,caudalanteriorcingulate_x,caudalmiddlefrontal_x,cuneus_x,entorhinal_x,fusiform_x,inferiorparietal_x,inferiortemporal_x,isthmuscingulate_x,lateraloccipital_x,...,SupraTentorial,SupraTentorialNotVent,SupraTentorialNotVentVox,Mask,BrainSegVol-to-eTIV,MaskVol-to-eTIV,lhSurfaceHoles,rhSurfaceHoles,SurfaceHoles,EstimatedTotalIntraCranial
0,NDARAA536PTU,2.863,2.636,2.099,3.777,2.762,2.681,3.081,2.432,2.513,...,1085570.112871,1076191.112871,1073932.0,1614592.0,0.77762,1.0311,30,35,65,1565892.870337


In [9]:
hbn_part_path = hbn_base_path / "data" / "Projects" / "HBN" / "BIDS_curated" / "derivatives"

In [10]:
participants_table = pd.read_csv(hbn_part_path / "qsiprep" / "participants.tsv", sep="\t")
participants_table['subject_id'] = participants_table['subject_id'].apply(lambda x: x.removeprefix('sub-'))

In [45]:
fs_subject_stats_all = []
for index, row in participants_table.iterrows():

    if index % 100 == 0:
        print(index)

    # if len(fs_subject_stats_all) == 10:
    #     break
    
    # Assuming the subject ID is in a column named 'subject_id'
    subject_id = row['subject_id']

    try:
        fs_subject_stats = get_freesurfer_stats(fs_output_dir=hbn_pod2_path, subject_id=subject_id)
    except Exception as exception:
        # print(f'Error for subject {subject_id}: {exception}')
        continue

    fs_subject_stats_all.append(fs_subject_stats)

df_fs_stats = pd.concat(fs_subject_stats_all)
df_fs_stats
    
    # Output the subject ID
   # print(subject_id)

0


    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters

100


    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters

200


    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters

300


    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters

400


    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters

500


    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters

600


    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters

700


    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters

800


    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters

900


    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters

1000


    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters

1100


    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters

1200


    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters

1300


    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters

1400


    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters

1500


    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters

1600


    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters

1700


    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters

1800


    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters

1900


    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters

2000


    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters

2100


    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters=float` will normally work.
    * Use `np.loadtxt(...).astype(np.int64)` parsing the file as
      floating point and then convert it.  (On all NumPy versions.)
  (Deprecated NumPy 1.23)
  aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")
    * make sure the original data is stored as integers.
    * use the `converters=` keyword argument.  If you only use
      NumPy 1.23 or later, `converters

Unnamed: 0,subject_id,caudalanteriorcingulate_x,caudalmiddlefrontal_x,cuneus_x,entorhinal_x,fusiform_x,inferiorparietal_x,inferiortemporal_x,isthmuscingulate_x,lateraloccipital_x,...,SupraTentorial,SupraTentorialNotVent,SupraTentorialNotVentVox,Mask,BrainSegVol-to-eTIV,MaskVol-to-eTIV,lhSurfaceHoles,rhSurfaceHoles,SurfaceHoles,EstimatedTotalIntraCranial
0,NDARAA536PTU,2.863,2.636,2.099,3.777,2.762,2.681,3.081,2.432,2.513,...,1085570.112871,1076191.112871,1073932.000000,1614592.000000,0.777620,1.031100,30,35,65,1565892.870337
0,NDARAD481FXF,2.643,2.744,2.054,3.436,2.697,2.631,2.918,2.327,2.260,...,1106764.251561,1080593.251561,1078408.000000,1748584.000000,0.733374,1.015394,93,73,166,1722074.268048
0,NDARAE199TDD,2.569,2.735,2.129,3.284,2.714,2.758,2.867,2.559,2.191,...,1059086.069629,1047306.069629,1044418.000000,1531484.000000,0.797519,1.026864,20,24,44,1491418.637725
0,NDARAJ366ZFA,2.741,2.528,2.049,3.458,2.911,2.607,2.989,2.589,2.157,...,879105.328591,870834.328591,868724.000000,1380141.000000,0.761726,1.036302,51,55,106,1331794.713947
0,NDARAM277WZT,2.512,2.506,2.107,3.106,2.593,2.604,2.840,2.120,2.153,...,955427.438677,947929.438677,945654.000000,1447521.000000,0.763228,1.015706,25,30,55,1425137.232144
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,NDARZJ414CAA,2.535,2.696,2.128,3.594,2.812,2.596,2.976,2.077,2.310,...,1127588.486250,1115567.486250,1114165.000000,1739464.000000,0.749495,1.006227,18,18,36,1728698.938714
0,NDARZP564MHU,2.953,2.354,2.170,2.787,2.460,2.479,2.541,2.738,2.302,...,866456.642989,852677.642989,851503.000000,1600045.000000,0.623762,1.006788,241,272,513,1589257.166552
0,NDARZT772PU4,2.894,2.666,1.857,3.291,2.615,2.505,2.821,2.409,2.033,...,994281.069400,977976.069400,976215.000000,1504325.000000,0.784426,1.044549,27,27,54,1440166.621893
0,NDARZV766YXP,2.649,2.697,2.139,3.702,2.789,2.731,2.872,2.408,2.351,...,881372.598901,872676.598901,870726.000000,1308976.000000,0.779089,1.035070,26,30,56,1264625.081928


In [46]:
df_final = pd.merge(participants_table, df_fs_stats, on='subject_id')
df_final

Unnamed: 0,subject_id,scan_site_id,sex,age,ehq_total,commercial_use,full_pheno,expert_qc_score,xgb_qc_score,xgb_qsiprep_qc_score,...,SupraTentorial,SupraTentorialNotVent,SupraTentorialNotVentVox,Mask,BrainSegVol-to-eTIV,MaskVol-to-eTIV,lhSurfaceHoles,rhSurfaceHoles,SurfaceHoles,EstimatedTotalIntraCranial
0,NDARAA536PTU,SI,M,11.998402,-86.67,No,Yes,,,0.962119,...,1085570.112871,1076191.112871,1073932.000000,1614592.000000,0.777620,1.031100,30,35,65,1565892.870337
1,NDARAD481FXF,SI,M,16.445242,60.00,No,Yes,,,0.968277,...,1106764.251561,1080593.251561,1078408.000000,1748584.000000,0.733374,1.015394,93,73,166,1722074.268048
2,NDARAE199TDD,SI,F,8.984029,63.34,No,Yes,,,0.809728,...,1059086.069629,1047306.069629,1044418.000000,1531484.000000,0.797519,1.026864,20,24,44,1491418.637725
3,NDARAJ366ZFA,SI,M,10.211156,33.35,No,Yes,,,0.944396,...,879105.328591,870834.328591,868724.000000,1380141.000000,0.761726,1.036302,51,55,106,1331794.713947
4,NDARAM277WZT,SI,M,14.320670,,No,Yes,,,0.956153,...,955427.438677,947929.438677,945654.000000,1447521.000000,0.763228,1.015706,25,30,55,1425137.232144
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
218,NDARZJ414CAA,SI,M,18.396760,96.67,No,Yes,,,0.921387,...,1127588.486250,1115567.486250,1114165.000000,1739464.000000,0.749495,1.006227,18,18,36,1728698.938714
219,NDARZP564MHU,SI,M,20.910107,100.00,No,Yes,,,0.972881,...,866456.642989,852677.642989,851503.000000,1600045.000000,0.623762,1.006788,241,272,513,1589257.166552
220,NDARZT772PU4,SI,F,17.707278,16.68,No,Yes,,,0.931097,...,994281.069400,977976.069400,976215.000000,1504325.000000,0.784426,1.044549,27,27,54,1440166.621893
221,NDARZV766YXP,SI,M,10.788272,84.47,No,Yes,,,0.958586,...,881372.598901,872676.598901,870726.000000,1308976.000000,0.779089,1.035070,26,30,56,1264625.081928


In [47]:
df_final.to_csv('hbn_fs_dataset.csv', index=False)