In [None]:
from pathlib import Path
import pandas as pd
import re
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s: %(message)s')

subDir = '/brain/babri_in/sangf/Projects/D_desc-babri/derivatives/freesurfer'
subDir = Path(subDir)
atlas = 'aseg'

def func_GetAseg(file):
    with open(file, 'r') as f:
        lines = [i for i in f.readlines() if len(i) != 0]
    # logging.info(lines)
    resRow = {}
    for i in lines:
        if re.match('# Measure lhCortex,', i):
            resRow['lhCortex'] = [float(re.split(',', i)[-2])]
        elif re.match('# Measure rhCortex,', i):
            resRow['rhCortex'] = [float(re.split(',', i)[-2])]
        elif re.match('# Measure Cortex,', i):
            resRow['Cortex'] = [float(re.split(',', i)[-2])]
        elif re.match('# Measure lhCerebralWhiteMatter,', i):
            resRow['lhCerebralWhiteMatter'] = [float(re.split(',', i)[-2])]
        elif re.match('# Measure rhCerebralWhiteMatter,', i):
            resRow['rhCerebralWhiteMatter'] = [float(re.split(',', i)[-2])]
        elif re.match('# Measure CerebralWhiteMatter,', i):
            resRow['CerebralWhiteMatter'] = [float(re.split(',', i)[-2])]
        elif re.match('# Measure EstimatedTotalIntraCranialVol,', i):
            resRow['EstimatedTotalIntraCranialVol'] = [float(re.split(',', i)[-2])]
        if not re.match('#', i):
            tmpRegion = re.split(r'\s+', i.strip())[4]
            resRow[f'{tmpRegion}_Volume_mm3'] = [re.split(r'\s+', i.strip())[3]]
            
    # logging.info(resRow)
    return pd.DataFrame(resRow)

def func_GetAparc(file, hemi='lh'):
    with open(file, 'r') as f:
        lines = [i for i in f.readlines() if len(i) != 0]
    # logging.info(lines)
    resRow = {}
    for i in lines:
        if re.match('# Measure Cortex, WhiteSurfArea,', i):
            resRow[f'{hemi}_WhiteSurfArea,'] = [float(re.split(',', i)[-2])]
        elif re.match('# Measure Cortex, MeanThickness,', i):
            resRow[f'{hemi}_MeanThickness'] = [float(re.split(',', i)[-2])]
        elif re.match('# Measure EstimatedTotalIntraCranialVol,', i):
            resRow[f'{hemi}_EstimatedTotalIntraCranialVol'] = [float(re.split(',', i)[-2])]
        if not re.match('#', i):
            tmpRegion = re.split(r'\s+', i.strip())[0]
            resRow[f'{hemi}_{tmpRegion}_NumVert'] = [re.split(r'\s+', i.strip())[1]]
            resRow[f'{hemi}_{tmpRegion}_SurfArea'] = [re.split(r'\s+', i.strip())[2]]
            resRow[f'{hemi}_{tmpRegion}_GrayVol'] = [re.split(r'\s+', i.strip())[3]]
            resRow[f'{hemi}_{tmpRegion}_ThickAvg'] = [re.split(r'\s+', i.strip())[4]]
            resRow[f'{hemi}_{tmpRegion}_ThickStd'] = [re.split(r'\s+', i.strip())[5]]
            resRow[f'{hemi}_{tmpRegion}_MeanCurv'] = [re.split(r'\s+', i.strip())[6]]
            resRow[f'{hemi}_{tmpRegion}_GausCurv'] = [re.split(r'\s+', i.strip())[7]]
            resRow[f'{hemi}_{tmpRegion}_FoldInd'] = [re.split(r'\s+', i.strip())[8]]
            resRow[f'{hemi}_{tmpRegion}_CurvInd'] = [re.split(r'\s+', i.strip())[9]]
            
    # logging.info(resRow)
    return pd.DataFrame(resRow)

resDf = pd.DataFrame()
for i in subDir.glob('sub-*'):
    subId = i.name
    logging.info(subId)
    
    if atlas == 'aseg':
        subPath = i.joinpath('stats/aseg.stats')
        if not subPath.exists(): continue
        subDf = func_GetAseg(subPath.resolve())
    elif atlas == 'aparc':
        subPath = i.joinpath('stats/lh.aparc.stats')
        if not subPath.exists(): continue
        subDf_lh = func_GetAparc(subPath.resolve(), hemi='lh')
        subDf_rh = func_GetAparc(str(subPath.resolve()).replace('lh', 'rh'), hemi='rh')
        subDf = pd.concat([subDf_lh, subDf_rh], axis=1)
    elif atlas == 'aparc.a2009s':
        subPath = i.joinpath('stats/lh.aparc.a2009s.stats')
        if not subPath.exists(): continue
        subDf_lh = func_GetAparc(subPath.resolve(), hemi='lh')
        subDf_rh = func_GetAparc(str(subPath.resolve()).replace('lh', 'rh'), hemi='rh')
        subDf = pd.concat([subDf_lh, subDf_rh], axis=1)
        
    subDf['participant_id'] = [subId]
    resDf = pd.concat([resDf, subDf], ignore_index=True)

resDf.to_csv(f'Res_atlas-{atlas}.csv', index=False)

2024-12-04 14:59:31,270: sub-31157
2024-12-04 14:59:31,273: {'lh_WhiteSurfArea,': [74372.9], 'lh_MeanThickness': [2.48902], 'lh_EstimatedTotalIntraCranialVol': [1296541.55381], 'lh_bankssts_NumVert': ['1577'], 'lh_bankssts_SurfArea': ['906'], 'lh_bankssts_GrayVol': ['2059'], 'lh_bankssts_ThickAvg': ['2.409'], 'lh_bankssts_ThickStd': ['0.465'], 'lh_bankssts_MeanCurv': ['0.093'], 'lh_bankssts_GausCurv': ['0.028'], 'lh_bankssts_FoldInd': ['13'], 'lh_bankssts_CurvInd': ['2.3'], 'lh_caudalanteriorcingulate_NumVert': ['783'], 'lh_caudalanteriorcingulate_SurfArea': ['426'], 'lh_caudalanteriorcingulate_GrayVol': ['1370'], 'lh_caudalanteriorcingulate_ThickAvg': ['2.808'], 'lh_caudalanteriorcingulate_ThickStd': ['0.732'], 'lh_caudalanteriorcingulate_MeanCurv': ['0.116'], 'lh_caudalanteriorcingulate_GausCurv': ['0.026'], 'lh_caudalanteriorcingulate_FoldInd': ['9'], 'lh_caudalanteriorcingulate_CurvInd': ['1.0'], 'lh_caudalmiddlefrontal_NumVert': ['3432'], 'lh_caudalmiddlefrontal_SurfArea': ['1809'

In [49]:
import pandas as pd

subDf = pd.read_excel('核磁编号_tosangf.xlsx', header=0)
datDf = pd.read_csv('Res_atlas-aparc.csv', header=0)
datDf = datDf.set_index(['participant_id'])

datDf = datDf.loc[subDf['participant_id'].values, :]
datDf.to_csv('./Res_atlas-aparc_tolong.csv')