In [1]:
import os
import glob
import pandas as pd
import numpy as np
import re
import datetime

In [2]:
DATA_DIR = "D:/data/ADNI"

In [3]:
columns = ['RID', 'DX', 'Conv', 'Filename']
demo_columns = ['PTGENDER (1=male, 2=female)', 'Age', 'PTEDUCAT', 'APOE Status', 'MMSCORE', 'CDGLOBAL', 'SUM BOXES']
columns = columns + demo_columns

In [4]:
# pre-defined label - only MCI related patients are included
mri_labels = pd.read_csv(os.path.join(DATA_DIR, "labels/MRI_features_all.csv"))
mri_labels = mri_labels[columns]

In [5]:
summary = mri_labels.copy()
summary['Filename'].str.split('/')
summary[['PET Type', 'MRI']] = summary['Filename'].str.split('/', expand=True)
summary = summary.loc[~summary.MRI.isna()].reset_index(drop=True)

In [6]:
# add amyloid PET (FBP)
fbp_proc_params = glob.glob(os.path.join(DATA_DIR, "PUP_FBP/*/*/*.param"), recursive=True)
fbp_logs = []
for fbp_proc_param in fbp_proc_params:
    with open(fbp_proc_param, 'rb') as f:
        res = f.readlines()
        r = [r for r in res if r.startswith(b'fsdir=/datadrive/ADNI_STTR/proc/FS7')]
        if len(r) == 1:
            k = fbp_proc_param.split('\\')[-1].replace('_pet.param', '')
            v = r[0].decode('utf-8').split('/')[-2]
            fbp_logs.append([k, v])
        else:
            raise ValueError
fbp_logs = pd.DataFrame(fbp_logs)
fbp_logs.columns = ['FBP', 'MRI']

In [7]:
fbp_logs.shape

(1463, 2)

In [8]:
# add FDG PET (FDG)
fdg_proc_params = glob.glob(os.path.join(DATA_DIR, "FDG/*/*/*.param"), recursive=True)
fdg_logs = []
for fdg_proc_param in fdg_proc_params:
    with open(fdg_proc_param, 'rb') as f:
        res = f.readlines()
        r = [r for r in res if r.startswith(b'fsdir=/datadrive/ADNI_STTR/proc/FS7')]
        if len(r) == 1:
            k = fdg_proc_param.split('\\')[-1].replace('_pet.param', '')
            v = r[0].decode('utf-8').split('/')[-2]
            fdg_logs.append([k, v])
        else:
            raise ValueError
fdg_logs = pd.DataFrame(fdg_logs)
fdg_logs.columns = ['FDG', 'MRI']

In [9]:
fdg_logs.shape

(2044, 2)

In [10]:
summary = pd.merge(left=summary, right=fbp_logs, how='left', left_on=['MRI'], right_on=['MRI'])
summary.shape

(3342, 14)

In [11]:
summary = pd.merge(left=summary, right=fdg_logs, how='left', left_on=['MRI'], right_on=['MRI'])
summary.shape

(3352, 15)

In [12]:
summary.Conv = summary.Conv.fillna(-1)
summary.DX = summary.DX.fillna(-1)
summary['MCI'] = [1 if d in [2, 4, 8] else 0 for d in summary.DX.tolist()]

In [39]:
IS_FILE = [os.path.isdir(os.path.join(DATA_DIR, f"FS7/{mri}/cat12/mri/")) for mri in summary.MRI]
summary['IS_FILE'] = IS_FILE

In [40]:
summary.Conv = summary.Conv.astype(int)
summary.to_csv(os.path.join(DATA_DIR, "labels/data_info.csv"), index=False)