# Prepare a subset of ADNI for 
`mild to moderate AD without depression`

> Specifically this code is to prepare ADNI subset that matches recruitment criteria of EXPEDITION3 clinical trial.

> The filter is not done (MMSE < 26, GDS < 6 etc.). The goal of this notebook is to give the entire ADNI subset of specific columns on which one can furhter add these filters

> This notebook mainly gathers only important fields, revolves duplicates and joins all the specific datasets with the identifer information from ADNIMERGE

In [18]:
DATA_DIR = '/Users/cervere/sandbox/AD/data/'

## Load datasets

> Note the folder structure and please update accordingly

In [19]:
adni_demog_df = pd.read_csv(DATA_DIR+'ADNI/clean/ADNIMERGE.csv', sep=',')
adni_gds_df = pd.read_csv(DATA_DIR+'ADNI/raw/NEUROPSYCH/GDS/ALL/GDSCALE.csv', sep=',')

## This is an already compiled file from all neuropsych scores, essentially needs to have ADAS11, FAQ, MMSe, CDR etc
adni_cog_df = pd.read_csv(DATA_DIR+'ADNI/clean/ADNIMERGE_NP.csv', sep=',')

### The following files are the new harmonized ADSP PHC Files
adni_mri_df = pd.read_csv(DATA_DIR+'ADNI/raw/ADSP/ADSP_ADNI_T1-Scalar_Dec2023/ADSP_PHC_T1-FS_Dec2023.csv', sep=',')
adni_csf_df = pd.read_csv(DATA_DIR+'ADNI/raw/ADSP/ADSP_ADNI_Fluid_Biomarker_Dec2023/ADSP_PHC_CSF_Dec2023.csv', sep=',')
adni_pet_df = pd.read_csv(DATA_DIR+'ADNI/raw/ADSP/ADSP_ADNI_PET-Scalar_Dec2023/ADSP_PHC_PET_Amyloid_Simple_Dec2023.csv', sep=',')


  adni_demog_df = pd.read_csv(DATA_DIR+'ADNI/clean/ADNIMERGE.csv', sep=',')


In [20]:
import pandas as pd

### ADNI Fields of interest

In [21]:
adni_demographics = ['PTGENDER', 'AGE', 'PTEDUCAT', 'APOE4']
adni_cognitive = ['ADAS11', 'MMSE', 'FAQ']
adni_clinical = ['GDTOTAL', 'CDRSB']
adni_mri = ['VOL_ENTORHINAL', 'VOL_HIPPOCAMPUS', 'VOL_INFERIORPARIETAL',
                'VOL_SUPERIORTEMPORAL', 'VOL_MIDDLETEMPORAL',
               ]
adni_adas_field = 'ADAS11'
adni_faq_field = 'FAQ'
adni_columns = ['RID', 'VISCODE', 'VS_MONTH'] +  adni_demographics\
                + adni_cognitive + adni_clinical + adni_mri
                

In [22]:
adni_demog_df.set_index(['RID', 'VISCODE'], inplace=True)

adni_gds_df['VISCODE2'].replace('sc', 'bl', inplace=True)
adni_gds_df.set_index(['RID', 'VISCODE2'], inplace=True)
adni_gds_df.drop(columns='VISCODE', inplace=True)
adni_gds_df.index.names = ['RID', 'VISCODE']

adni_csf_df.set_index(['RID', 'VISCODE2'], inplace=True)
adni_csf_df.drop(columns='VISCODE', inplace=True)
adni_csf_df.index.names = ['RID', 'VISCODE']

def getStatus(row):
    aStatus = 0 if row == '' else row[1]
    return (1 if aStatus == '+' else 0)

adni_csf_df['PHC_AMYLOID_STATUS_CSF'] =  adni_csf_df['AT_class'].fillna('').map(getStatus)

adni_pet_df.set_index(['RID', 'VISCODE2'], inplace=True)
adni_pet_df.drop(columns='VISCODE', inplace=True)
adni_pet_df.index.names = ['RID', 'VISCODE']
adni_cog_df.set_index(['RID', 'VISCODE'], inplace=True)


In [23]:
adni_mri_df.set_index(['RID', 'VISCODE2'], inplace=True)
adni_mri_df.drop(columns='VISCODE', inplace=True)
adni_mri_df.index.names = ['RID', 'VISCODE']
mri_fields = ['lh_entorhinal_volume_combat', 'rh_entorhinal_volume_combat', 
       'Left.Hippocampus_combat', 'Right.Hippocampus_combat', 
      'lh_inferiorparietal_volume_combat', 'rh_inferiorparietal_volume_combat',
      'lh_superiortemporal_volume_combat', 'rh_superiortemporal_volume_combat',
      'lh_middletemporal_volume_combat', 'rh_middletemporal_volume_combat',
             'EstimatedTotalIntraCranialVol_combat']
adni_mri_df['VOL_ENTORHINAL'] = adni_mri_df['lh_entorhinal_volume_combat'] + adni_mri_df['rh_entorhinal_volume_combat']
adni_mri_df['VOL_HIPPOCAMPUS'] = adni_mri_df['Left.Hippocampus_combat'] + adni_mri_df['Right.Hippocampus_combat']
adni_mri_df['VOL_INFERIORPARIETAL'] =  adni_mri_df['lh_inferiorparietal_volume_combat'] + adni_mri_df['rh_inferiorparietal_volume_combat']
adni_mri_df['VOL_SUPERIORTEMPORAL'] = adni_mri_df['lh_superiortemporal_volume_combat'] + adni_mri_df['rh_superiortemporal_volume_combat']
adni_mri_df['VOL_MIDDLETEMPORAL'] = adni_mri_df['lh_middletemporal_volume_combat'] + adni_mri_df['rh_middletemporal_volume_combat']
adni_mri_df['ICV'] = adni_mri_df['EstimatedTotalIntraCranialVol_combat']

## ICV Normalize MRI data

In [23]:
def getNormalisedMRIData(MRI_DATA, mri_columns) :
    MRI_MEAN_POPULATION = MRI_DATA[mri_columns].mean()
    df = MRI_DATA.copy()
    for mri_field in mri_columns:
        df[mri_field] = MRI_MEAN_POPULATION['ICV'] * (df[mri_field]/df['ICV'])
    return df

adni_mri_icv_norm = getNormalisedMRIData(adni_mri_df[adni_mri + ['ICV']], adni_mri + ['ICV'])

## Duplicates:
 - demog: None
 - neuropsych : None
 - gds : couple, GDTOTAL same
 - csf: None
 - PEt Amyloid: Some, but same Status
 - Mri : Many, Avg
 
 To understand the data a bit more, verify the following cells.

In [24]:
def findDuplicates(df):
    return  df.loc[df.index.duplicated(keep=False)]

### Demographics

In [25]:
findDuplicates(adni_demog_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,COLPROT,ORIGPROT,PTID,SITE,EXAMDATE,DX_bl,AGE,PTGENDER,PTEDUCAT,PTETHCAT,...,PIB_bl,AV45_bl,FBB_bl,Years_bl,Month_bl,Month,M,update_stamp,ETHNICRACE,VS_MONTH
RID,VISCODE,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1


### NeuroPsych

In [26]:
findDuplicates(adni_cog_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,CDRSB,ADAS11,ADAS13,ADASQ4,MMSE,RAVLT_immediate,RAVLT_learning,RAVLT_forgetting,RAVLT_perc_forgetting,LDELTOTAL,DIGITSCOR,TRABSCOR,FAQ
RID,VISCODE,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1


### GDS

In [27]:
findDuplicates(adni_gds_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,Phase,ID,SITEID,USERDATE,USERDATE2,EXAMDATE,GDSOURCE,GDUNABL,GDUNABSP,GDSATIS,...,GDHOME,GDMEMORY,GDALIVE,GDWORTH,GDENERGY,GDHOPE,GDBETTER,GDTOTAL,GDDATE,update_stamp
RID,VISCODE,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
830,m60,ADNIGO,1262,21,2011-09-23,,,1.0,-4.0,-4.0,1.0,...,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,,2012-11-16 09:40:34.0
830,m60,ADNI2,662,21,2011-09-26,,,1.0,-4.0,-4.0,1.0,...,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,,2013-04-10 19:17:09.0
4952,m78,ADNI3,85455,12,2019-06-09,2019-06-09,,1.0,0.0,,1.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,,2021-07-29 04:18:30.0
4952,m78,ADNI3,126289,12,2021-01-18,2021-01-18,,1.0,,,,...,,,,,,,,,2019-05-23,2022-04-28 04:18:58.0


### CSF

In [28]:
findDuplicates(adni_csf_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,SUBJID,PHASE,DRAWDATE,PHC_Visit,PHC_Sex,PHC_Education,PHC_Ethnicity,PHC_Race,PHC_Age_Biomarker,PHC_Diagnosis,Platform,AB42_RAW,PHC_AB42,Tau_RAW,PHC_Tau,pTau_RAW,PHC_pTau,AT_class,PHC_AMYLOID_STATUS_CSF
RID,VISCODE,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1


### PET

In [29]:
findDuplicates(adni_pet_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,SUBJID,PHASE,SCANDATE,PHC_Visit,PHC_Sex,PHC_Education,PHC_Ethnicity,PHC_Race,PHC_Age_PET,PHC_Diagnosis,...,PHC_QC_IMAGE,PHC_QC_TIMING,PHC_QC_NOTES,PHC_COMPLIANT_ACQUISITION_START,PHC_COMPLIANT_ACQUISITION_END,PHC_INJECTED_DOSE,PHC_DYNAMIC,PHC_CENTILOIDS,PHC_AMYLOID_STATUS,PHC_AMYLOID_STATUS_GMM
RID,VISCODE,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
6644,bl,,ADNI3,20190130,1.0,1,20,2.0,5,86.245038,1.0,...,PASS,PASS,,50.0,70.0,10.0,1,57.0,1.0,1.0
6644,bl,,ADNI3,20210329,,1,20,2.0,5,88.405202,1.0,...,PASS,PASS,,50.0,70.0,10.0,1,75.0,1.0,1.0
6677,bl,,ADNI3,20190411,1.0,1,16,2.0,5,78.94319,2.0,...,PASS,PASS,,90.0,110.0,8.1,1,2.0,0.0,0.0
6677,bl,,ADNI3,20211209,,1,16,2.0,5,81.607118,2.0,...,PASS,PASS,,90.0,110.0,8.1,1,8.0,0.0,0.0
4349,m126,ADNI_018_S_4349,ADNI3,20201202,,2,16,2.0,5,80.503765,1.0,...,PASS,PASS,,50.0,70.0,10.0,1,53.0,1.0,1.0
4349,m126,ADNI_018_S_4349,ADNI3,20220512,7.0,2,16,2.0,5,81.943874,1.0,...,PASS,PASS,,50.0,70.0,10.0,1,71.0,1.0,1.0
6306,m48,,ADNI3,20200903,,1,14,2.0,5,75.671458,1.0,...,PASS,PASS,,50.0,70.0,10.0,1,39.0,1.0,1.0
6306,m48,,ADNI3,20220523,2.0,1,14,2.0,5,77.38809,1.0,...,PASS,PASS,,50.0,70.0,10.0,1,58.0,1.0,1.0
4143,m24,ADNI_041_S_4143,ADNI2,20130826,4.0,2,19,2.0,5,65.817933,2.0,...,FAIL,FAIL,,,,10.0,1,,,
4143,m24,ADNI_041_S_4143,ADNI2,20130923,4.0,2,19,2.0,5,65.894593,2.0,...,PASS,PASS,,50.0,70.0,10.0,1,-4.0,0.0,0.0


### MRI

In [30]:
findDuplicates(adni_mri_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,SUBJID,PHASE,SCANDATE,PHC_Visit,PHC_Sex,PHC_Education,PHC_Ethnicity,PHC_Race,PHC_Age_T1,PHC_Diagnosis,...,rh_frontalpole_volume_combat,rh_temporalpole_volume_combat,rh_transversetemporal_volume_combat,rh_insula_volume_combat,VOL_ENTORHINAL,VOL_HIPPOCAMPUS,VOL_INFERIORPARIETAL,VOL_SUPERIORTEMPORAL,VOL_MIDDLETEMPORAL,ICV
RID,VISCODE,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
413,m108,ADNI_002_S_0413,ADNI2,6/9/2015,11.0,2,16,2.0,5,85.519507,1.0,...,1100.916418,2368.631025,971.349908,5926.408155,4866.134224,6469.976774,23408.464780,21088.039290,20034.393232,1549385.221
413,m108,ADNI_002_S_0413,ADNI2,6/9/2015,11.0,2,16,2.0,5,85.519507,1.0,...,1051.486662,2240.624766,1078.782053,5994.030472,4312.521604,6998.145806,23314.089670,20095.119733,19619.143774,1614352.736
2010,m06,ADNI_002_S_2010,ADNIGO,1/22/2011,2.0,2,20,2.0,6,63.561944,1.0,...,917.551961,2371.134014,969.602969,7394.034556,3186.913294,7306.488234,23841.769610,26060.679130,20024.473396,1385677.750
2010,m06,ADNI_002_S_2010,ADNIGO,10/22/2010,2.0,2,20,2.0,6,63.310062,1.0,...,1028.690617,2088.429903,872.198598,7742.211012,3554.928020,7605.606281,24194.463860,26285.229270,19978.948014,1377298.022
2073,bl,ADNI_002_S_2073,ADNIGO,12/6/2010,1.0,2,20,2.0,5,63.682409,2.0,...,1016.596598,2598.675256,849.575116,5989.924978,5689.148948,6756.848860,20302.691807,20997.072624,18689.525290,1419173.486
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4420,bl,ADNI_941_S_4420,ADNI2,6/27/2012,1.0,1,18,2.0,5,81.738535,2.0,...,972.341819,2570.286044,896.162670,6588.246156,4516.516197,7356.232913,23230.658480,22662.195780,19191.346142,1660673.828
5124,bl,G-ADNI-AN000233,ADNI2,7/11/2013,1.0,2,20,2.0,5,77.026694,1.0,...,1299.428627,2395.361945,762.899925,5930.595527,3347.221580,4283.421830,20014.031040,17561.034076,19630.252580,1472483.446
5124,bl,G-ADNI-AN000233,ADNI2,4/9/2013,1.0,2,20,2.0,5,76.772074,1.0,...,971.886301,2359.820532,717.433942,6382.439344,2819.176373,6000.819711,20970.423970,18530.445763,18535.712168,1485953.233
5193,bl,G-ADNI-AN000253,ADNI2,8/19/2013,1.0,2,16,2.0,5,72.796715,1.0,...,781.376233,1830.013581,428.540759,6754.795545,4661.537881,7168.318769,24752.068350,18095.664380,17365.975349,1469884.649


## Data required for Amyloid Positivity and Mild-to-Moderate Dementia checks (AD)

### Resolving duplicates

In [31]:
'''
From the verification of duplicates above, all the duplicates entries of [RID, VISCODE] have same GDTOTAL, so we can keep any of them
'''
adni_gds_df.dropna(subset='GDTOTAL', inplace=True)
adni_gds_df_AD = adni_gds_df.loc[~adni_gds_df.index.duplicated()]
adni_neuropsych_df = adni_cog_df.merge(adni_gds_df_AD['GDTOTAL'], left_index=True, right_index=True)

In [32]:
'''
From the verification of duplicates above, all the duplicates entries of [RID, VISCODE] have same PHC_AMYLOID_STATUS, so we can keep any of them
'''
adni_pet_df.dropna(subset='PHC_AMYLOID_STATUS', inplace=True)
adni_pet_df_AD = adni_pet_df.loc[~(adni_pet_df.index.duplicated())]

In [33]:

adni_mri_icv_norm_AD = adni_mri_icv_norm.reset_index().groupby(['RID', 'VISCODE']).mean()

In [34]:
adni_demog_NP_MRI_for_AD = adni_demog_df[['AGE', 'PTGENDER', 'PTEDUCAT', 'APOE4', 'VS_MONTH']].merge(adni_neuropsych_df[adni_cognitive + adni_clinical],
                                                             left_index=True,
                                                             right_index=True,
                                                             how='left').merge(adni_mri_icv_norm[adni_mri],
                                                             left_index=True,
                                                             right_index=True,
                                                             how='left').merge(adni_pet_df_AD['PHC_AMYLOID_STATUS'],
                                                             left_index=True,
                                                             right_index=True,
                                                             how='left').merge(adni_csf_df['PHC_AMYLOID_STATUS_CSF'],
                                                             left_index=True,
                                                             right_index=True,
                                                             how='left')


In [35]:
adni_demog_NP_MRI_for_AD.reset_index().to_csv(DATA_DIR+'ADNI/clean/FOR_EXPEDITION3/adni_demog_NP_MRI_for_AD.csv', index=False)