In [None]:
import os
import pandas as pd

In [None]:
####################
#### file paths ####
####################

## INPUT FILES
# ADNIMERGE.csv
adnimerge_path = '/<path to>/ADNIMERGE.csv'
# DXSUM_PDXCONV_ADNIALL.csv
dx_path = '/<path to>/DXSUM_PDXCONV_ADNIALL.csv'
# ROSTER.csv
roster_path = '/<path to>/ROSTER.csv'

## OUTPUT FILE PATH
output_path = '/<path>'

In [None]:
adnimerge = pd.read_csv(adnimerge_path)
dx = pd.read_csv(dx_path)

In [None]:
dx_adni1  = dx[dx['Phase'] == 'ADNI1']
dx_adni2  = dx[dx['Phase'] == 'ADNI2']
dx_adnigo = dx[dx['Phase'] == 'ADNIGO']
dx_adni3  = dx[dx['Phase'] == 'ADNI3']

In [None]:
print(len(dx), len(dx_adni1), len(dx_adni2), len(dx_adnigo), len(dx_adni3))

## ADNI1


    current diagnosis DXCURREN: 1=NL, 2=MCI, 3=AD
    did diagnosis change DXCONV: 0=No, 1=Yes-Conversion, 2=Yes-Reversion
    if patient reverted, DXREV: 1=MCI to Normal, 2=AD to MCI, 3=AD to Normal
    if patient converted, DXCONTYP: 1=Normal to MCI, 2=Normal to AD, 3=MCI to AD


In [None]:
# find RID of subjects with baseline diagnosis CN
adni1_cn = dx_adni1[dx_adni1['VISCODE2'] == 'bl']
adni1_cn = adni1_cn[adni1_cn['DXCURREN'] == 1]
adni1_cn = adni1_cn['RID']

# CN to MCI converters
# find RID of subjects who at some visit had DXCONTYP == 1 (conversion from
# CN to MCI) and had baseline diagnosis CN
adni1_converters = dx_adni1[dx_adni1['DXCONTYP'] == 1]['RID']
adni1_converters = adni1_converters.unique()

# CN to AD converters
# find RID of subjects who at some visit had DXCONTYP == 2 (conversion from
# CN to AD) and had baseline diagnosis CN
adni1_converters_2 = dx_adni1[dx_adni1['DXCONTYP'] == 2]['RID']
adni1_converters_2 = adni1_converters_2.unique()


In [None]:
print("ADNI1 CN: {} new subjects, {} converters to MCI, {} converters to AD".format(len(adni1_cn), len(adni1_converters), len(adni1_converters_2)))

## ADNI2/ADNIGO

single variable DXCHANGE:

    1=Stable NL, 2=Stable MCI,3=Stable AD
    4=Conversion from NL to MCI, 5=Conversion from MCI to AD, 6=Conversion from NL to AD
    7=Reversion from MCI to NL, 8=Reversion from AD to MCI, 9=Reversion from AD to NL


In [None]:
# find RID of subjects with baseline diagnosis CN
adni2_cn = dx_adni2[dx_adni2['VISCODE2'] == 'bl']
adni2_cn = adni2_cn[adni2_cn['DXCHANGE'] == 1]
adni2_cn = adni2_cn['RID']

# CN to MCI converters
# find RID of subjects who at some visit had DXCHANGE == 4 (conversion from
# CN to MCI) and had baseline diagnosis CN
adni2_converters = dx_adni2[dx_adni2['DXCHANGE'] == 4]['RID']
adni2_converters = adni2_converters.unique()

# CN to AD converters
# find RID of subjects who at some visit had DXCHANGE == 6 (conversion from
# CN to AD) and had baseline diagnosis CN
adni2_converters_2 = dx_adni2[dx_adni2['DXCHANGE'] == 6]['RID']
adni2_converters_2 = adni2_converters_2.unique()


In [None]:
print("ADNI2 CN: {} new subjects, {} converters to MCI, {} converters to AD".format(len(adni2_cn), len(adni2_converters), len(adni2_converters_2)))

## ADNI3

current diagnosis DIAGNOSIS: 1=NL, 2=MCI, 3=AD

In [None]:
# find RID of subjects with baseline diagnosis CN
adni3_cn = dx_adni3[dx_adni3['VISCODE2'] == 'bl']
adni3_cn = adni3_cn[adni3_cn['DIAGNOSIS'] == 1]
adni3_cn = adni3_cn['RID']

# CN to MCI converters
# find RID of subjects who at some visit had DXCHANGE == 4 (conversion from
# CN to MCI) and had baseline diagnosis CN
adni3_converters = dx_adni3[dx_adni3['DIAGNOSIS'] == 2]['RID']
adni3_converters = adni3_converters.unique()

# CN to AD converters
# find RID of subjects who at some visit had DXCHANGE == 6 (conversion from
# CN to AD) and had baseline diagnosis CN
adni3_converters_2 = dx_adni3[dx_adni3['DIAGNOSIS'] == 3]['RID']
adni3_converters_2 = adni3_converters_2.unique()

In [None]:
print("ADNI3 CN: {} new subjects".format(len(adni3_cn)))

## create RID lists

In [None]:
# all subjects with baseline diagnosis CN
cn_subjects_all = set(adni1_cn) | set(adni2_cn) | set(adni3_cn)

# remove subjects which converted during ADNI1,2,3
cn_subjects_noncon = cn_subjects_all - set(adni1_converters) - set(adni1_converters_2)
cn_subjects_noncon = cn_subjects_noncon - set(adni2_converters) - set(adni2_converters_2)
cn_subjects_noncon = cn_subjects_noncon - set(adni3_converters) - set(adni3_converters_2)

# CN subjects that converted at some point
cn_subjects_con = cn_subjects_all - cn_subjects_noncon

print("total: {} CN subjects, {} non-converters, {} converters".format(len(cn_subjects_all), len(cn_subjects_noncon), len(cn_subjects_con)))

## create table with patient id and conversion type

In [None]:
roster = pd.read_csv(roster_path)

In [None]:
roster = roster[roster.apply(lambda row: row['RID'] in cn_subjects_all, axis=1)]

# subjects have a row in roster table for each study phase they participated in
# keep only first entry

roster = roster.drop_duplicates(subset=['RID'], keep='first')

In [None]:
roster['CONVERSION'] = roster.apply(lambda row: row['RID'] in cn_subjects_con, axis=1)
roster = roster.drop(columns=["USERDATE", "USERDATE2", "update_stamp"])

In [None]:
roster

In [None]:
roster.to_csv(os.path.join(output_path, 'roster_CN.csv'), index=False)