In [47]:
import os
import pandas as pd
import numpy as np

In [48]:
DXSUM = pd.read_csv("data/All_Subjects_DXSUM_24Oct2024.csv")
DXSUM[0:100]

Unnamed: 0,PHASE,PTID,RID,VISCODE,VISCODE2,EXAMDATE,DIAGNOSIS,DXNORM,DXNODEP,DXMCI,...,DXODES,DXCONFID,ID,SITEID,USERDATE,USERDATE2,DD_CRF_VERSION_LABEL,LANGUAGE_CODE,HAS_QC_ERROR,update_stamp
0,ADNI1,011_S_0002,2,bl,bl,2005-09-29,1.0,1.0,-4.0,-4.0,...,-4.0,4.0,2,107,2005-10-01,,,,,2005-10-01 00:00:00.0
1,ADNI1,011_S_0003,3,bl,bl,2005-09-30,3.0,-4.0,-4.0,-4.0,...,-4.0,3.0,4,107,2005-10-01,,,,,2005-10-01 00:00:00.0
2,ADNI1,011_S_0005,5,bl,bl,2005-09-30,1.0,1.0,-4.0,-4.0,...,-4.0,4.0,6,107,2005-10-01,,,,,2005-10-01 00:00:00.0
3,ADNI1,011_S_0008,8,bl,bl,2005-09-30,1.0,1.0,-4.0,-4.0,...,-4.0,3.0,8,107,2005-10-01,,,,,2005-10-01 00:00:00.0
4,ADNI1,022_S_0007,7,bl,bl,2005-10-06,3.0,-4.0,-4.0,-4.0,...,-4.0,4.0,10,10,2005-10-06,,,,,2005-10-06 00:00:00.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,ADNI1,068_S_0210,210,bl,bl,2006-03-02,1.0,1.0,-4.0,-4.0,...,-4.0,3.0,192,33,2006-03-02,,,,,2006-03-02 00:00:00.0
96,ADNI1,068_S_0127,127,bl,bl,2006-03-02,1.0,1.0,-4.0,-4.0,...,-4.0,3.0,194,33,2006-03-02,,,,,2006-03-02 00:00:00.0
97,ADNI1,032_S_0187,187,bl,bl,2006-03-02,2.0,-4.0,-4.0,1.0,...,-4.0,4.0,196,16,2006-03-02,,,,,2006-03-02 00:00:00.0
98,ADNI1,021_S_0159,159,bl,bl,2006-03-03,1.0,1.0,-4.0,-4.0,...,-4.0,4.0,198,9,2006-03-03,,,,,2006-03-03 00:00:00.0


**Amount of patients**

In [49]:
DXSUM['PTID'].unique().shape

(3220,)

**Creating a dictionary where the keys are patient IDs, and the values are relevant information abaout visits.**

In [50]:
patient_dict = {}

for index, row in DXSUM.iterrows():
    #print(index)
    #print(row['PTID'])
    if row['PTID'] in patient_dict:
        patient_dict[row['PTID']].append([row['VISCODE'], row['EXAMDATE'], row['DIAGNOSIS']])
    else:
        patient_dict[row['PTID']] = [[row['VISCODE'], row['EXAMDATE'], row['DIAGNOSIS']]]

np.save("data/patient_dict.npy", patient_dict) 

In [51]:
patient_dict['011_S_0002']

[['bl', '2005-09-29', 1.0],
 ['m06', '2006-03-06', 1.0],
 ['m36', '2008-08-27', 1.0],
 ['m60', '2010-09-27', 1.0],
 ['v06', '2011-09-19', 1.0],
 ['v11', '2012-10-04', 2.0],
 ['v21', '2013-09-23', 1.0],
 ['v41', '2015-10-20', 1.0],
 ['v51', '2016-10-03', 2.0],
 ['init', '2017-11-01', 2.0]]

**Creating a dictionary of patients who only ever got CN diagnosis.**

Iterates through every visit of every patient, adding the patient to the cn dictionary only if they exclusively have CN diagnoses in their records. The dictionary also contains all the visits, wich are all relevant for the dataset.

In [61]:
cn_dict = {}

for patient_id in patient_dict:
    always_cn = True
    visits = patient_dict[patient_id]

    for i in range(0, len(visits)):

        if visits[i][2] != 1.0:
            always_cn = False

    if always_cn and len(visits) >= 5:
        cn_dict[patient_id] = visits

np.save("data/cn_dict.npy", cn_dict) 

In [62]:
print('Number of patients:', len(cn_dict))

num_relevant_visits = 0

for key in cn_dict:
   num_relevant_visits += len(cn_dict[key]) 
    
print('Number of relevant visits:', num_relevant_visits) 


Number of patients: 354
Number of relevant visits: 2540


**Creating a dictionary of patients who only ever got AD diagnosis.**

Iterates through every visit of every patient, adding the patient to the cn dictionary only if they exclusively have AD diagnoses in their records. The dictionary also contains all the visits, wich are all relevant for the dataset.

In [72]:
ad_dict = {}

for patient_id in patient_dict:
    always_ad = True
    visits = patient_dict[patient_id]

    for i in range(0, len(visits)):

        if visits[i][2] != 3.0:
            always_ad = False

    if always_ad:
        ad_dict[patient_id] = visits

np.save("data/ad_dict.npy", ad_dict) 

In [73]:
print('Number of patients:', len(ad_dict))

num_relevant_visits = 0

for key in ad_dict:
   num_relevant_visits += len(ad_dict[key]) 
    
print('Number of relevant visits:', num_relevant_visits) 

#ad_dict['011_S_0003']

Number of patients: 510
Number of relevant visits: 1574


**Creating a dictionary for patients with sMCI diagnosis**

Iterates through every visit of every patient, adding the patient to the sMCI dictionary if they have hava an unbroken streak of 5 visits with MCI diagnosis including the last recordedd visit. The dictionary only contains visits with MCI diagnosis.

In [43]:
smci_dict = {}

for patient_id in patient_dict:
    always_mci = True
    relevant_visits = list()
    mci_count = 0
    visits = patient_dict[patient_id]

    for i in range(0, len(visits)):

        if visits[i][2] == 1.0:
            mci_count = 0
            relevant_visits.clear()

        elif visits[i][2] == 2.0:
            relevant_visits.append(visits[i])
            mci_count += 1
        
        elif visits[i][2] == 3.0:
            always_mci = False
            relevant_visits.clear()

    if always_mci and mci_count >= 5:
        smci_dict[patient_id] = relevant_visits

np.save("data/smci_dict.npy", smci_dict) 

In [44]:
print('Number of patients:', len(smci_dict))

num_relevant_visits = 0

for key in smci_dict:
   num_relevant_visits += len(smci_dict[key]) 
    
print('Number of relevant visits:', num_relevant_visits) 
smci_dict['022_S_0004']

Number of patients: 356
Number of relevant visits: 2575


[['bl', '2005-11-08', 2.0],
 ['m06', '2006-05-02', 2.0],
 ['m12', '2006-11-14', 2.0],
 ['m18', '2007-05-14', 2.0],
 ['m36', '2008-11-18', 2.0]]

**Creating a dictionary for patients with pMCI diagnosis**

Iterates through every visit of every patient, adding the patient to the pMCI dictionary if they convert to AD within 36 months of being diagnosed with MCI, without regressing back to CN. The dictionary only contains visits with MCI diagnosis.

In [45]:
pmci_dict = {}

for patient_id in patient_dict:
    ad_detected = False
    mci_count = 0
    relevant_visits = list()
    visits = patient_dict[patient_id]

    for i in range(0, len(visits)):

        if visits[i][2] == 1.0:
            mci_count = 0
            relevant_visits.clear()

        elif visits[i][2] == 2.0:
            mci_count += 1
            relevant_visits.append(visits[i])

        elif visits[i][2] == 3.0:
            ad_detected = True
            break

    if  ad_detected and (1 <= mci_count <= 6):
        pmci_dict[patient_id] = relevant_visits

np.save("data/pmci_dict.npy", pmci_dict) 

In [46]:
print('Number of patients:', len(pmci_dict))

num_relevant_visits = 0

for key in pmci_dict:
   num_relevant_visits += len(pmci_dict[key]) 
    
print('Number of relevant visits:', num_relevant_visits)  
pmci_dict['099_S_0051']

Number of patients: 385
Number of relevant visits: 1267


[['bl', '2005-12-29', 2.0],
 ['m06', '2006-09-01', 2.0],
 ['m12', '2007-02-01', 2.0],
 ['m18', '2007-07-09', 2.0],
 ['m24', '2007-12-13', 2.0],
 ['m36', '2008-12-12', 2.0]]