In [8]:
import numpy as np
import csv
import textwrap
from dateutil.parser import parse
from operator import add
from datetime import datetime

import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir) 

## Look for Reversion In Both Files

Reversion is defined as follows:

In the ADNIMERGE file: Reversion occurs if a patient is diagnosed with Dementia on one visit and then the next visit temporally (with present data) they are diagnosed with MCI or CN. 

In the Diagnosis file: Reversion occurs if either the DXCHANGE explicitly shows reversion from AD (to CN or MCI) or the DXCURREN from one visit shows AD and shows MCI or CN for the next visit temporally. 

In [14]:
patients_nonDementiaAD = set()

DX_CURREN = {'1':'NL', "2": 'MCI', "3": 'AD', "":""}
DX_CHANGE = {'1':"Stable:NL to NL",'2':"Stable: MCI to MCI",'3':"Stable: AD to AD",'4':"Conv:NL to MCI",'5':"Conv:MCI to AD",'6':"Conv:NL to AD", '7':"Rev:MCI to NL",'8':"Rev:AD to MCI",'9':"Conv:AD to NL","":""}

patient_diagnosis_dict = {}
with open('../../Assessments/DXSUM_PDXCONV_ADNIALL.csv') as csvfile:
    reader = csv.reader(csvfile, delimiter=',')
    next(reader) #skip header
    for row in reader:
        RID = int(row[2])
        EXAMDATE = datetime.strptime(row[8], '%m/%d/%Y')
        Dx_curren = row[10]
        Dx_change = row[9]
        DXOTHDEM = row[47]
        
        #add to dictionary
        ##################
        if RID not in patient_diagnosis_dict:
            patient_diagnosis_dict[RID] = []
            
        patient_diagnosis_dict[RID].append([EXAMDATE, Dx_change, Dx_curren])
        
        #Check for the Non-AD dementia cases
        ##################
        #if Dx_change indicates AD but non-AD dementia by DXOTHDEM
        if DXOTHDEM == "1":
            if Dx_change in ['3','5','6'] or Dx_curren == '3':
                patients_nonDementiaAD.add(RID)
    

reverted_patients = set()    
for patient in patient_diagnosis_dict:
    exams = sorted(patient_diagnosis_dict[patient])

    #Check for reversions in Diagnosis File
    ##################
    for i in range(len(exams)):
        dx_change = exams[i][1]
        dx_current = exams[i][2]
        
        if dx_change == '8' or dx_change == '9':
            reverted_patients.add(patient)
            
        
        for j in range(i+1,len(exams)):
            dx_change_new = exams[j][1]
            dx_current_new = exams[j][2]
            if dx_current == '3' or dx_change in ['3', '5', '6']: #if diagnosed as AD
                if dx_current_new in ['2','3'] or dx_change_new in ['1','2','4','7']:
                    reverted_patients.add(patient)
            
          
        #if currently have AD, make sure the coding isnt messed up
        #if i+1 < len(exams) and dx_change in ['3','5','6']:
        #    dx_change_new = exams[i+1][1]
        #    if dx_change_new in ['1','2','4','7']:
        #        reverted_patients.add(patient)
                
        #if currently have AD, make sure the next time they dont have NL or MCI
        #if i+1 < len(exams) and dx_current == '3':
        #    dx_current_new = exams[i+1][2]
        #    if dx_current_new == "1" or dx_current_new == "2":
        #        reverted_patients.add(patient)

print "Patients we need to correct label for (they are NOT AD patients):",list(patients_nonDementiaAD)

Patients we need to correct label for (they are NOT AD patients): [769, 93]


### Check in Main File Now

In [16]:
patient_dict = {}
labels = []

RID = 0
COLPROT = 4
EXAMDATE = 6
DX_bl = 7
DX = 51

with open('../../Data___Database/ADNIMERGE.csv') as csvfile:
    reader = csv.reader(csvfile, delimiter=',')
    for row in reader:
        if row[0] == "RID":
            labels = row
        else:
            if int(row[0]) not in patient_dict:
                patient_dict[int(row[0])] = []
            
            date = datetime.strptime(row[EXAMDATE], '%Y-%m-%d')
            patient_dict[int(row[0])].append([date, row[DX]])

            
reverted_patients_diag = set()    
for patient in patient_dict:
    exams = sorted(patient_dict[patient])

    #Check for reversions in Main File
    ##################
    for i in range(len(exams)):
        diag = exams[i][1]
        if diag == "Dementia to MCI":
            reverted_patients_diag.add(patient)
  
        elif i+1 < len(exams) and (diag == 'Dementia' or diag.split(' ')[-1] == 'Dementia') and patient not in [769,93]:
            next_index = i+1
            while next_index < len(exams)-1 and exams[next_index][1] != "":
                next_index += 1
            
            #I allow MCI to Dementia because this looks like a coding error- if the last visit DX was Dementia and the next is MCI to Dementia, I assume that they had Dementia the whole time
            if exams[next_index][1] not in ['Dementia', 'MCI to Dementia'] and exams[next_index][1] != "":
                reverted_patients_diag.add(patient)
  


In [15]:
print "Reverted patients in Diagnosis File are:"
print sorted(list(reverted_patients))

print "\n Reverted patients in Main File are:"
print sorted(list(reverted_patients_diag))

union = list(reverted_patients | reverted_patients_diag)
print "\n All",  len(union), "potential reversion patients:"
print sorted(union)

Reverted patients in Diagnosis File are:
[135, 162, 166, 167, 429, 507, 555, 566, 702, 1226, 2210, 2274, 2367, 4005, 4114, 4293, 4426, 4430, 4434, 4641, 4706, 4741, 4746, 4845, 4899, 4947]

 Reverted patients in Main File are:
[162, 167, 429, 555, 702, 739, 1226, 2367, 4005, 4114, 4426, 4434, 4641, 4706, 4746]

 All 27 potential reversion patients:
[135, 162, 166, 167, 429, 507, 555, 566, 702, 739, 1226, 2210, 2274, 2367, 4005, 4114, 4293, 4426, 4430, 4434, 4641, 4706, 4741, 4746, 4845, 4899, 4947]
