In [44]:
from MimicIII import MimicIII

from config import Settings; settings = Settings()

In [63]:
mimic = MimicIII(settings)

diagnoses = mimic.read_diagnoses().copy()
diagnoses.head(10)

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,SEQ_NUM,ICD9_CODE
0,1297,109,172335,1.0,40301
111750,105624,9413,176956,1.0,86501
111742,105616,9412,148755,1.0,99674
519073,523522,71349,102891,1.0,389
335323,328441,28882,166021,1.0,44100
519102,523551,71353,178296,1.0,41401
274264,274442,24588,176009,1.0,42840
111727,105601,9412,114483,1.0,486
519113,523562,71361,191859,1.0,41011
519121,523570,71364,175502,1.0,42833


In [64]:
diagnoses['Chronic'] = cci.lookup(diagnoses['ICD9_CODE'])

In [66]:
diagnoses.head(10)

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,SEQ_NUM,ICD9_CODE,Chronic
0,1297,109,172335,1.0,40301,True
111750,105624,9413,176956,1.0,86501,False
111742,105616,9412,148755,1.0,99674,False
519073,523522,71349,102891,1.0,389,False
335323,328441,28882,166021,1.0,44100,True
519102,523551,71353,178296,1.0,41401,True
274264,274442,24588,176009,1.0,42840,True
111727,105601,9412,114483,1.0,486,False
519113,523562,71361,191859,1.0,41011,True
519121,523570,71364,175502,1.0,42833,True


In [2]:
import pandas as pd
import numpy as np

In [61]:
class CCI:
    def __init__(self,cci_path):
        self.cci_path = cci_path
        
        self.data = self._read_and_process()
        self._lookup_table = self.data.set_index('ICD-9-CM CODE')['CHRONIC'].to_dict()
        
        
    def lookup(self,code):
            """
            Given an icd9 code, returns the corresponding Chronic value (True for chronic, and False for not-chronic)
            
            Parameters
            ----------
            
            code : str | pd.Series
                icd9 code
            
            Returns:
                -1: code is not recognizable
                True: When the code is chronic
                False: when the code is not chronic
            """
            def lookup_single(code : str):
                try:
                    return self._lookup_table[code]
                except:
                    return np.nan
            if type(code) == pd.Series:
                return code.apply(lookup_single)
            elif type(code) == 'str':
                return lookup_single(code)
            else:
                raise ValueError(f'Wrong input type. Expecting str or pd.Series. Got {type(code)}')
        
    
    def _read_and_process(self):
        df = pd.read_csv('grouper_data/cci2015.csv',usecols=[0,2])
        df.columns = [col.replace("'","") for col in df.columns]
        df['ICD-9-CM CODE'] = df['ICD-9-CM CODE'].str.replace("'","").str.strip()
        df['CATEGORY DESCRIPTION'] = df['CATEGORY DESCRIPTION'].str.replace("'","").str.strip()
        df = df.rename(columns={'CATEGORY DESCRIPTION':'CHRONIC'})
        df['CHRONIC'] = df['CHRONIC'].map({'0':False,'1':True})
        
        return df

In [62]:
cci = CCI('grouper_data/cci2015.csv')

In [None]:
cci.lookup

In [25]:
df = pd.read_csv('grouper_data/cci2015.csv',usecols=[0,2])
df.columns = [col.replace("'","") for col in df.columns]
df['ICD-9-CM CODE'] = df['ICD-9-CM CODE'].str.replace("'","").str.strip()
df['CATEGORY DESCRIPTION'] = df['CATEGORY DESCRIPTION'].str.replace("'","").str.strip()
df = df.rename(columns={'CATEGORY DESCRIPTION':'CHRONIC'})
df['CHRONIC'] = df['CHRONIC'].map({'0':False,'1':True})
df

Unnamed: 0,ICD-9-CM CODE,CHRONIC
0,0010,False
1,0011,False
2,0019,False
3,0020,False
4,0021,False
...,...,...
13764,V9129,False
13765,V9190,False
13766,V9191,False
13767,V9192,False


In [None]:
pd.DataFrame().to_dict()

In [30]:
df.set_index('ICD-9-CM CODE')

Unnamed: 0_level_0,CHRONIC
ICD-9-CM CODE,Unnamed: 1_level_1
0010,False
0011,False
0019,False
0020,False
0021,False
...,...
V9129,False
V9190,False
V9191,False
V9192,False


In [41]:
df.set_index('ICD-9-CM CODE')['CHRONIC'].to_dict()

{'0010': False,
 '0011': False,
 '0019': False,
 '0020': False,
 '0021': False,
 '0022': False,
 '0023': False,
 '0029': False,
 '0030': False,
 '0031': False,
 '00320': False,
 '00321': False,
 '00322': False,
 '00323': False,
 '00324': False,
 '00329': False,
 '0038': False,
 '0039': False,
 '0040': False,
 '0041': False,
 '0042': False,
 '0043': False,
 '0048': False,
 '0049': False,
 '0050': False,
 '0051': False,
 '0052': False,
 '0053': False,
 '0054': False,
 '0058': False,
 '00581': False,
 '00589': False,
 '0059': False,
 '0060': False,
 '0061': False,
 '0062': False,
 '0063': False,
 '0064': False,
 '0065': False,
 '0066': False,
 '0068': False,
 '0069': False,
 '0070': False,
 '0071': False,
 '0072': False,
 '0073': False,
 '0074': False,
 '0075': False,
 '0078': False,
 '0079': False,
 '0080': False,
 '00800': False,
 '00801': False,
 '00802': False,
 '00803': False,
 '00804': False,
 '00809': False,
 '0081': False,
 '0082': False,
 '0083': False,
 '00841': False,
 '00842':

In [17]:
df.head(2)

Unnamed: 0,ICD-9-CM CODE,ICD-9-CM CODE DESCRIPTION,CATEGORY DESCRIPTION,ICD9 CHAPTERS
0,10,CHOLERA D/T VIB CHOLERAE,0,1
1,11,CHOLERA D/T VIB EL TOR,0,1


In [15]:
df['BODY SYSTEM'].value_counts()

17    2618
6     1441
1     1304
18    1205
11    1115
2     1033
13     902
9      635
7      510
5      485
14     433
10     411
16     410
3      349
15     299
8      274
12     212
4      133
Name: BODY SYSTEM, dtype: int64

In [12]:
df['CATEGORY DESCRIPTION'].value_counts()

0    9185
1    4584
Name: CATEGORY DESCRIPTION, dtype: int64

In [None]:
class CCI:
    """
    Chronic condition indicator. 
    
    from: https://www.hcup-us.ahrq.gov/toolssoftware/chronic/chronic.jsp
    """
    def __init__(self,file):

            file = open(file,"r")
            content = file.read()
            file.close()
            lookup = {}
            groups = re.findall('(\d+\s+[A-Z].*(\n.+)+)',content)
            for group in groups:
                parsed = group[0].split()
                for code in parsed[2:]:
                    lookup[code] = int(parsed[0])
            self._lookup_table = lookup