### Import packages

In [1]:
import pandas as pd
import numpy as np

### Import utilities

In [2]:
import sys
sys.path.append('../src/')
import utils

rename_map = utils.icd_category_map

### Import CCSR data

In [3]:
icd_map = pd.read_csv('../data/ccsr/DXCCSR_v2021-1.CSV')

In [4]:
icd_map.head()

Unnamed: 0,'ICD-10-CM CODE','ICD-10-CM CODE DESCRIPTION','Default CCSR CATEGORY IP,'Default CCSR CATEGORY DESCRIPTION IP','Default CCSR CATEGORY OP,'Default CCSR CATEGORY DESCRIPTION OP','CCSR CATEGORY 1','CCSR CATEGORY 1 DESCRIPTION','CCSR CATEGORY 2','CCSR CATEGORY 2 DESCRIPTION','CCSR CATEGORY 3','CCSR CATEGORY 3 DESCRIPTION','CCSR CATEGORY 4','CCSR CATEGORY 4 DESCRIPTION','CCSR CATEGORY 5','CCSR CATEGORY 5 DESCRIPTION','CCSR CATEGORY 6','CCSR CATEGORY 6 DESCRIPTION'
0,'A000',"Cholera due to Vibrio cholerae 01, biovar chol...",'DIG001',Intestinal infection,'DIG001',Intestinal infection,'DIG001',Intestinal infection,'INF003',Bacterial infections,' ',,' ',,' ',,' ',
1,'A001',"Cholera due to Vibrio cholerae 01, biovar eltor",'DIG001',Intestinal infection,'DIG001',Intestinal infection,'DIG001',Intestinal infection,'INF003',Bacterial infections,' ',,' ',,' ',,' ',
2,'A009',"Cholera, unspecified",'DIG001',Intestinal infection,'DIG001',Intestinal infection,'DIG001',Intestinal infection,'INF003',Bacterial infections,' ',,' ',,' ',,' ',
3,'A0100',"Typhoid fever, unspecified",'DIG001',Intestinal infection,'DIG001',Intestinal infection,'DIG001',Intestinal infection,'INF003',Bacterial infections,' ',,' ',,' ',,' ',
4,'A0101',Typhoid meningitis,'NVS001',Meningitis,'NVS001',Meningitis,'INF003',Bacterial infections,'NVS001',Meningitis,' ',,' ',,' ',,' ',


### Clean CCSR data

In [5]:
icd_map.columns = [x.replace("'",'') for x in icd_map.columns]

In [6]:
icd_map = icd_map[icd_map.columns[:6,].values]

In [7]:
for n in [0,2,4]:
    icd_map.iloc[:,n] = [x.replace("'",'') for x in icd_map.iloc[:,n].to_list()]

In [8]:
icd_map.head()

Unnamed: 0,ICD-10-CM CODE,ICD-10-CM CODE DESCRIPTION,Default CCSR CATEGORY IP,Default CCSR CATEGORY DESCRIPTION IP,Default CCSR CATEGORY OP,Default CCSR CATEGORY DESCRIPTION OP
0,A000,"Cholera due to Vibrio cholerae 01, biovar chol...",DIG001,Intestinal infection,DIG001,Intestinal infection
1,A001,"Cholera due to Vibrio cholerae 01, biovar eltor",DIG001,Intestinal infection,DIG001,Intestinal infection
2,A009,"Cholera, unspecified",DIG001,Intestinal infection,DIG001,Intestinal infection
3,A0100,"Typhoid fever, unspecified",DIG001,Intestinal infection,DIG001,Intestinal infection
4,A0101,Typhoid meningitis,NVS001,Meningitis,NVS001,Meningitis


### Import CCSR categories and naming conventions

In [9]:
#read in ccs categories
cat = pd.read_excel('../data/ccsr/DXCCSR-Reference-File-v2021-1.xlsx', sheet_name='CCSR_Categories', skiprows=1)

In [10]:
#read in ccs naming conventions
nam = pd.read_excel('../data/ccsr/DXCCSR-Reference-File-v2021-1.xlsx', sheet_name='Naming_Conventions', skiprows=1)

### Clean CCSR categories and naming conventions

In [11]:
#extract broad category
cat['abb'] = [x[:3] for x in cat['CCSR Category']]

In [12]:
#merge categories and naming conventions
cats = pd.merge(cat, nam, left_on='abb', right_on='3-Character Abbreviation', how='left')

In [13]:
cats.head()

Unnamed: 0,CCSR Category,CCSR Category Description,abb,ICD-10-CM Diagnosis Chapter,3-Character Abbreviation
0,BLD001,Nutritional anemia,BLD,Diseases of the Blood and Blood Forming Organs...,BLD
1,BLD002,Hemolytic anemia,BLD,Diseases of the Blood and Blood Forming Organs...,BLD
2,BLD003,Aplastic anemia,BLD,Diseases of the Blood and Blood Forming Organs...,BLD
3,BLD004,Acute posthemorrhagic anemia,BLD,Diseases of the Blood and Blood Forming Organs...,BLD
4,BLD005,Sickle cell trait/anemia,BLD,Diseases of the Blood and Blood Forming Organs...,BLD


In [14]:
#then merge with icd10 map using inpatient defaults
merge = pd.merge(icd_map, cats, left_on='Default CCSR CATEGORY IP', right_on='CCSR Category', how='left')

In [15]:
merge.head()

Unnamed: 0,ICD-10-CM CODE,ICD-10-CM CODE DESCRIPTION,Default CCSR CATEGORY IP,Default CCSR CATEGORY DESCRIPTION IP,Default CCSR CATEGORY OP,Default CCSR CATEGORY DESCRIPTION OP,CCSR Category,CCSR Category Description,abb,ICD-10-CM Diagnosis Chapter,3-Character Abbreviation
0,A000,"Cholera due to Vibrio cholerae 01, biovar chol...",DIG001,Intestinal infection,DIG001,Intestinal infection,DIG001,Intestinal infection,DIG,Diseases of the Digestive System,DIG
1,A001,"Cholera due to Vibrio cholerae 01, biovar eltor",DIG001,Intestinal infection,DIG001,Intestinal infection,DIG001,Intestinal infection,DIG,Diseases of the Digestive System,DIG
2,A009,"Cholera, unspecified",DIG001,Intestinal infection,DIG001,Intestinal infection,DIG001,Intestinal infection,DIG,Diseases of the Digestive System,DIG
3,A0100,"Typhoid fever, unspecified",DIG001,Intestinal infection,DIG001,Intestinal infection,DIG001,Intestinal infection,DIG,Diseases of the Digestive System,DIG
4,A0101,Typhoid meningitis,NVS001,Meningitis,NVS001,Meningitis,NVS001,Meningitis,NVS,Diseases of the Nervous System,NVS


### Rename and recategorize disease categories

In [17]:
merge['ICD-10-CM Diagnosis Chapter'].value_counts()

Injury, Poisoning and Certain Other Consequences of External Causes                                    39193
Unacceptable principal diagnosis (inpatient data) or first-listed diagnosis (outpatient data)           9906
Diseases of the Musculoskeletal System and Connective Tissue                                            6230
Diseases of the Eye and Adnexa                                                                          2633
Pregnancy, Childbirth and the Puerperium                                                                2233
Mental, Behavioral and Neurodevelopmental Disorders                                                     1761
Neoplasms                                                                                               1701
Diseases of the Circulatory System                                                                      1389
Diseases of the Digestive System                                                                         888
Endocrine, Nutritio

In [18]:
merge.replace(rename_map, inplace=True)

In [19]:
merge['ICD-10-CM Diagnosis Chapter'].value_counts()

Injuries and external causes          39193
Exclude                               18663
Musculoskeletal diseases               6230
Mental and substance use disorders     1761
Neoplasms                              1701
Cardiovascular diseases                1389
Gastrointestinal diseases               888
Endocrine diseases                      879
Genitourinary diseases                  719
Nervous system diseases                 716
Infections                              690
Respiratory diseases                    376
Name: ICD-10-CM Diagnosis Chapter, dtype: int64

### Clean data

Select default inpatient category

In [20]:
merge.rename(columns={'ICD-10-CM CODE':'icd_10_cm_code',
                      'ICD-10-CM CODE DESCRIPTION':'icd_10_cm_code_desc',
                      'Default CCSR CATEGORY DESCRIPTION IP':'ccsr_category',
                      'ICD-10-CM Diagnosis Chapter': 'disease_category'}, inplace=True)

In [21]:
merge = merge[['icd_10_cm_code','icd_10_cm_code_desc','ccsr_category','disease_category']]

In [22]:
merge.head()

Unnamed: 0,icd_10_cm_code,icd_10_cm_code_desc,ccsr_category,disease_category
0,A000,"Cholera due to Vibrio cholerae 01, biovar chol...",Intestinal infection,Gastrointestinal diseases
1,A001,"Cholera due to Vibrio cholerae 01, biovar eltor",Intestinal infection,Gastrointestinal diseases
2,A009,"Cholera, unspecified",Intestinal infection,Gastrointestinal diseases
3,A0100,"Typhoid fever, unspecified",Intestinal infection,Gastrointestinal diseases
4,A0101,Typhoid meningitis,Meningitis,Nervous system diseases


In [31]:
print(merge['icd_10_cm_code'].nunique())
print(merge.shape)

73205
(73205, 4)


There are no duplicate ICD 10 codes, so each one maps uniquely to a category.

#### Remove dental disorders from GI

In [37]:
merge.loc[merge['ccsr_category'] == 'Disorders of teeth and gingiva', 'disease_category'] = 'Exclude'

#### Recategorize certain CCSR categories

In [38]:
ccsr_reorganize = {
    'Cardiac and circulatory congenital anomalies' : 'Cardiovascular diseases',
    'Transient cerebral ischemia' : 'Cardiovascular diseases',
    'Hepatitis' : 'Gastrointestinal diseases',
    'Digestive congenital anomalies' : 'Gastrointestinal diseases',
    'Neonatal digestive and feeding disorders' : 'Gastrointestinal diseases',
    'Genitourinary congenital anomalies' : 'Genitourinary diseases',
    'Perinatal infections' : 'Infections',
    'Musculoskeletal congenital conditions' : 'Musculoskeletal diseases',
    'Nervous system congenital anomalies' : 'Nervous system diseases',
    'Neonatal cerebral disorders' : 'Nervous system diseases',
    'Respiratory congenital malformations' : 'Respiratory diseases',
    'Respiratory distress syndrome' : 'Respiratory diseases',
    'Respiratory perinatal condition' : 'Respiratory diseases'
}

In [39]:
def recode_ccsr_categories(df, dic):
    
    r = copy.deepcopy(df)
    
    for k,v in dic.items():
        r.loc[r['ccsr_category'] == k,'disease_category'] = v
        
    return r

In [40]:
merge = recode_ccsr_categories(merge, ccsr_reorganize)

In [44]:
merge['disease_category'].value_counts()

Injuries and external causes          39193
Exclude                               18022
Musculoskeletal diseases               6481
Mental and substance use disorders     1761
Neoplasms                              1701
Cardiovascular diseases                1488
Endocrine diseases                      879
Genitourinary diseases                  878
Gastrointestinal diseases               841
Nervous system diseases                 778
Infections                              738
Respiratory diseases                    445
Name: disease_category, dtype: int64

### Export data

In [45]:
merge.to_csv('../data/ccsr/disease_hierarchy.csv', index=False)
#export.to_csv('../data/ccsr/disease_hierarchy.csv', index=False)