# Parse ICD10 codes from CMS

In [17]:
# download ICD codes from CMS as an .xml file
# https://www.cms.gov/medicare/coding-billing/icd-10-codes/2024-icd-10-cm
import pandas as pd
import xml.dom.minidom
df = [] # results container
doc = xml.dom.minidom.parse('icd10cm_tabular_2024.xml')
# the .xml file has nested structure (chapter, section, diag, subdiag)
chapters = doc.documentElement.getElementsByTagName("chapter")
for chapter in chapters:
    sections = chapter.getElementsByTagName("section")
    for section in sections:
        diags = section.getElementsByTagName("diag")
        for diag in diags:
            diagNameElement = diag.getElementsByTagName("name")[0]
            diagDescElement = diag.getElementsByTagName("desc")[0]
            df.append([diagNameElement.childNodes[0].data, diagDescElement.childNodes[0].data])
            subdiags = diag.getElementsByTagName("diag")
            for subdiag in subdiags:
                subdiagNameElement = subdiag.getElementsByTagName("name")[0]
                subdiagDescElement = subdiag.getElementsByTagName("desc")[0]
                df.append([subdiagNameElement.childNodes[0].data, subdiagDescElement.childNodes[0].data])
df = pd.DataFrame(df, columns = ['code', 'name']) # convert to pandas df
df = df.drop_duplicates() # drop any duplicate rows
df['load_ts'] = pd.Timestamp.utcnow() # add timestamp
df.head(10) # quick check result
df.to_csv('icd10_codes_2024.csv', index = False) # save to .csv

Unnamed: 0,code,name,load_ts
0,A00,Cholera,2024-02-05 18:04:48.377417+00:00
1,A000,"Cholera due to Vibrio cholerae 01, biovar chol...",2024-02-05 18:04:48.377417+00:00
2,A001,"Cholera due to Vibrio cholerae 01, biovar eltor",2024-02-05 18:04:48.377417+00:00
3,A009,"Cholera, unspecified",2024-02-05 18:04:48.377417+00:00
7,A01,Typhoid and paratyphoid fevers,2024-02-05 18:04:48.377417+00:00
8,A010,Typhoid fever,2024-02-05 18:04:48.377417+00:00
9,A0100,"Typhoid fever, unspecified",2024-02-05 18:04:48.377417+00:00
10,A0101,Typhoid meningitis,2024-02-05 18:04:48.377417+00:00
11,A0102,Typhoid fever with heart involvement,2024-02-05 18:04:48.377417+00:00
12,A0103,Typhoid pneumonia,2024-02-05 18:04:48.377417+00:00
