In [1]:
import xml.etree.ElementTree as ET
from pprint import pprint

In [2]:
class Parser:
    def __init__(self, file_path):
        self.root = ET.parse(file_path).getroot() # https://docs.python.org/3/library/xml.etree.elementtree.html
        
    def extract_axis(self, elem) -> dict:
        data = {"code_elements":[]}
        data["pos"], data["value"] = elem.attrib.values()
        for child in elem:
            if child.tag == "title":
                data["category"] = child.text
            if child.tag == "definition":
                data["definition"] = child.text
            if child.tag == "label":
                data["code_elements"].append({
                    "code":child.attrib["code"],
                    "code_desc":child.text,
                })
        return data

In [3]:
# Extracting PCS Tables from XML document
# Each table is loaded as a 'block' to pcsTable_blocks
parser = Parser("Data/XML/icd10pcs_tables_2021.xml")
pcsTable_blocks = []

for pcsTable in parser.root:
    data_block = {"constants":[], "variables":[]}
    for elem in pcsTable:
        if elem.tag == "axis":
            data = parser.extract_axis(elem)
            data_block["constants"].append(data)
        if elem.tag == "pcsRow":
            pcsRow_data = {}
            pcsRow_data["num_codes"] = elem.attrib["codes"]
            pcsRow_data["codes"] = [parser.extract_axis(axis) for axis in elem]
            data_block["variables"].append(pcsRow_data)
    pcsTable_blocks.append(data_block)
    
print(f"Total Number of pcsTables: {len(pcsTable_blocks)-2}") #'version', 'title' tags

Total Number of pcsTables: 890


In [4]:
# Display of data extracted from XML document
pprint(pcsTable_blocks[2])

{'constants': [{'category': 'Section',
                'code_elements': [{'code': '0',
                                   'code_desc': 'Medical and Surgical'}],
                'pos': '1',
                'value': '1'},
               {'category': 'Body System',
                'code_elements': [{'code': '0',
                                   'code_desc': 'Central Nervous System and '
                                                'Cranial Nerves'}],
                'pos': '2',
                'value': '1'},
               {'category': 'Operation',
                'code_elements': [{'code': '1', 'code_desc': 'Bypass'}],
                'definition': 'Altering the route of passage of the contents '
                              'of a tubular body part',
                'pos': '3',
                'value': '1'}],
 'variables': [{'codes': [{'category': 'Body Part',
                           'code_elements': [{'code': '6',
                                              'code_desc': 'Cere

In [5]:
# Calculating all possible code combinations
master_code_list = []
for block in pcsTable_blocks:
    const = "".join([
        code["code"]
        for pcs_dict in block["constants"]
        for code in pcs_dict["code_elements"]
    ])
    for pcsRow in block["variables"]:
        num_codes = pcsRow["num_codes"]
        code_list = [
            f"{const}{pos4['code']}{pos5['code']}{pos6['code']}{pos7['code']}"
            for pos4 in pcsRow["codes"][0]["code_elements"]
            for pos5 in pcsRow["codes"][1]["code_elements"]
            for pos6 in pcsRow["codes"][2]["code_elements"]  
            for pos7 in pcsRow["codes"][3]["code_elements"]
        ]
        assert len(code_list) == int(num_codes), f"Number of codes found, {len(code_list)}, does not match given code length, {num_codes}."
        master_code_list.append(code_list)

#### Check to see if total number of calculated code combinations matches total number of possible PCS code combinations
- Source of total possible code combinations: https://www.google.com/search?rlz=1C5CHFA_enUS898US898&sxsrf=ALeKk01VsbFSOPbgnJIEVNeEFkd6xMthQQ%3A1615676381831&ei=3UNNYI-RMsS3tQaJxqjgCg&q=how+many+total+pcs+codes+are+there&oq=how+many+total+pcs+codes+are+there&gs_lcp=Cgdnd3Mtd2l6EAMyBAgjECcyBAgjECc6BwgAEEcQsANQtjxYi0Bgu0JoAXACeACAAZEBiAHlA5IBAzUuMZgBAKABAaoBB2d3cy13aXrIAQjAAQE&sclient=gws-wiz&ved=0ahUKEwjP873Gr67vAhXEW80KHQkjCqwQ4dUDCA0&uact=5
- Assumes using FY 2020 PCS codes

In [6]:
# Total number of calculated code combinations
count = 0
for code_list in master_code_list:
    count += len(code_list) 
print(count)

78136


In [7]:
# Calculated code combinations from the first pcsTable in the XML document
# Use to visually compare with XML document
print(master_code_list[0])

['0016070', '0016071', '0016072', '0016073', '0016074', '0016075', '0016076', '0016077', '0016078', '001607A', '001607B', '00160J0', '00160J1', '00160J2', '00160J3', '00160J4', '00160J5', '00160J6', '00160J7', '00160J8', '00160JA', '00160JB', '00160K0', '00160K1', '00160K2', '00160K3', '00160K4', '00160K5', '00160K6', '00160K7', '00160K8', '00160KA', '00160KB', '0016370', '0016371', '0016372', '0016373', '0016374', '0016375', '0016376', '0016377', '0016378', '001637A', '001637B', '00163J0', '00163J1', '00163J2', '00163J3', '00163J4', '00163J5', '00163J6', '00163J7', '00163J8', '00163JA', '00163JB', '00163K0', '00163K1', '00163K2', '00163K3', '00163K4', '00163K5', '00163K6', '00163K7', '00163K8', '00163KA', '00163KB', '0016470', '0016471', '0016472', '0016473', '0016474', '0016475', '0016476', '0016477', '0016478', '001647A', '001647B', '00164J0', '00164J1', '00164J2', '00164J3', '00164J4', '00164J5', '00164J6', '00164J7', '00164J8', '00164JA', '00164JB', '00164K0', '00164K1', '00164K2'