In [1]:
import warnings
warnings.filterwarnings('ignore')

In [9]:
%matplotlib inline
import heartpy as hp
import xml.etree.cElementTree as ET
import pathlib
from datetime import datetime
import pprint
pp = pprint.PrettyPrinter(indent=4)

In [7]:
namespaces = {'': 'urn:hl7-org:v3'}

datetime_format = '%Y%m%d%H%M%S.%f'

In [8]:
def get_data_from_xml(file, out_path, visualize=False):
    ecg = {}
    
    file_name_split = file.name.split('-')
    ecg['id'] = file_name_split[0]
    ecg['type'] = file_name_split[-2]
    
    tree = ET.parse(file)
    root = tree.getroot()
    
    effectiveTimeLow = root.find('effectiveTime/low', namespaces).get('value')
    date_time_obj = datetime.strptime(effectiveTimeLow, datetime_format)
    ecg['effectiveTimeLow'] = date_time_obj
    
    effectiveTimeHigh = root.find('effectiveTime/high', namespaces).get('value')
    date_time_obj = datetime.strptime(effectiveTimeHigh, datetime_format)
    ecg['effectiveTimeHigh'] = date_time_obj
    
    # ECG data
    series = root.findall('component/series', namespaces)
    for s in series:
        effectiveTimeLow = s.find('effectiveTime/low', namespaces).get('value')
        effectiveTimeHigh = s.find('effectiveTime/high', namespaces).get('value')
        sequenceSet = s.find('component/sequenceSet', namespaces)
        # TODO: components in sequenceSet: leads
        digits = sequenceSet.find('component/sequence/value/digits', namespaces)
        
        with open(f'{out_path}/{file.name}_{effectiveTimeLow}_{effectiveTimeHigh}.txt', 'w') as f:
            f.write(digits.text)
            
        if visualize == True:
            visualize_ecg(f'{out_path}/{file.name}_{effectiveTimeLow}_{effectiveTimeHigh}.txt', title=file.name)
            
    return ecg

### Post Covid Patients

In [139]:
xml_folder="ExportPostCovid"
files = list(pathlib.Path(xml_folder).glob('*.xml'))
ecg_files_path = 'ecgs_post_covid/'
ecgs = []


for file in files:
    ecg = get_data_from_xml(file, ecg_files_path)
    ecgs.append(ecg)

In [130]:
for i in range(5):
    pp.pprint(ecgs[i])

{   'effectiveTimeHigh': datetime.datetime(2015, 11, 26, 8, 56, 21),
    'effectiveTimeLow': datetime.datetime(2015, 11, 26, 8, 56, 10, 903000),
    'id': '7010',
    'type': 'Ruhe'}
{   'effectiveTimeHigh': datetime.datetime(2021, 10, 26, 9, 29, 50),
    'effectiveTimeLow': datetime.datetime(2021, 10, 26, 9, 13, 45, 937000),
    'id': '17849',
    'type': 'Belastungs'}
{   'effectiveTimeHigh': datetime.datetime(2021, 11, 19, 8, 23, 51),
    'effectiveTimeLow': datetime.datetime(2021, 11, 19, 8, 13, 55, 150000),
    'id': '32243',
    'type': 'Belastungs'}
{   'effectiveTimeHigh': datetime.datetime(2021, 12, 1, 14, 38, 25),
    'effectiveTimeLow': datetime.datetime(2021, 12, 1, 14, 38, 14, 803000),
    'id': '32293',
    'type': 'Ruhe'}
{   'effectiveTimeHigh': datetime.datetime(2017, 10, 25, 10, 40, 14),
    'effectiveTimeLow': datetime.datetime(2017, 10, 25, 10, 24, 3, 247000),
    'id': '17031',
    'type': 'Belastungs'}


### Covid Patients

In [136]:
xml_folder="ExportCovid"
files = list(pathlib.Path(xml_folder).glob('*.xml'))
ecg_files_path = 'ecgs_covid/'
ecgs = []

for file in files:
    ecg = get_data_from_xml(file, ecg_files_path)
    ecgs.append(ecg)