In [1]:
import xml.etree.ElementTree as ET


def parse_xml(fn):
    
    xml_file = ET.parse(fn)
    root = xml_file.getroot()
    
    data_dict = {
        'root': {
            'tag': root.tag,
            'attributes': root.attrib
        },
        'children': []
    }
    
    for child in root:
        child_data = {
            'tag': child.tag,
            'attributes': child.attrib
        }
        data_dict['children'].append(child_data)
    
    return data_dict


In [2]:

fn = 'US v Caliper analyzed/MeasurementInfo.vxml'
measurement_info = parse_xml(fn)
fn = 'US v Caliper analyzed/Study.vxml'
study = parse_xml(fn)

In [3]:
measurement_info, study

({'root': {'tag': 'studyInformation', 'attributes': {'version': '1'}},
  'children': [{'tag': 'genericNumbers',
    'attributes': {'VSI_E_GMT_Unknown': '0',
     'VSI_E_GMT_Annotation': '0',
     'VSI_E_GMT_Linear': '0',
     'VSI_E_GMT_TracedDistance': '0',
     'VSI_E_GMT_Area': '0',
     'VSI_E_GMT_BModePoint': '0',
     'VSI_E_GMT_MModeVelocity': '0',
     'VSI_E_GMT_Time': '0',
     'VSI_E_GMT_Depth': '0',
     'VSI_E_GMT_MModePoint': '0',
     'VSI_E_GMT_HeartRate': '0',
     'VSI_E_GMT_MModeLVArea': '0',
     'VSI_E_GMT_Acceleration': '0',
     'VSI_E_GMT_DopplerVelocity': '0',
     'VSI_E_GMT_VTI': '0',
     'VSI_E_GMT_DopplerPoint': '0',
     'VSI_E_GMT_ContrastRegion': '0',
     'VSI_E_GMT_CardiacRegion': '0',
     'VSI_E_GMT_Radius': '0',
     'VSI_E_GMT_Angle': '0',
     'VSI_E_GMT_Ellipse': '0',
     'VSI_E_GMT_BModeLVAreaLong': '0',
     'VSI_E_GMT_BModeLVAreaShort': '0',
     'VSI_E_GMT_3dPoint': '0',
     'VSI_E_GMT_3dLinear': '0',
     'VSI_E_GMT_3dArea': '0',
     'VS

In [8]:
import glob
import os
import tqdm


def build_meta_dict(fp):
    meta_dict = {}
    for _fn in tqdm.tqdm(glob.glob(os.path.join(fp, '*.*xml*'))):
        _, _ext = os.path.splitext(_fn)
        if _ext not in meta_dict.keys():
            meta_dict[_ext] = {}
        meta_dict[_ext][_fn] = parse_xml(_fn)
    return meta_dict


def summarize_meta_dict(meta_dict):
    for _ext, _entries in meta_dict.items():
        print(len(_entries), _ext)



In [12]:
!ls US\ v\ Caliper\ analyzed

[0m[01;34m20211110180827403[0m/     [01;32mStudy.bak[0m*   [01;32mStudy.vxml.bak[0m*
[01;32mMeasurementInfo.vxml[0m*  [01;32mStudy.vxml[0m*


In [9]:
meta_dict = build_meta_dict('US v Caliper analyzed/')
meta_dict

100%|███████████████████████████████████████████| 3/3 [00:00<00:00, 4638.01it/s]


In [10]:

scanned = []
for _ext, _files in meta_dict.items():
    for _f in _files:
        scanned.append(_f)
    
for _fn in tqdm.tqdm(glob.glob(os.path.join(fp, '*.*'))):
    if _fn not in scanned:
        print(_fn)

NameError: name 'fp' is not defined

In [76]:
meta_dict['.vxml'].keys()

dict_keys(['US v Caliper analyzed/20211110180827403/20211110185407753.vxml', 'US v Caliper analyzed/20211110180827403/20211110184556092.vxml', 'US v Caliper analyzed/20211110180827403/20211116012305311.vxml', 'US v Caliper analyzed/20211110180827403/20211110190629516.vxml', 'US v Caliper analyzed/20211110180827403/20211116001227897.vxml', 'US v Caliper analyzed/20211110180827403/20211116011355823.vxml', 'US v Caliper analyzed/20211110180827403/20211110185852579.vxml', 'US v Caliper analyzed/20211110180827403/20211110184739814.vxml', 'US v Caliper analyzed/20211110180827403/20211110184435186.vxml', 'US v Caliper analyzed/20211110180827403/20211110184219101.vxml', 'US v Caliper analyzed/20211110180827403/20211110183729030.vxml', 'US v Caliper analyzed/20211110180827403/20211116010058000.vxml', 'US v Caliper analyzed/20211110180827403/20211110185204180.vxml', 'US v Caliper analyzed/20211110180827403/20211116012051826.vxml', 'US v Caliper analyzed/20211110180827403/20211116010838247.vxml',

In [77]:
meta_dict['.vxml']['US v Caliper analyzed/20211110180827403/Series.vxml']

{'root': {'tag': 'series',
  'attributes': {'id': '21419197145373966324195418861158523',
   'idStudy': '28342518462174775533543117915754361797',
   'name': 'Nudes',
   'createdDate': '2021-11-10T18:08:27.0Z',
   'acquiredBy': 'Nicole Henning',
   'autoSyncStatus': '1',
   'versionRequired': '3.2.5.15042',
   'versionCreated': '3.2.7.15251',
   'versionModified': '3.2.7.15251',
   'notes': '',
   'animalId': '',
   'animalColor': '',
   'strain': '',
   'weight': '',
   'type': '',
   'c4': '',
   'lastModifiedBy': 'Nicole Henning',
   'lastModifiedDate': '2021-11-10T18:34:38.0Z'}},
 'children': [{'tag': 'fields', 'attributes': {}}]}