In [1]:
from dcc import DCC
from collections import defaultdict

In [2]:
dcco = DCC('data/dcc/siliziumkugel_2_4_0.xml')

In [3]:
dcco.item_id()

{'issuer': 'manufacturer',
 'value': 'Si28kg_03_a',
 'content (lang: de)': 'Kennnummer',
 'content (lang: en)': 'Serial No.'}

In [5]:
item_list = dcco.root.find("dcc:administrativeData/dcc:items", dcco.name_space)
elem_dict = {}
# iterate through individual items and subelements and return identification type with value
for elem in item_list.iter(tag='{' + dcco.name_space['dcc'] + '}' + 'identifications'):
    for subelem in elem.iter():
        textpart = subelem.text
        if textpart.strip():
            # checks if additional attributes like language are available and adds it to result list
            if subelem.attrib:
                if subelem.tag.rpartition('/')[2].replace('}', ':') in elem_dict.keys():
                    elem_dict[subelem.tag.rpartition('/')[2].replace('}', ':')] += [{'#text':textpart, subelem.items()[0][0]:subelem.items()[0][1]}]
                else:
                    elem_dict[subelem.tag.rpartition('/')[2].replace('}', ':')] = [{'#text':textpart, subelem.items()[0][0]:subelem.items()[0][1]}]
            else:
                elem_dict[subelem.tag.rpartition('/')[2].replace('}', ':')] = textpart
        else:
            temp_dict = elem_dict
            elem_dict = {subelem.tag.rpartition('/')[2].replace('}', ':'):temp_dict}

In [37]:
subelem.tag.rpartition('/')

('{https://ptb.de', '/', 'dcc}content')

In [17]:
dcco.name_space

{'dcc': 'https://ptb.de/dcc',
 'si': 'https://ptb.de/si',
 'ds': 'http://www.w3.org/2000/09/xmldsig#'}

In [33]:
elem_dict

{'dcc:description': {'dcc:identification': {'dcc:identifications': {}},
  'dcc:issuer': 'manufacturer',
  'dcc:value': 'Si28kg_03_a'},
 'dcc:content': [{'#text': 'Kennnummer', 'lang': 'de'},
  {'#text': 'Serial No.', 'lang': 'en'}]}

In [11]:
for elem in item_list.iter(tag='{' + dcco.name_space['dcc'] + '}' + 'identifications'):
    for subelem in elem.iter():
        print(subelem.text)


                    

                        
manufacturer
Si28kg_03_a

                            
Kennnummer
Serial No.


In [18]:
def etree_to_dict(t):
    tkey = t.tag.rpartition('}')[2]
    d = {tkey: {} if t.attrib else None}
    children = list(t)
    if children:
        dd = defaultdict(list)
        for dc in map(etree_to_dict, children):
            for k, v in dc.items():
                dd[k].append(v)
        d = {tkey: {k: v[0] if len(v) == 1 else v
                     for k, v in dd.items()}}
    if t.attrib:
        d[tkey].update(('@' + k, v)
                        for k, v in t.attrib.items())
    if t.text:
        text = t.text.strip()
        if children or t.attrib:
            if text:
                d[tkey]['#text'] = text
        else:
            d[tkey] = text
    return d

In [4]:
import xml.etree.ElementTree as ET

In [15]:
idxml = ET.XML('''<?xml version="1.0" encoding="UTF-8"?>
<dcc:digitalCalibrationCertificate xmlns:dcc="https://ptb.de/dcc">
                <dcc:identifications>
                    <dcc:identification>
                        <dcc:issuer>manufacturer</dcc:issuer>
                        <dcc:value>Si28kg_03_a</dcc:value>
                        <dcc:description>
                            <dcc:content lang="de">Kennnummer</dcc:content>
                            <dcc:content lang="en">Serial No.</dcc:content>
                        </dcc:description>
                    </dcc:identification>
                </dcc:identifications>
</dcc:digitalCalibrationCertificate>''')

In [23]:
list(list(idxml)[0])

[<Element '{https://ptb.de/dcc}identification' at 0x0000024E16E79220>]

In [21]:
etree_to_dict(idxml)

{'digitalCalibrationCertificate': {'identifications': {'identification': {'issuer': 'manufacturer',
    'value': 'Si28kg_03_a',
    'description': {'content': [{'@lang': 'de', '#text': 'Kennnummer'},
      {'@lang': 'en', '#text': 'Serial No.'}]}}}}}

In [34]:
item_list = dcco.root.find("dcc:administrativeData/dcc:items", dcco.name_space)
elem_dict = {}
# iterate through individual items and subelements and return identification type with value
for elem in item_list.iter(tag='{' + dcco.name_space['dcc'] + '}' + 'identifications'):
    print(elem)
    elem_dict=etree_to_dict(elem)

<Element '{https://ptb.de/dcc}identifications' at 0x0000024E1635BF90>


In [33]:
dcco.root.find("dcc:administrativeData/dcc:items", dcco.name_space)

[<Element '{https://ptb.de/dcc}name' at 0x0000024E16358900>,
 <Element '{https://ptb.de/dcc}item' at 0x0000024E16358AE0>]

<Element '{https://ptb.de/dcc}items' at 0x0000024E163588B0>

In [35]:
elem_dict

{'identifications': {'identification': {'issuer': 'manufacturer',
   'value': 'Si28kg_03_a',
   'description': {'content': [{'@lang': 'de', '#text': 'Kennnummer'},
     {'@lang': 'en', '#text': 'Serial No.'}]}}}}

In [45]:
id_list = dcco.root.find("dcc:administrativeData/dcc:items/dcc:item/dcc:identifications", dcco.name_space)

In [47]:
etree_to_dict(id_list)

{'identifications': {'identification': {'issuer': 'manufacturer',
   'value': 'Si28kg_03_a',
   'description': {'content': [{'@lang': 'de', '#text': 'Kennnummer'},
     {'@lang': 'en', '#text': 'Serial No.'}]}}}}