# Wrangling XML
An example to show how to extract data from an xml file and add them to a list.

In [1]:
import xml.etree.ElementTree as ET

article_file = "exampleResearchArticle.xml"

# Create a function to get the root of the xml file.
def get_root(fname):
    tree = ET.parse(fname)
    return tree.getroot()

# Create a function to extract authors data from the xml file and add them to a list of dicts.
def get_authors(root):
    authors = []
    for author in root.findall('./fm/bibl/aug/au'):
        data = {
                'fnm': None,
                'snm': None,
                'email': None,
                'insr': []
        }

        data['fnm'] = author.find('fnm').text
        data['snm'] = author.find('snm').text
        data['email'] = author.find('email').text
        for i in author.findall('insr'):
            data['insr'].append(i.attrib['iid'])

        authors.append(data)

    return authors

root = get_root(article_file)
data = get_authors(root)
print(data)

[{'fnm': 'Omer', 'snm': 'Mei-Dan', 'email': 'omer@extremegate.com', 'insr': ['I1']}, {'fnm': 'Mike', 'snm': 'Carmont', 'email': 'mcarmont@hotmail.com', 'insr': ['I2']}, {'fnm': 'Lior', 'snm': 'Laver', 'email': 'laver17@gmail.com', 'insr': ['I3', 'I4']}, {'fnm': 'Meir', 'snm': 'Nyska', 'email': 'nyska@internet-zahav.net', 'insr': ['I3']}, {'fnm': 'Hagay', 'snm': 'Kammar', 'email': 'kammarh@gmail.com', 'insr': ['I8']}, {'fnm': 'Gideon', 'snm': 'Mann', 'email': 'gideon.mann.md@gmail.com', 'insr': ['I3', 'I5']}, {'fnm': 'Barnaby', 'snm': 'Clarck', 'email': 'barns.nz@gmail.com', 'insr': ['I6']}, {'fnm': 'Eugene', 'snm': 'Kots', 'email': 'eukots@gmail.com', 'insr': ['I7']}]
