# XML Tutorial Walkthrough

In [42]:
import xml.etree.ElementTree as ET
import pandas as pd

In [22]:
tree = ET.parse("data.xml")

In [23]:
print(type(tree))

<class 'xml.etree.ElementTree.ElementTree'>


In [25]:
root = tree.getroot()
root

<Element 'data' at 0x7f7f09da1950>

In [26]:
print(root.tag)
print(root.attrib)
print(len(root))

data
{}
3


In [28]:
country1 = root[0]
rank = country1[0]
print(rank.tag)

rank


In [29]:
print(rank.text)

1


In [30]:
print(country1[4].attrib)

{'name': 'Switzerland', 'direction': 'W'}


In [31]:
country3 = root[2]
rank = country3[0]
print(rank.tag)
print(rank.text)
print(country3[4].attrib)

rank
68
{'name': 'Colombia', 'direction': 'E'}


In [47]:
for country in root.findall('country'):
    rank = country.find('rank').text
    name = country.get('name')
    print(name, rank)

Liechtenstein 1
Singapore 4
Panama 68


In [46]:
for neighbor in root.iter('neighbor'):
    print(neighbor.attrib)

{'name': 'Austria', 'direction': 'E'}
{'name': 'Switzerland', 'direction': 'W'}
{'name': 'Malaysia', 'direction': 'N'}
{'name': 'Costa Rica', 'direction': 'W'}
{'name': 'Colombia', 'direction': 'E'}


In [45]:
root.findall(".")

[<Element 'data' at 0x7f7f09da1950>]

In [35]:
root.findall("./country/neighbor")

[<Element 'neighbor' at 0x7f7f09da1ae0>,
 <Element 'neighbor' at 0x7f7f09da1b30>,
 <Element 'neighbor' at 0x7f7f09da1cc0>,
 <Element 'neighbor' at 0x7f7f09da1e50>,
 <Element 'neighbor' at 0x7f7f09da1ea0>]

In [36]:
root.findall(".//year/..[@name='Singapore']")

[<Element 'country' at 0x7f7f09da1b80>]

In [37]:
root.findall(".//*[@name='Singapore']/year")

[<Element 'year' at 0x7f7f09da1c20>]

In [38]:
root.findall(".//neighbor[2]")

[<Element 'neighbor' at 0x7f7f09da1b30>,
 <Element 'neighbor' at 0x7f7f09da1ea0>]

In [40]:
my_dict = {'name': [],
           'rank': [],
           'year': [],
           'gdppc': []}

for country in root:
    name_value = country.attrib['name']
    my_dict['name'].append(name_value)
    rank_value = country[0].text
    my_dict['rank'].append(rank_value)
    year_value = country[1].text
    my_dict['year'].append(year_value)
    gdppc_value = country[2].text
    my_dict['gdppc'].append(gdppc_value)
    
my_dict

{'name': ['Liechtenstein', 'Singapore', 'Panama'],
 'rank': ['1', '4', '68'],
 'year': ['2008', '2011', '2011'],
 'gdppc': ['141100', '59900', '13600']}

In [43]:
df = pd.DataFrame(my_dict)
df

Unnamed: 0,name,rank,year,gdppc
0,Liechtenstein,1,2008,141100
1,Singapore,4,2011,59900
2,Panama,68,2011,13600
