# Python vs XML: An Easy Example
---
xzhai March 16, 2017

In [63]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import xml
import xml.etree.ElementTree as ET

from enum import Enum

import itertools

## Example XML

In [71]:
xml = r'''
        <?xml version="1.0"?>
        <data>
            <country name="Liechtenstein">
                <rank>1</rank>
                <year>2008</year>
                <gdppc>141100</gdppc>
                <neighbor name="Austria" direction="E"/>
                <neighbor name="Switzerland" direction="W"/>
            </country>
            <country name="Singapore">
                <rank>4</rank>
                <year>2011</year>
                <gdppc>59900</gdppc>
                <neighbor name="Malaysia" direction="N"/>
            </country>
            <country name="Panama">
                <rank>68</rank>
                <year>2011</year>
                <gdppc>13600</gdppc>
                <neighbor name="Costa Rica" direction="W"/>
                <neighbor name="Colombia" direction="E"/>
            </country>
        </data>
    '''

## Parse

In [69]:
root = ET.fromstring(xml)

data {}


### traverse a node with loop

In [5]:
for child in root:
    print(child.tag, child.attrib, child.text)

country {'name': 'Liechtenstein'} 
        
country {'name': 'Singapore'} 
        
country {'name': 'Panama'} 
        


### access data via index & Enum

In [74]:
class country(Enum):
    Lienchtenstein = 0
    Singapore = 1
    Panama = 2
    
print(country.Lienchtenstein.value)

print(root[country.Lienchtenstein.value][1].text)

0
2008


### functions

```python
finall()
```

In [30]:
for country in root.findall('country'):
    print(country.get('name'), country.find('rank').text)

Liechtenstein 1
Singapore 4
Panama 68


In [47]:
countries = [c.get('name') for c in root.findall('country')]
ranks = [c.find('rank').text for c in root.findall('country')]
df = pd.DataFrame({'country': countries,
                   'rank': ranks
                  }
                 )
df.head()

Unnamed: 0,country,rank
0,Liechtenstein,1
1,Singapore,4
2,Panama,68


### XPath

In [59]:
root.findall('.//neighbor')

[<Element 'neighbor' at 0x000000000A0B82C8>,
 <Element 'neighbor' at 0x000000000A0B8318>,
 <Element 'neighbor' at 0x000000000A0B8598>,
 <Element 'neighbor' at 0x000000000A0B8A48>,
 <Element 'neighbor' at 0x000000000A0B8B38>]