# XML exercise

Using data from [**mondial database**](https://drive.google.com/file/d/14lFT4nWHgwN36ij4XZh6OUuup-K9qLgR/view?usp=sharing) find the answers to following questions:

1. 10 countries with the lowest infant mortality rates
2. 10 cities with the largest population
3. name and country of a) longest river, b) largest lake and c) airport at highest elevation

In [1]:
import xml.etree.ElementTree as ET
import pandas as pd
import numpy as np

In [2]:
tree = ET.parse('mondial.xml')

In [3]:
root = tree.getroot()
print(root.tag)
print(root.attrib)
print(len(root))

mondial
{}
3403


### Question 1

In [4]:
dict_1 = {'Name': [], 'Infant Mortality': []}

for country in root.findall("./country"):
    try:
        inf_mor = float(country.find('infant_mortality').text)
    except AttributeError:
            pass
    dict_1['Infant Mortality'].append(inf_mor)
    name = country.find('name').text
    dict_1['Name'].append(name)

In [5]:
df1 = pd.DataFrame(dict_1)
df1.sort_values(by = 'Infant Mortality')[:10]

Unnamed: 0,Name,Infant Mortality
38,Monaco,1.81
98,Japan,2.13
36,Norway,2.48
117,Bermuda,2.48
106,Singapore,2.53
37,Sweden,2.6
10,Czech Republic,2.63
8,Spain,2.7
78,Hong Kong,2.73
79,Macao,3.13


### Question 2

In [6]:
print(len(root.findall("./country/city")))
print(len(root.findall("./country/province/city")))

401
3003


In [7]:
dict_2 = {'Name': [], 'Population': []}

list = root.findall('./country/city')
for k in range(len(list)):
    try:
        population = float(list[k].findall("./population")[-1].text)
    except IndexError:
        pass
    dict_2['Name'].append(list[k][0].text)
    dict_2['Population'].append(population)
    
list = root.findall('./country/province/city')
for k in range(len(list)):
    try:
        population = float(list[k].findall("./population")[-1].text)
    except IndexError:
        pass
    dict_2['Name'].append(list[k][0].text)
    dict_2['Population'].append(population)

In [8]:
df2 = pd.DataFrame(dict_2)
df2.sort_values(by='Population')[-10:]

Unnamed: 0,Name,Population
1740,Lahore,11126285.0
2911,São Paulo,11152344.0
3144,Kinshasa,11575000.0
1677,Beijing,11716620.0
859,Moskva,11979529.0
1857,Mumbai,12442373.0
1140,Istanbul,13710512.0
3222,Lagos,13745000.0
1761,Karachi,14916456.0
1678,Shanghai,22315474.0


### Question 3

In [9]:
# Longest River
dict_3 = {'Name' : [], 'Country Code': [], 'Length': []}

for river in root.findall("./river"):
    try:
        length = float(river.find("./length").text)
    except AttributeError:
            pass
    dict_3['Length'].append(length)
    name = river.find("./name").text
    dict_3['Name'].append(name)
    country_code = river.attrib["country"]
    dict_3['Country Code'].append(country_code)

In [10]:
df3 = pd.DataFrame(dict_3)
df3.sort_values(by = 'Length')[-1:]

Unnamed: 0,Name,Country Code,Length
214,Yangtze,CN,6380.0


# Yangtze River in China

In [11]:
# Largest Lake
dict_4 = {'Name' : [], 'Country Code': [], 'Area': []}

for lake in root.findall("./lake"):
    try:
        area = float(lake.find("./area").text)
    except AttributeError:
            pass
    dict_4['Area'].append(area)
    name = lake.find("./name").text
    dict_4['Name'].append(name)
    country_code = lake.attrib["country"]
    dict_4['Country Code'].append(country_code)

In [12]:
df4 = pd.DataFrame(dict_4)
df4.sort_values(by = "Area")[-1:]

Unnamed: 0,Name,Country Code,Area
59,Caspian Sea,R AZ KAZ IR TM,386400.0


# Caspian Sea in Russia, Azerbaijan, Kazakhstan, Iran and Turkmenistan

In [13]:
# Airport at Highest Elevation
dict_5 = {'Name' : [], 'Country Code': [], 'Elevation': []}

for airport in root.findall("./airport"):
    try:
        elevation = float(airport.find("./elevation").text)
    except AttributeError:
            pass
    dict_5['Elevation'].append(elevation)
    name = airport.find("./name").text
    dict_5['Name'].append(name)
    country_code = airport.attrib["country"]
    dict_5['Country Code'].append(country_code)

In [14]:
df5 = pd.DataFrame(dict_5)
df5.sort_values(by = "Elevation")[-1:]

Unnamed: 0,Name,Country Code,Elevation
81,El Alto Intl,BOL,4063.0


# El Alto International Airport in Bolivia