# XML exercise

Using data from [**mondial database**](https://drive.google.com/file/d/14lFT4nWHgwN36ij4XZh6OUuup-K9qLgR/view?usp=sharing) find the answers to following questions:

1. 10 countries with the lowest infant mortality rates
2. 10 cities with the largest population
3. name and country of a) longest river, b) largest lake and c) airport at highest elevation

[Answer](https://notebook.community/norsween/data-science/springboard-answers-to-exercises/sliderule_dsi_xml_exercise-answers%20to%20exercises)

In [1]:
import pandas as pd
import xml.etree.ElementTree as et

In [2]:
tree = et.parse('mondial.xml')
root = tree.getroot()

## 1. 10 countries with the lowest infant mortality rates

In [575]:
country_mort = {'name': [],
               'infant_mortality_rate': []}

for country in root:
    name_value = country.findtext('name')
    country_mort['name'].append(name_value)
    
    mort_value = country.findtext('infant_mortality')
    country_mort['infant_mortality_rate'].append(mort_value)

In [576]:
df_country_mort = pd.DataFrame(country_mort)
df_country_mort.sort_values(by = 'infant_mortality_rate').head(10)

Unnamed: 0,name,infant_mortality_rate
38,Monaco,1.81
30,Romania,10.16
153,Fiji,10.2
69,Brunei,10.48
132,Grenada,10.5
237,Mauritius,10.59
124,Panama,10.7
243,Seychelles,10.77
102,United Arab Emirates,10.92
113,Barbados,10.93


## 2. 10 cities with the largest population

In [577]:
city_pop = {'city': [],
           'population': []}

for city_ele in root.findall('.//city/[population]'):
# for city_ele in root.findall('.//city'):
    city_value = city_ele.findtext('name')
    city_pop['city'].append(city_value)
    
    pop_value = int(city_ele.findtext('population'))
    city_pop['population'].append(pop_value)

In [578]:
df_city_pop = pd.DataFrame(city_pop)
df_city_pop.sort_values(by = 'population', ascending = False).head(10)

Unnamed: 0,city,population
1761,Seoul,10229262
1422,Mumbai,9925891
2596,São Paulo,9412894
1627,Jakarta,8259266
1251,Shanghai,8205598
1942,Ciudad de México,8092449
448,Moskva,8010954
1723,Tokyo,7843000
1250,Beijing,7362426
1469,Delhi,7206704


## 3. Name and country of a) longest river, b) largest lake and c) airport at highest elevation


In [612]:
river = {'name': [], 'country': [], 'length': []}

country = {'code': [], 'countryname': []}

for r_ele in root.findall('./river/[length]'):
    r_name = r_ele.findtext('name')
    river['name'].append(r_name)
    
    r_con = r_ele.get('country')
    r_con = r_con.split(' ')[0]
    river['country'].append(r_con)
    
    r_length = float(r_ele.findtext('length'))
    river['length'].append(r_length)
    
for c_ele in root.findall('./country'):
    c_code = c_ele.get('car_code')
    country['code'].append(c_code)
    
    c_name = c_ele.findtext('name')
    country['countryname'].append(c_name)

In [613]:
df_r = pd.DataFrame(river)
df_m = pd.DataFrame(country)

In [618]:
df_river = pd.merge(df_m, df_r, left_on = 'code', right_on = 'country').drop('country', axis = 1)
df_river.sort_values(by = 'length', ascending = False).head(1)

Unnamed: 0,code,countryname,name,length
191,CN,China,Yangtze,6380.0


In [657]:
lake = {'name': [], 'country': [], 'area': []}

for l_ele in root.findall('./lake/[area]'):
    l_name = l_ele.findtext('name')
    lake['name'].append(l_name)
    
    l_con = l_ele.get('country')
    l_con = l_con.split(' ')[0]
    lake['country'].append(l_con)
    
    l_area = float(l_ele.findtext('area'))
    lake['area'].append(l_area)

In [658]:
df_l = pd.DataFrame(lake)
df_lake = pd.merge(df_m, df_l, left_on = 'code', right_on = 'country').drop('country', axis = 1)
df_lake.sort_values(by = 'area', ascending = False).head(1)

Unnamed: 0,code,countryname,name,area
36,R,Russia,Caspian Sea,386400.0


In [659]:
airport = {'name': [], 'country': [], 'elevation': []}

for a_ele in root.findall('./airport/[elevation]'):
    a_name = a_ele.findtext('name')
    airport['name'].append(a_name)
    
    a_con = a_ele.get('country')
    airport['country'].append(a_con)
    
    a_eleva = float(a_ele.findtext('elevation'))
    airport['elevation'].append(a_eleva)

In [665]:
df_a = pd.DataFrame(airport)
df_airport = pd.merge(df_m, df_a, left_on = 'code', right_on = 'country').drop('country', axis = 1)
df_airport.sort_values(by = 'elevation', ascending = False).head(1)

Unnamed: 0,code,countryname,name,elevation
1059,BOL,Bolivia,El Alto Intl,4063.0
