In [103]:
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd

In [104]:
def get_value(cell):
    
    """ 
        Extract text from the element, and convert them into numbers when possible. 
        Take a cell tag.
        Return the field value.
    """
    
    text = ''.join(cell.find_all(text=True, recursive=False)).strip().replace(u'\xa0', u'')
    
    if text.replace(" ", "").isnumeric():
        return int(text.replace(" ", ""))
    
    elif text.replace(" ", "").replace(".", "").isnumeric():
        return float(text.replace(" ", ""))
    
    elif text.replace(" ", "").replace(".", "").replace("/", "").isnumeric():
        return text.replace(" ", "")
    
    else:
        return text 

In [105]:
def general_info(soup):
    
    """ 
        Extract General Information from the UNdata database.
        Take a bs4.BeautifulSoup object generated from querying the country's profile page.
        Return a dataframe containing the country's General Information
    """
    
    tables = soup.find_all("table")
    rows = tables[1].find_all("tr")
    
    index = [[get_value(r.find_all("td")[0]) for r in rows]]
    value = [[get_value(r.find_all("td")[-1]) for r in rows]]
    
    data = dict(zip(['Index', 'last'], index+value))
    
    df = pd.DataFrame(data)
    
    return df

In [106]:
def indicator(soup, indicator):
    
    """ 
        Extract Indicators information from the UNdata database.
        Take a bs4.BeautifulSoup object generated from querying the country's profile page
           & the name of the indicator.
        Return a dataframe containing the country's indicators data (3 years). 
    """
    
    tables = soup.find_all("table")
    
    if indicator == 'Economic indicators':
        table = tables[2]
    elif indicator == 'Social indicators':
        table = tables[3]
    elif indicator == 'Environment and infrastructure indicators':
        table = tables[4]
    
    rows = table.find_all("tr")
    col_num = len(rows[0].find_all("td"))
        
    year = [get_value(rows[0].find_all("td")[i]) for i in range(col_num)][1:] 

    col_names = ['Index','Unit']+year
    
    index = [[get_value(r.find_all("td")[0]) for r in rows[1:]]] 

    unit = [[get_value(r.find_all("td")[0].small) for r in rows[1:]]]
    
    value = [[(get_value(r.find_all("td")[i].small)) for r in rows[1:]] for i in range(-col_num+1,0)]
    
        
    data = dict(zip(col_names, index+unit+value))
    
    df = pd.DataFrame(data)
    
    return df

In [107]:
def get_profile(url):
    
    """ 
        Take a url that is a country's profile page.
        Return a list of two dataframes, 
               which contains the country's General Information and 
               Indicators data (3 years) respectively. 
    """
    response = requests.get(url)
    
    soup = BeautifulSoup(response.content,'lxml')

    section = [i.get_text() for i in soup.find_all("summary")]

    GI_df = general_info(soup)

    Indicators_df = pd.concat([indicator(soup, i) for i in section[1:]], keys=section[1:])
    
    Indicators_df['last'] = Indicators_df[[2010, 2015, 2020]].apply(lambda x: x[2020] if (x[2020]!='...' and x[2020]!='... / ...') else x[2015] if (x[2015]!='...' and x[2015]!='... / ...') else x[2010], axis=1)

    return [GI_df, Indicators_df]

In [108]:
def removeAccents(word):
    repl = {'à': 'a', 'á': 'a', 'â': 'a', 'ã': 'a',
            'é': 'e', 'ê': 'e',
            'í': 'i',
            'ó': 'o', 'ô': 'o', 'õ': 'o',
            'ú': 'u', 'ü': 'u'}

    new_word = ''.join([repl[c] if c in repl else c for c in word])
    return new_word

In [109]:
def hack_recon_relation(text):
    if text == "Oceania":
        return "Australia and New Zealand"
    else:
        return text

In [110]:
index_url = "http://data.un.org/en/index.html"
response = requests.get(index_url)
soup = BeautifulSoup(response.content,'lxml')

In [111]:
urls = ["http://data.un.org/en/"+u['href'] for u in soup.section.find_all('a', href=True)]
countries_list = [c.previousSibling for c in soup.section.find_all("br")]
countries_list = [removeAccents(str(i).split(' (')[0]) for i in countries_list]
print(len(urls) == len(countries_list))

True


In [112]:
countries_list

['Afghanistan',
 'Albania',
 'Algeria',
 'American Samoa',
 'Andorra',
 'Angola',
 'Anguilla',
 'Antigua and Barbuda',
 'Argentina',
 'Armenia',
 'Aruba',
 'Australia',
 'Austria',
 'Azerbaijan',
 'Bahamas',
 'Bahrain',
 'Bangladesh',
 'Barbados',
 'Belarus',
 'Belgium',
 'Belize',
 'Benin',
 'Bermuda',
 'Bhutan',
 'Bolivia',
 'Bonaire, St. Eustatius & Saba',
 'Bosnia and Herzegovina',
 'Botswana',
 'Brazil',
 'British Virgin Islands',
 'Brunei Darussalam',
 'Bulgaria',
 'Burkina Faso',
 'Burundi',
 'Cabo Verde',
 'Cambodia',
 'Cameroon',
 'Canada',
 'Cayman Islands',
 'Central African Republic',
 'Chad',
 'Channel Islands',
 'Chile',
 'China',
 'China, Hong Kong SAR',
 'China, Macao SAR',
 'Colombia',
 'Comoros',
 'Congo',
 'Cook Islands',
 'Costa Rica',
 'Cote d’Ivoire',
 'Croatia',
 'Cuba',
 'Curaçao',
 'Cyprus',
 'Czechia',
 "Dem. People's Rep. Korea",
 'Dem. Rep. of the Congo',
 'Denmark',
 'Djibouti',
 'Dominica',
 'Dominican Republic',
 'Ecuador',
 'Egypt',
 'El Salvador',
 'Equ

In [113]:
total = dict()
for i in range(len(urls)):
    profile = get_profile(urls[i])
    total[countries_list[i]] = profile
    print("Finished:" + countries_list[i] + "(" + str(i+1) + "/"+ str(len(urls))+")")

Finished:Afghanistan(1/232)
Finished:Albania(2/232)
Finished:Algeria(3/232)
Finished:American Samoa(4/232)
Finished:Andorra(5/232)
Finished:Angola(6/232)
Finished:Anguilla(7/232)
Finished:Antigua and Barbuda(8/232)
Finished:Argentina(9/232)
Finished:Armenia(10/232)
Finished:Aruba(11/232)
Finished:Australia(12/232)
Finished:Austria(13/232)
Finished:Azerbaijan(14/232)
Finished:Bahamas(15/232)
Finished:Bahrain(16/232)
Finished:Bangladesh(17/232)
Finished:Barbados(18/232)
Finished:Belarus(19/232)
Finished:Belgium(20/232)
Finished:Belize(21/232)
Finished:Benin(22/232)
Finished:Bermuda(23/232)
Finished:Bhutan(24/232)
Finished:Bolivia(25/232)


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




Finished:Bonaire, St. Eustatius & Saba(26/232)
Finished:Bosnia and Herzegovina(27/232)
Finished:Botswana(28/232)
Finished:Brazil(29/232)
Finished:British Virgin Islands(30/232)
Finished:Brunei Darussalam(31/232)
Finished:Bulgaria(32/232)
Finished:Burkina Faso(33/232)
Finished:Burundi(34/232)
Finished:Cabo Verde(35/232)
Finished:Cambodia(36/232)
Finished:Cameroon(37/232)
Finished:Canada(38/232)
Finished:Cayman Islands(39/232)
Finished:Central African Republic(40/232)
Finished:Chad(41/232)
Finished:Channel Islands(42/232)
Finished:Chile(43/232)
Finished:China(44/232)
Finished:China, Hong Kong SAR(45/232)
Finished:China, Macao SAR(46/232)
Finished:Colombia(47/232)
Finished:Comoros(48/232)
Finished:Congo(49/232)
Finished:Cook Islands(50/232)
Finished:Costa Rica(51/232)
Finished:Cote d’Ivoire(52/232)
Finished:Croatia(53/232)
Finished:Cuba(54/232)
Finished:Curaçao(55/232)
Finished:Cyprus(56/232)
Finished:Czechia(57/232)
Finished:Dem. People's Rep. Korea(58/232)
Finished:Dem. Rep. of the Cong

In [114]:
for c in countries_list:
    total[c][1] = total[c][1][['Index', 'Unit', 'last']] # get 2020 indicators
    total[c][0].columns = ['Index', c]
    total[c][1].columns = ['Index', 'Unit', c]

In [115]:
gi = [total[c][0] for c in countries_list]
GI = gi[0]
for i in range(len(gi))[1:]:
    GI = pd.merge(GI, gi[i], how="outer", on="Index")

In [116]:
GIt = GI.transpose()[1:]
GIt.columns = GI.Index

In [117]:
ind = [total[c][1] for c in countries_list]
Ind = ind[0]
for i in range(len(ind))[1:]:
    Ind = pd.merge(Ind, ind[i], how="outer", on= ["Index", "Unit"])

In [118]:
Indt = Ind.transpose()[2:]
Indt.columns = Ind.Index + Ind.Unit
countries_df = pd.concat([GIt, Indt], axis=1)
countries_df

Unnamed: 0,Region,"Population(000, 2020)","Pop. density(per km2, 2020)",Capital city,"Capital city pop.(000, 2020)",UN membership date,Surface area(km2),Sex ratio(m per 100 f),National currency,Exchange rate(per US$),...,"Energy production, primary(Petajoules)",Energy supply per capita(Gigajoules),Important sites for terrestrial biodiversity protected(%),Net Official Development Assist. received(% of GNI),Research & Development expenditure(% of GDP),CO2 emission estimates(million tons/tons per capita),Tourist/visitor arrivals at national borders(000),Pop. using safely managed sanitation(urban/rural %),"Pop. using safely managed drinking water(urban/rural, %)",Net Official Development Assist. disbursed(% of GNI)
Afghanistan,Southern Asia,38928,59.6,Kabul,4114,19-Nov-46,652864,105.4,Afghani (AFN),78.4,...,82,3,5.7,19.46,,,,,,
Albania,Southern Europe,2878,105,Tirana,484.6,14-Dec-55,28748,103.7,Lek (ALL),108.6,...,69,34,57.2,2.29,0.2,4.3/1.5,5340,40.2/39.4,,
Algeria,Northern Africa,43851,18.4,Algiers,2729.3,08-Oct-62,2381741,102.1,Algerian Dinar (DZD),119.2,...,6285,55,16.6,0.08,0.5,130.5/3.2,2657,16.5/20.8,,
American Samoa,Polynesia,56,279,Pago Pago,48.5,,199,103.6,US Dollar (USD),,...,,,71.1,,0.4,,20,,,
Andorra,Southern Europe,77,164.2,Andorra la Vella,22.6,28-Jul-93,468,102.3,Euro (EUR),0.9,...,1,117,26.1,,,,3042,100.0/100.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wallis and Futuna Islands,Polynesia,12,82.5,Matu-Utu,1,,142,93.4,CFP Franc (XPF),106.2,...,0,30,,,,,,,,
Western Sahara,Northern Africa,597,2.2,El Aai?n,232.4,,266000,109.5,Moroccan Dirham (MAD),9.6,...,,,,,,,,,,
Yemen,Western Asia,29826,56.5,Sana'a,2874.4,30-Sep-47,527968,101.5,Yemeni Rial (YER),,...,77,5,19.4,29.67,,8.9/0.3,367,67.0/...,,
Zambia,Eastern Africa,18384,24.7,Lusaka,2646.6,01-Dec-64,752612,98.1,Zambian Kwacha (ZMW),14.1,...,448,29,45.5,3.84,0.3,6.0/0.4,1072,,46.2/...,


In [176]:
countries_df['Currency+ISO-4217'] = countries_df['National currency'].apply(lambda x: str(x)[-4:-1]) 

In [177]:
countries_df[]

Unnamed: 0,Region,"Population(000, 2020)","Pop. density(per km2, 2020)",Capital city,"Capital city pop.(000, 2020)",UN membership date,Surface area(km2),Sex ratio(m per 100 f),National currency,Exchange rate(per US$),...,Important sites for terrestrial biodiversity protected(%),Net Official Development Assist. received(% of GNI),Research & Development expenditure(% of GDP),CO2 emission estimates(million tons/tons per capita),Tourist/visitor arrivals at national borders(000),Pop. using safely managed sanitation(urban/rural %),"Pop. using safely managed drinking water(urban/rural, %)",Net Official Development Assist. disbursed(% of GNI),Index,Currency+ISO-4217
Afghanistan,Southern Asia,38928,59.6,Kabul,4114,19-Nov-46,652864,105.4,Afghani (AFN),78.4,...,5.7,19.46,,,,,,,Afghanistan,AFN
Albania,Southern Europe,2878,105,Tirana,484.6,14-Dec-55,28748,103.7,Lek (ALL),108.6,...,57.2,2.29,0.2,4.3/1.5,5340,40.2/39.4,,,Albania,ALL
Algeria,Northern Africa,43851,18.4,Algiers,2729.3,08-Oct-62,2381741,102.1,Algerian Dinar (DZD),119.2,...,16.6,0.08,0.5,130.5/3.2,2657,16.5/20.8,,,Algeria,DZD
American Samoa,Polynesia,56,279,Pago Pago,48.5,,199,103.6,US Dollar (USD),,...,71.1,,0.4,,20,,,,American Samoa,USD
Andorra,Southern Europe,77,164.2,Andorra la Vella,22.6,28-Jul-93,468,102.3,Euro (EUR),0.9,...,26.1,,,,3042,100.0/100.0,,,Andorra,EUR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wallis and Futuna Islands,Polynesia,12,82.5,Matu-Utu,1,,142,93.4,CFP Franc (XPF),106.2,...,,,,,,,,,Wallis and Futuna Islands,XPF
Western Sahara,Northern Africa,597,2.2,El Aai?n,232.4,,266000,109.5,Moroccan Dirham (MAD),9.6,...,,,,,,,,,Western Sahara,MAD
Yemen,Western Asia,29826,56.5,Sana'a,2874.4,30-Sep-47,527968,101.5,Yemeni Rial (YER),,...,19.4,29.67,,8.9/0.3,367,67.0/...,,,Yemen,YER
Zambia,Eastern Africa,18384,24.7,Lusaka,2646.6,01-Dec-64,752612,98.1,Zambian Kwacha (ZMW),14.1,...,45.5,3.84,0.3,6.0/0.4,1072,,46.2/...,,Zambia,ZMW


In [169]:
countries_df.to_csv('countries_df.csv')

In [149]:
countries_df['Index'] = countries_df.index 
temp = countries_df [['Index', 'Region']]
recon_relation = temp.groupby('Region')['Index'].apply(list).reset_index()
recon_relation.columns = ['Index', 'Countries']
recon_relation.index = recon_relation ['Index']
recon_relation
recon_relation.index = recon_relation.index.map(hack_recon_relation)
recon_relation = recon_relation.drop(columns=['Index'])
recon_relation

Unnamed: 0_level_0,Countries
Index,Unnamed: 1_level_1
Caribbean,"[Anguilla, Antigua and Barbuda, Aruba, Bahamas..."
Central America,"[Belize, Costa Rica, El Salvador, Guatemala, H..."
Central Asia,"[Kazakhstan, Kyrgyzstan, Tajikistan, Turkmenis..."
Eastern Africa,"[Burundi, Comoros, Djibouti, Eritrea, Ethiopia..."
Eastern Asia,"[China, China, Hong Kong SAR, China, Macao SAR..."
Eastern Europe,"[Belarus, Bulgaria, Czechia, Hungary, Poland, ..."
Melanesia,"[Fiji, New Caledonia, Papua New Guinea, Solomo..."
Micronesia,"[Guam, Kiribati, Marshall Islands, Micronesia,..."
Middle Africa,"[Angola, Cameroon, Central African Republic, C..."
Northern Africa,"[Algeria, Egypt, Libya, Morocco, Sudan, Tunisi..."


# Regions

In [138]:
index_url = "http://data.un.org/en/regions.html"
response = requests.get(index_url)
soup = BeautifulSoup(response.content,'lxml')

In [139]:
urls = ["http://data.un.org/en/"+u['href'] for u in soup.section.find_all('a', href=True)]
regions_list = [str(i).replace('<td>','').replace('</td>','') for i in [soup.find_all("td")[i*5] for i in range(1,31)] ]
print(len(urls) == len(regions_list))

True


In [140]:
total = dict()
for i in range(len(urls)):
    profile = get_profile(urls[i])
    total[regions_list[i]] = profile
    print("Finished:" + regions_list[i] + "(" + str(i+1) + "/"+ str(len(urls))+")")

Finished:World(1/30)
Finished:Africa(2/30)
Finished:Northern Africa(3/30)
Finished:Sub-Saharan Africa(4/30)
Finished:Eastern Africa(5/30)
Finished:Middle Africa(6/30)
Finished:Southern Africa(7/30)
Finished:Western Africa(8/30)
Finished:Americas(9/30)
Finished:Northern America(10/30)
Finished:Latin America and the Caribbean(11/30)
Finished:Caribbean(12/30)
Finished:Central America(13/30)
Finished:South America(14/30)
Finished:Asia(15/30)
Finished:Central Asia(16/30)
Finished:Eastern Asia(17/30)
Finished:South-eastern Asia(18/30)
Finished:Southern Asia(19/30)
Finished:Western Asia(20/30)
Finished:Europe(21/30)
Finished:Eastern Europe(22/30)
Finished:Northern Europe(23/30)
Finished:Southern Europe(24/30)
Finished:Western Europe(25/30)
Finished:Oceania(26/30)
Finished:Australia and New Zealand(27/30)
Finished:Melanesia(28/30)
Finished:Micronesia(29/30)
Finished:Polynesia(30/30)


In [141]:
for c in regions_list:
    total[c][1] = total[c][1][['Index', 'Unit', 'last']] # get 2020 indicators
    total[c][0].columns = ['Index', c]
    total[c][1].columns = ['Index', 'Unit', c]

In [154]:
gi = [total[c][0] for c in regions_list]
GI = gi[0]
for i in range(len(gi))[1:]:
    GI = pd.merge(GI, gi[i], how="outer", on="Index")

In [155]:
GIt = GI.transpose()[1:]
GIt.columns = GI.Index

In [156]:
ind = [total[c][1] for c in regions_list]
Ind = ind[0]
for i in range(len(ind))[1:]:
    Ind = pd.merge(Ind, ind[i], how="outer", on= ["Index", "Unit"])

In [157]:
Indt = Ind.transpose()[2:]
Indt.columns = Ind.Index + Ind.Unit

In [158]:
dfm = pd.concat([GIt, Indt], axis=1)

In [161]:
temp=dfm[ dfm['Region'] != 'World']
regions_df =temp[temp.index!='World']
regions_df['Index'] = regions_df.index
regions_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,"Population(000, 2020)","Pop. density(per km2, 2020)",Surface area(km2),Sex ratio(m per 100 f),Region,GDP: Gross domestic product(million current US$),"GDP growth rate(annual %, const. 2015 prices)",GDP per capita(current US$),Employment in agriculture(% of employed),Employment in industry(% of employed),...,Individuals using the Internet(per 100 inhabitants),Research & Development expenditure(% of GDP),Forested area(% of land area),"Energy production, primary(Petajoules)",Energy supply per capita(Gigajoules),Important sites for terrestrial biodiversity protected(%),"Pop. using safely managed drinking water(urban/rural, %)",Pop. using safely managed sanitation(urban/rural %),Net Official Development Assist. received(% of GNI),Index
Northern Africa,246232,31.7,7880000,101.0,Africa,666216.0,3.6,2814.3,24.8,25.6,...,44.1,0.6,3.4,,,34.9,,39.3/...,0.85,Northern Africa
Sub-Saharan Africa,1094366,50.0,22431000,99.6,Africa,1699027.0,3.0,1637.6,52.6,11.2,...,26.0,0.4,28.0,,,41.0,50.0/11.6,19.8/17.5,2.9,Sub-Saharan Africa
Eastern Africa,445406,66.8,7005000,98.5,Sub-Saharan Africa,393669.0,6.0,934.1,63.7,8.7,...,17.6,,32.6,,,,53.2/8.9,,,Eastern Africa
Middle Africa,179595,27.6,6613000,99.6,Sub-Saharan Africa,250276.0,1.0,1479.9,,,...,13.9,,46.7,,,,,,,Middle Africa
Southern Africa,67504,25.5,2675000,96.9,Sub-Saharan Africa,408569.0,0.9,6215.0,6.6,23.0,...,54.9,,10.4,,,,81.9/...,,,Southern Africa
Western Africa,401861,66.3,6138000,101.4,Sub-Saharan Africa,646513.0,3.2,1696.0,39.9,13.9,...,35.8,,11.1,,,,33.6/15.4,21.9/17.0,,Western Africa
Northern America,368870,19.8,21776000,98.0,Americas,22302188.0,2.8,61220.9,1.3,19.7,...,89.2,2.7,35.5,,,41.4,99.6/...,90.2/...,,Northern America
Latin America and the Caribbean,653962,32.5,20546000,96.8,Americas,5565516.0,0.5,8682.9,13.7,20.3,...,66.3,0.7,46.5,,,38.0,82.3/41.7,37.0/...,,Latin America and the Caribbean
Caribbean,43532,192.6,234000,97.5,Latin America & Caribbean,371468.0,1.0,8800.3,16.0,15.3,...,58.7,,32.2,,,,,,,Caribbean
Central America,179670,73.3,2480000,96.1,Latin America & Caribbean,1492059.0,2.1,8503.1,16.0,24.2,...,,,35.2,,,,.../41.6,44.0/...,,Central America


In [163]:
regions_df_final=regions_df.merge(recon_relation, how='outer', on='Index')
regions_df_final.index = regions_df_final['Index']
regions_df_final

Unnamed: 0_level_0,"Population(000, 2020)","Pop. density(per km2, 2020)",Surface area(km2),Sex ratio(m per 100 f),Region,GDP: Gross domestic product(million current US$),"GDP growth rate(annual %, const. 2015 prices)",GDP per capita(current US$),Employment in agriculture(% of employed),Employment in industry(% of employed),...,Research & Development expenditure(% of GDP),Forested area(% of land area),"Energy production, primary(Petajoules)",Energy supply per capita(Gigajoules),Important sites for terrestrial biodiversity protected(%),"Pop. using safely managed drinking water(urban/rural, %)",Pop. using safely managed sanitation(urban/rural %),Net Official Development Assist. received(% of GNI),Index,Countries
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Northern Africa,246232,31.7,7880000,101.0,Africa,666216.0,3.6,2814.3,24.8,25.6,...,0.6,3.4,,,34.9,,39.3/...,0.85,Northern Africa,"[Algeria, Egypt, Libya, Morocco, Sudan, Tunisi..."
Sub-Saharan Africa,1094366,50.0,22431000,99.6,Africa,1699027.0,3.0,1637.6,52.6,11.2,...,0.4,28.0,,,41.0,50.0/11.6,19.8/17.5,2.9,Sub-Saharan Africa,
Eastern Africa,445406,66.8,7005000,98.5,Sub-Saharan Africa,393669.0,6.0,934.1,63.7,8.7,...,,32.6,,,,53.2/8.9,,,Eastern Africa,"[Burundi, Comoros, Djibouti, Eritrea, Ethiopia..."
Middle Africa,179595,27.6,6613000,99.6,Sub-Saharan Africa,250276.0,1.0,1479.9,,,...,,46.7,,,,,,,Middle Africa,"[Angola, Cameroon, Central African Republic, C..."
Southern Africa,67504,25.5,2675000,96.9,Sub-Saharan Africa,408569.0,0.9,6215.0,6.6,23.0,...,,10.4,,,,81.9/...,,,Southern Africa,"[Botswana, Eswatini, Lesotho, Namibia, South A..."
Western Africa,401861,66.3,6138000,101.4,Sub-Saharan Africa,646513.0,3.2,1696.0,39.9,13.9,...,,11.1,,,,33.6/15.4,21.9/17.0,,Western Africa,"[Benin, Burkina Faso, Cabo Verde, Cote d’Ivoir..."
Northern America,368870,19.8,21776000,98.0,Americas,22302188.0,2.8,61220.9,1.3,19.7,...,2.7,35.5,,,41.4,99.6/...,90.2/...,,Northern America,"[Bermuda, Canada, Greenland, Saint Pierre and ..."
Latin America and the Caribbean,653962,32.5,20546000,96.8,Americas,5565516.0,0.5,8682.9,13.7,20.3,...,0.7,46.5,,,38.0,82.3/41.7,37.0/...,,Latin America and the Caribbean,
Caribbean,43532,192.6,234000,97.5,Latin America & Caribbean,371468.0,1.0,8800.3,16.0,15.3,...,,32.2,,,,,,,Caribbean,"[Anguilla, Antigua and Barbuda, Aruba, Bahamas..."
Central America,179670,73.3,2480000,96.1,Latin America & Caribbean,1492059.0,2.1,8503.1,16.0,24.2,...,,35.2,,,,.../41.6,44.0/...,,Central America,"[Belize, Costa Rica, El Salvador, Guatemala, H..."


In [170]:
regions_df_final.to_csv('regions_df_final.csv')

In [164]:
regions_df['Index'] = regions_df.index 
temp = regions_df [['Index', 'Region']]
recontin_realtion = temp.groupby('Region')['Index'].apply(list).reset_index()
recontin_realtion.columns = ['Index', 'Regions']
# recontin_realtion.index = recontin_realtion ['Index']
# recon_relation.drop(columns=['Index'], inplace=True)
recontin_realtion

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,Index,Regions
0,Africa,"[Northern Africa, Sub-Saharan Africa]"
1,Americas,"[Northern America, Latin America and the Carib..."
2,Asia,"[Central Asia, Eastern Asia, South-eastern Asi..."
3,Europe,"[Eastern Europe, Northern Europe, Southern Eur..."
4,Latin America & Caribbean,"[Caribbean, Central America, South America]"
5,Oceania,"[Australia and New Zealand, Melanesia, Microne..."
6,Sub-Saharan Africa,"[Eastern Africa, Middle Africa, Southern Afric..."


# continents

In [165]:
continents_df =dfm[ dfm['Region'] == 'World']
continents_df['Index']= continents_df.index
continents_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,"Population(000, 2020)","Pop. density(per km2, 2020)",Surface area(km2),Sex ratio(m per 100 f),Region,GDP: Gross domestic product(million current US$),"GDP growth rate(annual %, const. 2015 prices)",GDP per capita(current US$),Employment in agriculture(% of employed),Employment in industry(% of employed),...,Individuals using the Internet(per 100 inhabitants),Research & Development expenditure(% of GDP),Forested area(% of land area),"Energy production, primary(Petajoules)",Energy supply per capita(Gigajoules),Important sites for terrestrial biodiversity protected(%),"Pop. using safely managed drinking water(urban/rural, %)",Pop. using safely managed sanitation(urban/rural %),Net Official Development Assist. received(% of GNI),Index
Africa,1340598,45.2,30311000,99.9,World,2365243.0,3.2,1856.2,48.7,13.3,...,,,21.0,45629.0,26.0,,,,2.44,Africa
Americas,1022832,24.2,42322000,97.2,World,27867704.0,2.3,27721.7,9.0,20.0,...,,,41.2,,,,,,0.23,Americas
Asia,4641055,149.6,31915000,104.7,World,31839900.0,4.5,6982.1,,,...,47.3,,19.1,268302.0,62.0,,85.8/59.8,42.7/46.0,0.24,Asia
Europe,747636,33.8,23049000,93.4,World,21908700.0,2.0,29278.7,,,...,82.0,1.9,45.9,104101.0,144.0,65.6,,81.1/53.3,0.61,Europe
Oceania,42678,5.0,8564000,100.2,World,1711770.0,2.0,41469.2,,,...,68.2,,20.4,17969.0,163.0,33.7,96.1/...,66.7/23.5,6.98,Oceania


In [166]:
continents_df_final=continents_df.merge(recontin_realtion, how='inner', on='Index')
continents_df_final.index = continents_df_final['Index']
continents_df_final

Unnamed: 0_level_0,"Population(000, 2020)","Pop. density(per km2, 2020)",Surface area(km2),Sex ratio(m per 100 f),Region,GDP: Gross domestic product(million current US$),"GDP growth rate(annual %, const. 2015 prices)",GDP per capita(current US$),Employment in agriculture(% of employed),Employment in industry(% of employed),...,Research & Development expenditure(% of GDP),Forested area(% of land area),"Energy production, primary(Petajoules)",Energy supply per capita(Gigajoules),Important sites for terrestrial biodiversity protected(%),"Pop. using safely managed drinking water(urban/rural, %)",Pop. using safely managed sanitation(urban/rural %),Net Official Development Assist. received(% of GNI),Index,Regions
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Africa,1340598,45.2,30311000,99.9,World,2365243.0,3.2,1856.2,48.7,13.3,...,,21.0,45629.0,26.0,,,,2.44,Africa,"[Northern Africa, Sub-Saharan Africa]"
Americas,1022832,24.2,42322000,97.2,World,27867704.0,2.3,27721.7,9.0,20.0,...,,41.2,,,,,,0.23,Americas,"[Northern America, Latin America and the Carib..."
Asia,4641055,149.6,31915000,104.7,World,31839900.0,4.5,6982.1,,,...,,19.1,268302.0,62.0,,85.8/59.8,42.7/46.0,0.24,Asia,"[Central Asia, Eastern Asia, South-eastern Asi..."
Europe,747636,33.8,23049000,93.4,World,21908700.0,2.0,29278.7,,,...,1.9,45.9,104101.0,144.0,65.6,,81.1/53.3,0.61,Europe,"[Eastern Europe, Northern Europe, Southern Eur..."
Oceania,42678,5.0,8564000,100.2,World,1711770.0,2.0,41469.2,,,...,,20.4,17969.0,163.0,33.7,96.1/...,66.7/23.5,6.98,Oceania,"[Australia and New Zealand, Melanesia, Microne..."


In [89]:
continents_df_final.to_csv('continents_df_final.csv')