In [1]:
# libraries 
import pandas as pd 
import numpy as np
import os

# data directory
data_dir = "../../basins/"

# import data (https://population.un.org/wpp/Download/Standard/Population/)
un = pd.read_csv("./un_estimates.csv")

# list of countries
countries = ['Sri Lanka', 'El Salvador', 'Morocco', 'Bolivia', 'Honduras',
             'Philippines', 'Indonesia', 'Pakistan', 'Rwanda', 'Bangladesh',
             'Kyrgyzstan', 'Egypt', 'Mozambique', 'Afghanistan', 'Angola',
             'Ghana', 'Zambia', "Côte d'Ivoire", 'Kenya', 'Uganda']
             
              
def save_country(df, country):
    """
    This function computes the age distribution over the 10 age groups for a given country
    :param df: UN estimates DataFrame
    :param country: country name
    """
        
    country_name = country
    
    # fix Bolivia name
    if country == "Bolivia":
        country_name = "Bolivia (Plurinational State of)"

    # loc country
    df_country = df.loc[(df["Region, subregion, country or area *"]==country_name) & \
                        (df["Reference date (as of 1 July)"]==2020)]

    # pop by age group
    age_groups, pop = [], []
    
    age_groups.append(['0-9'])
    pop.append(np.sum([1000 * int(v.replace(" ", "")) for v in df_country[['0-4', '5-9']].values[0]]))
    
    age_groups.append(['10-19'])
    pop.append(np.sum([1000 * int(v.replace(" ", "")) for v in df_country[['10-14', '15-19']].values[0]]))
    
    age_groups.append(['20-24'])
    pop.append(np.sum([1000 * int(v.replace(" ", "")) for v in df_country[['20-24']].values[0]]))
    
    age_groups.append(['25-29'])
    pop.append(np.sum([1000 * int(v.replace(" ", "")) for v in df_country[['25-29']].values[0]]))
    
    age_groups.append(['30-39'])
    pop.append(np.sum([1000 * int(v.replace(" ", "")) for v in df_country[['30-34', '35-39']].values[0]]))
    
    age_groups.append(['40-49'])
    pop.append(np.sum([1000 * int(v.replace(" ", "")) for v in df_country[['40-44', '45-49']].values[0]]))
    
    age_groups.append(['50-59'])
    pop.append(np.sum([1000 * int(v.replace(" ", "")) for v in df_country[['50-54', '55-59']].values[0]]))
    
    age_groups.append(['60-69'])
    pop.append(np.sum([1000 * int(v.replace(" ", "")) for v in df_country[['60-64', '65-69']].values[0]]))
    
    age_groups.append(['70-79'])
    pop.append(np.sum([1000 * int(v.replace(" ", "")) for v in df_country[['70-74', '75-79']].values[0]]))
        
    # last age group
    age_groups.append('80+')
    pop.append(np.sum([1000 * int(v.replace(" ", "")) for v in df_country[['80-84', '85-89', '90-94', '95-99', '100+']].values[0]]))
    
    # save
    print("{0}: {1} (Millions)".format(country, sum(pop) / 10**6))
    df_age = pd.DataFrame(data={"group": age_groups, "value": pop})
    df_age.to_csv(os.path.join(data_dir, country, "demographic/Nk_10.csv"), index=False)

In [2]:
for country in countries:
    save_country(un, country)

Sri Lanka: 21.414 (Millions)
El Salvador: 6.488 (Millions)
Morocco: 36.911 (Millions)
Bolivia: 11.673 (Millions)
Honduras: 9.906 (Millions)
Philippines: 109.581 (Millions)
Indonesia: 273.523 (Millions)
Pakistan: 220.893 (Millions)
Rwanda: 12.952 (Millions)
Bangladesh: 164.688 (Millions)
Kyrgyzstan: 6.525 (Millions)
Egypt: 102.334 (Millions)
Mozambique: 31.258 (Millions)
Afghanistan: 38.927 (Millions)
Angola: 32.866 (Millions)
Ghana: 31.07 (Millions)
Zambia: 18.385 (Millions)
Côte d'Ivoire: 26.378 (Millions)
Kenya: 53.772 (Millions)
Uganda: 45.741 (Millions)
