In [1]:
%load_ext lab_black

In [2]:
import requests
import json
import datetime
import pandas as pd
from pathlib import Path

In [206]:
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
pd.set_option("display.max_colwidth", None)

In [207]:
# the UN's API includes many entities which are not sovereign states,
# yet are listed as "Country". This workaround is a manuel fix for if
# one is only interested in countries by the classic definition
not_countries = [
    "American Samoa",
    "Bermuda",
    "British Virgin Islands",
    "Cayman Islands",
    "Mayotte",
    "Cook Islands",
    "Faroe Islands",
    "Falkland Islands (Malvinas)",
    "French Guiana",
    "French Polynesia",
    "Gibraltar",
    "Greenland",
    "Guadeloupe",
    "Guam",
    "China, Hong Kong SAR",
    "China, Macao SAR",
    "Martinique",
    "Montserrat",
    "Curaçao",
    "Aruba",
    "Sint Maarten (Dutch part)",
    "Bonaire, Sint Eustatius and Saba",
    "New Caledonia",
    "Niue",
    "Northern Mariana Islands",
    "Puerto Rico",
    "Réunion",
    "Saint Helena",
    "Anguilla",
    "Saint Pierre and Miquelon",
    "Tokelau",
    "Turks and Caicos Islands",
    "Isle of Man",
    "United States Virgin Islands",
    "Wallis and Futuna Islands",
]

# FUNCION PARA llamar a la API

In [10]:
# Define a function that will take a relative path as an input, call the API, and return a dataframe
def callAPI(relative_path: str, topic_list: bool = False) -> pd.DataFrame:
    base_url = "https://population.un.org/dataportalapi/api/v1"
    target = base_url + relative_path
    # Query string parameters may be appended here or directly in the provided relative path
    # Calls the API
    response = requests.get(target)
    # Reformats response into a JSON object
    j = response.json()
    # The block below will deal with paginated results.
    # If results not paginated, this will be skipped.
    try:
        # If results are paginated, they are transformed into a python dictionary.
        # The data may be accessed using the 'data' key of the dictionary.
        df = pd.json_normalize(j["data"])
        # As long as the nextPage key of the dictionary contains an address for the next API call, the function will continue to call the API and append the results to the dataframe.
        while j["nextPage"] is not None:
            response = requests.get(j["nextPage"])
            j = response.json()
            df_temp = pd.json_normalize(j["data"])
            df = pd.concat([df, df_temp], ignore_index=True)
    except:
        if topic_list:
            df = pd.json_normalize(j, "indicators")
        else:
            df = pd.DataFrame(j)
    return df

In [11]:
df_locations = callAPI("/locations/")
df_locations.head(5)

Unnamed: 0,id,name,iso3,iso2,longitude,latitude
0,4,Afghanistan,AFG,AF,67.709953,33.93911
1,8,Albania,ALB,AL,20.168331,41.153332
2,12,Algeria,DZA,DZ,1.659626,28.033886
3,16,American Samoa,ASM,AS,-170.696182,-14.306021
4,20,Andorra,AND,AD,1.521801,42.506287


### Relacion entre el DATASET PAISES del BANCO MUNDIAL y la ONU

In [12]:
paises = pd.read_csv("paises_mariel.csv").drop(columns="Unnamed: 0")
paises

Unnamed: 0,name,id,region.value,incomeLevel.value
0,Aruba,ABW,Latin America & Caribbean,High income
1,Andorra,AND,Europe & Central Asia,High income
2,United Arab Emirates,ARE,Middle East & North Africa,High income
3,Antigua and Barbuda,ATG,Latin America & Caribbean,High income
4,Australia,AUS,East Asia & Pacific,High income
5,Austria,AUT,Europe & Central Asia,High income
6,Belgium,BEL,Europe & Central Asia,High income
7,Bahrain,BHR,Middle East & North Africa,High income
8,"Bahamas, The",BHS,Latin America & Caribbean,High income
9,Bermuda,BMU,North America,High income


In [13]:
country_codes = [code for code in paises["id"].values]

In [14]:
ONU_paises_id = df_locations.loc[df_locations.iso3.isin(country_codes)].sort_values(
    "name"
)
ONU_paises_id

Unnamed: 0,id,name,iso3,iso2,longitude,latitude
0,4,Afghanistan,AFG,AF,67.709953,33.93911
1,8,Albania,ALB,AL,20.168331,41.153332
3,16,American Samoa,ASM,AS,-170.696182,-14.306021
4,20,Andorra,AND,AD,1.521801,42.506287
5,24,Angola,AGO,AO,17.873886,-11.202692
6,28,Antigua and Barbuda,ATG,AG,-61.78918,17.07867
8,32,Argentina,ARG,AR,-63.616673,-38.416096
14,51,Armenia,ARM,AM,45.038189,40.069099
142,533,Aruba,ABW,AW,-69.968338,12.52111
9,36,Australia,AUS,AU,133.775131,-25.274399


In [106]:
country_list = [str(pais) for pais in ONU_paises_id["name"].values]
country_list

['Afghanistan',
 'Albania',
 'American Samoa',
 'Andorra',
 'Angola',
 'Antigua and Barbuda',
 'Argentina',
 'Armenia',
 'Aruba',
 'Australia',
 'Austria',
 'Azerbaijan',
 'Bahamas',
 'Bahrain',
 'Bangladesh',
 'Belarus',
 'Belgium',
 'Belize',
 'Benin',
 'Bermuda',
 'Bhutan',
 'Bolivia (Plurinational State of)',
 'Bosnia and Herzegovina',
 'Brazil',
 'Bulgaria',
 'Burkina Faso',
 'Burundi',
 'Cabo Verde',
 'Cameroon',
 'Central African Republic',
 'Comoros',
 'Congo',
 "Côte d'Ivoire",
 'Dem. Rep. of the Congo',
 'Eritrea',
 'Ethiopia',
 'Gambia',
 'Guinea',
 'Guinea-Bissau',
 'United Arab Emirates']

In [108]:
country_codes = [str(code) for code in ONU_paises_id["id"].values]
# Converts country code list into a string to be used in later API call
country_selection_string = ",".join(country_codes)

country_selection_string

'4,8,16,20,24,28,32,51,533,36,40,31,44,48,50,112,56,84,204,60,64,68,70,76,100,854,108,132,120,140,174,178,384,180,232,231,270,324,624,784'

## Geographical areas or locations With Aggregates

In [210]:
# Define target URL.
base_url = "https://population.un.org/dataportalapi/api/v1/locationsWithAggregates?pageNumber=1"

# Call the API and convert the resquest into JSON object.
response = requests.get(base_url).json()

# Convert JSON object to data frame.
df = pd.json_normalize(response)

# Get the response, which includes the first pages. Only 3.
pages = 3

# Converts call into JSON and concat to the previous data frame.
for page in range(2, pages + 1):
    # Reset the target to the next page
    target = f"https://population.un.org/dataportalapi/api/v1/locationsWithAggregates?pageNumber={page}"

    # Each iteration call the API and convert the resquest into JSON object.
    response = requests.get(target).json()

    # Each iteration convert JSON object to data frame.
    df_temp = pd.json_normalize(response)

    # Each iteration concat the data frames.
    df = pd.concat([df, df_temp], ignore_index=True)

print("df.shape", df.shape, end="\n")
df

# Display only relevant data. (Drop NaN, )
df_paises = (
    df[
        [
            "Id",
            "Name",
            "Iso2",
            "Iso3",
            "Longitude",
            "Latitude",
            "Region",
            "SubRegion",
            "WorldBankIncomeGroup",
            "UNDevelopmentGroup",
        ]
    ]
    .copy()
    .dropna()
)
df_paises.info()

df.shape (278, 12)
<class 'pandas.core.frame.DataFrame'>
Int64Index: 227 entries, 0 to 236
Data columns (total 10 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Id                    227 non-null    int64  
 1   Name                  227 non-null    object 
 2   Iso2                  227 non-null    object 
 3   Iso3                  227 non-null    object 
 4   Longitude             227 non-null    float64
 5   Latitude              227 non-null    float64
 6   Region                227 non-null    object 
 7   SubRegion             227 non-null    object 
 8   WorldBankIncomeGroup  227 non-null    object 
 9   UNDevelopmentGroup    227 non-null    object 
dtypes: float64(2), int64(1), object(7)
memory usage: 19.5+ KB


In [211]:
country_codes = [str(code) for code in df_paises["Id"].values]
# Converts country code list into a string to be used in later API call
country_selection_string = ",".join(country_codes)

country_selection_string

'4,8,12,16,20,24,28,31,32,36,40,44,48,50,51,52,56,64,68,70,72,76,84,90,92,96,100,104,108,112,116,120,132,136,140,144,148,152,156,158,170,174,175,178,180,184,188,191,192,196,203,204,208,212,214,218,222,226,231,232,233,234,238,242,246,250,254,258,262,266,268,270,275,276,288,292,296,300,308,312,316,320,324,328,332,336,340,344,348,352,356,360,364,368,372,376,380,384,388,392,398,400,404,408,410,414,417,418,422,426,428,430,434,438,440,442,446,450,454,458,462,466,470,474,478,480,484,492,496,498,499,500,504,508,512,516,520,524,528,531,533,534,535,540,548,554,558,562,566,570,578,580,583,584,585,586,591,598,600,604,608,616,620,624,626,630,634,638,642,643,646,654,659,660,662,670,674,678,682,686,688,690,694,702,703,704,705,706,710,716,724,728,729,732,740,748,752,756,760,762,764,768,772,776,780,784,788,792,795,796,798,800,804,807,818,826,833,834,850,854,858,860,862,876,882,887,894'

## Anios 

In [212]:
start_year = str(1990)
end_year = str(2020)

## Family planing

In [273]:
df_family_planing = callAPI("/topics/5/indicators").iloc[[1, 9, 11]]
df_family_planing

Unnamed: 0,topicId,topicName,topicShortName,indicatorId,indicatorName,indicatorDisplayName,indicatorShortName,indicatorDescription,dimAge,dimSex,dimVariant,dimCategory,defaultAgeId,defaultSexId,defaultVariantId,defaultCategoryId,unitScaling,formatString,unitLongLabel,unitShortLabel,variableType,sourceId,sourceName,sourceYear,sourceStartYear,sourceEndYear,sourceCitation,sourceUrl
1,5,Family Planning,FP,1,Contraceptive prevalence: Any method (Percent),Any,CPAnyP,Percentage of women of reproductive age (15-49 years) who are currently using any method of contraception,False,False,True,True,31,2,4,100,0.01,#0.0,per cent,%,relative,23,Estimates and Projections of Family Planning Indicators,2022,1970,2030,"United Nations, Department of Economic and Social Affairs, Population Division (2022). <i>Model-based Estimates and Projections of Family Planning Indicators 2022</i>, custom data acquired via website.",https://www.un.org/development/desa/pd/data/family-planning-indicators
9,5,Family Planning,FP,6,Total demand for family planning (Percent),Total demand,DEMTot,Percentage of women of reproductive age (15-49 years) who are currently using any method of contraception or are having unmet need for family planning,False,False,True,True,31,2,4,100,0.01,#0.0,per cent,%,relative,23,Estimates and Projections of Family Planning Indicators,2022,1970,2030,"United Nations, Department of Economic and Social Affairs, Population Division (2022). <i>Model-based Estimates and Projections of Family Planning Indicators 2022</i>, custom data acquired via website.",https://www.un.org/development/desa/pd/data/family-planning-indicators
11,5,Family Planning,FP,4,Unmet need for family planning: Any method (Percent),Unmet need,UNMP,Percentage of women of reproductive age (15-49 years) who want to stop or delay childbearing but are not using a method of contraception,False,False,True,True,31,2,4,100,0.01,#0.0,per cent,%,relative,23,Estimates and Projections of Family Planning Indicators,2022,1970,2030,"United Nations, Department of Economic and Social Affairs, Population Division (2022). <i>Model-based Estimates and Projections of Family Planning Indicators 2022</i>, custom data acquired via website.",https://www.un.org/development/desa/pd/data/family-planning-indicators


### Percentage of women of reproductive age (15-49 years) who are currently using any method of contraception

In [68]:
# Calls the API to return the indicator values for the selected indicators and countries.
df_women_using_MC = callAPI(
    f"/data/indicators/{1}/locations/{country_selection_string}/start/{start_year}/end/{end_year}"
)

In [69]:
df_women_using_MC.head(1)
# location, iso3,indicator,timeLabel,category == All women, variant = Median

Unnamed: 0,locationId,location,iso3,iso2,locationTypeId,indicatorId,indicator,indicatorDisplayName,sourceId,source,revision,variantId,variant,variantShortName,variantLabel,timeId,timeLabel,timeMid,categoryId,category,estimateTypeId,estimateType,estimateMethodId,estimateMethod,sexId,sex,ageId,ageLabel,ageStart,ageEnd,ageMid,value
0,4,Afghanistan,AFG,AF,4,1,Contraceptive prevalence: Any method (Percent),Any,23,Estimates and Projections of Family Planning Indicators,0,1,95% lower bound,LOWER95,95% Lower,41,1990,1990.5,99,All women,1,Model-based Estimates,2,Interpolation,2,Female,31,15-49,15,49,32.5,3.5


In [205]:
# Finally, filters the returned results to only include median values for All Women, and limits the number of columns retained in the new dataframe.
df_women_using_MC_1 = df_women_using_MC.loc[
    (df_women_using_MC["variant"] == "Median")
    & (df_women_using_MC["category"] == "All women"),
    ["location", "iso3", "timeLabel", "indicator", "value"],
]
df_women_using_MC_1

Unnamed: 0,location,iso3,timeLabel,indicator,value
6,Afghanistan,AFG,1990,Contraceptive prevalence: Any method (Percent),5.0
21,Afghanistan,AFG,1991,Contraceptive prevalence: Any method (Percent),5.2
38,Afghanistan,AFG,1992,Contraceptive prevalence: Any method (Percent),5.4
51,Afghanistan,AFG,1993,Contraceptive prevalence: Any method (Percent),5.6
68,Afghanistan,AFG,1994,Contraceptive prevalence: Any method (Percent),5.8
81,Afghanistan,AFG,1995,Contraceptive prevalence: Any method (Percent),5.9
98,Afghanistan,AFG,1996,Contraceptive prevalence: Any method (Percent),6.1
113,Afghanistan,AFG,1997,Contraceptive prevalence: Any method (Percent),6.3
126,Afghanistan,AFG,1998,Contraceptive prevalence: Any method (Percent),6.5
143,Afghanistan,AFG,1999,Contraceptive prevalence: Any method (Percent),6.8


### Percentage of women of reproductive age (15-49 years) who want to stop or delay childbearing but are not using a method of contraception

In [274]:
# Calls the API to return the indicator values for the selected indicators and countries.
df_family_planing_4 = callAPI(
    f"/data/indicators/{4}/locations/{country_selection_string}/start/{start_year}/end/{end_year}"
)

In [73]:
# Finally, filters the returned results to only include median values for All Women, and limits the number of columns retained in the new dataframe.
df_family_planing_4_ = df_family_planing_4.loc[
    (df_family_planing_4["variant"] == "Median")
    & (df_family_planing_4["category"] == "All women"),
    ["location", "iso3", "timeLabel", "indicator", "value"],
]
df_family_planing_4_.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1188 entries, 6 to 17811
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   location   1188 non-null   object 
 1   iso3       1188 non-null   object 
 2   timeLabel  1188 non-null   object 
 3   indicator  1188 non-null   object 
 4   value      1188 non-null   float64
dtypes: float64(1), object(4)
memory usage: 55.7+ KB


## Marital status

In [213]:
df_marital_status = callAPI("/topics/6/indicators")

In [214]:
df_marital_status

Unnamed: 0,topicId,topicName,topicShortName,indicatorId,indicatorName,indicatorDisplayName,indicatorShortName,indicatorDescription,dimAge,dimSex,dimVariant,dimCategory,defaultAgeId,defaultSexId,defaultVariantId,defaultCategoryId,unitScaling,formatString,unitLongLabel,unitShortLabel,variableType,sourceId,sourceName,sourceYear,sourceStartYear,sourceEndYear,sourceCitation,sourceUrl
0,6,Marital Status,MarStat,43,Currently married (Number),Currently married (Number),CURN15_49,Number of married or in-union women of reproductive age (15-49 years),False,False,True,True,31,2,4,100,1.0,### ### ##0,persons,persons,absolute,26,Estimates and Projections of Women of Reproductive Age Who Are Married or in a Union 2022,2022,1970,2030,"United Nations Department of Economic and Social Affairs, Population Division (2022). Estimates and Projections of Women of Reproductive Age Who Are Married or in a Union 2022",https://www.un.org/development/desa/pd/content/fertility-and-marriage-0
1,6,Marital Status,MarStat,42,Currently married (Percent),Currently married (Percent),CURP15_49,Percentage of married or in-union among women of reproductive age (15-49 years),False,False,True,True,31,2,4,100,0.01,#0.0,per cent,%,relative,26,Estimates and Projections of Women of Reproductive Age Who Are Married or in a Union 2022,2022,1970,2030,"United Nations Department of Economic and Social Affairs, Population Division (2022). Estimates and Projections of Women of Reproductive Age Who Are Married or in a Union 2022",https://www.un.org/development/desa/pd/content/fertility-and-marriage-0
2,6,Marital Status,MarStat,45,Currently married by age of woman (Number),Currently married by age of woman (Number),CURN5Y,Number of married or in-union women of reproductive age (15-49 years) by 5-year age groups,True,False,True,True,67,2,4,100,1.0,### ### ##0,persons,persons,absolute,26,Estimates and Projections of Women of Reproductive Age Who Are Married or in a Union 2022,2022,1970,2030,"United Nations Department of Economic and Social Affairs, Population Division (2022). Estimates and Projections of Women of Reproductive Age Who Are Married or in a Union 2022",https://www.un.org/development/desa/pd/content/fertility-and-marriage-0
3,6,Marital Status,MarStat,44,Currently married by age of woman (Percent),Currently married by age of woman (Percent),CURP5Y,Percentage of married or in-union among women of reproductive age (15-49 years) by 5-year age groups,True,False,True,True,67,2,4,100,0.01,#0.0,per cent,%,relative,26,Estimates and Projections of Women of Reproductive Age Who Are Married or in a Union 2022,2022,1970,2030,"United Nations Department of Economic and Social Affairs, Population Division (2022). Estimates and Projections of Women of Reproductive Age Who Are Married or in a Union 2022",https://www.un.org/development/desa/pd/content/fertility-and-marriage-0


### Percentage of married or in-union among women of reproductive age (15-49 years)	

In [215]:
# Calls the API to return the indicator values for the selected indicators and countries.
df_marital_status_42 = callAPI(
    f"/data/indicators/{42}/locations/{country_selection_string}/start/{start_year}/end/{end_year}"
)

In [249]:
# Finally, filters the returned results to only include median values for All Women, and limits the number of columns retained in the new dataframe.
df_marital_status_42_ = df_marital_status_42.loc[
    (df_marital_status_42["variant"] == "Median"),
    ["location", "iso3", "timeLabel", "indicatorId", "indicator", "value"],
]
df_marital_status_42_.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6758 entries, 0 to 6757
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   location     6758 non-null   object 
 1   iso3         6758 non-null   object 
 2   timeLabel    6758 non-null   object 
 3   indicatorId  6758 non-null   int64  
 4   indicator    6758 non-null   object 
 5   value        6758 non-null   float64
dtypes: float64(1), int64(1), object(4)
memory usage: 369.6+ KB


In [250]:
df_marital_status_42_.head()

Unnamed: 0,location,iso3,timeLabel,indicatorId,indicator,value
0,Afghanistan,AFG,1990,42,Currently married (Percent),75.204014
1,Afghanistan,AFG,1991,42,Currently married (Percent),74.907219
2,Afghanistan,AFG,1992,42,Currently married (Percent),74.618856
3,Afghanistan,AFG,1993,42,Currently married (Percent),74.332852
4,Afghanistan,AFG,1994,42,Currently married (Percent),74.065406


In [251]:
df_marital_status_42_.to_parquet(f"../datasets/df_UNPD_{42}.parquet")

## Child Mortality

In [220]:
df_child_mortality = callAPI("/topics/8/indicators")
df_child_mortality

Unnamed: 0,topicId,topicName,topicShortName,indicatorId,indicatorName,indicatorDisplayName,indicatorShortName,indicatorDescription,dimAge,dimSex,dimVariant,dimCategory,defaultAgeId,defaultSexId,defaultVariantId,defaultCategoryId,unitScaling,formatString,unitLongLabel,unitShortLabel,variableType,sourceId,sourceName,sourceYear,sourceStartYear,sourceEndYear,sourceCitation,sourceUrl
0,8,Child Mortality,IGME,22,Infant mortality rate (IMR),Infant mortality rate (IMR),IMR,Probability of dying between birth and exactly one year of age (1q0),False,True,True,False,188,3,4,0,0.001,##0.0,"per 1,000 live births",per live births,relative,25,World Population Prospects,2022,1950,2100,"United Nations, Department of Economic and Social Affairs, Population Division (2022). <i>World Population Prospects: The 2022 Revision</i>, custom data acquired via website.",https://population.un.org/wpp/
1,8,Child Mortality,IGME,24,Under-five mortality rate (U5MR),Under-five mortality rate (U5MR),U5MR,Probability of dying between birth and exactly five years of age (5q0),False,True,True,False,188,3,4,0,0.001,##0.0,"per 1,000 live births",per live births,relative,25,World Population Prospects,2022,1950,2100,"United Nations, Department of Economic and Social Affairs, Population Division (2022). <i>World Population Prospects: The 2022 Revision</i>, custom data acquired via website.",https://population.un.org/wpp/


### Mortalidad al nacer

In [221]:
# Calls the API to return the indicator values for the selected indicators and countries.
df_child_mortality_22 = callAPI(
    f"/data/indicators/{'22'}/locations/{country_selection_string}/start/{start_year}/end/{end_year}"
)

In [252]:
# Finally, filters the returned results to only include and limits the number of columns retained in the new dataframe.
df_child_mortality_22_ = df_child_mortality_22[
    [
        "location",
        "iso3",
        "timeLabel",
        "indicatorId",
        "indicator",
        "sexId",
        "sex",
        "value",
    ]
]
df_child_mortality_22_.head(5)

Unnamed: 0,location,iso3,timeLabel,indicatorId,indicator,sexId,sex,value
0,Afghanistan,AFG,1990,22,Infant mortality rate (IMR),1,Male,134.21248
1,Afghanistan,AFG,1990,22,Infant mortality rate (IMR),2,Female,119.60013
2,Afghanistan,AFG,1990,22,Infant mortality rate (IMR),3,Both sexes,126.95578
3,Afghanistan,AFG,1991,22,Infant mortality rate (IMR),1,Male,130.38545
4,Afghanistan,AFG,1991,22,Infant mortality rate (IMR),2,Female,116.3709


In [253]:
df_child_mortality_22_.to_parquet(f"../datasets/df_UNPD_{22}.parquet")

### Mortalidad antes de los 5 anios

In [225]:
df_child_mortality_24 = callAPI(
    f"/data/indicators/{'24'}/locations/{country_selection_string}/start/{start_year}/end/{end_year}"
)

In [254]:
# Finally, filters the returned results to only include and limits the number of columns retained in the new dataframe.
df_child_mortality_24_ = df_child_mortality_24[
    [
        "location",
        "iso3",
        "timeLabel",
        "indicatorId",
        "indicator",
        "sexId",
        "sex",
        "value",
    ]
]
df_child_mortality_24_.head(5)

Unnamed: 0,location,iso3,timeLabel,indicatorId,indicator,sexId,sex,value
0,Afghanistan,AFG,1990,24,Under-five mortality rate (U5MR),1,Male,198.40969
1,Afghanistan,AFG,1990,24,Under-five mortality rate (U5MR),2,Female,189.829974
2,Afghanistan,AFG,1990,24,Under-five mortality rate (U5MR),3,Both sexes,194.103407
3,Afghanistan,AFG,1991,24,Under-five mortality rate (U5MR),1,Male,191.957315
4,Afghanistan,AFG,1991,24,Under-five mortality rate (U5MR),2,Female,183.570334


In [255]:
df_child_mortality_24_.to_parquet(f"../datasets/df_UNPD_{24}.parquet")

## Fertilidad

In [242]:
df_fertilidad = callAPI("/topics/2/indicators")

In [241]:
df_fertilidad

Unnamed: 0,topicId,topicName,topicShortName,indicatorId,indicatorName,indicatorDisplayName,indicatorShortName,indicatorDescription,dimAge,dimSex,dimVariant,dimCategory,defaultAgeId,defaultSexId,defaultVariantId,defaultCategoryId,unitScaling,formatString,unitLongLabel,unitShortLabel,variableType,sourceId,sourceName,sourceYear,sourceStartYear,sourceEndYear,sourceCitation,sourceUrl
0,2,Fertility,Fert,74,Births by age of mother (1-year),Live births by age of mother (and sex of child) - complete,Births1,Number of live births over a given period classified by age group of mother. - complete,True,False,True,False,86,3,4,0,1,### ### ##0,live births,live births,absolute,25,World Population Prospects,2022,1950,2100,"United Nations, Department of Economic and Social Affairs, Population Division (2022). <i>World Population Prospects: The 2022 Revision</i>, custom data acquired via website.",https://population.un.org/wpp/
1,2,Fertility,Fert,73,Births by age of mother (5-year),Live births by age of mother (and sex of child),Births5,Number of live births over a given period classified by age group of mother.,True,False,True,False,67,3,4,0,1,### ### ##0,live births,live births,absolute,25,World Population Prospects,2022,1950,2100,"United Nations, Department of Economic and Social Affairs, Population Division (2022). <i>World Population Prospects: The 2022 Revision</i>, custom data acquired via website.",https://population.un.org/wpp/
2,2,Fertility,Fert,55,Crude birth rate,"Crude birth rate (births per 1,000 population)",CBR,Number of live births over a given period divided by the person-years lived by the population over that period.,False,False,True,False,188,3,4,0,1000,#0.0,"births per 1,000 population","per 1,000 population",relative,25,World Population Prospects,2022,1950,2100,"United Nations, Department of Economic and Social Affairs, Population Division (2022). <i>World Population Prospects: The 2022 Revision</i>, custom data acquired via website.",https://population.un.org/wpp/
3,2,Fertility,Fert,68,Fertility rates by age of mother (1-year),Fertility rates by age of mother (1-year),ASFR1,"Number of live births to women at a particular age, divided by the number of women (i.e., person-years) at that age over a given period.",True,False,True,False,86,3,4,0,1,#0.0,per thousand,thousands,relative,25,World Population Prospects,2022,1950,2100,"United Nations, Department of Economic and Social Affairs, Population Division (2022). <i>World Population Prospects: The 2022 Revision</i>, custom data acquired via website.",https://population.un.org/wpp/
4,2,Fertility,Fert,17,Fertility rates by age of mother (5-year),Fertility rates by age of mother (5-year),ASFR5,Age-specific fertility rates (ASFR),True,False,True,False,67,3,4,0,1,#0.0,per thousand,thousands,relative,25,World Population Prospects,2022,1950,2100,"United Nations, Department of Economic and Social Affairs, Population Division (2022). <i>World Population Prospects: The 2022 Revision</i>, custom data acquired via website.",https://population.un.org/wpp/
5,2,Fertility,Fert,18,Mean age of childbearing (5-year),Mean age of childbearing (5-year),MAC5,Mean age of childbearing,False,False,True,False,188,3,4,0,1,#0.0,years,years,duration,25,World Population Prospects,2022,1950,2100,"United Nations, Department of Economic and Social Affairs, Population Division (2022). <i>World Population Prospects: The 2022 Revision</i>, custom data acquired via website.",https://population.un.org/wpp/
6,2,Fertility,Fert,56,Net Reproduction Rate,Net reproduction rate (daughters per woman),NRR,The average number of daughters a hypothetical cohort of women would have at the end of their reproductive period if they were subject during their whole lives to the fertility rates and the mortality rates of a given period. It is expressed as number of daughters per woman.,False,False,True,False,188,3,4,0,1,#0.00,number of daughters per woman,per woman,relative,25,World Population Prospects,2022,1950,2100,"United Nations, Department of Economic and Social Affairs, Population Division (2022). <i>World Population Prospects: The 2022 Revision</i>, custom data acquired via website.",https://population.un.org/wpp/
7,2,Fertility,Fert,58,Sex ratio at birth,Sex ratio at birth (per female newborn),SRB,Number of male births per one female birth.,False,False,True,False,188,3,4,0,1,#0.000,per female newborn,per female newborn,relative,25,World Population Prospects,2022,1950,2100,"United Nations, Department of Economic and Social Affairs, Population Division (2022). <i>World Population Prospects: The 2022 Revision</i>, custom data acquired via website.",https://population.un.org/wpp/
8,2,Fertility,Fert,57,Total births by sex,Total number of live births by sex,TBirths,Number of live births over a given period.,False,True,True,False,188,3,4,0,1,### ### ##0,newborns,newborns,absolute,25,World Population Prospects,2022,1950,2100,"United Nations, Department of Economic and Social Affairs, Population Division (2022). <i>World Population Prospects: The 2022 Revision</i>, custom data acquired via website.",https://population.un.org/wpp/
9,2,Fertility,Fert,19,Total fertility rate,Total fertility rate,TFR5,Total fertility (children per woman),False,False,True,False,188,3,4,0,1,#0.00,per woman,per woman,relative,25,World Population Prospects,2022,1950,2100,"United Nations, Department of Economic and Social Affairs, Population Division (2022). <i>World Population Prospects: The 2022 Revision</i>, custom data acquired via website.",https://population.un.org/wpp/


### Cantidad de hijos por mujer (19)

In [232]:
# Calls the API to return the indicator values for the selected indicators and countries.
df_fert_rate = callAPI(
    f"/data/indicators/{'19'}/locations/{country_selection_string}/start/{start_year}/end/{end_year}"
)

In [236]:
df_fert_rate.tail(1)

Unnamed: 0,locationId,location,iso3,iso2,locationTypeId,indicatorId,indicator,indicatorDisplayName,sourceId,source,revision,variantId,variant,variantShortName,variantLabel,timeId,timeLabel,timeMid,categoryId,category,estimateTypeId,estimateType,estimateMethodId,estimateMethod,sexId,sex,ageId,ageLabel,ageStart,ageEnd,ageMid,value
35029,894,Zambia,ZMB,ZM,4,19,Total fertility rate,Total fertility rate,25,World Population Prospects,0,6,95% upper bound,UPPER95,95% Upper,71,2020,2020.5,0,Not applicable,1,Model-based Estimates,2,Interpolation,3,Both sexes,188,Total,0,-1,0,5.0


In [262]:
# Finally, filters the returned results to only include and limits the number of columns retained in the new dataframe.
df_fert_rate_ = df_fert_rate.loc[
    (df_fert_rate["variant"] == "Median"),
    ["location", "iso3", "timeLabel", "indicatorId", "indicator", "value"],
]
df_fert_rate_.head(5)

Unnamed: 0,location,iso3,timeLabel,indicatorId,indicator,value
2,Afghanistan,AFG,1990,19,Total fertility rate,7.56457
7,Afghanistan,AFG,1991,19,Total fertility rate,7.60559
12,Afghanistan,AFG,1992,19,Total fertility rate,7.66482
17,Afghanistan,AFG,1993,19,Total fertility rate,7.718583
22,Afghanistan,AFG,1994,19,Total fertility rate,7.716534


In [257]:
df_fert_rate_.to_parquet(f"../datasets/df_UNPD_{19}.parquet")

## International migration

In [245]:
df_imigration = callAPI("/topics/4/indicators")

In [246]:
df_imigration

Unnamed: 0,topicId,topicName,topicShortName,indicatorId,indicatorName,indicatorDisplayName,indicatorShortName,indicatorDescription,dimAge,dimSex,dimVariant,dimCategory,defaultAgeId,defaultSexId,defaultVariantId,defaultCategoryId,unitScaling,formatString,unitLongLabel,unitShortLabel,variableType,sourceId,sourceName,sourceYear,sourceStartYear,sourceEndYear,sourceCitation,sourceUrl
0,4,International Migration,iMigration,66,Crude rate of net migration,Crude rate of net migration,TNetMigRT,The crude rate of net migration is defined as the ratio of net migration during the year to the average population in that year. The value is expressed per 1000 persons.,False,False,True,False,188,3,4,0,1,### ### ##0,persons,persons,absolute,25,World Population Prospects,2022,1950,2100,"United Nations, Department of Economic and Social Affairs, Population Division (2022). <i>World Population Prospects: The 2022 Revision</i>, custom data acquired via website.",https://population.un.org/wpp/
1,4,International Migration,iMigration,65,Total net-migration,Total net number of migrants,TNetMigration,"Net number of migrants, that is, the number of immigrants minus the number of emigrants.",False,False,True,False,188,3,4,0,1,### ### ##0,persons,persons,absolute,25,World Population Prospects,2022,1950,2100,"United Nations, Department of Economic and Social Affairs, Population Division (2022). <i>World Population Prospects: The 2022 Revision</i>, custom data acquired via website.",https://population.un.org/wpp/


### Crude rate of net migration

In [247]:
# Calls the API to return the indicator values for the selected indicators and countries.
df_migr_rate = callAPI(
    f"/data/indicators/{'66'}/locations/{country_selection_string}/start/{start_year}/end/{end_year}"
)

In [248]:
df_migr_rate.head(5)

Unnamed: 0,locationId,location,iso3,iso2,locationTypeId,indicatorId,indicator,indicatorDisplayName,sourceId,source,revision,variantId,variant,variantShortName,variantLabel,timeId,timeLabel,timeMid,categoryId,category,estimateTypeId,estimateType,estimateMethodId,estimateMethod,sexId,sex,ageId,ageLabel,ageStart,ageEnd,ageMid,value
0,4,Afghanistan,AFG,AF,4,66,Crude rate of net migration,Crude rate of net migration,25,World Population Prospects,0,4,Median,Median,Median,41,1990,1990.5,0,Not applicable,1,Model-based Estimates,2,Interpolation,3,Both sexes,188,Total,0,-1,0,-60.375
1,4,Afghanistan,AFG,AF,4,66,Crude rate of net migration,Crude rate of net migration,25,World Population Prospects,0,4,Median,Median,Median,42,1991,1991.5,0,Not applicable,1,Model-based Estimates,2,Interpolation,3,Both sexes,188,Total,0,-1,0,3.721
2,4,Afghanistan,AFG,AF,4,66,Crude rate of net migration,Crude rate of net migration,25,World Population Prospects,0,4,Median,Median,Median,43,1992,1992.5,0,Not applicable,1,Model-based Estimates,2,Interpolation,3,Both sexes,188,Total,0,-1,0,164.629
3,4,Afghanistan,AFG,AF,4,66,Crude rate of net migration,Crude rate of net migration,25,World Population Prospects,0,4,Median,Median,Median,44,1993,1993.5,0,Not applicable,1,Model-based Estimates,2,Interpolation,3,Both sexes,188,Total,0,-1,0,87.304
4,4,Afghanistan,AFG,AF,4,66,Crude rate of net migration,Crude rate of net migration,25,World Population Prospects,0,4,Median,Median,Median,45,1994,1994.5,0,Not applicable,1,Model-based Estimates,2,Interpolation,3,Both sexes,188,Total,0,-1,0,43.986


In [265]:
# Finally, filters the returned results to only include and limits the number of columns retained in the new dataframe.
df_migr_rate_ = df_migr_rate[
    ["location", "iso3", "timeLabel", "indicatorId", "indicator", "sex", "value"]
]

df_migr_rate_.head(5)

Unnamed: 0,location,iso3,timeLabel,indicatorId,indicator,sex,value
0,Afghanistan,AFG,1990,66,Crude rate of net migration,Both sexes,-60.375
1,Afghanistan,AFG,1991,66,Crude rate of net migration,Both sexes,3.721
2,Afghanistan,AFG,1992,66,Crude rate of net migration,Both sexes,164.629
3,Afghanistan,AFG,1993,66,Crude rate of net migration,Both sexes,87.304
4,Afghanistan,AFG,1994,66,Crude rate of net migration,Both sexes,43.986


In [266]:
df_migr_rate_.to_parquet(f"../datasets/df_UNPD_{66}.parquet")

### Total net-migration

In [269]:
# Calls the API to return the indicator values for the selected indicators and countries.
df_migr_number = callAPI(
    f"/data/indicators/{'65'}/locations/{country_selection_string}/start/{start_year}/end/{end_year}"
)

In [270]:
df_migr_number.head(5)

Unnamed: 0,locationId,location,iso3,iso2,locationTypeId,indicatorId,indicator,indicatorDisplayName,sourceId,source,revision,variantId,variant,variantShortName,variantLabel,timeId,timeLabel,timeMid,categoryId,category,estimateTypeId,estimateType,estimateMethodId,estimateMethod,sexId,sex,ageId,ageLabel,ageStart,ageEnd,ageMid,value
0,4,Afghanistan,AFG,AF,4,65,Total net-migration,Total net number of migrants,25,World Population Prospects,0,4,Median,Median,Median,41,1990,1990.5,0,Not applicable,1,Model-based Estimates,2,Interpolation,3,Both sexes,188,Total,0,-1,0,-666001
1,4,Afghanistan,AFG,AF,4,65,Total net-migration,Total net number of migrants,25,World Population Prospects,0,4,Median,Median,Median,42,1991,1991.5,0,Not applicable,1,Model-based Estimates,2,Interpolation,3,Both sexes,188,Total,0,-1,0,39926
2,4,Afghanistan,AFG,AF,4,65,Total net-migration,Total net number of migrants,25,World Population Prospects,0,4,Median,Median,Median,43,1992,1992.5,0,Not applicable,1,Model-based Estimates,2,Interpolation,3,Both sexes,188,Total,0,-1,0,1834556
3,4,Afghanistan,AFG,AF,4,65,Total net-migration,Total net number of migrants,25,World Population Prospects,0,4,Median,Median,Median,44,1993,1993.5,0,Not applicable,1,Model-based Estimates,2,Interpolation,3,Both sexes,188,Total,0,-1,0,1171818
4,4,Afghanistan,AFG,AF,4,65,Total net-migration,Total net number of migrants,25,World Population Prospects,0,4,Median,Median,Median,45,1994,1994.5,0,Not applicable,1,Model-based Estimates,2,Interpolation,3,Both sexes,188,Total,0,-1,0,665410


In [271]:
# Finally, filters the returned results to only include and limits the number of columns retained in the new dataframe.
df_migr_number_ = df_migr_number[
    ["location", "iso3", "timeLabel", "indicatorId", "indicator", "sex", "value"]
]

In [272]:
df_migr_number_.to_parquet(f"../datasets/df_UNPD_{65}.parquet")

## Maternay mortality

In [93]:
df_maternal_mortality = callAPI("/topics/9/indicators")
df_maternal_mortality.info()

<class 'pandas.core.frame.DataFrame'>
Index: 0 entries
Empty DataFrame

### World Bank

In [308]:
def extraer(api_url, lista, indicador, tiempo):
    # creamos el argumento paises
    lista3 = lista.copy()
    b = lista3[0]
    lista3.pop(0)
    for a in lista3:
        b = (
            b + ";" + a
        )  # la API devuelve los datos de varios paises pasando el argumento: AVW;AND;ETH...
    path = f"/country/{b}/indicator/{indicador}"
    url = api_url + path
    args = {"format": "json", "prefix": "Getdata", "date": tiempo, "per_page": 2000}
    s = requests.get(url, params=args)
    salida = pd.json_normalize(s.json()[1])
    return salida[["countryiso3code", "date", "value"]]

In [312]:
country_list = [str(code) for code in df_paises["Iso3"].values]
country_list

['AFG',
 'ALB',
 'DZA',
 'ASM',
 'AND',
 'AGO',
 'ATG',
 'AZE',
 'ARG',
 'AUS',
 'AUT',
 'BHS',
 'BHR',
 'BGD',
 'ARM',
 'BRB',
 'BEL',
 'BTN',
 'BOL',
 'BIH',
 'BWA',
 'BRA',
 'BLZ',
 'SLB',
 'VGB',
 'BRN',
 'BGR',
 'MMR',
 'BDI',
 'BLR',
 'KHM',
 'CMR',
 'CPV',
 'CYM',
 'CAF',
 'LKA',
 'TCD',
 'CHL',
 'CHN',
 'TWN',
 'COL',
 'COM',
 'MYT',
 'COG',
 'COD',
 'COK',
 'CRI',
 'HRV',
 'CUB',
 'CYP',
 'CZE',
 'BEN',
 'DNK',
 'DMA',
 'DOM',
 'ECU',
 'SLV',
 'GNQ',
 'ETH',
 'ERI',
 'EST',
 'FRO',
 'FLK',
 'FJI',
 'FIN',
 'FRA',
 'GUF',
 'PYF',
 'DJI',
 'GAB',
 'GEO',
 'GMB',
 'PSE',
 'DEU',
 'GHA',
 'GIB',
 'KIR',
 'GRC',
 'GRD',
 'GLP',
 'GUM',
 'GTM',
 'GIN',
 'GUY',
 'HTI',
 'VAT',
 'HND',
 'HKG',
 'HUN',
 'ISL',
 'IND',
 'IDN',
 'IRN',
 'IRQ',
 'IRL',
 'ISR',
 'ITA',
 'CIV',
 'JAM',
 'JPN',
 'KAZ',
 'JOR',
 'KEN',
 'PRK',
 'KOR',
 'KWT',
 'KGZ',
 'LAO',
 'LBN',
 'LSO',
 'LVA',
 'LBR',
 'LBY',
 'LIE',
 'LTU',
 'LUX',
 'MAC',
 'MDG',
 'MWI',
 'MYS',
 'MDV',
 'MLI',
 'MLT',
 'MTQ',
 'MRT',


In [313]:
url = "http://api.worldbank.org/v2"
indicador = "SH.STA.MMRT"
tiempo = "2000:2017"
df = extraer(api_url=url, lista=country_list, indicador=indicador, tiempo=tiempo)
df.info()

IndexError: list index out of range

## Infants exclusively breastfed for the first six months of life (%)

In [314]:
OMS = "https://ghoapi.azureedge.net/api/"

In [315]:
IndicatorCode = "WHOSIS_000006"
target = OMS + IndicatorCode + "?pageNumber=1"
response = requests.get(target)
j = response.json()
df = pd.json_normalize(j["value"])
pages = 10
# Converts call into JSON and concat to the previous data frame.
for page in range(2, pages + 1):
    # Reset the target to the next page
    target = OMS + IndicatorCode + f"?pageNumber={page}"

    # Each iteration call the API and convert the resquest into JSON object.
    response = requests.get(target).json()

    # Each iteration convert JSON object to data frame.
    df_temp = pd.json_normalize(response)

    # Each iteration concat the data frames.
    df = pd.concat([df, df_temp], ignore_index=True)

print("df.shape", df.shape, end="\n")

df.shape (624, 25)


In [316]:
df[df["SpatialDim"] == "PER"]

Unnamed: 0,Id,IndicatorCode,SpatialDimType,SpatialDim,TimeDimType,TimeDim,Dim1Type,Dim1,Dim2Type,Dim2,Dim3Type,Dim3,DataSourceDimType,DataSourceDim,Value,NumericValue,Low,High,Comments,Date,TimeDimensionValue,TimeDimensionBegin,TimeDimensionEnd,@odata.context,value
408,27252886.0,WHOSIS_000006,COUNTRY,PER,YEAR,1986.0,,,,,,,,,28,28.0,,,"Perú Encuesta Demográfica y de Salud Familiar - ENDES 1986. Lima, Perú: INEI/Perú.",2021-10-08T15:33:05.143+02:00,1986,1986-01-01T00:00:00+01:00,1986-12-31T00:00:00+01:00,,
409,27252887.0,WHOSIS_000006,COUNTRY,PER,YEAR,1991.0,,,,,,,,,48,48.0,,,"Encuesta demografica y de salud familiar 1991/1992. Demographic and Health Surveys. Lima, Peru, 1992",2021-10-08T15:33:05.16+02:00,1991,1991-01-01T00:00:00+01:00,1991-12-31T00:00:00+01:00,,
410,27252888.0,WHOSIS_000006,COUNTRY,PER,YEAR,1996.0,,,,,,,,,53,53.0,,,"Peru demographic and health survey 1996. Demographic and Health Surveys. Instituto Nacional de Estadistica e Informacion. Lima, Peru, 1997",2021-10-08T15:33:05.19+02:00,1996,1996-01-01T00:00:00+01:00,1996-12-31T00:00:00+01:00,,
411,27252889.0,WHOSIS_000006,COUNTRY,PER,YEAR,2000.0,,,,,,,,,66.6 [62.9-70.1],66.58,62.9,70.1,"Encuesta demografica y de salud familiar 2000. Demographic and Health Surveys. Lima, Peru and Calverton, Maryland, USA: Instituto Nacional de Estadistica e Informacion y ORC Macro, 2001",2021-10-08T15:33:05.207+02:00,2000,2000-01-01T00:00:00+01:00,2000-12-31T00:00:00+01:00,,
412,27252890.0,WHOSIS_000006,COUNTRY,PER,YEAR,2005.0,,,,,,,,,63.3,63.3,,,"Peru, encuesta demografica y de salud familiar. Informe principal: ENDES continua 2004-2006. Demographic and Health Surveys. Lima, Peru: INEI, USAID and ORC Macro, 2007",2021-10-08T15:33:05.223+02:00,2005,2005-01-01T00:00:00+01:00,2005-12-31T00:00:00+01:00,,
413,27252891.0,WHOSIS_000006,COUNTRY,PER,YEAR,2007.0,,,,,,,,,66.5 [60.7-71.8],66.48,60.7,71.8,"Encuesta demografica y de salud familiar. Informe principal: ENDES continua 2007-2008. Demographic and Health Surveys. Lima, Peru: INEI, USAID y ORC Macro, 2009",2021-10-08T15:33:05.24+02:00,2007,2007-01-01T00:00:00+01:00,2007-12-31T00:00:00+01:00,,
414,27252892.0,WHOSIS_000006,COUNTRY,PER,YEAR,2009.0,,,,,,,,,68.3 [64.1-72.2],68.26,64.1,72.2,"Encuesta demografica y de salud familiar - ENDES continua 2009. Demographic and Health Surveys. Lima, Peru: INEI, USAID y ORC Macro, 2010",2021-10-08T15:33:05.253+02:00,2009,2009-01-01T00:00:00+01:00,2009-12-31T00:00:00+01:00,,
415,27252893.0,WHOSIS_000006,COUNTRY,PER,YEAR,2010.0,,,,,,,,,68.3 [64.0-72.3],68.28,64.0,72.3,"Encuesta demografica y de salud familiar - ENDES continua 2010. Demographic and Health Surveys. Lima, Peru: INEI, USAID y ORC Macro, 2011",2021-10-08T15:33:05.253+02:00,2010,2010-01-01T00:00:00+01:00,2010-12-31T00:00:00+01:00,,
416,27252894.0,WHOSIS_000006,COUNTRY,PER,YEAR,2011.0,,,,,,,,,70.1 [65.1-74.6],70.09,65.1,74.6,"Encuesta demografica y de salud familiar - ENDES continua 2011. Demographic and Health Surveys. Lima, Peru: INEI, USAID y ORC Macro, 2012",2021-10-08T15:33:05.27+02:00,2011,2011-01-01T00:00:00+01:00,2011-12-31T00:00:00+01:00,,
417,27252895.0,WHOSIS_000006,COUNTRY,PER,YEAR,2012.0,,,,,,,,,67.4 [62.7-71.8],67.41,62.7,71.8,"Encuesta demografica y de salud familiar - ENDES continua 2012. Demographic and Health Surveys. Lima, Peru: INEI, USAID y ORC Macro, 2013",2021-10-08T15:33:05.3+02:00,2012,2012-01-01T00:00:00+01:00,2012-12-31T00:00:00+01:00,,
