In [1]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
# For async requests
import aiohttp
import asyncio
import nest_asyncio


In [2]:
endpoint = "https://api.worldbank.org/v2/country/all/indicator/NY.GDP.PCAP.PP.KD"
params = {
    'format': "json",
    "per_page": 16758
}

response = requests.get(endpoint, params = params)
[metadata, data] = response.json()
gdp_percapita_df = pd.DataFrame(data)
gdp_percapita_df["Country_Name"] = gdp_percapita_df["country"].apply(lambda x: x["value"])
gdp_percapita_df = gdp_percapita_df.rename(columns={"value": "GDP_Per_Capita"})
cols_to_drop = [x for x in gdp_percapita_df.columns if x not in ["Country_Name", "date", 'GDP_Per_Capita']]
gdp_percapita_df = gdp_percapita_df.drop(columns=cols_to_drop)

In [3]:
gdp_percapita_df.head()

Unnamed: 0,date,GDP_Per_Capita,Country_Name
0,2022,3553.91337,Africa Eastern and Southern
1,2021,3519.17484,Africa Eastern and Southern
2,2020,3455.023119,Africa Eastern and Southern
3,2019,3648.220302,Africa Eastern and Southern
4,2018,3661.360566,Africa Eastern and Southern


In [4]:
endpoint = "https://api.worldbank.org/v2/country/all/indicator/SP.DYN.LE00.IN"
params = {
    'format': "json",
    "per_page": 16758
}

response = requests.get(endpoint, params = params)
[metadata, data] = response.json()
gdp_le_df = pd.DataFrame(data)
gdp_le_df["Country_Name"] = gdp_le_df["country"].apply(lambda x: x["value"])
gdp_le_df = gdp_le_df.rename(columns={"value": "Life_Expectancy"})
cols_to_drop = [x for x in gdp_le_df.columns if x not in ["Country_Name", "date", 'Life_Expectancy']]
gdp_le_df = gdp_le_df.drop(columns=cols_to_drop)


In [5]:
gdp_le_df.head()

Unnamed: 0,date,Life_Expectancy,Country_Name
0,2022,,Africa Eastern and Southern
1,2021,62.45459,Africa Eastern and Southern
2,2020,63.31386,Africa Eastern and Southern
3,2019,63.755678,Africa Eastern and Southern
4,2018,63.365863,Africa Eastern and Southern


In [6]:
merged_df = pd.merge(gdp_percapita_df, gdp_le_df, on=["Country_Name", "date"] )
merged_df = merged_df.dropna()
# gdp_percapita_df["Country_ID"].value_counts()

In [7]:
endpoint = "http://api.worldbank.org/v2/country"
params = {
    'format': "json",
    "per_page": 297
}
response = requests.get(endpoint, params = params)
[metadata, data] = response.json()
countries_df = pd.DataFrame(data)
countries_df = countries_df[countries_df["capitalCity"].astype(bool)]
countries_df = countries_df[[ 'iso2Code', 'name', 'capitalCity', 'longitude', 'latitude']]
countries_df = countries_df.rename(columns={"name": "Country_Name"})

In [8]:
triple_merged_df = pd.merge(countries_df, merged_df, on=["Country_Name"], how="inner")

In [9]:
triple_merged_df.head()

Unnamed: 0,iso2Code,Country_Name,capitalCity,longitude,latitude,date,GDP_Per_Capita,Life_Expectancy
0,AW,Aruba,Oranjestad,-70.0167,12.5167,2021,38866.333486,74.626
1,AW,Aruba,Oranjestad,-70.0167,12.5167,2020,33155.243239,75.723
2,AW,Aruba,Oranjestad,-70.0167,12.5167,2019,40780.516485,76.248
3,AW,Aruba,Oranjestad,-70.0167,12.5167,2018,40706.749216,76.072
4,AW,Aruba,Oranjestad,-70.0167,12.5167,2017,38865.188195,75.903


In [10]:
#Only US
endpoint = "http://api.worldbank.org/v2/country/us/indicator/SP.POP.TOTL"
params = {
    'format': "json",
    'per_page':63
}
response = requests.get(endpoint, params = params)
[metadata, data] = response.json()
df = pd.DataFrame(data)

In [11]:
#Only US 2021
endpoint = "http://api.worldbank.org/v2/country/us/indicator/SP.POP.TOTL"
params = {
    'format': "json",
    # 'per_page':63
    "date":2021
}
response = requests.get(endpoint, params = params)
[metadata, data] = response.json()
df = pd.DataFrame(data)

In [12]:
#Only US 2000:2021
endpoint = "http://api.worldbank.org/v2/country/us/indicator/SP.POP.TOTL"
params = {
    'format': "json",
    # 'per_page':63
    "date":"2000:2021"
}
response = requests.get(endpoint, params = params)
[metadata, data] = response.json()
df = pd.DataFrame(data)
print(metadata["total"])

22


In [13]:
#Only US and Canada 2000:2021
endpoint = "http://api.worldbank.org/v2/country/us;ca/indicator/SP.POP.TOTL"
params = {
    'format': "json",
    # 'per_page':63
    "date":"2000:2021"
}
response = requests.get(endpoint, params = params)
[metadata, data] = response.json()
df = pd.DataFrame(data)
print(metadata["total"])

44


In [14]:
#Synchronous solution
response_data = []
endpoint = "https://api.worldbank.org/v2/country/all/indicator/NY.GDP.PCAP.PP.KD"
params = {
    'format': "json",
    'page':1,
    "date":"2010:2021"
}
response = requests.get(endpoint, params = params)
[metadata, data] = response.json()
response_data.extend(data)
total_pages = metadata["pages"]
page = 1
while page < total_pages:
    page += 1
    params["page"] = page
    response = requests.get(endpoint, params = params)
    [metadata, data] = response.json()
    response_data.extend(data)
    if page%50 == 0:
        print(f"recieved page {page} of {total_pages}")



recieved page 50 of 64


In [15]:
#async solution

response_data = []
total_pages = 0

async def fetch_data(session, page):
    endpoint = "https://api.worldbank.org/v2/country/all/indicator/NY.GDP.PCAP.PP.KD"
    params = {
        'format': "json",
        'page': page,
        "date":"2010:2021"
    }
    async with session.get(endpoint, params=params) as response:
        data = await response.json()
        return data

async def fetch_all():
    async with aiohttp.ClientSession() as session:
        initial_response = await fetch_data(session, 1)
        [metadata, data] = initial_response
        response_data.extend(data)
        total_pages = metadata["pages"]

        tasks = [fetch_data(session, page) for page in range(2, total_pages + 1)]
        results = await asyncio.gather(*tasks)
        print("All data recieved!....Please wait as I put it together.....")
        for result in results:
            [metadata, data] = result
            response_data.extend(data)
            # print(f"packaging page {metadata['page']} of {total_pages}")
        print('Done!')

nest_asyncio.apply()
asyncio.run(fetch_all())



All data recieved!....Please wait as I put it together.....
Done!
