In [1]:
import requests
import pandas as pd
import matplotlib.pyplot as plt

In [34]:
endpoint = "https://api.worldbank.org/v2/country/all/indicator/NY.GDP.PCAP.PP.KD"
params = {
    'format': "json",
    "per_page": 16758
}

response = requests.get(endpoint, params = params)
[metadata, data] = response.json()
gdp_percapita_df = pd.DataFrame(data)
gdp_percapita_df["Country_Name"] = gdp_percapita_df["country"].apply(lambda x: x["value"])
gdp_percapita_df = gdp_percapita_df.rename(columns={"value": "GDP_Per_Capita"})
cols_to_drop = [x for x in gdp_percapita_df.columns if x not in ["Country_Name", "date", 'GDP_Per_Capita']]
gdp_percapita_df = gdp_percapita_df.drop(columns=cols_to_drop)

In [35]:
gdp_percapita_df.head()

Unnamed: 0,date,GDP_Per_Capita,Country_Name
0,2022,3553.91337,Africa Eastern and Southern
1,2021,3519.17484,Africa Eastern and Southern
2,2020,3455.023119,Africa Eastern and Southern
3,2019,3648.220302,Africa Eastern and Southern
4,2018,3661.360566,Africa Eastern and Southern


In [None]:
gdp_percapita_df["Country_Name"] = gdp_percapita_df["country"].apply(lambda x: x["value"])
gdp_percapita_df.rename({"value": "GDP_Per_Capita"})

In [26]:
cols_to_drop = [x for x in gdp_percapita_df.columns if x not in ["Country_Name", "date", 'value']]
gdp_percapita_df = gdp_percapita_df.drop(columns=cols_to_drop)

In [36]:
endpoint = "https://api.worldbank.org/v2/country/all/indicator/SP.DYN.LE00.IN"
params = {
    'format': "json",
    "per_page": 16758
}

response = requests.get(endpoint, params = params)
[metadata, data] = response.json()
gdp_le_df = pd.DataFrame(data)
gdp_le_df["Country_Name"] = gdp_le_df["country"].apply(lambda x: x["value"])
gdp_le_df = gdp_le_df.rename(columns={"value": "Life_Expectancy"})
cols_to_drop = [x for x in gdp_le_df.columns if x not in ["Country_Name", "date", 'Life_Expectancy']]
gdp_le_df = gdp_le_df.drop(columns=cols_to_drop)


In [37]:
gdp_le_df.head()

Unnamed: 0,date,Life_Expectancy,Country_Name
0,2022,,Africa Eastern and Southern
1,2021,62.45459,Africa Eastern and Southern
2,2020,63.31386,Africa Eastern and Southern
3,2019,63.755678,Africa Eastern and Southern
4,2018,63.365863,Africa Eastern and Southern


In [38]:
merged_df = pd.merge(gdp_percapita_df, gdp_le_df, on=["Country_Name", "date"] )
merged_df = merged_df.dropna()
# gdp_percapita_df["Country_ID"].value_counts()

In [95]:
endpoint = "http://api.worldbank.org/v2/country"
params = {
    'format': "json",
    "per_page": 297
}
response = requests.get(endpoint, params = params)
[metadata, data] = response.json()
countries_df = pd.DataFrame(data)
countries_df = countries_df[countries_df["capitalCity"].astype(bool)]
countries_df = countries_df[[ 'iso2Code', 'name', 'capitalCity', 'longitude', 'latitude']]
countries_df = countries_df.rename(columns={"name": "Country_Name"})

In [96]:
triple_merged_df = pd.merge(countries_df, merged_df, on=["Country_Name"], how="inner")

In [97]:
triple_merged_df.head()

Unnamed: 0,iso2Code,Country_Name,capitalCity,longitude,latitude,date,GDP_Per_Capita,Life_Expectancy
0,AW,Aruba,Oranjestad,-70.0167,12.5167,2021,38866.333486,74.626
1,AW,Aruba,Oranjestad,-70.0167,12.5167,2020,33155.243239,75.723
2,AW,Aruba,Oranjestad,-70.0167,12.5167,2019,40780.516485,76.248
3,AW,Aruba,Oranjestad,-70.0167,12.5167,2018,40706.749216,76.072
4,AW,Aruba,Oranjestad,-70.0167,12.5167,2017,38865.188195,75.903


In [105]:
#Only US
endpoint = "http://api.worldbank.org/v2/country/us/indicator/SP.POP.TOTL"
params = {
    'format': "json",
    'per_page':63
}
response = requests.get(endpoint, params = params)
[metadata, data] = response.json()
df = pd.DataFrame(data)

In [106]:
#Only US 2021
endpoint = "http://api.worldbank.org/v2/country/us/indicator/SP.POP.TOTL"
params = {
    'format': "json",
    # 'per_page':63
    "date":2021
}
response = requests.get(endpoint, params = params)
[metadata, data] = response.json()
df = pd.DataFrame(data)

In [110]:
#Only US 2000:2021
endpoint = "http://api.worldbank.org/v2/country/us/indicator/SP.POP.TOTL"
params = {
    'format': "json",
    # 'per_page':63
    "date":"2000:2021"
}
response = requests.get(endpoint, params = params)
[metadata, data] = response.json()
df = pd.DataFrame(data)
print(metadata["total"])

22


In [113]:
#Only US and Canada 2000:2021
endpoint = "http://api.worldbank.org/v2/country/us;ca/indicator/SP.POP.TOTL"
params = {
    'format': "json",
    # 'per_page':63
    "date":"2000:2021"
}
response = requests.get(endpoint, params = params)
[metadata, data] = response.json()
df = pd.DataFrame(data)
print(metadata["total"])

44


In [121]:
#Synchronous solution
response_data = []
endpoint = "https://api.worldbank.org/v2/country/all/indicator/NY.GDP.PCAP.PP.KD"
params = {
    'format': "json",
    'page':1,
    "date":"2010:2021"
}
initial_response = requests.get(endpoint, params = params)
[metadata, data] = response.json()
response_data.extend(data)
total_pages = metadata["pages"]
page = 1
while page < total_pages:
    page += 1
    params["page"] = page
    response = requests.get(endpoint, params = params)
    [metadata, data] = response.json()
    response_data.extend(data)
    print(f"recieved page {page} of {total_pages}")



recieved page 2 of 64
recieved page 3 of 64
recieved page 4 of 64
recieved page 5 of 64
recieved page 6 of 64
recieved page 7 of 64
recieved page 8 of 64
recieved page 9 of 64
recieved page 10 of 64
recieved page 11 of 64
recieved page 12 of 64
recieved page 13 of 64
recieved page 14 of 64
recieved page 15 of 64
recieved page 16 of 64
recieved page 17 of 64
recieved page 18 of 64
recieved page 19 of 64
recieved page 20 of 64
recieved page 21 of 64
recieved page 22 of 64
recieved page 23 of 64
recieved page 24 of 64
recieved page 25 of 64
recieved page 26 of 64
recieved page 27 of 64
recieved page 28 of 64
recieved page 29 of 64
recieved page 30 of 64
recieved page 31 of 64
recieved page 32 of 64
recieved page 33 of 64
recieved page 34 of 64
recieved page 35 of 64
recieved page 36 of 64
recieved page 37 of 64
recieved page 38 of 64
recieved page 39 of 64
recieved page 40 of 64
recieved page 41 of 64
recieved page 42 of 64
recieved page 43 of 64
recieved page 44 of 64
recieved page 45 of

In [141]:
import aiohttp
import asyncio
import nest_asyncio

response_data = []
total_pages = None

async def fetch_data(session, page):
    endpoint = "https://api.worldbank.org/v2/country/all/indicator/NY.GDP.PCAP.PP.KD"
    params = {
        'format': "json",
        'page': page,
    }
    async with session.get(endpoint, params=params) as response:
        data = await response.json()
        return data

async def main():
    async with aiohttp.ClientSession() as session:
        initial_response = await fetch_data(session, 1)
        [metadata, data] = initial_response
        response_data.extend(data)
        total_pages = metadata["pages"]

        tasks = [fetch_data(session, page) for page in range(2, total_pages + 1)]
        results = await asyncio.gather(*tasks)
        print("All data recived!....Please wait as I put it together.....")
        for result in results:
            [metadata, data] = result
            response_data.extend(data)
            # print(f"packaging page {metadata['page']} of {total_pages}")
        print('Done!')

nest_asyncio.apply()
asyncio.run(main())



All data recived!....Please wait as I put it together.....
Done!


In [143]:
len(response_data)

16758