In [397]:
import requests
import json
import pandas as pd
import aiohttp
import asyncio
import time
from aiohttp import TCPConnector

In [398]:
endpoint = 'http://api.worldbank.org/v2/country/all/indicator/NY.GDP.PCAP.PP.KD'
params = {
    'format': 'json',
    'per_page': 16758
}
response = requests.get(endpoint, params)

data = response.json()

gdp_df = pd.json_normalize(data[1])

print(gdp_df.tail(10).to_string())



      countryiso3code  date  value unit obs_status  decimal       indicator.id                                      indicator.value country.id country.value
16748             ZWE  1969    NaN                        0  NY.GDP.PCAP.PP.KD  GDP per capita, PPP (constant 2017 international $)         ZW      Zimbabwe
16749             ZWE  1968    NaN                        0  NY.GDP.PCAP.PP.KD  GDP per capita, PPP (constant 2017 international $)         ZW      Zimbabwe
16750             ZWE  1967    NaN                        0  NY.GDP.PCAP.PP.KD  GDP per capita, PPP (constant 2017 international $)         ZW      Zimbabwe
16751             ZWE  1966    NaN                        0  NY.GDP.PCAP.PP.KD  GDP per capita, PPP (constant 2017 international $)         ZW      Zimbabwe
16752             ZWE  1965    NaN                        0  NY.GDP.PCAP.PP.KD  GDP per capita, PPP (constant 2017 international $)         ZW      Zimbabwe
16753             ZWE  1964    NaN                        

In [399]:
endpoint = 'http://api.worldbank.org/v2/country/all/indicator/SP.DYN.LE00.IN'
params = {
    'format': 'json',
    'per_page': 16758
}
response = requests.get(endpoint, params)

data = response.json()

le_df = pd.json_normalize(data[1])

print(le_df.head(10).to_string())

  countryiso3code  date      value unit obs_status  decimal    indicator.id                          indicator.value country.id                country.value
0             AFE  2022        NaN                        0  SP.DYN.LE00.IN  Life expectancy at birth, total (years)         ZH  Africa Eastern and Southern
1             AFE  2021  62.454590                        0  SP.DYN.LE00.IN  Life expectancy at birth, total (years)         ZH  Africa Eastern and Southern
2             AFE  2020  63.313860                        0  SP.DYN.LE00.IN  Life expectancy at birth, total (years)         ZH  Africa Eastern and Southern
3             AFE  2019  63.755678                        0  SP.DYN.LE00.IN  Life expectancy at birth, total (years)         ZH  Africa Eastern and Southern
4             AFE  2018  63.365863                        0  SP.DYN.LE00.IN  Life expectancy at birth, total (years)         ZH  Africa Eastern and Southern
5             AFE  2017  62.922390                        

In [400]:
columns_to_keep = ['country.value', 'date', 'value']

gdp_df = gdp_df[columns_to_keep]
le_df = le_df[columns_to_keep]

gdp_df = gdp_df.rename(columns={'value': 'gdp_per_cap'})
le_df = le_df.rename(columns={'value': 'life_exp'})
merged_df = gdp_df.merge(le_df, on=['country.value', 'date'], how='inner')

merged_df.dropna(inplace=True)
len(merged_df)

7365

In [401]:
endpoint = 'http://api.worldbank.org/v2/country'

params = {
    'format': 'json',
    'per_page': 16758
}
response = requests.get(endpoint, params)

data = response.json()

country_df = pd.json_normalize(data[1])

columns_to_keep = ['name', 'capitalCity', 'region.value', 'incomeLevel.value']
country_df = country_df[columns_to_keep]
country_df = country_df.rename(columns={'name': 'country.value'})


print(country_df.tail(10).to_string())

                                             country.value capitalCity                region.value    incomeLevel.value
287                                                Vietnam       Hanoi         East Asia & Pacific  Lower middle income
288                                                Vanuatu   Port-Vila         East Asia & Pacific  Lower middle income
289                                                  World                              Aggregates           Aggregates
290                                                  Samoa        Apia         East Asia & Pacific  Lower middle income
291                                                 Kosovo    Pristina       Europe & Central Asia  Upper middle income
292  Sub-Saharan Africa excluding South Africa and Nigeria                              Aggregates           Aggregates
293                                            Yemen, Rep.      Sana'a  Middle East & North Africa           Low income
294                                     

In [402]:
merged_df = merged_df.merge(country_df, on=['country.value'], how='inner')

merged_df = merged_df[merged_df['region.value'] != 'Aggregates']

merged_df.head(100)

Unnamed: 0,country.value,date,gdp_per_cap,life_exp,capitalCity,region.value,incomeLevel.value
1455,Afghanistan,2021,1517.016266,61.982,Kabul,South Asia,Low income
1456,Afghanistan,2020,1968.341002,62.575,Kabul,South Asia,Low income
1457,Afghanistan,2019,2079.921861,63.565,Kabul,South Asia,Low income
1458,Afghanistan,2018,2060.698973,63.081,Kabul,South Asia,Low income
1459,Afghanistan,2017,2096.093111,63.016,Kabul,South Asia,Low income
...,...,...,...,...,...,...,...
1550,Angola,2010,7689.820735,56.726,Luanda,Sub-Saharan Africa,Lower middle income
1551,Angola,2009,7646.144358,55.752,Luanda,Sub-Saharan Africa,Lower middle income
1552,Angola,2008,7866.184345,54.633,Luanda,Sub-Saharan Africa,Lower middle income
1553,Angola,2007,7340.388909,53.642,Luanda,Sub-Saharan Africa,Lower middle income


In [403]:
endpoint = 'http://api.worldbank.org/v2/country/US/indicator/NY.GDP.PCAP.PP.KD'
params = {
    'format': 'json',
    'per_page': 16758
}
response = requests.get(endpoint, params)

data = response.json()

gdp_df = pd.json_normalize(data[1])

print(gdp_df.tail(10).to_string())



   countryiso3code  date  value unit obs_status  decimal       indicator.id                                      indicator.value country.id  country.value
53             USA  1969    NaN                        0  NY.GDP.PCAP.PP.KD  GDP per capita, PPP (constant 2017 international $)         US  United States
54             USA  1968    NaN                        0  NY.GDP.PCAP.PP.KD  GDP per capita, PPP (constant 2017 international $)         US  United States
55             USA  1967    NaN                        0  NY.GDP.PCAP.PP.KD  GDP per capita, PPP (constant 2017 international $)         US  United States
56             USA  1966    NaN                        0  NY.GDP.PCAP.PP.KD  GDP per capita, PPP (constant 2017 international $)         US  United States
57             USA  1965    NaN                        0  NY.GDP.PCAP.PP.KD  GDP per capita, PPP (constant 2017 international $)         US  United States
58             USA  1964    NaN                        0  NY.GDP.PCAP.

In [404]:
endpoint = 'http://api.worldbank.org/v2/country/US/indicator/NY.GDP.PCAP.PP.KD?date=2021'
params = {
    'format': 'json',
    'per_page': 16758
}
response = requests.get(endpoint, params)

data = response.json()

gdp_df = pd.json_normalize(data[1])

print(gdp_df.tail(10).to_string())

  countryiso3code  date        value unit obs_status  decimal       indicator.id                                      indicator.value country.id  country.value
0             USA  2021  63635.82381                        0  NY.GDP.PCAP.PP.KD  GDP per capita, PPP (constant 2017 international $)         US  United States


In [405]:
endpoint = 'http://api.worldbank.org/v2/country/USA/indicator/NY.GDP.PCAP.PP.KD?date=2000:2021'
params = {
    'format': 'json',
    'per_page': 16758
}
response = requests.get(endpoint, params)

data = response.json()

gdp_df = pd.json_normalize(data[1])

print(gdp_df.tail(10).to_string())

   countryiso3code  date         value unit obs_status  decimal       indicator.id                                      indicator.value country.id  country.value
12             USA  2009  53514.931797                        0  NY.GDP.PCAP.PP.KD  GDP per capita, PPP (constant 2017 international $)         US  United States
13             USA  2008  55427.178273                        0  NY.GDP.PCAP.PP.KD  GDP per capita, PPP (constant 2017 international $)         US  United States
14             USA  2007  55885.646174                        0  NY.GDP.PCAP.PP.KD  GDP per capita, PPP (constant 2017 international $)         US  United States
15             USA  2006  55307.719149                        0  NY.GDP.PCAP.PP.KD  GDP per capita, PPP (constant 2017 international $)         US  United States
16             USA  2005  54331.658336                        0  NY.GDP.PCAP.PP.KD  GDP per capita, PPP (constant 2017 international $)         US  United States
17             USA  2004  52

In [406]:
endpoint = 'http://api.worldbank.org/v2/country/us;ca/indicator/NY.GDP.PCAP.PP.KD?date=2000:2021'
params = {
    'format': 'json',
    'per_page': 16758
}
response = requests.get(endpoint, params)

data = response.json()

gdp_df = pd.json_normalize(data[1])

print(gdp_df.head(10).to_string())

  countryiso3code  date         value unit obs_status  decimal       indicator.id                                      indicator.value country.id country.value
0             CAN  2021  48218.038316                        0  NY.GDP.PCAP.PP.KD  GDP per capita, PPP (constant 2017 international $)         CA        Canada
1             CAN  2020  46181.757555                        0  NY.GDP.PCAP.PP.KD  GDP per capita, PPP (constant 2017 international $)         CA        Canada
2             CAN  2019  49175.677050                        0  NY.GDP.PCAP.PP.KD  GDP per capita, PPP (constant 2017 international $)         CA        Canada
3             CAN  2018  48962.481511                        0  NY.GDP.PCAP.PP.KD  GDP per capita, PPP (constant 2017 international $)         CA        Canada
4             CAN  2017  48317.174584                        0  NY.GDP.PCAP.PP.KD  GDP per capita, PPP (constant 2017 international $)         CA        Canada
5             CAN  2016  47457.585346   

In [449]:
endpoint = 'http://api.worldbank.org/v2/country/all/indicator/NY.GDP.PCAP.PP.KD?format=json'

total_pages = data[0]['pages']
print('number of pages: ' + str(total_pages))
all_records = []
# start_time = time.time()
def get_data_sequentially():
    start_time = time.time()
    for page_num in range(1, total_pages + 1):
        res = requests.get(endpoint + 'page=' + str(page_num))
        if res.status_code == 200:
            data = res.json()
            all_records.extend(data[1])  
        else:
            print(f"Failed to fetch data for page {page_num}. Status Code: {res.status_code}")
    duration = time.time() - start_time
    df = pd.DataFrame(all_records)
    print(f'it took {round(duration, 2)} seconds to fetch {len(df)} rows from {total_pages} pages')
    return df

df = get_data_sequentially()
df.head(5)

number of pages: 336
it took 29.82 seconds to fetch 16800 rows from 336 pages


Unnamed: 0,indicator,country,countryiso3code,date,value,unit,obs_status,decimal
0,"{'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per ...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2022,3553.91337,,,0
1,"{'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per ...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2021,3519.17484,,,0
2,"{'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per ...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2020,3455.023119,,,0
3,"{'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per ...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2019,3648.220302,,,0
4,"{'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per ...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2018,3661.360566,,,0


In [451]:
endpoint = 'http://api.worldbank.org/v2/country/all/indicator/NY.GDP.PCAP.PP.KD?format=json'

# fetch a single page
async def fetch_page(session, page_num):
    async with session.get(endpoint + '&page=' + str(page_num)) as res:
        if res.status == 200:
            data = await res.json()
            return data[1]  
        else:
            print(f"Failed to fetch data for page {page_num}. Status Code: {res.status}")
            return []

async def main(total_pages):
    connector = TCPConnector(limit=300)  # Limit connections
    all_records = []
    async with aiohttp.ClientSession(connector=connector) as session:
        tasks = []
        page_num = 1
        while page_num <= total_pages:
            tasks.append(fetch_page(session, page_num))
            page_num += 1

        pages_data = await asyncio.gather(*tasks)
        for page_data in pages_data:
            all_records.extend(page_data)
    return all_records

res = requests.get(endpoint)
data = res.json()
total_pages = data[0]['pages']
print('number of pages: ' + str(total_pages))

# Call the main function using await
start_time = time.time()
all_records = await main(total_pages)
duration = time.time() - start_time
df = pd.DataFrame(all_records)

print(f'it took {round(duration, 2)} seconds to fetch {len(df)} rows from {total_pages} pages')
df.head(5)


number of pages: 336
it took 0.54 seconds to fetch 16758 rows from 336 pages


Unnamed: 0,indicator,country,countryiso3code,date,value,unit,obs_status,decimal
0,"{'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per ...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2022,3553.91337,,,0
1,"{'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per ...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2021,3519.17484,,,0
2,"{'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per ...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2020,3455.023119,,,0
3,"{'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per ...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2019,3648.220302,,,0
4,"{'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per ...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2018,3661.360566,,,0


In [409]:
endpoint = 'http://api.worldbank.org/v2/indicator'
params = {
    'format': 'json',
    'per_page': 16758
}
response = requests.get(endpoint, params)

data = response.json()

df = pd.json_normalize(data[1])

# print(df.head(25).to_string)
# print(df.head(10).to_string())
# print(df['name'])
substring = 'Public Expenditure on Education'
filtered_df = df[df['name'].str.contains(substring, case=False, na=False)] 
print(len(filtered_df))

print(filtered_df['id'])

1
16623    SE.XPD.EDUC.ZS
Name: id, dtype: object
