# UN Data API Exercise

In [108]:
import requests
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

1. Use the API to get all available data for the GDP per capita, PPP (constant 2017 international $) indicator. Hint: this indicator has code "NY.GDP.PCAP.PP.KD". Adjust the query parameters so that you can retrieve all available rows. Convert the results to a DataFrame.

2. Now, use the API to get all available data for Life expectancy at birth, total (years). This indicator has code "SP.DYN.LE00.IN". Again, convert the results to a DataFrame.

In [109]:
# Define the base URL for the World Bank API
base_url = 'https://api.worldbank.org/v2/country/all/indicator/'

# Define multiple indicators
gdp_per_capita_indicator = 'NY.GDP.PCAP.PP.KD'
life_expectancy_at_birth_indicator = 'SP.DYN.LE00.IN'
indicators = {
    'GDP_per_capita': gdp_per_capita_indicator,
    'Life_expectancy_at_birth': life_expectancy_at_birth_indicator
}

format_type = 'json'
params = {
    'format': format_type,
    'per_page': 20000  # High number to get all rows in one request
}

# Initialize an empty list to store DataFrames
worlddata_dfs = []

# Loop through each indicator and fetch data
for indicator_name, indicator_code in indicators.items():
    # Fetch the data from the World Bank API for the current indicator
    response = requests.get(f"{base_url}{indicator_code}", params=params)
    
    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()
        
        # Check if the response contains data
        if len(data) > 1:
            # Convert the data into a pandas DataFrame
            indicator_df = pd.DataFrame(data[1])
            
            # Add a column to indicate which indicator the data belongs to
            indicator_df['Indicator'] = indicator_name
            
            # Append this DataFrame to the list
            worlddata_dfs.append(indicator_df)
        else:
            print(f"No data found for the {indicator_name} indicator.")
    else:
        print(f"Failed to retrieve data for {indicator_name}. Status code: {response.status_code}")
worldbank_gdp_df = worlddata_dfs[0]
worldbank_le_df = worlddata_dfs[1]

In [110]:
worldbank_gdp_df

Unnamed: 0,indicator,country,countryiso3code,date,value,unit,obs_status,decimal,Indicator
0,"{'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per ...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2023,4047.007031,,,0,GDP_per_capita
1,"{'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per ...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2022,4038.638689,,,0,GDP_per_capita
2,"{'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per ...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2021,3994.171654,,,0,GDP_per_capita
3,"{'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per ...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2020,3919.499230,,,0,GDP_per_capita
4,"{'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per ...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2019,4130.057222,,,0,GDP_per_capita
...,...,...,...,...,...,...,...,...,...
17019,"{'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per ...","{'id': 'ZW', 'value': 'Zimbabwe'}",ZWE,1964,,,,0,GDP_per_capita
17020,"{'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per ...","{'id': 'ZW', 'value': 'Zimbabwe'}",ZWE,1963,,,,0,GDP_per_capita
17021,"{'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per ...","{'id': 'ZW', 'value': 'Zimbabwe'}",ZWE,1962,,,,0,GDP_per_capita
17022,"{'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per ...","{'id': 'ZW', 'value': 'Zimbabwe'}",ZWE,1961,,,,0,GDP_per_capita


In [111]:
worldbank_le_df

Unnamed: 0,indicator,country,countryiso3code,date,value,unit,obs_status,decimal,Indicator
0,"{'id': 'SP.DYN.LE00.IN', 'value': 'Life expect...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2023,,,,0,Life_expectancy_at_birth
1,"{'id': 'SP.DYN.LE00.IN', 'value': 'Life expect...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2022,62.899031,,,0,Life_expectancy_at_birth
2,"{'id': 'SP.DYN.LE00.IN', 'value': 'Life expect...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2021,62.454590,,,0,Life_expectancy_at_birth
3,"{'id': 'SP.DYN.LE00.IN', 'value': 'Life expect...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2020,63.313860,,,0,Life_expectancy_at_birth
4,"{'id': 'SP.DYN.LE00.IN', 'value': 'Life expect...","{'id': 'ZH', 'value': 'Africa Eastern and Sout...",AFE,2019,63.755678,,,0,Life_expectancy_at_birth
...,...,...,...,...,...,...,...,...,...
17019,"{'id': 'SP.DYN.LE00.IN', 'value': 'Life expect...","{'id': 'ZW', 'value': 'Zimbabwe'}",ZWE,1964,54.994000,,,0,Life_expectancy_at_birth
17020,"{'id': 'SP.DYN.LE00.IN', 'value': 'Life expect...","{'id': 'ZW', 'value': 'Zimbabwe'}",ZWE,1963,54.549000,,,0,Life_expectancy_at_birth
17021,"{'id': 'SP.DYN.LE00.IN', 'value': 'Life expect...","{'id': 'ZW', 'value': 'Zimbabwe'}",ZWE,1962,54.071000,,,0,Life_expectancy_at_birth
17022,"{'id': 'SP.DYN.LE00.IN', 'value': 'Life expect...","{'id': 'ZW', 'value': 'Zimbabwe'}",ZWE,1961,53.619000,,,0,Life_expectancy_at_birth


Explode country column so we get country names for gdp dataframe. Then concatenate with gdp data frame. Finally, isolate the year 2021.

In [112]:
country_exploded = (
    pd.json_normalize(worldbank_gdp_df['country'])
    .rename(columns={'value':'Country'})
    .drop(columns=['id'])
)
worldbank_country_exp_df = (pd.concat([worldbank_gdp_df.drop(columns=['country']), country_exploded], axis=1)
                            .drop(columns=['countryiso3code','unit','obs_status','decimal','Indicator','indicator'])
                            .rename(columns={'value':'GDP_Per_Capita','date':'Year'})
)
worldbank_country_exp_df['Year'] = worldbank_country_exp_df['Year'].astype('int16')
worldbank_country_exp_df_2021 = worldbank_country_exp_df[worldbank_country_exp_df['Year']==2021]
worldbank_country_exp_df_2021

Unnamed: 0,Year,GDP_Per_Capita,Country
2,2021,3994.171654,Africa Eastern and Southern
66,2021,4756.308419,Africa Western and Central
130,2021,15776.984852,Arab World
194,2021,19757.122425,Caribbean small states
258,2021,39902.215832,Central Europe and the Baltics
...,...,...,...
16706,2021,46238.059654,Virgin Islands (U.S.)
16770,2021,5663.121607,West Bank and Gaza
16834,2021,,"Yemen, Rep."
16898,2021,3526.507418,Zambia


Explode country column so we get country names for le dataframe. Then concatenate with le data frame. Finally, isolate the year 2021.

In [113]:
country_exploded = (
    pd.json_normalize(worldbank_le_df['country'])
    .rename(columns={'value':'Country'})
    .drop(columns=['id'])
)
worldbank_country_exp_le_df = (pd.concat([worldbank_le_df.drop(columns=['country']), country_exploded], axis=1)
                            .drop(columns=['countryiso3code','unit','obs_status','decimal','Indicator','indicator'])
                            .rename(columns={'value':'Life_Expectancy_at_Birth','date':'Year'})
)
worldbank_country_exp_le_df['Year'] = worldbank_country_exp_le_df['Year'].astype('int16')
worldbank_country_exp_le_df_2021 = worldbank_country_exp_le_df[worldbank_country_exp_le_df['Year']==2021] #getting all data for 2021
worldbank_country_exp_le_df_2021

Unnamed: 0,Year,Life_Expectancy_at_Birth,Country
2,2021,62.454590,Africa Eastern and Southern
66,2021,56.988657,Africa Western and Central
130,2021,70.814483,Arab World
194,2021,70.481791,Caribbean small states
258,2021,74.797401,Central Europe and the Baltics
...,...,...,...
16706,2021,80.068293,Virgin Islands (U.S.)
16770,2021,73.473000,West Bank and Gaza
16834,2021,63.753000,"Yemen, Rep."
16898,2021,61.223000,Zambia


3. Merge the two results DataFrames together. You may want to rename or drop columns prior to merging.

In [114]:
worldbank_gdp_le_2021 = pd.merge(worldbank_country_exp_df_2021, worldbank_country_exp_le_df_2021, how='outer')
worldbank_gdp_le_2021 = worldbank_gdp_le_2021[['Country','Year','GDP_Per_Capita','Life_Expectancy_at_Birth']]
worldbank_gdp_le_2021

Unnamed: 0,Country,Year,GDP_Per_Capita,Life_Expectancy_at_Birth
0,Afghanistan,2021,2138.870247,61.982000
1,Africa Eastern and Southern,2021,3994.171654,62.454590
2,Africa Western and Central,2021,4756.308419,56.988657
3,Albania,2021,16261.804554,76.463000
4,Algeria,2021,14690.916230,76.377000
...,...,...,...,...
261,West Bank and Gaza,2021,5663.121607,73.473000
262,World,2021,19724.129207,71.327157
263,"Yemen, Rep.",2021,,63.753000
264,Zambia,2021,3526.507418,61.223000


4. You can also get more information about the available countries (region, capital city, income level classification, etc.) by using the Country API. Use this API to pull in all available data. Merge this with your other datasets. Use this to now remove the rows that correspond to regions and not countries.

In [115]:
url = 'https://api.worldbank.org/v2/country/'
format_type = 'json'
params = {
    'format': format_type,
    'per_page': 10000  # High number to get all rows in one request
}
response = requests.get(url,params=params)    # Check if the request was successful
if response.status_code == 200:
    data = response.json()
        
    # Check if the response contains data
    if len(data) > 1:
        # Convert the data into a pandas DataFrame
        country_df = pd.DataFrame(data[1])
    else:
        print(f'No data could be found for {url}')
else:
    print(f"Failed to retrieve data for {url}. Status code: {response.status_code}")
country_df = (
    country_df.replace('', np.nan)
    .dropna(subset='capitalCity')
    .drop(columns=['region', 'adminregion', 'incomeLevel' ,'lendingType'])
    .rename(columns={'name':'Country', 'capitalCity':'Capital_City'})
)
country_df #should only have regions with capital cities, i.e. countries

Unnamed: 0,id,iso2Code,Country,Capital_City,longitude,latitude
0,ABW,AW,Aruba,Oranjestad,-70.0167,12.5167
2,AFG,AF,Afghanistan,Kabul,69.1761,34.5228
5,AGO,AO,Angola,Luanda,13.242,-8.81155
6,ALB,AL,Albania,Tirane,19.8172,41.3317
7,AND,AD,Andorra,Andorra la Vella,1.5218,42.5075
...,...,...,...,...,...,...
290,XKX,XK,Kosovo,Pristina,20.926,42.565
292,YEM,YE,"Yemen, Rep.",Sana'a,44.2075,15.352
293,ZAF,ZA,South Africa,Pretoria,28.1871,-25.746
294,ZMB,ZM,Zambia,Lusaka,28.2937,-15.3982


In [116]:
countries_gdp_le_2021 = pd.merge(country_df, worldbank_gdp_le_2021, how='left')
countries_gdp_le_2021

Unnamed: 0,id,iso2Code,Country,Capital_City,longitude,latitude,Year,GDP_Per_Capita,Life_Expectancy_at_Birth
0,ABW,AW,Aruba,Oranjestad,-70.0167,12.5167,2021,38226.146157,74.626
1,AFG,AF,Afghanistan,Kabul,69.1761,34.5228,2021,2138.870247,61.982
2,AGO,AO,Angola,Luanda,13.242,-8.81155,2021,7414.278958,61.643
3,ALB,AL,Albania,Tirane,19.8172,41.3317,2021,16261.804554,76.463
4,AND,AD,Andorra,Andorra la Vella,1.5218,42.5075,2021,58829.850072,
...,...,...,...,...,...,...,...,...,...
206,XKX,XK,Kosovo,Pristina,20.926,42.565,2021,12362.704808,76.806
207,YEM,YE,"Yemen, Rep.",Sana'a,44.2075,15.352,2021,,63.753
208,ZAF,ZA,South Africa,Pretoria,28.1871,-25.746,2021,14172.544251,62.341
209,ZMB,ZM,Zambia,Lusaka,28.2937,-15.3982,2021,3526.507418,61.223


## Bonus Questions

Adjust your request so that it returns data just for the United States.

In [133]:
usa_url = 'https://api.worldbank.org/v2/country/usa/indicator/'
gdp_per_capita_indicator = 'NY.GDP.PCAP.PP.KD'
life_expectancy_at_birth_indicator = 'SP.DYN.LE00.IN'
indicator_list = [gdp_per_capita_indicator, life_expectancy_at_birth_indicator]
format_type = 'json'
params = {
    'format': format_type,
    'per_page': 10000  # High number to get all rows in one request
}
request_str = usa_url
for indicator in indicator_list:
    if indicator_list.index(indicator) != len(indicator_list)-1:
        request_str = request_str + indicator + ';'
    else:
        request_str = request_str + indicator + '?source=' + str(len(indicator_list))
response = requests.get(request_str, params=params)
data = response.json()
data[1]

[{'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
   'value': 'GDP per capita, PPP (constant 2021 international $)'},
  'country': {'id': 'US', 'value': 'United States'},
  'countryiso3code': 'USA',
  'date': '2023',
  'value': 73637.3027885905,
  'unit': '',
  'obs_status': '',
  'decimal': 0},
 {'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
   'value': 'GDP per capita, PPP (constant 2021 international $)'},
  'country': {'id': 'US', 'value': 'United States'},
  'countryiso3code': 'USA',
  'date': '2022',
  'value': 72165.4834696586,
  'unit': '',
  'obs_status': '',
  'decimal': 0},
 {'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
   'value': 'GDP per capita, PPP (constant 2021 international $)'},
  'country': {'id': 'US', 'value': 'United States'},
  'countryiso3code': 'USA',
  'date': '2021',
  'value': 71055.87619383,
  'unit': '',
  'obs_status': '',
  'decimal': 0},
 {'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
   'value': 'GDP per capita, PPP (constant 2021 international $)'},
  'country': {'id': '

2. Adjust your request so that it returns data just for the United States for the year 2021.

In [118]:
usa_url = 'https://api.worldbank.org/v2/country/usa/indicator/'
gdp_per_capita_indicator = 'NY.GDP.PCAP.PP.KD'
life_expectancy_at_birth_indicator = 'SP.DYN.LE00.IN'
indicator_list = [gdp_per_capita_indicator, life_expectancy_at_birth_indicator]
date = 2020
format_type = 'json'
params = {
    'format': format_type,
    'per_page': 10000  # High number to get all rows in one request
}
request_str = usa_url
for indicator in indicator_list:
    if indicator_list.index(indicator) != len(indicator_list)-1:
        request_str = request_str + indicator + ';'
    else:
        request_str = request_str + indicator + '?source=' + str(len(indicator_list))
response = requests.get(request_str+'&date='+str(date), params=params)
data = response.json()
data[1]

[{'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
   'value': 'GDP per capita, PPP (constant 2021 international $)'},
  'country': {'id': 'US', 'value': 'United States'},
  'countryiso3code': 'USA',
  'date': '2020',
  'value': 67266.187874108,
  'unit': '',
  'obs_status': '',
  'decimal': 0},
 {'indicator': {'id': 'SP.DYN.LE00.IN',
   'value': 'Life expectancy at birth, total (years)'},
  'country': {'id': 'US', 'value': 'United States'},
  'countryiso3code': 'USA',
  'date': '2020',
  'value': 76.9804878048781,
  'unit': '',
  'obs_status': '',
  'decimal': 0}]

3. Adjust your request so that it returns data just for the United States for the years 2000 through 2021.

In [119]:
usa_url = 'https://api.worldbank.org/v2/country/usa/indicator/'
gdp_per_capita_indicator = 'NY.GDP.PCAP.PP.KD'
life_expectancy_at_birth_indicator = 'SP.DYN.LE00.IN'
indicator_list = [gdp_per_capita_indicator, life_expectancy_at_birth_indicator]
date_range = '2000:2021'
format_type = 'json'
params = {
    'format': format_type,
    'per_page': 10000  # High number to get all rows in one request
}
request_str = usa_url
for indicator in indicator_list:
    if indicator_list.index(indicator) != len(indicator_list)-1:
        request_str = request_str + indicator + ';'
    else:
        request_str = request_str + indicator + '?source=' + str(len(indicator_list))
response = requests.get(request_str+'&date='+date_range, params=params)
data = response.json()
data[1]

[{'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
   'value': 'GDP per capita, PPP (constant 2021 international $)'},
  'country': {'id': 'US', 'value': 'United States'},
  'countryiso3code': 'USA',
  'date': '2021',
  'value': 71055.87619383,
  'unit': '',
  'obs_status': '',
  'decimal': 0},
 {'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
   'value': 'GDP per capita, PPP (constant 2021 international $)'},
  'country': {'id': 'US', 'value': 'United States'},
  'countryiso3code': 'USA',
  'date': '2020',
  'value': 67266.187874108,
  'unit': '',
  'obs_status': '',
  'decimal': 0},
 {'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
   'value': 'GDP per capita, PPP (constant 2021 international $)'},
  'country': {'id': 'US', 'value': 'United States'},
  'countryiso3code': 'USA',
  'date': '2019',
  'value': 69458.6101827166,
  'unit': '',
  'obs_status': '',
  'decimal': 0},
 {'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
   'value': 'GDP per capita, PPP (constant 2021 international $)'},
  'country': {'id': 'U

4. Adjust your request so that it returns data for the United States and Canada for the years 2000 through 2021.

In [120]:
countries_url = 'https://api.worldbank.org/v2/country/usa;can/indicator/'
gdp_per_capita_indicator = 'NY.GDP.PCAP.PP.KD'
life_expectancy_at_birth_indicator = 'SP.DYN.LE00.IN'
indicator_list = [gdp_per_capita_indicator, life_expectancy_at_birth_indicator]
date_range = '2000:2021'
format_type = 'json'
params = {
    'format': format_type,
    'per_page': 10000  # High number to get all rows in one request
}
request_str = countries_url
for indicator in indicator_list:
    if indicator_list.index(indicator) != len(indicator_list)-1:
        request_str = request_str + indicator + ';'
    else:
        request_str = request_str + indicator + '?source=' + str(len(indicator_list))
response = requests.get(request_str+'&date='+date_range, params=params)
data = response.json()
data[1]

[{'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
   'value': 'GDP per capita, PPP (constant 2021 international $)'},
  'country': {'id': 'CA', 'value': 'Canada'},
  'countryiso3code': 'CAN',
  'date': '2021',
  'value': 55781.6990272912,
  'unit': '',
  'obs_status': '',
  'decimal': 0},
 {'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
   'value': 'GDP per capita, PPP (constant 2021 international $)'},
  'country': {'id': 'CA', 'value': 'Canada'},
  'countryiso3code': 'CAN',
  'date': '2020',
  'value': 53274.9109302286,
  'unit': '',
  'obs_status': '',
  'decimal': 0},
 {'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
   'value': 'GDP per capita, PPP (constant 2021 international $)'},
  'country': {'id': 'CA', 'value': 'Canada'},
  'countryiso3code': 'CAN',
  'date': '2019',
  'value': 56713.0888767094,
  'unit': '',
  'obs_status': '',
  'decimal': 0},
 {'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
   'value': 'GDP per capita, PPP (constant 2021 international $)'},
  'country': {'id': 'CA', 'value': 'Cana

Let's try to generalize with a function. We will begin by creating a dictionary with countries and their ids as key value pairs.

In [121]:
gdp_le = pd.merge(worldbank_country_exp_df, worldbank_country_exp_le_df)
gdp_le_country = pd.merge(gdp_le, country_df, how='right')
country_id_dict = gdp_le_country.set_index('Country')['id'].to_dict()

worldbank_api_query is a way to request info from the worldbank api. It incorporates an initial request with a 'per_page' parameter of 1, discovers the total number of pages possible, and then adjusts the parameter accordingly. *Note: for some reason, I am having trouble utilyzing indicator IDs other than GDP Per Capita and Life Expectancy in this function.

In [122]:
import json
import difflib  #Importing the difflib module for string matching

def find_closest_dict_match(country, country_id_dict):
    #Find the closest match for a given country name in the country_id_dict.
    keys = list(country_id_dict.keys())
    closest_matches = difflib.get_close_matches(country, keys, n=1, cutoff=0.8)  # Adjust cutoff for match sensitivity
    return closest_matches[0] if closest_matches else None

def worldbank_api_query(countries=None,  # takes a list of country names
                        indicators=None,  # list of indicators
                        date=None, date_range_begin=None, date_range_end=None,
                        format_type='json'):
    init_request = True
    base_url = 'https://api.worldbank.org/v2/'
    init_params = {
        'format': format_type,
        'per_page': 1
    }
    
    # Check if countries are valid
    if countries:
        country_ids = []
        for country in countries:
            if country in country_id_dict:  # Check if country exists in country_id_dict
                country_ids.append(country_id_dict[country])
            else:
                # Suggest closest match
                closest_match = find_closest_dict_match(country, country_id_dict)
                if closest_match:
                    print(f"Warning: '{country}' not found. Did you mean '{closest_match}'?")
                    country_ids.append(country_id_dict[closest_match])  # Append the closest match ID
                else:
                    print(f"Error: '{country}' not found and no suggestions available.")
                    return None
                
        country_ids_str = ';'.join(country_ids)
        url = f'{base_url}country/{country_ids_str}/'
    else:
        url = f'{base_url}country/all'

    #indicators parameter
    if indicators:
        if len(indicators)>1:
            indicators_str = ';'.join(indicators)
            url = f'{url}indicator/{indicators_str}?source={len(indicators)}'
        else:
            url = f'{url}indicator/{indicators[0]}?'
    else:
        raise ValueError('indicators parameter cannot be None')

    #date parameter
    if date is not None and (date_range_begin is None or date_range_end is None):
        date_param = date
    elif date_range_begin is not None and date_range_end is not None:
        date_param = f"{date_range_begin}:{date_range_end}"
    else:
        date_param = None
    # Add date parameter to URL if it exists
    if date_param is not None:
        prefix = '&' if len(indicators) > 1 else ''
        url = f"{url}{prefix}date={date_param}"

    print(url)
    
    if init_request:
        response = requests.get(url, params=init_params)
        data = response.json()
        new_params = {
        'format': format_type,
        'per_page': data[0]['total']
        }
        init_request = False
    if not init_request:
        response = requests.get(url, params=new_params)
        # Check if the request was successful
        if response.status_code != 200:
            print(f"Error: Received status code {response.status_code}. Response: {response.text}")
            return None
        
        # Decode the response text to handle BOM and load JSON
        try:
            data = response.content.decode('utf-8-sig')  # Handle BOM
            return json.loads(data)  # Parse JSON using the json module
        except json.JSONDecodeError as e:
            print(f"JSONDecodeError: {e}. Response: {data}")  # Print the raw response data
            return None

countries = ['United States', 'Netherlands']
gdp_per_capita_indicator = 'NY.GDP.PCAP.PP.KD'
life_expectancy_at_birth_indicator = 'SP.DYN.LE00.IN'
indicator_list = [gdp_per_capita_indicator, life_expectancy_at_birth_indicator]
date_range_begin = 2000
date_range_end = 2021
data = worldbank_api_query(countries=countries, indicators=indicator_list, 
                           date_range_begin=date_range_begin, 
                           date_range_end=date_range_end)
data[1]

https://api.worldbank.org/v2/country/USA;NLD/indicator/NY.GDP.PCAP.PP.KD;SP.DYN.LE00.IN?source=2&date=2000:2021


[{'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
   'value': 'GDP per capita, PPP (constant 2021 international $)'},
  'country': {'id': 'NL', 'value': 'Netherlands'},
  'countryiso3code': 'NLD',
  'date': '2021',
  'value': 67693.3264311302,
  'unit': '',
  'obs_status': '',
  'decimal': 0},
 {'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
   'value': 'GDP per capita, PPP (constant 2021 international $)'},
  'country': {'id': 'NL', 'value': 'Netherlands'},
  'countryiso3code': 'NLD',
  'date': '2020',
  'value': 64080.8302893499,
  'unit': '',
  'obs_status': '',
  'decimal': 0},
 {'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
   'value': 'GDP per capita, PPP (constant 2021 international $)'},
  'country': {'id': 'NL', 'value': 'Netherlands'},
  'countryiso3code': 'NLD',
  'date': '2019',
  'value': 67043.1699696165,
  'unit': '',
  'obs_status': '',
  'decimal': 0},
 {'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
   'value': 'GDP per capita, PPP (constant 2021 international $)'},
  'country': {'id': 'NL',

Turns api request into a data frame, all cleaned up!

In [123]:
def turn_query_into_df(data):
    query_df = pd.DataFrame(data[1])
    query_country_exploded = (
        pd.json_normalize(query_df['country'])
        .rename(columns={'value':'Country'})
        .drop(columns=['id'])
    )
    explode_indicator = (
        pd.json_normalize(query_df['indicator'])
        .drop(columns=['id'])
        .rename(columns={'value':'DataKind'})
    )
    query_df = (
        pd.concat([query_country_exploded, explode_indicator, 
                    query_df.drop(columns=['indicator', 'country'])], axis=1)
        .groupby(['Country','date','DataKind'])['value'].first().unstack(fill_value=0).reset_index()
        .rename(columns={'DataKind':'index'})
    )
    return query_df

In [124]:
countries = ['United States', 'Netherlands', 'Guinea', 'Luxembourg']
gdp_per_capita_indicator = 'NY.GDP.PCAP.PP.KD'
life_expectancy_at_birth_indicator = 'SP.DYN.LE00.IN'
indicator_list = [gdp_per_capita_indicator, life_expectancy_at_birth_indicator]
date_range_begin = 1990
date_range_end = 2021
turn_query_into_df(worldbank_api_query(countries=countries, indicators=indicator_list, 
                           date_range_begin=date_range_begin, 
                           date_range_end=date_range_end))

https://api.worldbank.org/v2/country/USA;NLD;GIN;LUX/indicator/NY.GDP.PCAP.PP.KD;SP.DYN.LE00.IN?source=2&date=1990:2021


DataKind,Country,date,"GDP per capita, PPP (constant 2021 international $)","Life expectancy at birth, total (years)"
0,Guinea,1990,2132.116671,47.004000
1,Guinea,1991,2101.329836,47.546000
2,Guinea,1992,2101.310205,48.363000
3,Guinea,1993,2140.295084,48.962000
4,Guinea,1994,2159.075290,49.171000
...,...,...,...,...
123,United States,2017,66482.893439,78.539024
124,United States,2018,68095.686806,78.639024
125,United States,2019,69458.610183,78.787805
126,United States,2020,67266.187874,76.980488


A way to search for indicator name and will return its id.

In [134]:
def create_name_index(data_list):
    """Create a dictionary mapping names to their indices for fast lookup."""
    return {item['name']: index for index, item in enumerate(data_list)}

def find_closest_matches(name_index, name_search, n=5):
    """Find the closest matches using the name index dictionary."""
    closest_matches = difflib.get_close_matches(name_search, name_index.keys(), n=n, cutoff=0.4)
    return closest_matches

def find_index_of_name(data_list, name_search):
    name_index = create_name_index(data_list)
    
    # Check for an exact match
    if name_search in name_index:
        return name_index[name_search]
    
    # If there's no exact match, find the closest matches
    closest_matches = find_closest_matches(name_index, name_search, n=5)
    
    if closest_matches:
        print(f"No exact match found for '{name_search}'. Did you mean one of the following? Assuming first one.")
        for match in closest_matches:
            print(f"- {match}")
        return name_index[closest_matches[0]]  # Return the index of the first closest match
    
    print(f"No match found for '{name_search}'.")
    return -1

def find_indicator_id_for_name(data, name):
    index = find_index_of_name(data, name)
    print(data[index])
    return data[index]['id']

print(data[1]) #need to find data[1] which includes 
find_indicator_id_for_name(data[1], 'gross national income')

[{'indicator': {'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per capita, PPP (constant 2021 international $)'}, 'country': {'id': 'US', 'value': 'United States'}, 'countryiso3code': 'USA', 'date': '2023', 'value': 73637.3027885905, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per capita, PPP (constant 2021 international $)'}, 'country': {'id': 'US', 'value': 'United States'}, 'countryiso3code': 'USA', 'date': '2022', 'value': 72165.4834696586, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per capita, PPP (constant 2021 international $)'}, 'country': {'id': 'US', 'value': 'United States'}, 'countryiso3code': 'USA', 'date': '2021', 'value': 71055.87619383, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'NY.GDP.PCAP.PP.KD', 'value': 'GDP per capita, PPP (constant 2021 international $)'}, 'country': {'id': 'US', 'value': 'United States'}, 'countryiso3code': 'USA', '

KeyError: 'name'

In [126]:
gdp_per_capita_indicator = 'NY.GDP.PCAP.PP.KD'
life_expectancy_at_birth_indicator = 'SP.DYN.LE00.IN'
gni = 'C1.7'
indicator_list = [life_expectancy_at_birth_indicator, gdp_per_capita_indicator]
date_range_begin = 1990
date_range_end = 2021
q = worldbank_api_query(countries=['United States', 'Netherlands', 'Ireland', 'China'], indicators=indicator_list, date_range_begin=date_range_begin, date_range_end=date_range_end)
turn_query_into_df(q)

https://api.worldbank.org/v2/country/USA;NLD;IRL;CHN/indicator/SP.DYN.LE00.IN;NY.GDP.PCAP.PP.KD?source=2&date=1990:2021


DataKind,Country,date,"GDP per capita, PPP (constant 2021 international $)","Life expectancy at birth, total (years)"
0,China,1990,1645.578944,68.005000
1,China,1991,1773.639411,68.169000
2,China,1992,2001.254274,68.734000
3,China,1993,2253.052020,69.216000
4,China,1994,2518.154888,69.520000
...,...,...,...,...
123,United States,2017,66482.893439,78.539024
124,United States,2018,68095.686806,78.639024
125,United States,2019,69458.610183,78.787805
126,United States,2020,67266.187874,76.980488
