# UNData API Exercise

In [23]:
import requests
import pandas as pd

API to use:
https://datahelpdesk.worldbank.org/knowledgebase/articles/889392-about-the-indicators-api-documentation
Basic call structure:
https://datahelpdesk.worldbank.org/knowledgebase/articles/898581

### Question 1
Use the API to get all available data for the GDP per capita, PPP (constant 2017 international $) indicator. Hint: this indicator has code "NY.GDP.PCAP.PP.KD". Adjust the query parameters so that you can retrieve all available rows. Convert the results to a DataFrame.

In [35]:
endpoint = 'http://api.worldbank.org/v2/country/all/indicator/NY.GDP.PCAP.PP.KD'
params = {
    'format': 'json',
    'per_page': 50
}

response = requests.get(endpoint, params)

In [36]:
response

<Response [200]>

In [37]:
res = response.json()

In [38]:
# use 'page' and 'pages' to create dynamic loop
# otherwise I would change per_page parameter to 16758 which works today but is probably not resilient
res[0]

{'page': 1,
 'pages': 336,
 'per_page': 50,
 'total': 16758,
 'sourceid': '2',
 'lastupdated': '2023-09-19'}

In [43]:
res[0]['page']

1

In [44]:
res[0]['pages']

336

In [39]:
res[1]

[{'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
   'value': 'GDP per capita, PPP (constant 2017 international $)'},
  'country': {'id': 'ZH', 'value': 'Africa Eastern and Southern'},
  'countryiso3code': 'AFE',
  'date': '2022',
  'value': 3553.91337005893,
  'unit': '',
  'obs_status': '',
  'decimal': 0},
 {'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
   'value': 'GDP per capita, PPP (constant 2017 international $)'},
  'country': {'id': 'ZH', 'value': 'Africa Eastern and Southern'},
  'countryiso3code': 'AFE',
  'date': '2021',
  'value': 3519.17483992362,
  'unit': '',
  'obs_status': '',
  'decimal': 0},
 {'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
   'value': 'GDP per capita, PPP (constant 2017 international $)'},
  'country': {'id': 'ZH', 'value': 'Africa Eastern and Southern'},
  'countryiso3code': 'AFE',
  'date': '2020',
  'value': 3455.02311899819,
  'unit': '',
  'obs_status': '',
  'decimal': 0},
 {'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
   'value': 'GDP per capita, PPP (constant 20

In [40]:
# json to dataframe
df_1 = pd.json_normalize(res[1])

In [41]:
df_1.shape

(50, 10)

In [42]:
# checking that everything unpacked correctly, no further processing needed
df_1.head()

Unnamed: 0,countryiso3code,date,value,unit,obs_status,decimal,indicator.id,indicator.value,country.id,country.value
0,AFE,2022,3553.91337,,,0,NY.GDP.PCAP.PP.KD,"GDP per capita, PPP (constant 2017 internation...",ZH,Africa Eastern and Southern
1,AFE,2021,3519.17484,,,0,NY.GDP.PCAP.PP.KD,"GDP per capita, PPP (constant 2017 internation...",ZH,Africa Eastern and Southern
2,AFE,2020,3455.023119,,,0,NY.GDP.PCAP.PP.KD,"GDP per capita, PPP (constant 2017 internation...",ZH,Africa Eastern and Southern
3,AFE,2019,3648.220302,,,0,NY.GDP.PCAP.PP.KD,"GDP per capita, PPP (constant 2017 internation...",ZH,Africa Eastern and Southern
4,AFE,2018,3661.360566,,,0,NY.GDP.PCAP.PP.KD,"GDP per capita, PPP (constant 2017 internation...",ZH,Africa Eastern and Southern


plan:
- initial api call to get number of pages (res[0]['pages'])
- use pages value to create dynamic loop (for page in range(pages)) (test with static value like 3)
- within loop get res[1] which is list of dictionaries
- concat the lists together (much more efficient than appending dataframes)
- return concat list of lists
- flatten to remove the first list layer (resulting in single list with all dictionaries)
- to dataframe with pd.json_normalize()

In [48]:
# these will be arguments when turning into function
# indicator doesn't work in the parameters
endpoint = 'http://api.worldbank.org/v2/country/all/indicator/NY.GDP.PCAP.PP.KD'
params = {
    'format': 'json',
    'per_page': 50
}

In [51]:
# generate response and save to variable as json
res = requests.get(endpoint, params).json()

In [53]:
# grab pages, save to variable
pages = res[0]['pages']

In [54]:
# create empty list to be appended to
lst = []

# function arguments
endpoint = 'http://api.worldbank.org/v2/country/all/indicator/NY.GDP.PCAP.PP.KD'
params = {
    'format': 'json',
    'per_page': 50
}

# need to have initial api call outside loop to get number of pages
res = requests.get(endpoint, params).json()
pages = res[0]['pages']
# append res[1] to list
lst.append(res[1])

lst

# for loop, add page parameter

[[{'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
    'value': 'GDP per capita, PPP (constant 2017 international $)'},
   'country': {'id': 'ZH', 'value': 'Africa Eastern and Southern'},
   'countryiso3code': 'AFE',
   'date': '2022',
   'value': 3553.91337005893,
   'unit': '',
   'obs_status': '',
   'decimal': 0},
  {'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
    'value': 'GDP per capita, PPP (constant 2017 international $)'},
   'country': {'id': 'ZH', 'value': 'Africa Eastern and Southern'},
   'countryiso3code': 'AFE',
   'date': '2021',
   'value': 3519.17483992362,
   'unit': '',
   'obs_status': '',
   'decimal': 0},
  {'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
    'value': 'GDP per capita, PPP (constant 2017 international $)'},
   'country': {'id': 'ZH', 'value': 'Africa Eastern and Southern'},
   'countryiso3code': 'AFE',
   'date': '2020',
   'value': 3455.02311899819,
   'unit': '',
   'obs_status': '',
   'decimal': 0},
  {'indicator': {'id': 'NY.GDP.PCAP.PP.KD',
    'value': 'GDP