In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import json
import requests
import time
%matplotlib inline

def get_indicator(ind_code, ind_text):
    BASE_URL = 'https://ghoapi.azureedge.net/api/'
    DATE_2000S = '?$filter=date(TimeDimensionBegin) ge 2000-01-01'    
    service_url = BASE_URL + ind_code + DATE_2000S
    response = requests.get(service_url)

# make sure we got a valid response
    if(response.ok):
        data_j = response.json()
# SpatialDim = country_code, TimeDim = year, Numeric_Value
        data = pd.DataFrame(data_j["value"]).rename(
            columns = {'NumericValue':ind_text, 'SpatialDim':'country_code', 'TimeDim':'year'})
        data = data[(data.SpatialDimType != 'REGION') & (data.SpatialDimType != 'WORLDBANKINCOMEGROUP')]

        print("Data for \"{}\" loaded, set {} rows {} columns".format(ind_text, data.shape[0], data.shape[1]))
        return data
    else:
        print("Response was not OK", response)
        return None

def remove_duplicates(data_set):
    dup_set = data_set.duplicated(subset=["country_code", "year"], keep='last')
    return data_set[~dup_set]

def test_dump(data_set):
    print(data_set.shape)
    print(data_set.info())
    print(data_set[data_set['country_code'] == 'BEL'])

    col_names = list(data_set.columns.values)
    for name in col_names:
        print(name,data_set[name].nunique())



In [3]:
service_url0 = 'https://ghoapi.azureedge.net/api/DIMENSION/COUNTRY/DimensionValues/'
response0 = requests.get(service_url0)

# make sure we got a valid response
print(response0)
if (response0.ok):
    # get the full data from the response
    data0j = response0.json()
    print(data0j.keys())
else:
    print("Response was not OK")

data0a = pd.DataFrame(data0j["value"])
data0a = data0a[data0a['Title'] != 'SPATIAL_SYNONYM']

remove_list = ['PRI', 'KNA', 'DMA', 'PSE', 'AND', 'SMR', 'MCO', 'LIE', 'COK', 
               'TUV', 'PLW', 'TKL', 'MHL', 'NIU', 'NRU', 'ME1', 'SDF']


<Response [200]>
dict_keys(['@odata.context', 'value'])


In [5]:
data0a.head()

Unnamed: 0,Code,Title,ParentDimension,Dimension,ParentCode,ParentTitle
0,ABW,Aruba,REGION,COUNTRY,AMR,Americas
1,AFG,Afghanistan,REGION,COUNTRY,EMR,Eastern Mediterranean
2,AGO,Angola,REGION,COUNTRY,AFR,Africa
3,AIA,Anguilla,REGION,COUNTRY,AMR,Americas
4,ALB,Albania,REGION,COUNTRY,EUR,Europe


In [9]:

ind_code = 'WHOSIS_000001'
ind_text = 'life_expect'
data_raw = get_indicator(ind_code, ind_text)

data01 = data_raw[data_raw['Dim1'] == 'BTSX'][['country_code', 'year', ind_text]]
data01 = remove_duplicates(data01)
test_dump(data01)

Data for "life_expect" loaded, set 2196 rows 23 columns
(732, 3)
<class 'pandas.core.frame.DataFrame'>
Int64Index: 732 entries, 8 to 2195
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   country_code  732 non-null    object 
 1   year          732 non-null    int64  
 2   life_expect   732 non-null    float64
dtypes: float64(1), int64(1), object(1)
memory usage: 22.9+ KB
None
    country_code  year  life_expect
140          BEL  2000     77.66169
141          BEL  2010     79.84602
142          BEL  2015     80.71184
143          BEL  2019     81.42305
country_code 183
year 4
life_expect 732


In [11]:
data01.head(20)

Unnamed: 0,country_code,year,life_expect
8,AFG,2000,54.98949
9,AFG,2010,59.94055
10,AFG,2015,61.65429
11,AFG,2019,63.2099
20,AGO,2000,49.30265
21,AGO,2010,58.06943
22,AGO,2015,61.71907
23,AGO,2019,63.06044
32,ALB,2000,73.54528
33,ALB,2010,76.24688


In [13]:
pd.set_option('display.max_columns', None)

In [16]:
data_raw[data_raw.country_code=="AFG"]

Unnamed: 0,Id,IndicatorCode,SpatialDimType,country_code,TimeDimType,year,Dim1Type,Dim1,Dim2Type,Dim2,Dim3Type,Dim3,DataSourceDimType,DataSourceDim,Value,life_expect,Low,High,Comments,Date,TimeDimensionValue,TimeDimensionBegin,TimeDimensionEnd
0,24614313,WHOSIS_000001,COUNTRY,AFG,YEAR,2000,SEX,MLE,,,,,,,54.6,54.57449,,,,2020-12-04T16:59:42.513+01:00,2000,2000-01-01T00:00:00+01:00,2000-12-31T00:00:00+01:00
1,24614317,WHOSIS_000001,COUNTRY,AFG,YEAR,2010,SEX,MLE,,,,,,,59.6,59.60036,,,,2020-12-04T16:59:43.013+01:00,2010,2010-01-01T00:00:00+01:00,2010-12-31T00:00:00+01:00
2,24614321,WHOSIS_000001,COUNTRY,AFG,YEAR,2015,SEX,MLE,,,,,,,61.0,61.03658,,,,2020-12-04T16:59:43.423+01:00,2015,2015-01-01T00:00:00+01:00,2015-12-31T00:00:00+01:00
3,24614325,WHOSIS_000001,COUNTRY,AFG,YEAR,2019,SEX,MLE,,,,,,,63.3,63.28709,,,,2020-12-04T16:59:43.533+01:00,2019,2019-01-01T00:00:00+01:00,2019-12-31T00:00:00+01:00
4,24614329,WHOSIS_000001,COUNTRY,AFG,YEAR,2000,SEX,FMLE,,,,,,,55.4,55.41726,,,,2020-12-04T16:59:43.61+01:00,2000,2000-01-01T00:00:00+01:00,2000-12-31T00:00:00+01:00
5,24614333,WHOSIS_000001,COUNTRY,AFG,YEAR,2010,SEX,FMLE,,,,,,,60.3,60.2972,,,,2020-12-04T16:59:43.72+01:00,2010,2010-01-01T00:00:00+01:00,2010-12-31T00:00:00+01:00
6,24614337,WHOSIS_000001,COUNTRY,AFG,YEAR,2015,SEX,FMLE,,,,,,,62.3,62.34584,,,,2020-12-04T16:59:43.847+01:00,2015,2015-01-01T00:00:00+01:00,2015-12-31T00:00:00+01:00
7,24614341,WHOSIS_000001,COUNTRY,AFG,YEAR,2019,SEX,FMLE,,,,,,,63.2,63.15551,,,,2020-12-04T16:59:43.927+01:00,2019,2019-01-01T00:00:00+01:00,2019-12-31T00:00:00+01:00
8,24614345,WHOSIS_000001,COUNTRY,AFG,YEAR,2000,SEX,BTSX,,,,,,,55.0,54.98949,,,,2020-12-04T16:59:44.053+01:00,2000,2000-01-01T00:00:00+01:00,2000-12-31T00:00:00+01:00
9,24614349,WHOSIS_000001,COUNTRY,AFG,YEAR,2010,SEX,BTSX,,,,,,,59.9,59.94055,,,,2020-12-04T16:59:44.18+01:00,2010,2010-01-01T00:00:00+01:00,2010-12-31T00:00:00+01:00


In [18]:
data01[data01.country_code=="AFG"]

Unnamed: 0,country_code,year,life_expect
8,AFG,2000,54.98949
9,AFG,2010,59.94055
10,AFG,2015,61.65429
11,AFG,2019,63.2099


In [19]:
ind_code = 'WHOSIS_000015'
ind_text = 'life_exp60'
data_raw = get_indicator(ind_code, ind_text)

data01a = data_raw[data_raw['Dim1'] == 'BTSX'][['country_code', 'year', ind_text]]
data01a = remove_duplicates(data01a)
test_dump(data01a)

Data for "life_exp60" loaded, set 2196 rows 23 columns
(732, 3)
<class 'pandas.core.frame.DataFrame'>
Int64Index: 732 entries, 8 to 2195
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   country_code  732 non-null    object 
 1   year          732 non-null    int64  
 2   life_exp60    732 non-null    float64
dtypes: float64(1), int64(1), object(1)
memory usage: 22.9+ KB
None
    country_code  year  life_exp60
140          BEL  2000    21.63166
141          BEL  2010    23.10626
142          BEL  2015    23.58500
143          BEL  2019    24.02222
country_code 183
year 4
life_exp60 732


In [20]:
ind_code = 'WHOSIS_000004'
ind_text = 'adult_mortality'
data_raw = get_indicator(ind_code, ind_text)

data02 = data_raw[data_raw['Dim1'] == 'BTSX'][['country_code', 'year', ind_text]]
data02 = remove_duplicates(data02)
test_dump(data02)

Data for "adult_mortality" loaded, set 9333 rows 23 columns
(3111, 3)
<class 'pandas.core.frame.DataFrame'>
Int64Index: 3111 entries, 308 to 9689
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   country_code     3111 non-null   object 
 1   year             3111 non-null   int64  
 2   adult_mortality  3111 non-null   float64
dtypes: float64(1), int64(1), object(1)
memory usage: 97.2+ KB
None
    country_code  year  adult_mortality
869          BEL  2000         99.34914
872          BEL  2001         98.10686
875          BEL  2002         96.57720
878          BEL  2003         93.56242
881          BEL  2004         89.45644
884          BEL  2005         88.14542
887          BEL  2006         84.60459
890          BEL  2007         85.75175
893          BEL  2008         85.98980
896          BEL  2009         84.16703
899          BEL  2010         81.23653
902          BEL  2011         80.80521
905 