### Importing dependencies

In [2]:
import requests
import json
import pandas as pd

### Data source: World Health Organization website - odata api
### https://www.who.int/data/gho/info/gho-odata-api
#### Retrieving indicators containing the word "anaemia"  https://ghoapi.azureedge.net/api/Indicator?$filter=contains(IndicatorName,%27Anaemia%27)  
##### For this project we will work with indicators for anaemia in children: "NUTRITION_ANAEMIA_CHILDREN_PREV", "NUTRITION_ANAEMIA_CHILDREN_NUM" 

## Getting Countries List from different Indicator

In [3]:
measure = "COUNTRIES"
url = "https://ghoapi.azureedge.net/api/DIMENSION/COUNTRY/DimensionValues"
response = requests.get(url).json()
countries = pd.json_normalize(response['value'])
countries.head()

Unnamed: 0,Code,Title,ParentDimension,Dimension,ParentCode,ParentTitle
0,ABW,Aruba,REGION,COUNTRY,AMR,Americas
1,AFG,Afghanistan,REGION,COUNTRY,EMR,Eastern Mediterranean
2,AGO,Angola,REGION,COUNTRY,AFR,Africa
3,AIA,Anguilla,REGION,COUNTRY,AMR,Americas
4,ALB,Albania,REGION,COUNTRY,EUR,Europe


## Anaemia data retrieval and tranformation

### Anaemia in children

In [5]:
measure = "NUTRITION_ANAEMIA_CHILDREN_PREV"   #prevalence indicator to be joined by numeric values in next NUTRITION_ANAEMIA_CHILDREN_NUM indicator
url = f"https://ghoapi.azureedge.net/api/{measure}"
response = requests.get(url).json()
children_prev = pd.json_normalize(response['value'])
children_prev.head()
children_prev_cleaned = children_prev[["SpatialDim", "TimeDim", "NumericValue"]]

In [6]:
measure = "NUTRITION_ANAEMIA_CHILDREN_NUM"
url = f"https://ghoapi.azureedge.net/api/{measure}"
response = requests.get(url).json()
children_num = pd.json_normalize(response['value'])
children_num.head()
children_num_cleaned = children_num[["SpatialDim", "TimeDim", "NumericValue"]]

##### Merging two indicators for anaemia then merging them to the countries info

In [7]:
children_anaemia_merged = pd.merge(children_prev_cleaned, children_num_cleaned, how = "left", on=["SpatialDim","TimeDim"], suffixes=('_prev', '_num'))
children_anaemia = children_anaemia_merged.merge(countries, how= 'left', left_on="SpatialDim", right_on="Code")
children_anaemia.head()

Unnamed: 0,SpatialDim,TimeDim,NumericValue_prev,NumericValue_num,Code,Title,ParentDimension,Dimension,ParentCode,ParentTitle
0,AFG,2000,51.5,2145.418,AFG,Afghanistan,REGION,COUNTRY,EMR,Eastern Mediterranean
1,AFG,2001,49.8,2155.114,AFG,Afghanistan,REGION,COUNTRY,EMR,Eastern Mediterranean
2,AFG,2002,48.3,2167.643,AFG,Afghanistan,REGION,COUNTRY,EMR,Eastern Mediterranean
3,AFG,2003,47.1,2188.923,AFG,Afghanistan,REGION,COUNTRY,EMR,Eastern Mediterranean
4,AFG,2004,46.2,2212.253,AFG,Afghanistan,REGION,COUNTRY,EMR,Eastern Mediterranean


In [8]:
children_anaemia = children_anaemia[["SpatialDim","Title","ParentTitle","TimeDim","NumericValue_prev",  "NumericValue_num"]]
children_anaemia = children_anaemia.rename(columns={
    "SpatialDim":"Country_code",
    "Title": "Country_name",
    "ParentTitle": "Region",
    "TimeDim":"Year",
    "Value":"Income_level"
})

### merging to countries_gdp_health to get income group

In [9]:
gdp_df = pd.read_excel("countries_gdp_health.xlsx")

In [10]:
# groupby country and income group
gdp_df = gdp_df.groupby(["country code","income group"],as_index=False).last()
# dropping cols not needed
gdp_df = gdp_df[["country code","income group"]]

In [11]:
children_anaemia_merged = children_anaemia.merge(gdp_df, how= 'left', left_on="Country_code", right_on="country code").dropna()

In [12]:
children_anaemia_merged

Unnamed: 0,Country_code,Country_name,Region,Year,NumericValue_prev,NumericValue_num,country code,income group
0,AFG,Afghanistan,Eastern Mediterranean,2000,51.5,2145.418,AFG,Low
1,AFG,Afghanistan,Eastern Mediterranean,2001,49.8,2155.114,AFG,Low
2,AFG,Afghanistan,Eastern Mediterranean,2002,48.3,2167.643,AFG,Low
3,AFG,Afghanistan,Eastern Mediterranean,2003,47.1,2188.923,AFG,Low
4,AFG,Afghanistan,Eastern Mediterranean,2004,46.2,2212.253,AFG,Low
...,...,...,...,...,...,...,...,...
3835,ZWE,Zimbabwe,Africa,2015,41.7,941.070,ZWE,Low-Mid
3836,ZWE,Zimbabwe,Africa,2016,40.5,922.765,ZWE,Low-Mid
3837,ZWE,Zimbabwe,Africa,2017,39.6,891.498,ZWE,Low-Mid
3838,ZWE,Zimbabwe,Africa,2018,38.5,846.109,ZWE,Low-Mid


In [13]:
children_anaemia_merged_clean = children_anaemia_merged[["Country_code","Country_name","income group","Region","Year","NumericValue_prev","NumericValue_num"]]
children_anaemia_merged_clean = children_anaemia_merged_clean.rename(columns={
    "income group":"Income_level",
    "NumericValue_prev":"Anaemia_Prevalence",
    "NumericValue_num":"Anaemia_Num(Thousands)"
})
children_anaemia_df = children_anaemia_merged_clean.sort_values(by=["Country_code","Year"])
children_anaemia_df.reset_index(drop=True,inplace=True)

In [14]:
children_anaemia_df.to_csv("anaemia_children.csv")