### Importing dependencies

In [1]:
import requests
import json
import pandas as pd

### Data source: World Health Organization website - odata api
### https://www.who.int/data/gho/info/gho-odata-api
#### Retrieving indicators containing the word "anaemia"  https://ghoapi.azureedge.net/api/Indicator?$filter=contains(IndicatorName,%27Anaemia%27)  
##### For this project we will work with indicators for anaemia in women of reproductive age: "NUTRITION_ANAEMIA_REPRODUCTIVEAGE_PREV", "NUTRITION_ANAEMIA_REPRODUCTIVEAGE_NUM" 

## Getting Countries List from different Indicator

In [2]:
measure = "COUNTRIES"
url = "https://ghoapi.azureedge.net/api/DIMENSION/COUNTRY/DimensionValues"
response = requests.get(url).json()
countries = pd.json_normalize(response['value'])
countries.head()

Unnamed: 0,Code,Title,ParentDimension,Dimension,ParentCode,ParentTitle
0,ABW,Aruba,REGION,COUNTRY,AMR,Americas
1,AFG,Afghanistan,REGION,COUNTRY,EMR,Eastern Mediterranean
2,AGO,Angola,REGION,COUNTRY,AFR,Africa
3,AIA,Anguilla,REGION,COUNTRY,AMR,Americas
4,ALB,Albania,REGION,COUNTRY,EUR,Europe


## Anaemia data retrieval and tranformation

### Anaemia in women of reproductive age (aged 15-49)

In [3]:
measure = "NUTRITION_ANAEMIA_REPRODUCTIVEAGE_PREV"   #prevalence indicator to be joined by numeric values in next NUTRITION_ANAEMIA_REPRODUCTIVEAGE_NUM indicator
url = f"https://ghoapi.azureedge.net/api/{measure}"
response = requests.get(url).json()
reproductive_prev = pd.json_normalize(response['value'])
reproductive_prev.head()
reproductive_prev_cleaned = reproductive_prev[["SpatialDim", "TimeDim", "NumericValue"]]

In [4]:
measure = "NUTRITION_ANAEMIA_REPRODUCTIVEAGE_NUM"
url = f"https://ghoapi.azureedge.net/api/{measure}"
response = requests.get(url).json()
reproductive_num = pd.json_normalize(response['value'])
reproductive_num.head()
reproductive_num_cleaned = reproductive_num[["SpatialDim", "TimeDim", "NumericValue"]]

##### Merging two indicators for anaemia then merging them to the countries info

In [5]:
reproductive_anaemia_merged = pd.merge(reproductive_prev_cleaned, reproductive_num_cleaned, how = "left", on=["SpatialDim","TimeDim"], suffixes=('_prev', '_num'))
reproductive_anaemia = reproductive_anaemia_merged.merge(countries, how= 'left', left_on="SpatialDim", right_on="Code")
reproductive_anaemia.head()

Unnamed: 0,SpatialDim,TimeDim,NumericValue_prev,NumericValue_num,Code,Title,ParentDimension,Dimension,ParentCode,ParentTitle
0,AGO,2003,50.9,2069.667,AGO,Angola,REGION,COUNTRY,AFR,Africa
1,AGO,2012,45.9,2619.674,AGO,Angola,REGION,COUNTRY,AFR,Africa
2,AGO,2015,45.0,2856.631,AGO,Angola,REGION,COUNTRY,AFR,Africa
3,AGO,2013,45.6,2696.526,AGO,Angola,REGION,COUNTRY,AFR,Africa
4,AGO,2016,44.8,2949.494,AGO,Angola,REGION,COUNTRY,AFR,Africa


In [6]:
reproductive_anaemia = reproductive_anaemia[["SpatialDim","Title","ParentTitle","TimeDim","NumericValue_prev",  "NumericValue_num"]]
reproductive_anaemia = reproductive_anaemia.rename(columns={
    "SpatialDim":"Country_code",
    "Title": "Country_name",
    "ParentTitle": "Region",
    "TimeDim":"Year",
    "Value":"Income_level"
})

### merging to countries_gdp_health to get income group

In [7]:
gdp_df = pd.read_excel("countries_gdp_health.xlsx")

In [8]:
# groupby country and income group
gdp_df = gdp_df.groupby(["country code","income group"],as_index=False).last()
# dropping cols not needed
gdp_df = gdp_df[["country code","income group"]]

In [9]:
reproductive_anaemia_merged = reproductive_anaemia.merge(gdp_df, how= 'left', left_on="Country_code", right_on="country code").dropna()

In [10]:
reproductive_anaemia_merged

Unnamed: 0,Country_code,Country_name,Region,Year,NumericValue_prev,NumericValue_num,country code,income group
0,AGO,Angola,Africa,2003,50.9,2069.667,AGO,Low-Mid
1,AGO,Angola,Africa,2012,45.9,2619.674,AGO,Low-Mid
2,AGO,Angola,Africa,2015,45.0,2856.631,AGO,Low-Mid
3,AGO,Angola,Africa,2013,45.6,2696.526,AGO,Low-Mid
4,AGO,Angola,Africa,2016,44.8,2949.494,AGO,Low-Mid
...,...,...,...,...,...,...,...,...
3835,WSM,Samoa,Western Pacific,2000,24.7,9.623,WSM,Up-Mid
3836,WSM,Samoa,Western Pacific,2003,24.4,9.774,WSM,Up-Mid
3837,WSM,Samoa,Western Pacific,2001,24.5,9.653,WSM,Up-Mid
3838,WSM,Samoa,Western Pacific,2019,26.8,11.561,WSM,Up-Mid


In [11]:
reproductive_anaemia_merged_clean = reproductive_anaemia_merged[["Country_code","Country_name","income group","Region","Year","NumericValue_prev","NumericValue_num"]]
reproductive_anaemia_merged_clean = reproductive_anaemia_merged_clean.rename(columns={
    "income group":"Income_level",
    "NumericValue_prev":"Anaemia_Prevalence",
    "NumericValue_num":"Anaemia_Num(Thousands)"
})
reproductive_anaemia_df = reproductive_anaemia_merged_clean.sort_values(by=["Country_code","Year"])
reproductive_anaemia_df.reset_index(drop=True,inplace=True)

In [12]:
reproductive_anaemia_df.to_csv("anaemia_women.csv")