In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path

In [2]:
countries = pd.read_csv(Path('__file__').parent / 'data' / 'country_dict.csv', index_col=0)

In [3]:
# Healthy life expectancy

# https://www.who.int/data/gho/indicator-metadata-registry/imr-details/66
file_path = Path('__file__').parent / 'data' / 'data_Healthy_life_expectancy_(HALE)_at_birth_(years).csv'
hale = pd.read_csv(file_path)
hale_clear = hale[['Indicator', 'ParentLocationCode', 'ParentLocation', 'SpatialDimValueCode', 'Location', 'Period', 'Dim1', 'FactValueNumeric']]
hale_clear = hale_clear.rename(columns = {'ParentLocationCode': 'RegionCode',
                                      'FactValueNumeric': 'Value', 
                                      'Location': 'Country', 
                                      'SpatialDimValueCode': 'CountryCode', 
                                      'ParentLocation': 'Region', 
                                      'Dim1': 'Sex', 
                                      'Period': 'Year'})
hale_clear = hale_clear.merge(right = countries[['CountryCode', 'WBIncomeCode']], on = 'CountryCode')
hale_clear = hale_clear[['Indicator', 'RegionCode', 'Region', 'WBIncomeCode', 'CountryCode', 'Country', 'Year', 'Sex', 'Value']]
hale_clear['Index'] = hale_clear['Year'].astype(str) + hale_clear['CountryCode']

In [None]:
# Current health expenditure (CHE) as percentage of gross domestic product (GDP) (%)

# Current health expenditure as a share of GDP provides an indication on the level of resources channelled to health relative 
# to other uses. It shows the importance of the health sector in the whole economy and indicates the societal priority 
# which health is given measured in monetary terms.

file_path = '/Users/a.kholodov/Documents/02. Personal/20. Education/50. Universities/Springboard/18. Data Storytelling/18.3 Apply Your Storytelling Skills/18.3 Craft a Story from a Dataset/data/data_CHE_as_percentage_of_GDP.csv'
che = pd.read_csv(file_path)


che_clear = che[['Indicator', 'ParentLocationCode', 'ParentLocation', 'SpatialDimValueCode', 'Location', 'Period', 'Dim1', 'FactValueNumeric']]
che_clear = che_clear.rename(columns = {'ParentLocationCode': 'RegionCode',
                                      'FactValueNumeric': 'Value', 
                                      'Location': 'Country', 
                                      'SpatialDimValueCode': 'CountryCode', 
                                      'ParentLocation': 'Region', 
                                      'Dim1': 'Parameter', 
                                      'Period': 'Year'})
che_clear = che_clear.merge(right = countries[['CountryCode', 'WBIncomeCode']], on = 'CountryCode')
che_clear = che_clear[['Indicator', 'RegionCode', 'Region', 'WBIncomeCode', 'CountryCode', 'Country', 'Year', 'Parameter', 'Value']]
che_clear['Index'] = che_clear['Year'].astype(str) + che_clear['CountryCode']

In [5]:
# Age-standardized prevalence of tobacco use among persons 15 years and older (%)

file_path = '/Users/a.kholodov/Documents/02. Personal/20. Education/50. Universities/Springboard/18. Data Storytelling/18.3 Apply Your Storytelling Skills/18.3 Craft a Story from a Dataset/data/data_tobacco.csv'
tbc = pd.read_csv(file_path)

tbc_clear = tbc[['Indicator', 'Year', 'Country ISO 3 code', 'Country', 'WHO region', 'World bank income group', 'Sex', 'Value Numeric']]
tbc_clear = tbc_clear.rename(columns = {'World bank income group': 'WBIncomeCode',
                                      'Value Numeric': 'Value', 
                                      'Location': 'Country', 
                                      'Country ISO 3 code': 'CountryCode', 
                                      'WHO region': 'Region'})
tbc_clear = tbc_clear.merge(right = countries[['CountryCode', 'RegionCode']], on = 'CountryCode')
tbc_clear = tbc_clear[['Indicator', 'RegionCode', 'Region', 'WBIncomeCode', 'CountryCode', 'Country', 'Year', 'Sex', 'Value']]
tbc_clear['Index'] = tbc_clear['Year'].astype(str) + tbc_clear['CountryCode']

In [6]:
file_path = '/Users/a.kholodov/Documents/02. Personal/20. Education/50. Universities/Springboard/18. Data Storytelling/18.3 Apply Your Storytelling Skills/18.3 Craft a Story from a Dataset/data/data_alcohol.csv'
alc = pd.read_csv(file_path)

alc_clear = alc[['Indicator', 'ParentLocationCode', 'ParentLocation', 'SpatialDimValueCode', 'Location', 'Period', 'Dim1', 'FactValueNumeric']]
alc_clear = alc_clear.rename(columns = {'ParentLocationCode': 'RegionCode',
                                      'FactValueNumeric': 'Value', 
                                      'Location': 'Country', 
                                      'SpatialDimValueCode': 'CountryCode', 
                                      'ParentLocation': 'Region', 
                                      'Dim1': 'Sex', 
                                      'Period': 'Year'})
alc_clear = alc_clear.merge(right = countries[['CountryCode', 'WBIncomeCode']], on = 'CountryCode')
alc_clear = alc_clear[['Indicator', 'RegionCode', 'Region', 'WBIncomeCode', 'CountryCode', 'Country', 'Year', 'Sex', 'Value']]
alc_clear['Index'] = alc_clear['Year'].astype(str) + alc_clear['CountryCode']

In [7]:
file_path = '/Users/a.kholodov/Documents/02. Personal/20. Education/50. Universities/Springboard/18. Data Storytelling/18.3 Apply Your Storytelling Skills/18.3 Craft a Story from a Dataset/data/data_expend10.csv'
expend = pd.read_csv(file_path)

expend_clear = expend[['Indicator', 'ParentLocationCode', 'ParentLocation', 'SpatialDimValueCode', 'Location', 'Period', 'Dim1', 'FactValueNumeric']]
expend_clear = expend_clear.rename(columns = {'ParentLocationCode': 'RegionCode',
                                      'FactValueNumeric': 'Value', 
                                      'Location': 'Country', 
                                      'SpatialDimValueCode': 'CountryCode', 
                                      'ParentLocation': 'Region', 
                                      'Dim1': 'Parameter', 
                                      'Period': 'Year'})
expend_clear = expend_clear.merge(right = countries[['CountryCode', 'WBIncomeCode']], on = 'CountryCode')
expend_clear = expend_clear[['Indicator', 'RegionCode', 'Region', 'WBIncomeCode', 'CountryCode', 'Country', 'Year', 'Parameter', 'Value']]
expend_clear['Index'] = expend_clear['Year'].astype(str) + expend_clear['CountryCode']

In [8]:
dataset = pd.DataFrame(columns = ['CountryCode', 'Country', 'Region', 'RegionCode', 'WBIncomeCode', 'Year'])
for year in range(2000, 2022):
    tmp = countries
    tmp['Year'] = year
    dataset = pd.concat([dataset, tmp])
dataset['Index'] = dataset['Year'].astype(str) + dataset['CountryCode']

In [9]:
indicator = hale_clear['Indicator'] == 'Healthy life expectancy (HALE) at birth (years)'
sex = hale_clear['Sex'] == 'Both sexes'
dataset = dataset.merge(right = hale_clear.loc[sex & indicator, ['Index', 'Value']], on = 'Index', how = 'left') \
    .rename(columns={'Value': 'HALE at birth (years)'})

indicator = hale_clear['Indicator'] == 'Healthy life expectancy (HALE) at age 60 (years)'
dataset = dataset.merge(right = hale_clear.loc[sex & indicator, ['Index', 'Value']], on = 'Index', how = 'left') \
    .rename(columns={'Value': 'HALE at age 60 (years)'})

In [None]:
#  CHE as percentage of GDP (%)
dataset = dataset.merge(right = che_clear.loc[:, ['Index', 'Value']], on = 'Index', how = 'left') \
    .rename(columns={'Value': 'CHE as percentage of GDP (%)'})

In [None]:
# Age-standardized prevalence of current tobacco smoking among persons aged 15 years and older
# The percentage of the population aged 15 years and over who currently use any tobacco product

sex = tbc_clear['Sex'] == 'Both sexes'
dataset = dataset.merge(right = tbc_clear.loc[sex, ['Index', 'Value']], on = 'Index', how = 'left') \
    .rename(columns={'Value': 'Share of smokers (%)'})

In [None]:
# Alcohol, total per capita (15+) consumption (in litres of pure alcohol) (SDG Indicator 3.5.2), three-year average

sex = alc_clear['Sex'] == 'Both sexes'
dataset = dataset.merge(right = alc_clear.loc[sex, ['Index', 'Value']], on = 'Index', how = 'left') \
    .rename(columns={'Value': 'Alcohol, total per capita (litres)'})

In [None]:
# Population with household spending on health greater than 10% of total household budget (SDG 3.8.2, reported data) (%)

parameter = expend_clear['Parameter'] == 'Adults with older persons'
dataset = dataset.merge(right = expend_clear.loc[parameter, ['Index', 'Value']], on = 'Index', how = 'left') \
    .rename(columns={'Value': 'Spending more 10% (%)'})

In [16]:
dataset.to_csv(Path('__file__').parent / 'data' / 'health_data.csv')

----------------------------------------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------------------------------------