In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
pip install pycountry-convert

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams["figure.figsize"] = [10,5]

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)

import pycountry
import pycountry_convert as pc

def convert_continent(x):
    try:
        return pc.country_alpha2_to_continent_code(x)
    except:
        if x == 'TL':
            return 'AS'

# Data explore

## Maternal mortality ratio

In [None]:
maternalMortalityRatio = pd.read_csv('../input/who-worldhealth-statistics-2020-complete/maternalMortalityRatio.csv', parse_dates =['Period'])
maternalMortalityRatio.info()

In [None]:
maternalMortalityRatio.sample(5)

In [None]:
# Period of data
maternalMortalityRatio['Period'].value_counts().sort_index()

In [None]:
# Take the same period as maternal death

In [None]:
# No. of countries
len(maternalMortalityRatio['Location'].unique())

Interesting, there is one extra country compared to the count in years

In [None]:
maternalMortalityRatio['Location'].unique()

Could write a regex to find similar pattern in country name, but my eyes caught Sudan

In [None]:
# Change "Sudan (until 2011)" to "Sudan"
maternalMortalityRatio.loc[maternalMortalityRatio['Location'] == "Sudan (until 2011)", 'Location'] = 'Sudan'
len(maternalMortalityRatio['Location'].unique())

* The first tooltip is the value, but it includes a range (unknown measurement). Need to get the value before '\['
* Also rename the column to ratio

In [None]:
maternalMortalityRatio = maternalMortalityRatio.rename(columns={'First Tooltip': 'ratio'})

In [None]:
f = lambda x: x["ratio"].split("[")[0]
maternalMortalityRatio['ratio'] = maternalMortalityRatio.apply(f, axis=1)

maternalMortalityRatio['ratio'] = maternalMortalityRatio['ratio'].astype(int)

### By country

In [None]:
# Set top 5 as colored, the rest as grey
ratio_sorted = maternalMortalityRatio.sort_values(['ratio'], ascending = False)
colorlist = ['red','orange','green','blue','purple']
palette = {c:colorlist[i] for i, c in enumerate(ratio_sorted['Location'].unique()[:5])}
print(palette)
other_palette = {c:'lightgrey' for c in ratio_sorted['Location'].unique()[5:]}
palette = {**palette, **other_palette}

sns.set(style="darkgrid")

ax = sns.lineplot(x='Period', y='ratio', hue='Location', palette = palette, data = ratio_sorted)
ax.legend(['Sierra Leone','South Sudan','Afghanistan','Chad','Central African Republic'])
plt.xticks(rotation=45)
plt.ylabel('Maternal mortality ratio (per 100,000 live births)')
plt.show()

### By continent

In [None]:
# Need to change some country name to use pycountry
maternalMortalityRatio.loc[maternalMortalityRatio['Location'] == "Bolivia (Plurinational State of)", 'Location'] = 'Bolivia, Plurinational State of'
maternalMortalityRatio.loc[maternalMortalityRatio['Location'] == "Côte d’Ivoire", 'Location'] = 'Ivory Coast'
maternalMortalityRatio.loc[maternalMortalityRatio['Location'] == "Iran (Islamic Republic of)", 'Location'] = 'Iran, Islamic Republic of'
maternalMortalityRatio.loc[maternalMortalityRatio['Location'] == "Micronesia (Federated States of)", 'Location'] = 'Micronesia'
maternalMortalityRatio.loc[maternalMortalityRatio['Location'] == "Republic of Korea", 'Location'] = 'Korea, Republic of'
maternalMortalityRatio.loc[maternalMortalityRatio['Location'] == "The former Yugoslav Republic of Macedonia", 'Location'] = 'North Macedonia'
maternalMortalityRatio.loc[maternalMortalityRatio['Location'] == "Venezuela (Bolivarian Republic of)", 'Location'] = 'Venezuela, Bolivarian Republic of'

In [None]:
maternalMortalityRatio['country_code'] = maternalMortalityRatio['Location'].apply(pc.country_name_to_country_alpha2)
maternalMortalityRatio['continent'] = maternalMortalityRatio['country_code'].apply(lambda x: convert_continent(x))

In [None]:
continent_fplot = maternalMortalityRatio.groupby(['Period','continent']).sum()
plot_df = continent_fplot.unstack('continent').loc[:,'ratio']
plot_df.index = pd.PeriodIndex(plot_df.index.tolist(), freq="A")
ax = plot_df.plot()
ax.set_xlabel('Year')
ax.set_ylabel("Maternal mortality per 100,000 live births")

Remarks:
* Countries in Afria have been the highest in maternal mortality since 2000

# Infant mortality rate
Infant mortality rate is the probability of a child born in a specific year or period dying before reaching the age of one, if subject to age-specific mortality rates of that period. Infant mortality rate is strictly speaking not a rate (i.e. the number of deaths divided by the number of population at risk during a certain period of time) but a probability of death derived from a life table and expressed as rate per 1000 live births. 

https://www.who.int/data/gho/data/indicators/indicator-details/GHO/infant-mortality-rate-(probability-of-dying-between-birth-and-age-1-per-1000-live-births)

In [None]:
infantMortalityRate = pd.read_csv('../input/who-worldhealth-statistics-2020-complete/infantMortalityRate.csv', parse_dates=['Period'])
infantMortalityRate.info()

In [None]:
print(infantMortalityRate['Period'].value_counts().sort_index())

In [None]:
# Need to change some country name to use pycountry
infantMortalityRate.loc[infantMortalityRate['Location'] == "Sudan (until 2011)", 'Location'] = 'Sudan'
infantMortalityRate.loc[infantMortalityRate['Location'] == "Bolivia (Plurinational State of)", 'Location'] = 'Bolivia, Plurinational State of'
infantMortalityRate.loc[infantMortalityRate['Location'] == "Côte d’Ivoire", 'Location'] = 'Ivory Coast'
infantMortalityRate.loc[infantMortalityRate['Location'] == "Iran (Islamic Republic of)", 'Location'] = 'Iran, Islamic Republic of'
infantMortalityRate.loc[infantMortalityRate['Location'] == "Micronesia (Federated States of)", 'Location'] = 'Micronesia'
infantMortalityRate.loc[infantMortalityRate['Location'] == "Republic of Korea", 'Location'] = 'Korea, Republic of'
infantMortalityRate.loc[infantMortalityRate['Location'] == "The former Yugoslav Republic of Macedonia", 'Location'] = 'North Macedonia'
infantMortalityRate.loc[infantMortalityRate['Location'] == "Venezuela (Bolivarian Republic of)", 'Location'] = 'Venezuela, Bolivarian Republic of'
infantMortalityRate.loc[infantMortalityRate['Location'] == "Germany, Federal Republic (former)", 'Location'] = 'Germany'
infantMortalityRate.loc[infantMortalityRate['Location'] == "India (until 1975)", 'Location'] = 'India'
infantMortalityRate.loc[infantMortalityRate['Location'] == "Kiribati (until 1984)", 'Location'] = 'Kiribati'
infantMortalityRate.loc[infantMortalityRate['Location'] == "South Viet Nam (former)", 'Location'] = 'Viet Nam'

In [None]:
infantMortalityRate['country_code'] = infantMortalityRate['Location'].apply(pc.country_name_to_country_alpha2)
infantMortalityRate['continent'] = infantMortalityRate['country_code'].apply(lambda x: convert_continent(x))

In [None]:
infantMortalityRate = infantMortalityRate.rename(columns={'Dim1':'Sex', 'First Tooltip':'mortality_rate'})
infantMortalityRate.head()

In [None]:
f = lambda x: x['mortality_rate'].split("[")[0]
infantMortalityRate['mortality_rate'] = infantMortalityRate.apply(f, axis=1)

infantMortalityRate['mortality_rate'] = infantMortalityRate['mortality_rate'].astype(float)

In [None]:
infantMortalityRate.sample(10)

In [None]:
infantMortalityRate_bothsex = infantMortalityRate[infantMortalityRate['Sex']=='Both sexes']
infantMortalityRate_male = infantMortalityRate[infantMortalityRate['Sex']=='Male']
infantMortalityRate_female = infantMortalityRate[infantMortalityRate['Sex']=='Female']

In [None]:
fig, axes = plt.subplots(nrows=3, ncols=1)

continent_fplot = infantMortalityRate_bothsex.groupby(['Period','continent']).mean()
plot_df = continent_fplot.unstack('continent').loc[:,'mortality_rate']
plot_df.index = pd.PeriodIndex(plot_df.index.tolist(), freq="A")
plot_df.plot(ax=axes[0], figsize = [10,25],ylim=(0,180), title='Both sexes')
axes[0].set_xlabel('Year')
axes[0].set_ylabel("probability of dying between birth and age 1 per 1000 live births")

continent_fplot = infantMortalityRate_male.groupby(['Period','continent']).mean()
plot_df = continent_fplot.unstack('continent').loc[:,'mortality_rate']
plot_df.index = pd.PeriodIndex(plot_df.index.tolist(), freq="A")
plot_df.plot(ax=axes[1], figsize = [10,25],ylim=(0,180), title='Male')
axes[1].set_xlabel('Year')
axes[1].set_ylabel("probability of dying between birth and age 1 per 1000 live births")

continent_fplot = infantMortalityRate_female.groupby(['Period','continent']).mean()
plot_df = continent_fplot.unstack('continent').loc[:,'mortality_rate']
plot_df.index = pd.PeriodIndex(plot_df.index.tolist(), freq="A")
plot_df.plot(ax=axes[2], figsize = [10,25],ylim=(0,180), title='Female')
axes[2].set_xlabel('Year')
axes[2].set_ylabel("probability of dying between birth and age 1 per 1000 live births")

Comments:
<br/>
Decreasing infant mortality rate over the years for both sexes (both and individual), with countries in Africa at the highest

# Neonatal mortality rate

In [None]:
neonatalMortalityRate = pd.read_csv('../input/who-worldhealth-statistics-2020-complete/neonatalMortalityRate.csv', parse_dates=['Period']) 
neonatalMortalityRate.info()

In [None]:
# Need to change some country name to use pycountry
neonatalMortalityRate.loc[neonatalMortalityRate['Location'] == "Sudan (until 2011)", 'Location'] = 'Sudan'
neonatalMortalityRate.loc[neonatalMortalityRate['Location'] == "Bolivia (Plurinational State of)", 'Location'] = 'Bolivia, Plurinational State of'
neonatalMortalityRate.loc[neonatalMortalityRate['Location'] == "Côte d’Ivoire", 'Location'] = 'Ivory Coast'
neonatalMortalityRate.loc[neonatalMortalityRate['Location'] == "Iran (Islamic Republic of)", 'Location'] = 'Iran, Islamic Republic of'
neonatalMortalityRate.loc[neonatalMortalityRate['Location'] == "Micronesia (Federated States of)", 'Location'] = 'Micronesia'
neonatalMortalityRate.loc[neonatalMortalityRate['Location'] == "Republic of Korea", 'Location'] = 'Korea, Republic of'
neonatalMortalityRate.loc[neonatalMortalityRate['Location'] == "The former Yugoslav Republic of Macedonia", 'Location'] = 'North Macedonia'
neonatalMortalityRate.loc[neonatalMortalityRate['Location'] == "Venezuela (Bolivarian Republic of)", 'Location'] = 'Venezuela, Bolivarian Republic of'
neonatalMortalityRate.loc[neonatalMortalityRate['Location'] == "Germany, Federal Republic (former)", 'Location'] = 'Germany'
neonatalMortalityRate.loc[neonatalMortalityRate['Location'] == "India (until 1975)", 'Location'] = 'India'
neonatalMortalityRate.loc[neonatalMortalityRate['Location'] == "Kiribati (until 1984)", 'Location'] = 'Kiribati'
neonatalMortalityRate.loc[neonatalMortalityRate['Location'] == "South Viet Nam (former)", 'Location'] = 'Viet Nam'
neonatalMortalityRate.loc[neonatalMortalityRate['Location'] == 'Yemen Arab Republic (until 1990)', 'Location'] = 'Yemen'

In [None]:
neonatalMortalityRate['country_code'] = neonatalMortalityRate['Location'].apply(pc.country_name_to_country_alpha2)
neonatalMortalityRate['continent'] = neonatalMortalityRate['country_code'].apply(lambda x: convert_continent(x))

In [None]:
neonatalMortalityRate = neonatalMortalityRate.rename(columns={'Dim1':'Sex', 'First Tooltip':'mortality_rate'})
neonatalMortalityRate.head()

In [None]:
f = lambda x: x['mortality_rate'].split("[")[0]
neonatalMortalityRate['mortality_rate'] = neonatalMortalityRate.apply(f, axis=1)

neonatalMortalityRate['mortality_rate'] = neonatalMortalityRate['mortality_rate'].astype(float)

In [None]:
neonatalMortalityRate['Sex'].value_counts()

In [None]:
neonatalMortalityRate.sample(10)

In [None]:
# continent_fplot = neonatalMortalityRate.groupby(['Period','continent']).mean()
plot_df = continent_fplot.unstack('continent').loc[:,'mortality_rate']
plot_df.index = pd.PeriodIndex(plot_df.index.tolist(), freq="A")
ax = plot_df.plot()
ax.set_xlabel('Year')
ax.set_ylabel("Neonatal mortality rate (per 1000 live births)")

# Under 5 mortality rate

In [None]:
under5MortalityRate = pd.read_csv('../input/who-worldhealth-statistics-2020-complete/under5MortalityRate.csv', parse_dates=['Period']) 
under5MortalityRate.info()

In [None]:
# Need to change some country name to use pycountry
under5MortalityRate.loc[under5MortalityRate['Location'] == "Sudan (until 2011)", 'Location'] = 'Sudan'
under5MortalityRate.loc[under5MortalityRate['Location'] == "Bolivia (Plurinational State of)", 'Location'] = 'Bolivia, Plurinational State of'
under5MortalityRate.loc[under5MortalityRate['Location'] == "Côte d’Ivoire", 'Location'] = 'Ivory Coast'
under5MortalityRate.loc[under5MortalityRate['Location'] == "Iran (Islamic Republic of)", 'Location'] = 'Iran, Islamic Republic of'
under5MortalityRate.loc[under5MortalityRate['Location'] == "Micronesia (Federated States of)", 'Location'] = 'Micronesia'
under5MortalityRate.loc[under5MortalityRate['Location'] == "Republic of Korea", 'Location'] = 'Korea, Republic of'
under5MortalityRate.loc[under5MortalityRate['Location'] == "The former Yugoslav Republic of Macedonia", 'Location'] = 'North Macedonia'
under5MortalityRate.loc[under5MortalityRate['Location'] == "Venezuela (Bolivarian Republic of)", 'Location'] = 'Venezuela, Bolivarian Republic of'
under5MortalityRate.loc[under5MortalityRate['Location'] == "Germany, Federal Republic (former)", 'Location'] = 'Germany'
under5MortalityRate.loc[under5MortalityRate['Location'] == "India (until 1975)", 'Location'] = 'India'
under5MortalityRate.loc[under5MortalityRate['Location'] == "Kiribati (until 1984)", 'Location'] = 'Kiribati'
under5MortalityRate.loc[under5MortalityRate['Location'] == "South Viet Nam (former)", 'Location'] = 'Viet Nam'
under5MortalityRate.loc[under5MortalityRate['Location'] == 'Yemen Arab Republic (until 1990)', 'Location'] = 'Yemen'

under5MortalityRate['country_code'] = under5MortalityRate['Location'].apply(pc.country_name_to_country_alpha2)
under5MortalityRate['continent'] = under5MortalityRate['country_code'].apply(lambda x: convert_continent(x))

In [None]:
under5MortalityRate = under5MortalityRate.rename(columns={'Dim1':'Sex', 'First Tooltip':'mortality_rate'})
under5MortalityRate.head()

In [None]:
f = lambda x: x['mortality_rate'].split("[")[0]
under5MortalityRate['mortality_rate'] = under5MortalityRate.apply(f, axis=1)
under5MortalityRate['mortality_rate'] = under5MortalityRate['mortality_rate'].astype(float)

In [None]:
under5MortalityRate.sample(10)

In [None]:
under5MortalityRate_bothsex = under5MortalityRate[under5MortalityRate['Sex']=='Both sexes']
under5MortalityRate_male = under5MortalityRate[under5MortalityRate['Sex']=='Male']
under5MortalityRate_female = under5MortalityRate[under5MortalityRate['Sex']=='Female']

In [None]:
fig, axes = plt.subplots(nrows=3, ncols=1)

continent_fplot = under5MortalityRate_bothsex.groupby(['Period','continent']).mean()
plot_df = continent_fplot.unstack('continent').loc[:,'mortality_rate']
plot_df.index = pd.PeriodIndex(plot_df.index.tolist(), freq="A")
plot_df.plot(ax=axes[0], figsize = [10,25],ylim=(0,300), title='Both sexes')
axes[0].set_xlabel('Year')
axes[0].set_ylabel("(probability of dying by age 5 per 1000 live births")

continent_fplot = under5MortalityRate_male.groupby(['Period','continent']).mean()
plot_df = continent_fplot.unstack('continent').loc[:,'mortality_rate']
plot_df.index = pd.PeriodIndex(plot_df.index.tolist(), freq="A")
plot_df.plot(ax=axes[1], figsize = [10,25],ylim=(0,300), title='Male')
axes[1].set_xlabel('Year')
axes[1].set_ylabel("(probability of dying by age 5 per 1000 live births")

continent_fplot = under5MortalityRate_female.groupby(['Period','continent']).mean()
plot_df = continent_fplot.unstack('continent').loc[:,'mortality_rate']
plot_df.index = pd.PeriodIndex(plot_df.index.tolist(), freq="A")
plot_df.plot(ax=axes[2], figsize = [10,25],ylim=(0,300), title='Female')
axes[2].set_xlabel('Year')
axes[2].set_ylabel("(probability of dying by age 5 per 1000 live births")

# Medicinal condition

## Birth Attended By Skilled Personal

In [None]:
birthAttendedBySkilledPersonal = pd.read_csv('../input/who-worldhealth-statistics-2020-complete/birthAttendedBySkilledPersonal.csv', parse_dates =['Period'])
birthAttendedBySkilledPersonal = birthAttendedBySkilledPersonal.rename(columns={'First Tooltip':'birth_attended_rate'})
birthAttendedBySkilledPersonal.info()

In [None]:
birthAttendedBySkilledPersonal.sample(10)

In [None]:
birthAttendedBySkilledPersonal['Period'].value_counts().sort_index()

In [None]:
# Need to change some country name to use pycountry
birthAttendedBySkilledPersonal.loc[birthAttendedBySkilledPersonal['Location'] == "Sudan (until 2011)", 'Location'] = 'Sudan'
birthAttendedBySkilledPersonal.loc[birthAttendedBySkilledPersonal['Location'] == "Bolivia (Plurinational State of)", 'Location'] = 'Bolivia, Plurinational State of'
birthAttendedBySkilledPersonal.loc[birthAttendedBySkilledPersonal['Location'] == "Côte d’Ivoire", 'Location'] = 'Ivory Coast'
birthAttendedBySkilledPersonal.loc[birthAttendedBySkilledPersonal['Location'] == "Iran (Islamic Republic of)", 'Location'] = 'Iran, Islamic Republic of'
birthAttendedBySkilledPersonal.loc[birthAttendedBySkilledPersonal['Location'] == "Micronesia (Federated States of)", 'Location'] = 'Micronesia'
birthAttendedBySkilledPersonal.loc[birthAttendedBySkilledPersonal['Location'] == "Republic of Korea", 'Location'] = 'Korea, Republic of'
birthAttendedBySkilledPersonal.loc[birthAttendedBySkilledPersonal['Location'] == "The former Yugoslav Republic of Macedonia", 'Location'] = 'North Macedonia'
birthAttendedBySkilledPersonal.loc[birthAttendedBySkilledPersonal['Location'] == "Venezuela (Bolivarian Republic of)", 'Location'] = 'Venezuela, Bolivarian Republic of'
birthAttendedBySkilledPersonal.loc[birthAttendedBySkilledPersonal['Location'] == "Germany, Federal Republic (former)", 'Location'] = 'Germany'
birthAttendedBySkilledPersonal.loc[birthAttendedBySkilledPersonal['Location'] == "India (until 1975)", 'Location'] = 'India'
birthAttendedBySkilledPersonal.loc[birthAttendedBySkilledPersonal['Location'] == "Kiribati (until 1984)", 'Location'] = 'Kiribati'
birthAttendedBySkilledPersonal.loc[birthAttendedBySkilledPersonal['Location'] == "South Viet Nam (former)", 'Location'] = 'Viet Nam'
birthAttendedBySkilledPersonal.loc[birthAttendedBySkilledPersonal['Location'] == 'Yemen Arab Republic (until 1990)', 'Location'] = 'Yemen'

print(len(birthAttendedBySkilledPersonal['Location'].unique()))
NID = birthAttendedBySkilledPersonal['Location'].isin(maternalMortalityRatio['Location'])
print(sum(NID))
birthAttendedBySkilledPersonal = birthAttendedBySkilledPersonal.loc[NID]
print(len(birthAttendedBySkilledPersonal['Location'].unique()))

In [None]:
merge_df = pd.merge(maternalMortalityRatio, birthAttendedBySkilledPersonal, how='left', on=['Period','Location'])
merge_df.head()

In [None]:
ax = sns.relplot(x='ratio', y='birth_attended_rate',data = merge_df, hue='continent')
ax.set(xlabel='Maternal mortality per 100,000 live births', ylabel='Births attended by skilled health personnel (%)')
plt.show()

In [None]:
merge_df = pd.merge(infantMortalityRate, birthAttendedBySkilledPersonal, how='left', on=['Period','Location'])
merge_df = merge_df[merge_df['Sex']=='Both sexes']
merge_df.head()

In [None]:
ax = sns.relplot(x='mortality_rate', y='birth_attended_rate',data = merge_df, hue='continent')
ax.set(xlabel='Probability of dying between birth and age 1 per 1000 live births', ylabel='Births attended by skilled health personnel (%)')
plt.figure()
plt.show()

In [None]:
merge_df = pd.merge(neonatalMortalityRate, birthAttendedBySkilledPersonal, how='left', on=['Period','Location'])
merge_df.head()

In [None]:
ax = sns.relplot(x='mortality_rate', y='birth_attended_rate',data = merge_df, hue='continent')
ax.set(xlabel='Neonatal mortality rate (per 1000 live births)', ylabel='Births attended by skilled health personnel (%)')
plt.figure(figsize=[10,10])
plt.show()

## Medical Doctors

In [None]:
medicalDoctors = pd.read_csv('../input/who-worldhealth-statistics-2020-complete/medicalDoctors.csv', parse_dates =['Period'])
medicalDoctors = medicalDoctors.rename(columns={'First Tooltip':'doctor_avail'})
medicalDoctors.info()

In [None]:
medicalDoctors['Period'].value_counts().sort_index()

In [None]:
# Need to change some country name to use pycountry
medicalDoctors.loc[medicalDoctors['Location'] == "Sudan (until 2011)", 'Location'] = 'Sudan'
medicalDoctors.loc[medicalDoctors['Location'] == "Bolivia (Plurinational State of)", 'Location'] = 'Bolivia, Plurinational State of'
medicalDoctors.loc[medicalDoctors['Location'] == "Côte d’Ivoire", 'Location'] = 'Ivory Coast'
medicalDoctors.loc[medicalDoctors['Location'] == "Iran (Islamic Republic of)", 'Location'] = 'Iran, Islamic Republic of'
medicalDoctors.loc[medicalDoctors['Location'] == "Micronesia (Federated States of)", 'Location'] = 'Micronesia'
medicalDoctors.loc[medicalDoctors['Location'] == "Republic of Korea", 'Location'] = 'Korea, Republic of'
medicalDoctors.loc[medicalDoctors['Location'] == "The former Yugoslav Republic of Macedonia", 'Location'] = 'North Macedonia'
medicalDoctors.loc[medicalDoctors['Location'] == "Venezuela (Bolivarian Republic of)", 'Location'] = 'Venezuela, Bolivarian Republic of'
medicalDoctors.loc[medicalDoctors['Location'] == "Germany, Federal Republic (former)", 'Location'] = 'Germany'
medicalDoctors.loc[medicalDoctors['Location'] == "India (until 1975)", 'Location'] = 'India'
medicalDoctors.loc[medicalDoctors['Location'] == "Kiribati (until 1984)", 'Location'] = 'Kiribati'
medicalDoctors.loc[medicalDoctors['Location'] == "South Viet Nam (former)", 'Location'] = 'Viet Nam'
medicalDoctors.loc[medicalDoctors['Location'] == 'Yemen Arab Republic (until 1990)', 'Location'] = 'Yemen'

print(len(medicalDoctors['Location'].unique()))
NID = medicalDoctors['Location'].isin(maternalMortalityRatio['Location'])
print(sum(NID))
medicalDoctors = medicalDoctors.loc[NID]
print(len(medicalDoctors['Location'].unique()))

In [None]:
merge_df = pd.merge(maternalMortalityRatio, medicalDoctors, how='left', on=['Period','Location'])
merge_df.head()

In [None]:
ax = sns.relplot(x='ratio', y='doctor_avail',data = merge_df, hue='continent')
ax.set(xlabel='Maternal mortality per 100,000 live births', ylabel='Medical doctors (per 10,000)')
plt.show()

In [None]:
merge_df = pd.merge(infantMortalityRate, medicalDoctors, how='left', on=['Period','Location'])
merge_df = merge_df[merge_df['Sex']=='Both sexes']
merge_df.head()

In [None]:
ax = sns.relplot(x='mortality_rate', y='doctor_avail',data = merge_df, hue='continent')
ax.set(xlabel='Probability of dying between birth and age 1 per 1000 live births', ylabel='Medical doctors (per 10,000)')
plt.show()

In [None]:
merge_df = pd.merge(neonatalMortalityRate, medicalDoctors, how='left', on=['Period','Location'])
merge_df = merge_df[merge_df['Sex']=='Both sexes']
merge_df.head()

In [None]:
ax = sns.relplot(x='mortality_rate', y='doctor_avail',data = merge_df, hue='continent')
ax.set(xlabel='Neonatal mortality rate (per 1000 live births)', ylabel='Medical doctors (per 10,000)')
plt.show()

## Nursing and Midwife

In [None]:
nursingAndMidwife = pd.read_csv('../input/who-worldhealth-statistics-2020-complete/nursingAndMidwife.csv', parse_dates =['Period'])
nursingAndMidwife = nursingAndMidwife.rename(columns={'First Tooltip':'midwife_avail'})
nursingAndMidwife.info()

In [None]:
# Data from Belize has problem, should divide by 100
nursingAndMidwife.loc[(nursingAndMidwife['Location'] == 'Belize') & (nursingAndMidwife['midwife_avail'] > 50), 'midwife_avail']  /= 100

In [None]:
nursingAndMidwife['Period'].value_counts().sort_index()

In [None]:
# Need to change some country name to use pycountry
nursingAndMidwife.loc[nursingAndMidwife['Location'] == "Sudan (until 2011)", 'Location'] = 'Sudan'
nursingAndMidwife.loc[nursingAndMidwife['Location'] == "Bolivia (Plurinational State of)", 'Location'] = 'Bolivia, Plurinational State of'
nursingAndMidwife.loc[nursingAndMidwife['Location'] == "Côte d’Ivoire", 'Location'] = 'Ivory Coast'
nursingAndMidwife.loc[nursingAndMidwife['Location'] == "Iran (Islamic Republic of)", 'Location'] = 'Iran, Islamic Republic of'
nursingAndMidwife.loc[nursingAndMidwife['Location'] == "Micronesia (Federated States of)", 'Location'] = 'Micronesia'
nursingAndMidwife.loc[nursingAndMidwife['Location'] == "Republic of Korea", 'Location'] = 'Korea, Republic of'
nursingAndMidwife.loc[nursingAndMidwife['Location'] == "The former Yugoslav Republic of Macedonia", 'Location'] = 'North Macedonia'
nursingAndMidwife.loc[nursingAndMidwife['Location'] == "Venezuela (Bolivarian Republic of)", 'Location'] = 'Venezuela, Bolivarian Republic of'
nursingAndMidwife.loc[nursingAndMidwife['Location'] == "Germany, Federal Republic (former)", 'Location'] = 'Germany'
nursingAndMidwife.loc[nursingAndMidwife['Location'] == "India (until 1975)", 'Location'] = 'India'
nursingAndMidwife.loc[nursingAndMidwife['Location'] == "Kiribati (until 1984)", 'Location'] = 'Kiribati'
nursingAndMidwife.loc[nursingAndMidwife['Location'] == "South Viet Nam (former)", 'Location'] = 'Viet Nam'
nursingAndMidwife.loc[nursingAndMidwife['Location'] == 'Yemen Arab Republic (until 1990)', 'Location'] = 'Yemen'

print(len(nursingAndMidwife['Location'].unique()))
NID = nursingAndMidwife['Location'].isin(maternalMortalityRatio['Location'])
print(sum(NID))
nursingAndMidwife = nursingAndMidwife.loc[NID]
print(len(nursingAndMidwife['Location'].unique()))

In [None]:
merge_df = pd.merge(maternalMortalityRatio, nursingAndMidwife, how='left', on=['Period','Location'])
merge_df.head()

In [None]:
ax = sns.relplot(x='ratio', y='midwife_avail',data = merge_df, hue='continent')
ax.set(xlabel='Maternal mortality per 100,000 live births', ylabel='Nursing and midwifery personnel (per 10,000)')
plt.show()

In [None]:
merge_df = pd.merge(infantMortalityRate, nursingAndMidwife, how='left', on=['Period','Location'])
merge_df = merge_df[merge_df['Sex'] == 'Both sexes']
merge_df.head()

In [None]:
ax = sns.relplot(x='mortality_rate', y='midwife_avail',data = merge_df, hue='continent')
ax.set(xlabel='Probability of dying between birth and age 1 per 1000 live births', ylabel='Nursing and midwifery personnel (per 10,000)')
plt.show()

In [None]:
merge_df = pd.merge(neonatalMortalityRate, nursingAndMidwife, how='left', on=['Period','Location'])
merge_df.head()

In [None]:
ax = sns.relplot(x='mortality_rate', y='midwife_avail',data = merge_df, hue='continent')
ax.set(xlabel='Neonatal mortality rate (per 1000 live births)', ylabel='Nursing and midwifery personnel (per 10,000)')
plt.show()

## Pharmacists

In [None]:
pharmacists = pd.read_csv('../input/who-worldhealth-statistics-2020-complete/pharmacists.csv', parse_dates =['Period'])
pharmacists = pharmacists.rename(columns={'First Tooltip':'pharmacist_avail'})
pharmacists.info()

In [None]:
pharmacists['Period'].value_counts().sort_index()

In [None]:
# Need to change some country name to use pycountry
pharmacists.loc[pharmacists['Location'] == "Sudan (until 2011)", 'Location'] = 'Sudan'
pharmacists.loc[pharmacists['Location'] == "Bolivia (Plurinational State of)", 'Location'] = 'Bolivia, Plurinational State of'
pharmacists.loc[pharmacists['Location'] == "Côte d’Ivoire", 'Location'] = 'Ivory Coast'
pharmacists.loc[pharmacists['Location'] == "Iran (Islamic Republic of)", 'Location'] = 'Iran, Islamic Republic of'
pharmacists.loc[pharmacists['Location'] == "Micronesia (Federated States of)", 'Location'] = 'Micronesia'
pharmacists.loc[pharmacists['Location'] == "Republic of Korea", 'Location'] = 'Korea, Republic of'
pharmacists.loc[pharmacists['Location'] == "The former Yugoslav Republic of Macedonia", 'Location'] = 'North Macedonia'
pharmacists.loc[pharmacists['Location'] == "Venezuela (Bolivarian Republic of)", 'Location'] = 'Venezuela, Bolivarian Republic of'
pharmacists.loc[pharmacists['Location'] == "Germany, Federal Republic (former)", 'Location'] = 'Germany'
pharmacists.loc[pharmacists['Location'] == "India (until 1975)", 'Location'] = 'India'
pharmacists.loc[pharmacists['Location'] == "Kiribati (until 1984)", 'Location'] = 'Kiribati'
pharmacists.loc[pharmacists['Location'] == "South Viet Nam (former)", 'Location'] = 'Viet Nam'
pharmacists.loc[pharmacists['Location'] == 'Yemen Arab Republic (until 1990)', 'Location'] = 'Yemen'

print(len(pharmacists['Location'].unique()))
NID = pharmacists['Location'].isin(maternalMortalityRatio['Location'])
print(sum(NID))
pharmacists = pharmacists.loc[NID]
print(len(pharmacists['Location'].unique()))

In [None]:
merge_df = pd.merge(maternalMortalityRatio, pharmacists, how='left', on=['Period','Location'])
merge_df.head()

In [None]:
ax = sns.relplot(x='ratio', y='pharmacist_avail',data = merge_df, hue='continent')
ax.set(xlabel='Maternal mortality per 100,000 live births', ylabel='Pharmacists  (per 10,000)')
plt.show()

In [None]:
merge_df = pd.merge(infantMortalityRate, pharmacists, how='left', on=['Period','Location'])
merge_df = merge_df[merge_df['Sex']=='Both sexes']
merge_df.head()

In [None]:
ax = sns.relplot(x='mortality_rate', y='pharmacist_avail',data = merge_df, hue='continent')
ax.set(xlabel='Infant mortality rate (probability of dying between birth and age 1 per 1000 live births)', ylabel='Pharmacists  (per 10,000)')
plt.show()

In [None]:
merge_df = pd.merge(neonatalMortalityRate, pharmacists, how='left', on=['Period','Location'])
merge_df = merge_df[merge_df['Sex']=='Both sexes']
merge_df.head()

In [None]:
ax = sns.relplot(x='mortality_rate', y='pharmacist_avail',data = merge_df, hue='continent')
ax.set(xlabel='Neonatal mortality rate (per 1000 live births)', ylabel='Pharmacists  (per 10,000)')
plt.show()

# Hygiene: water and energy

In [None]:
basicDrinkingWaterServices = pd.read_csv('../input/who-worldhealth-statistics-2020-complete/basicDrinkingWaterServices.csv', parse_dates=['Period'])
basicDrinkingWaterServices = basicDrinkingWaterServices[['Location','Period','First Tooltip']]
basicDrinkingWaterServices = basicDrinkingWaterServices.rename(columns={'First Tooltip':'basicDrinkingWaterServices'})
basicDrinkingWaterServices.info()

In [None]:
atLeastBasicSanitizationServices = pd.read_csv('../input/who-worldhealth-statistics-2020-complete/atLeastBasicSanitizationServices.csv', parse_dates=['Period'])
atLeastBasicSanitizationServices = atLeastBasicSanitizationServices[atLeastBasicSanitizationServices['Dim1'] == 'Total']
atLeastBasicSanitizationServices = atLeastBasicSanitizationServices[['Location','Period','First Tooltip']]
atLeastBasicSanitizationServices = atLeastBasicSanitizationServices.rename(columns={'First Tooltip':'atLeastBasicSanitizationServices'})
atLeastBasicSanitizationServices.info()

In [None]:
safelySanitization = pd.read_csv('../input/who-worldhealth-statistics-2020-complete/safelySanitization.csv', parse_dates=['Period'])
safelySanitization = safelySanitization[safelySanitization['Dim1'] == 'Total']
safelySanitization = safelySanitization[['Location','Period','First Tooltip']]
safelySanitization = safelySanitization.rename(columns={'First Tooltip':'safelySanitization'})
safelySanitization.info()

In [None]:
basicHandWashing = pd.read_csv('../input/who-worldhealth-statistics-2020-complete/basicHandWashing.csv', parse_dates=['Period'])
basicHandWashing = basicHandWashing[basicHandWashing['Dim1'] == 'Total']
basicHandWashing = basicHandWashing[['Location','Period','First Tooltip']]
basicHandWashing = basicHandWashing.rename(columns={'First Tooltip':'basicHandWashing'})
basicHandWashing.info()

In [None]:
# Merge all data related to hygiene
df_hygiene = pd.merge(basicDrinkingWaterServices, atLeastBasicSanitizationServices, how='left', on=['Period','Location'])
df_hygiene = pd.merge(df_hygiene, safelySanitization, how='left', on=['Period','Location'])
df_hygiene = pd.merge(df_hygiene, basicHandWashing, how='left', on=['Period','Location'])
df_hygiene.head(20)

In [None]:
df_hygiene.describe()

# Finance-universal health coverage (UHC)

## UHC service coverage index 
Coverage of essential health services (defined as the average coverage of essential services based on tracer interventions that include reproductive, maternal, newborn and child health, infectious diseases, non-communicable diseases and service capacity and access, among the general and the most disadvantaged population). The indicator is an index reported on a unitless scale of 0 to 100, which is computed as the geometric mean of 14 tracer indicators of health service coverage. The tracer indicators are as follows, organized by four components of service coverage: 1. Reproductive, maternal, newborn and child health 2. Infectious diseases 3. Noncommunicable diseases 4. Service capacity and access See the 2019 monitoring report for the tracer indicator within each component. 

In [None]:
uhcCoverage = pd.read_csv('../input/who-worldhealth-statistics-2020-complete/uhcCoverage.csv', parse_dates=['Period'])
uhcCoverage.info()

In [None]:
uhcCoverage['Period'].value_counts().sort_index()

In [None]:
# Need to change some country name to use pycountry
uhcCoverage.loc[uhcCoverage['Location'] == "Sudan (until 2011)", 'Location'] = 'Sudan'
uhcCoverage.loc[uhcCoverage['Location'] == "Bolivia (Plurinational State of)", 'Location'] = 'Bolivia, Plurinational State of'
uhcCoverage.loc[uhcCoverage['Location'] == "Côte d’Ivoire", 'Location'] = 'Ivory Coast'
uhcCoverage.loc[uhcCoverage['Location'] == "Iran (Islamic Republic of)", 'Location'] = 'Iran, Islamic Republic of'
uhcCoverage.loc[uhcCoverage['Location'] == "Micronesia (Federated States of)", 'Location'] = 'Micronesia'
uhcCoverage.loc[uhcCoverage['Location'] == "Republic of Korea", 'Location'] = 'Korea, Republic of'
uhcCoverage.loc[uhcCoverage['Location'] == "The former Yugoslav Republic of Macedonia", 'Location'] = 'North Macedonia'
uhcCoverage.loc[uhcCoverage['Location'] == "Venezuela (Bolivarian Republic of)", 'Location'] = 'Venezuela, Bolivarian Republic of'
uhcCoverage.loc[uhcCoverage['Location'] == "Germany, Federal Republic (former)", 'Location'] = 'Germany'
uhcCoverage.loc[uhcCoverage['Location'] == "India (until 1975)", 'Location'] = 'India'
uhcCoverage.loc[uhcCoverage['Location'] == "Kiribati (until 1984)", 'Location'] = 'Kiribati'
uhcCoverage.loc[uhcCoverage['Location'] == "South Viet Nam (former)", 'Location'] = 'Viet Nam'
uhcCoverage.loc[uhcCoverage['Location'] == 'Yemen Arab Republic (until 1990)', 'Location'] = 'Yemen'

print(len(uhcCoverage['Location'].unique()))
NID = uhcCoverage['Location'].isin(maternalMortalityRatio['Location'])
print(sum(NID))
uhcCoverage = uhcCoverage.loc[NID]
print(len(uhcCoverage['Location'].unique()))

In [None]:
uhcCoverage['country_code'] = uhcCoverage['Location'].apply(pc.country_name_to_country_alpha2)
uhcCoverage['continent'] = uhcCoverage['country_code'].apply(lambda x: convert_continent(x))

In [None]:
merge_df = pd.merge(maternalMortalityRatio, uhcCoverage[['Period','Location','Indicator','First Tooltip']], how='left', on=['Period','Location'])
merge_df.head()

In [None]:
ax = sns.relplot(x='ratio', y='First Tooltip',data = merge_df, hue='continent')
ax.set(xlabel='Maternal mortality ratio (per 100 000 live births)', ylabel='UHC index of essential service coverage')
plt.show()

## Monitoring Sustainable Development Goals–Indicator 3.8.2
https://www.who.int/health_financing/topics/financial-protection/monitoring-sdg/en/
![](https://www.who.int/health_financing/topics/financial-protection/sdg-target-figure-491.jpg)

In [None]:
population10SDG = pd.read_csv('../input/who-worldhealth-statistics-2020-complete/population10SDG3.8.2.csv', parse_dates=['Period']) 
population10SDG = population10SDG[population10SDG['Dim1'] == 'Total']
population10SDG = population10SDG.rename(columns = {'First Tooltip':'population10SDG'})
population10SDG.head()

In [None]:
population25SDG = pd.read_csv('../input/who-worldhealth-statistics-2020-complete/population25SDG3.8.2.csv', parse_dates=['Period']) 
population25SDG = population25SDG[population25SDG['Dim1'] == 'Total']
population25SDG = population25SDG.rename(columns = {'First Tooltip':'population25SDG'})
population25SDG.head()

In [None]:
populationSDG = pd.merge(population10SDG, population25SDG, on=['Location','Period'] )
populationSDG = populationSDG[['Location','Period','population10SDG','population25SDG']]
populationSDG.describe()

In [None]:
populationSDG['Period'].value_counts().sort_index()

In [None]:
# Need to change some country name to use pycountry
populationSDG.loc[populationSDG['Location'] == "Sudan (until 2011)", 'Location'] = 'Sudan'
populationSDG.loc[populationSDG['Location'] == "Bolivia (Plurinational State of)", 'Location'] = 'Bolivia, Plurinational State of'
populationSDG.loc[populationSDG['Location'] == "Côte d’Ivoire", 'Location'] = 'Ivory Coast'
populationSDG.loc[populationSDG['Location'] == "Iran (Islamic Republic of)", 'Location'] = 'Iran, Islamic Republic of'
populationSDG.loc[populationSDG['Location'] == "Micronesia (Federated States of)", 'Location'] = 'Micronesia'
populationSDG.loc[populationSDG['Location'] == "Republic of Korea", 'Location'] = 'Korea, Republic of'
populationSDG.loc[populationSDG['Location'] == "The former Yugoslav Republic of Macedonia", 'Location'] = 'North Macedonia'
populationSDG.loc[populationSDG['Location'] == "Venezuela (Bolivarian Republic of)", 'Location'] = 'Venezuela, Bolivarian Republic of'
populationSDG.loc[populationSDG['Location'] == "Germany, Federal Republic (former)", 'Location'] = 'Germany'
populationSDG.loc[populationSDG['Location'] == "India (until 1975)", 'Location'] = 'India'
populationSDG.loc[populationSDG['Location'] == "Kiribati (until 1984)", 'Location'] = 'Kiribati'
populationSDG.loc[populationSDG['Location'] == "South Viet Nam (former)", 'Location'] = 'Viet Nam'
populationSDG.loc[populationSDG['Location'] == 'Yemen Arab Republic (until 1990)', 'Location'] = 'Yemen'

print(len(populationSDG['Location'].unique()))
NID = populationSDG['Location'].isin(maternalMortalityRatio['Location'])
print(sum(NID))
populationSDG = populationSDG.loc[NID]
print(len(populationSDG['Location'].unique()))

In [None]:
populationSDG['country_code'] = populationSDG['Location'].apply(pc.country_name_to_country_alpha2)
populationSDG['continent'] = populationSDG['country_code'].apply(lambda x: convert_continent(x))

In [None]:
merge_df = pd.merge(maternalMortalityRatio, populationSDG[['Period','Location','population10SDG','population25SDG']], how='left', on=['Period','Location'])
merge_df.head()

In [None]:
ax = sns.relplot(x='ratio', y='population10SDG',data = merge_df, hue='continent')
ax.set(xlabel='Maternal mortality ratio (per 100 000 live births)', ylabel='Population with household expenditures on health\n greater than 10% of total household expenditure or income (SDG 3.8.2) (%)')
plt.show()

In [None]:
ax = sns.relplot(x='ratio', y='population25SDG',data = merge_df, hue='continent')
ax.set(xlabel='Maternal mortality ratio (per 100 000 live births)', ylabel='Population with household expenditures on health\n greater than 25% of total household expenditure or income (SDG 3.8.2) (%)')
plt.show()