# **<font color="#566573">World Health Statistics 2020</font>**

1. [Drinking Water](#DW)
    * [Drinking Water Services](#dws)
    * [Basic Sanitization Services](#bss)
2. [Clean Household Energy](#CHE)
3. [Mortality from Environment Pollution](#MREP)
    * [Air Pollution Death Rate](#apdr)
    * [Mortality Rate of Unsafe WASH](#mrouw)
    * [Mortality Rate Poisoning](#mrp)
4. [Tobacco Control](#TC)
5. [Maternal Mortality](#MM)
    * [Maternal Mortality Ratio](#mmr)

In [None]:
from mpl_toolkits.basemap import Basemap
from geopy.geocoders import Nominatim
import matplotlib.patches as mpatches #create legend
from matplotlib import animation
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import itertools

In [None]:
#plot setup
plt.style.use("fivethirtyeight")
legend_lis=["Year", "2000-2003", "2004-2007", "2008-2011", "2012-2015", "2016-2017"]
legend_lis_=["Year", "2000-2003", "2004-2007", "2008-2011", "2012-2015", "2016-2018"]
palette=["#ede6b9","#a7beae","#9bc472","#3a6b35","#b85042"]
plt.rcParams['font.family'] = "Comic Sans MS"
# plt.rcParams["font.weight"] = "bold"
plt.rcParams["font.size"] = "18"
background_color="#EFEBDD"

#function
def merge_region(data, continents):
    data=data.merge(continents[["name", "region", "sub-region"]], left_on="Location", right_on="name").drop(columns=["name"]).rename(columns={"Period":"Year", "region":"Region", "sub-region":"SubRegion"})
    data["Year"]=data["Year"].astype("str")
    
    return data

def merge_year(data, col_lis, num, times, entend):
    year_lis=list(itertools.chain(*[[i]*times for i in range(num)]))
    year_lis.extend([num]*entend)
    
    data=data.groupby(["Year", "Region"]+col_lis)["First Tooltip"].mean().reset_index()
    data=data.sort_values(by=["Region", "Year"]).reset_index()

    data.loc[:, "YearGroup"]=year_lis*5
    data=data.groupby(["Region", "YearGroup"]+col_lis)["First Tooltip"].mean().reset_index()
    
    return data

### **<a id="DW"><font color="#566573">Drinking Water</font></a>**
**<a id="dws"><font color="#566573">Drinking Water Services</font></a>**

175 locations in the dataset with period range from 2000 to 2017. I group them by region and year(4 years) due to many countries and years, I then averaged『population using at least basic drinking water services (%)』. The outcomes might be different than the actual values given that the total pupulation is unknown but I think we can still learn some interesting information from the result.

* Europe has the highest population drinking water services used at all times and Africa has the lowest.
* Even every region has different population drinking water services used, they all gain higher from year to year. 

In [None]:
bdws=pd.read_csv("../input/who-worldhealth-statistics-2020-complete/basicDrinkingWaterServices.csv")
continents=pd.read_csv("../input/country-mapping-iso-continent-region/continents2.csv")

bdws_year_region=merge_region(bdws, continents)
bdws_year_region.head()

In [None]:
bdws_year_region=merge_year(bdws_year_region, [], 4 , 4, 2)
fig = plt.figure(figsize=(12, 6)) 
gs = fig.add_gridspec(1, 1)
ax = fig.add_subplot(gs[0, 0])
ax.text(-0.25, 105, "Drinking Water Services", color="#255f85", fontsize=24, fontweight="bold")

fig.patch.set_facecolor(background_color)
ax.set_facecolor(background_color)

sns.scatterplot(x="Region", y="First Tooltip", hue="YearGroup", s=500, ax=ax, palette=palette, data=bdws_year_region)
ax.grid(color="gray", linestyle=":", axis="x", dashes=(1, 3))

leg=ax.legend(bbox_to_anchor=(1, 1), fontsize=11, facecolor=background_color)
[text.set_text(legend) for text, legend in zip(leg.get_texts(), legend_lis)]

plt.ylabel("Population using drinking-water services (%)", size=14)
plt.xlabel("Region", size=16)
# plt.yticks(color="#255f85");
# plt.xticks(color="#255f85");

for s in ["top","right","left", "bottom"]:
    ax.spines[s].set_visible(False)


**<a id=bss><font color="#566573">Basic Sanitization Services</font></a>**

195 locations recorded in the dataset with period range from 2000 to 2017. Same as above I group them by region then averaged『population using at least basic sanitation services (%)』based on year(4 years), region and dim1(Rural/Urban/Total).

* Every regoin has higher percentage of population using basic sanitation services in urban than in rural.
* The population using basic sanitation services gain from year to year. 
* Europe has the highest percent of population used and Africa has the lowest.

In [None]:
albss=pd.read_csv("../input/who-worldhealth-statistics-2020-complete/atLeastBasicSanitizationServices.csv")
albss_year_region=merge_region(albss, continents)
albss_year_region.head()

In [None]:
albss_year_region=merge_year(albss_year_region, ["Dim1"], 4 , 12, 6)

fig = plt.figure(figsize=(20, 6)) 
gs = fig.add_gridspec(1, 3)
gs.update(wspace=0.2, hspace=0.2)
ax0 = fig.add_subplot(gs[0])
ax1 = fig.add_subplot(gs[1], sharey=ax0)
ax2 = fig.add_subplot(gs[2], sharey=ax0)

fig.patch.set_facecolor(background_color)

[ax.set_facecolor(background_color) for ax in [ax0, ax1, ax2]]


ax0.text(0, 112, "Basic Sanitization Services", color="#255f85", fontsize=30, fontweight="bold")
[ax.text(0, 105, place, color="#424F67", fontsize=24, fontweight="bold") for ax, place in zip([ax0, ax1, ax2], ["Rural", "Urban", "Total"])]


for ax, place in zip([ax0, ax1, ax2], ["Rural", "Urban", "Total"]):
    ax.grid(color="gray", linestyle=":", axis="x", dashes=(1, 5))
    
    year_region=albss_year_region[albss_year_region["Dim1"]==place]
    sns.scatterplot(x="Region", y="First Tooltip", hue="YearGroup", s=500, palette=palette, ax=ax, data=year_region)
    
    ax.set_xlabel("")
    ax.set_ylabel("")
#     ax.tick_params(size=28)
    ax.legend().set_visible(False)
    
    for s in ["top","right","left", "bottom"]:
        ax.spines[s].set_visible(False)
    

leg=ax2.legend(bbox_to_anchor=(1, 1.05), fontsize=13, facecolor=background_color)
[text.set_text(legend) for text, legend in zip(leg.get_texts(), legend_lis)]

ax1.set_xlabel("Region", size=20)
ax0.set_ylabel("Population using drinking-water services (%)", size=16);

### **<a id="CHE"><font color="#566573">Clean Household Energy</font></a>**

**<font color="#566573">Clean Fuel and Tech</font>**

191 locations in the dataset with period range from 2000 to 2018.

* Europe has the heighest usage of clean fuel and tech followed by Americas and Asia.
* Oceania has only fifty percent of the usage and you can see that it barely grow from years.
* Africa has the lowest usage with slightly increased over the year.


In [None]:
cfat=pd.read_csv("../input/who-worldhealth-statistics-2020-complete/cleanFuelAndTech.csv")
cfat_year_region=merge_region(cfat, continents)
cfat_year_region.head()

In [None]:
cfat_year_region=merge_year(cfat_year_region, [], 4 , 4, 3)

fig = plt.figure(figsize=(12,6)) 
gs = fig.add_gridspec(1, 1)
ax = fig.add_subplot(gs[0])
ax.text(-0.25, 100, "Clean Fuel and Tech", color="#255f85", fontsize=24, fontweight="bold")

fig.patch.set_facecolor(background_color)
ax.set_facecolor(background_color)


sns.scatterplot(x="Region", y="First Tooltip", hue="YearGroup", s=500, palette=palette, ax=ax, data=cfat_year_region)
ax.grid(color="gray", linestyle=":", axis="x", dashes=(1, 5))
# ax.tick_params(colors="#255f85", size=10)

leg=ax.legend(bbox_to_anchor=(1.15, 1), fontsize=12, facecolor=background_color)
[text.set_text(legend) for text, legend in zip(leg.get_texts(), legend_lis_)]

plt.ylabel("(population with clean fuels and technologies (%)", size=14)
plt.xlabel("Region", size=20);

for s in ["top","right","left", "bottom"]:
    ax.spines[s].set_visible(False)

### **<a id="MREP"><font color="#566573">Mortality from Environment Pollution</font></a>**

**<a id="apdr"><font color="#566573">Air Pollution Death Rate</font></a>**

Dataset contains only one year period(2016), information include death rate(per 100,00 population) with 5 different diseases caused by air pollution on different gender.

* Number of deaths from heart disease caused by air pollution is the highest in all region except in Africa.
* In Africa, death caused by lower respiratory infections has highest rate among other diseases.
* Overall, male has higher death rate regardless of diseases.

In [None]:
apdr=pd.read_csv("../input/who-worldhealth-statistics-2020-complete/airPollutionDeathRate.csv")
apdr_region=merge_region(apdr, continents)
apdr_region.head()

In [None]:
split=apdr_region["First Tooltip"].str.split(" ").reset_index()
split_lis=[float(s[0]) for s in split["First Tooltip"]]
apdr_region.loc[:, "First Tooltip"]=split_lis

apdr_region=apdr_region.groupby(["Region", "Dim2", "Dim1"])["First Tooltip"].mean().reset_index()
apdr_region=apdr_region[(apdr_region["Dim1"]!="Both sexes") & (apdr_region["Dim2"]!="Total")].reset_index(drop=True)

In [None]:
fig = plt.figure(figsize=(20,12)) 
gs = fig.add_gridspec(2, 3)
gs.update(wspace=0.15, hspace=0.3)
ax0 = fig.add_subplot(gs[0, 0])
ax1 = fig.add_subplot(gs[0, 1], sharey=ax0)
ax2 = fig.add_subplot(gs[0, 2], sharey=ax0)
ax3 = fig.add_subplot(gs[1, 0], sharey=ax0)
ax4 = fig.add_subplot(gs[1, 1], sharey=ax0)

axes=[ax0, ax1, ax2, ax3, ax4]
disease=apdr_region.Dim2.unique().tolist()

fig.patch.set_facecolor(background_color)
[ax.set_facecolor(background_color) for ax in axes]

ax0.text(-0.5, 80, "Air Pollution Death Rate", color="#255f85", fontsize=24, fontweight="bold")
[ax.text(-0.5, 65, place, color="#424F67", fontsize=16, fontweight="bold") for ax, place in zip(axes, disease)]


for ax, dis in zip(axes, disease):
    ax.grid(color="gray", linestyle=":", axis="y", dashes=(1, 5))
    
    disease_region=apdr_region.query("Dim2=='{}'".format(dis))
    sns.barplot(x="Region", y="First Tooltip", hue="Dim1", palette=["#b85042", "#3a6b35"], data=disease_region, ax=ax)
    ax.legend().set_visible(False)
    ax.set_xlabel("")
    ax.set_ylabel("")

leg=ax0.legend(loc="upper center", ncol=2, bbox_to_anchor=(0.22, 1.22), fontsize=12, facecolor=background_color)
ax0.text(-0.5, 75, "X : Region  Y : Death Rate per 100,00 Population", fontsize=12, color="#255f85", fontweight="bold")
#[(plt.setp(text, color="#255f85"), text.set_text(legend)) for text, legend in zip(leg.get_texts(), legend_lis)]


for ax in axes:
    for s in ["top","right","left", "bottom"]:
        ax.spines[s].set_visible(False)

**<a id="mrouw"><font color="#566573">Mortality Rate of Unsafe WASH</font></a>**

**Unsafe WASH:** Exposure to unsafe water, sanitation and lack of hygiene.

Dataset contains only one year period(2016) with information about the mortality rate(per 100,000 population) attributed to exposure to unsafe WASH services.

* Africa has the highest mortality rate of unsafe WASH and Europe has the lowest.
* In Europe, Asia and Oceania, female has higher mortality rate than male; on the other hand, male has higher mortality rate in Americas and Africa.

In [None]:
mruw=pd.read_csv("../input/who-worldhealth-statistics-2020-complete/mortalityRateUnsafeWash.csv")
mruw_region=merge_region(mruw, continents)
mruw_region.head()

In [None]:
mruw_region=mruw_region.groupby(["Region", "Dim1"])["First Tooltip"].mean().reset_index()

geolocator = Nominatim(user_agent="geoapiExercises")
latitude, longitude, region=[], [], []
for r in mruw_region.Region.unique().tolist():
    location=geolocator.geocode(r)
    longitude.append(location[1][0])
    latitude.append(location[1][1])
    region.append(r)

region_=pd.DataFrame({"Region":region, "Latitude":latitude, "Longitude":longitude})
mruw_region=mruw_region.merge(region_, on="Region")

mruw_region.loc[mruw_region.Region=="Americas", "Longitude"]=mruw_region[mruw_region["Region"]=="Americas"]["Longitude"]+35
mruw_region.loc[mruw_region.Region=="Americas", "Latitude"]=mruw_region[mruw_region["Region"]=="Americas"]["Latitude"]-25

In [None]:
fig=plt.figure(figsize=(15,15))
gs = fig.add_gridspec(1, 1)
ax=fig.add_subplot(gs[0])
ax.text(-180, 105, "Mortality Rate of Unsafe WASH", fontsize=24, color="#255f85", fontweight="bold")
fig.patch.set_facecolor(background_color)
ax.set_facecolor(background_color)

m = Basemap(llcrnrlat = -70, llcrnrlon = -180, urcrnrlat = 90, urcrnrlon = 180, ax=ax)
m.drawcoastlines()
m.drawcountries()
m.drawmapboundary(fill_color = "#335c67")
m.fillcontinents(color=background_color, lake_color = "#a8dadc");

for gender, color, sw in zip(["Male", "Female"], ["#3a6b35", "#b85042"], [5, -5]):
    gender_df=mruw_region.query("Dim1=='{}'".format(gender))
    lat, long = m(gender_df.Latitude+sw, gender_df.Longitude)  
    s=gender_df["First Tooltip"]
    
    m.scatter(lat, long, marker = "o", s=s*300, color=color, alpha=0.95, zorder=3)
    
    l=5 if gender=="Female" else 0
    for x, y, label in zip(gender_df.Latitude, gender_df.Longitude-l, round(gender_df["First Tooltip"], 2)):
        ax.text(x, y, gender+":"+str(label), size=8, ha="center", fontweight="bold", color="#ff9e00", fontsize=11)

male = mpatches.Patch(color="#3a6b35", label="Male")
female = mpatches.Patch(color="#b85042", label="Female")
ax.legend(handles=[male, female], ncol=2, loc="upper center", bbox_to_anchor=(0.081, 1.07), fontsize=11, facecolor=background_color);

**<a id="mrp"><font color="#566573">Mortality Rate Poisoning</font></a>**

Dataset contains five different years (2000/2005/2010/2015/2016), information with mortality rate(per 100,000 population) attributed to unintentional poisoning.

* Mortality rate decrease from year to year in every region. Asia has a large decrease from year 2000 to 2005. 
* From the xticks scale we can see that male has higher death rate than female.
* Americas and Europe are very close, female in Europe has lower moratility rate but male has higher, if we combine both genders, Americas overall has lowest moratility rate.
* Another thing we can see from the plot is that the mortality rate in Oceania has only little drop during the time.

In [None]:
mrp=pd.read_csv("../input/who-worldhealth-statistics-2020-complete/mortalityRatePoisoning.csv")
mrp_region=merge_region(mrp, continents)
mrp_region.head()

In [None]:
mrp_region=mrp_region.groupby(["Region", "Year", "Dim1"])["First Tooltip"].mean().reset_index()
mrp_region_male=mrp_region.query("Dim1=='Male'")
mrp_region_female=mrp_region.query("Dim1=='Female'")

In [None]:
fig = plt.figure(figsize=(15, 5)) 
gs = fig.add_gridspec(1, 2)
gs.update(wspace=0)
ax0= fig.add_subplot(gs[0])
ax1= fig.add_subplot(gs[1])

ax0.text(5, -1.4, "Mortality Rate Poisoning", fontsize=24, color="#255f85", fontweight="bold")
[ax0.text(5, i, reg, fontsize=16) for i, reg in zip(range(5), mrp_region.Region.unique())]
fig.patch.set_facecolor(background_color)
ax0.set_facecolor(background_color)
ax1.set_facecolor(background_color)




for ax, data in zip([ax0, ax1], [mrp_region_male, mrp_region_female]):
    sns.barplot(x="First Tooltip", y="Region", hue="Year", palette=palette, ax=ax, data=data)
    ax.grid(color="gray", linestyle=":", axis="x", dashes=(1, 5))
    plt.setp(ax.get_yticklabels(), visible=False) 
    ax.set_ylabel("")
    
ax0.set_xlabel("Male", size=14)
ax1.set_xlabel("Female", size=14)    
ax1.set_xticks(ax1.get_xticks()[1:])
ax1.legend().set_visible(False)
ax0.invert_xaxis()
ax0.legend(bbox_to_anchor=(0.75, 1.15), ncol=5, fontsize=12, facecolor=background_color)
#[text.set_text(legend) for text, legend in zip(leg.get_texts(), legend_lis_)]

for s in ["top","right","left", "bottom"]:
    ax0.spines[s].set_visible(False)
    ax1.spines[s].set_visible(False)
ax0.spines["right"].set_visible(True)
ax0.spines["right"].set_color("#d8e2dc")

### **<a id="TC"><font color="#566573">Tobacco Control</font></a>**
**<a id="ta"><font color="#566573">Tobacco Age</font></a>**

This dataset is about the tobacco use among persons aged 15 years and older. Below show the differences between region, gender and the change from year 2000 to 2015.

* Male has higher usage of tabacco than woman regardless of region.
* Africa has lowest used compare to others and it decreased from year to year.
* Overall Tobacco used in Americas decreased during the period but the percentage of female users has increased compare to the begining.
* Tobacco used in Asia has slightly decreased in 2005 and it went up after in 2010 but the porprotion of female users has decreased.
* Used of tabacco in Europe has little change during the time but you can see that there was a huge gain in female users in 2015.
* Oceania has the highest used of tabacco at all time and it has more female users compare to other regions.

In [None]:
ta=pd.read_csv("../input/who-worldhealth-statistics-2020-complete/tobaccoAge15.csv")
ta_region=merge_region(ta, continents)
ta_region.head()

In [None]:
ta_region=ta_region.groupby(["Year", "Dim1", "Region"])["First Tooltip"].mean().reset_index()
ta_region_year=ta_region[(ta_region["Year"] == "2000") | (ta_region["Year"] == "2005") | (ta_region["Year"] == "2010") | (ta_region["Year"] == "2015")]

Year=ta_region_year.Year.unique().tolist()*5
Region=ta_region_year.Region.unique().tolist()*5
Region=[y for y_list in [[y]*5 for y in ta_region_year.Region.unique()] for y in y_list]

In [None]:
fig, axes=plt.subplots(5, 4, figsize=(15, 6))
fig.subplots_adjust(wspace=0, hspace=0)
fig.patch.set_facecolor(background_color)

axes[0, 0].text(-4, 2.5, "Tobacco Control", fontsize=24, color="#255f85", fontweight="bold")
axes_lis=[(x, y) for x in range(5) for y in range(4)]
ta_region_year.Year.unique().tolist()

for ax, year, region in zip(axes_lis, Year, Region):
    data=ta_region_year[(ta_region_year["Year"]==year) & (ta_region_year["Region"]==region)]
    data_gender=data[data["Dim1"] != "Both sexes"]
    nparray=np.array(data_gender["First Tooltip"])
    axes[ax].pie(data_gender["First Tooltip"], colors=["#b85042", "#3a6b35"], radius=data[data["Dim1"]=="Both sexes"]["First Tooltip"].values/30)
    
    
#textprops={"size":12, "color":"white"},  autopct="%.0f%%", 
axes[4 ,0].text(-0.6, -2, "2000", fontsize=14)
axes[4 ,1].text(-0.6, -2, "2005", fontsize=14)
axes[4 ,2].text(-0.6, -2, "2010", fontsize=14)
axes[4 ,3].text(-0.6, -2, "2015", fontsize=14)

axes[0 ,0].text(-4, 0, "Africa", fontsize=14)
axes[1 ,0].text(-4, 0,"Americas", fontsize=14)
axes[2 ,0].text(-4, 0,"Asia", fontsize=14)
axes[3 ,0].text(-4, 0,"Europe", fontsize=14)
axes[4 ,0].text(-4, 0,"Oceania", fontsize=14)

male = mpatches.Patch(color="#3a6b35", label="Male")
female = mpatches.Patch(color="#b85042", label="Female")
axes[0, 0].legend(handles=[male, female], ncol=2,  bbox_to_anchor=(1.45, 1.4), fontsize=12, facecolor=background_color);

### **<a id="MM"><font color="#566573">Maternal Mortality</font></a>**

**<a id="mmr"><font color="#566573">Maternal Mortality Ratio</font></a>**

Dataset contains period range from 2000-2017, information include maternal mortality ratio per 100,000 births.

* Africa has the highest ratio and you can see that the ratio has slightly decreased throughout the year but it still quite high.

* Europe has the lowest ratio at all time.

* Compare to Americas, Asia and Oceania has bigger drop during the period.

In [None]:
mmr=pd.read_csv("../input/who-worldhealth-statistics-2020-complete/maternalMortalityRatio.csv")
mmr_region=merge_region(mmr, continents)
mmr_region.head()

In [None]:
fr=mmr_region["First Tooltip"].str.split(" ").reset_index(drop=True)
mmr_region.loc[:, "First Tooltip"]=[int(v[0]) for v in fr.values]
mmr_region_year=merge_year(mmr_region, [], 4, 4, 2)

In [None]:
fig = plt.figure(figsize=(20,12)) 
gs = fig.add_gridspec(2, 3)
gs.update(wspace=0.15, hspace=0.3)
ax0 = fig.add_subplot(gs[0, 0])
ax1 = fig.add_subplot(gs[0, 1], sharey=ax0)
ax2 = fig.add_subplot(gs[0, 2], sharey=ax0)
ax3 = fig.add_subplot(gs[1, 0], sharey=ax0)
ax4 = fig.add_subplot(gs[1, 1], sharey=ax0)

axes=[ax0, ax1, ax2, ax3, ax4]
# disease=apdr_region.Dim2.unique().tolist()

fig.patch.set_facecolor(background_color)
[ax.set_facecolor(background_color) for ax in axes]

ax0.text(-0.45, 770, "Maternal Mortality Ratio", color="#255f85", fontsize=24, fontweight="bold")
# [ax.text(-0.4, 65, place, color="#424F67", fontsize=16, fontweight="bold") for ax, place in zip(axes, disease)]

for ax, region in zip(axes, mmr_region_year.Region.unique().tolist()):
    ax.grid(color="gray", linestyle=":", axis="y", dashes=(1, 5))
    
    df_region=mmr_region_year.query("Region=='{}'".format(region))
    
    sns.barplot(x="YearGroup", y="First Tooltip", palette=palette[::-1], data=df_region, ax=ax)
    ax.set_xticklabels([], size=12)
    ax.legend().set_visible(False)
    ax.set_xlabel("{}".format(region))
    ax.set_ylabel("")

legend=[mpatches.Patch(color=color, label=year) for color, year in zip(palette[::-1], legend_lis[1:])]

ax0.legend(handles=legend, ncol=5,  bbox_to_anchor=(1.87, 1.1), fontsize=14, facecolor=background_color);

for ax in axes:
    for s in ["top","right","left", "bottom"]:
        ax.spines[s].set_visible(False)

In [None]:
# fig=plt.figure(figsize=(12, 6))
# plt.subplots_adjust(bottom = 0.2, top = 0.9) #ensuring the dates (on the x-axis) fit in the screen
# plt.ylabel("population using at least basic sanitation services (%)", size=12)
# plt.xlabel("Region", size=12);
# plt.legend(year_lis)

# year_lis=bdws_year_region["Year"].unique().tolist()

# def chart(i=int):
#     plt.legend(year_lis, bbox_to_anchor=(1.1, 1), prop={"size":6})
#     year_region=bdws_year_region[bdws_year_region["Year"]==year_lis[i]]
#     plt.scatter(x="Region", y="First Tooltip", s=500, color=palette[i], data=year_region)
#     #sns.scatterplot(x="Region", y="First Tooltip", s=500, data=year_region)

    

# animator = ani.FuncAnimation(fig, chart, frames = 18)
# html = HTML(animator.to_jshtml())
# display(html)
# plt.close() 