In [None]:
#Author Anna Durbanova
#Theme: Life Expectancy
#Date 16.08.2020

import pandas as pd
import os
import glob
import numpy as np
import holoviews as hv
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
hv.extension('bokeh')
%matplotlib inline

In [None]:
!pip install hvplot


In [None]:
import hvplot
from hvplot import hvPlot
import hvplot.pandas

In [None]:
!pip install pingouin


In [None]:
import pingouin as pg
import difflib as dfl
from functools import partial


# Information about our data

In [None]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
data=pd.read_csv('/kaggle/input/life-expectancy-who/Life Expectancy Data.csv')

data.info()

## Description of each column


|Field|Description|
|---:|:---|
|Country|Country|
|Year|Year|
|Status|Developed or Developing status|
|Life expectancy|Life Expectancy in age|
|Adult Mortality|Adult Mortality Rates of both sexes (probability of dying between 15 and 60 years per 1000 population)|
|infant deaths|Number of Infant Deaths per 1000 population|
|Alcohol|Alcohol, recorded per capita (15+) consumption (in litres of pure alcohol)|
|percentage expenditure|Expenditure on health as a percene of Gross Domestic Product per capita(%)|
|Hepatitis B|Hepatitis B (HepB) immunization coverage among 1-year-olds (%)|
|Measles|Measles - number of reported cases per 1000 population|
|BMI|Average Body Mass Index of entire population|
|under-five deaths|Number of under-five deaths per 1000 population|
|Polio|Polio (Pol3) immunization coverage among 1-year-olds (%)|
|Total expenditure|General government expenditure on health as a percene of total government expenditure (%)|
|Diphtheria|Diphtheria tetanus toxoid and pertussis (DTP3) immunization coverage among 1-year-olds (%)|
|HIV/AIDS|Deaths per 1 000 live births HIV/AIDS (0-4 years)|
|GDP|Gross Domestic Product per capita (in USD)|
|Population|Population of the country|
|thinness 1-19 years|Prevalence of thinness among children and adolescents for Age 10 to 19 (%)|
|thinness 5-9 years|Prevalence of thinness among children for Age 5 to 9(%)|
|Income composition of resources|Income composition of resources|
|Schooling|Number of years of Schooling(years)|

In [None]:
data["Income composition of resources"].replace(0.0, np.nan, inplace=True)
data["Schooling"].replace(0.0, np.nan, inplace=True)
data.sort_values(["Country", "Year"], inplace=True)
data

# Life Expectancy Overview for All countries for 2015

In [None]:
data_2015=(data[data.Year==2015]
    .groupby("Country")
    ["Country", "Life expectancy "]
    .median()
    .sort_values(by="Life expectancy ", ascending=False))

data_2015.reset_index().hvplot.bar(x="Country", y="Life expectancy ", rot=90,width=2000, height=550, title = "Life Expectancy for ALL Countries for 2015")

# The Effect of different factors on the Life Expectancy

In [None]:
mask = data.Year==2015
plt.rcParams["figure.figsize"]=(20,20)
sns.heatmap(data[mask].corr(), cmap="BuPu", annot=True).set_title("Correlation Table for all columns, 2015");

## Correlation between values  - Statistically Significance - with p-value

- Life Expectancy & Income Composition of Resources
- Life Expectancy & Schooling
- Under 5 Years & Infant Deaths
- Alcohol & Hepatitis B
- Polio & Diphtheria
- Total Expenditure & Population
- Schooling & GDP


### Life Expectancy & Income Composition of Resources


In [None]:
pg.corr(data["Life expectancy "], data["Income composition of resources"])

In [None]:
plt.rcParams["figure.figsize"]=(12,8)
sns.regplot(x="Income composition of resources", y="Life expectancy ", data=data).set_title("The Effect of Income Composition of Resources on Life expectancy for all data");

Correlation between Income composition of Resources and Life Expectancy, taking into account all countries for all years. 

We can clearly see that there is a positive correlation and more income composition of resource the country has, much longer the population expect to live

In [None]:
plt.rcParams["figure.figsize"]=(12,8)
g = sns.FacetGrid(data=data, col="Year", col_wrap=3, height=15)
g.map_dataframe(sns.regplot, x="Income composition of resources", y="Life expectancy ");

Correlation for Income composition of Resources for 15 years. We can see that it is a positive correlation

### Correlation between Schooling and Life Expectancy

In [None]:
pg.corr(data["Life expectancy "], data["Schooling"])

In [None]:
sns.regplot(x="Schooling", y="Life expectancy ", data=data).set_title("Schooling has an effect on Life Expectancy, 2000-2015");

In [None]:
sns.FacetGrid(data=data,col="Year",col_wrap=4, height=20)
g.map_dataframe(sns.regplot,x="Schooling", y="Life expectancy ");

From the scatter plots we can see that schooling indeed has an effect on life expectancy

More years of schooling the person has, longer does he live

## 3. Correlation between Infant Deaths and Under-Five Deaths

In [None]:
pg.corr(data["under-five deaths "], data["infant deaths"])

In [None]:
sns.regplot(data=data, x="infant deaths", y="under-five deaths ").set_title("Correlation between Infant Deaths and Under-Five-Deaths, all years");

Pretty obvious positive correlation: More infant deaths more under-five deaths.

## 4. Hepatitis B & Life Expectancy

In [None]:
fig, (ax1, ax2)=plt.subplots(ncols=2, figsize=[12,4])
sns.regplot(data=data[data.Year == 2010], x="Hepatitis B", y="Life expectancy ", ax=ax1).set_title("Hepatitis B has an effect on Life Expectancy, 2010");
sns.regplot(data=data[data.Year == 2015], x="Hepatitis B", y="Life expectancy ", ax=ax2).set_title("Hepatitis B has an effect on Life Expectancy, 2015");

From 2000 - 2015 the correlation between Hepatitis B and Life Expectancy has been changing from flat to positive correlation

## 5. Polio & Diphtheria on Life expectancy

In [None]:
fig, (ax1, ax2)=plt.subplots(ncols=2, figsize=[12,4])
sns.regplot(data=data[data.Year == 2000], x="Polio", y="Life expectancy ", ax=ax1).set_title("Polio has an effect on Life Expectancy, 2000");
sns.regplot(data=data[data.Year == 2015], x="Polio", y="Life expectancy ", ax=ax2).set_title("Polio has an effect on Life Expectancy, 2015");

There is a correlation between Polio and Life expectancy

In [None]:
sns.regplot(data=data, x="Diphtheria ", y= "Life expectancy ").set_title("Diphtheria has an effect on Life Expectancy, all years");

In [None]:
fig, (ax1, ax2)=plt.subplots(ncols=2, figsize=[12,4])
sns.regplot(data=data[data.Year == 2000], x="Diphtheria ", y="Life expectancy ", ax=ax1).set_title("Diphtheria has an effect on Life Expectancy, 2000");
sns.regplot(data=data[data.Year == 2015], x="Diphtheria ", y="Life expectancy ", ax=ax2).set_title("Diphtheria has an effect on Life Expectancy, 2015");

There is a positive correlation between Diptheria vaccine and Life expectance from 2000-2015 years

## 6. Alcohol and Life Expectancy

In [None]:
fig, (ax1, ax2)=plt.subplots(ncols=2, figsize=[12,4])
sns.regplot(data=data[data.Year == 2000], x="Alcohol", y="Life expectancy ", ax=ax1).set_title("Alcohol has an effect on Life Expectancy, 2000");
sns.regplot(data=data[data.Year == 2015], x="Alcohol", y="Life expectancy ", ax=ax2).set_title("Alcohol has an effect on Life Expectancy, 2015");

#sns.FacetGrid(data=data, col="Year", col_wrap=4, height=15)
#g.map_dataframe(sns.regplot, x="Alcohol", y="Life expectancy ")

There is a  positive correlation between consumption of Alcohol and Life Expectancy 

## 7. Total Expenditure & Life Expectancy for 2000

In [None]:
sns.regplot(data=data[data.Year == 2000], x="Total expenditure", y="Life expectancy ").set_title("Positive effect of expenditure on healthcare on the life expectancy, 2000");


According to the graphs, more money the country spend for healthcare, they extend the life expectancy

## 8. Infant Mortality & Life Expectancy


In [None]:
fig, (ax1, ax2)=plt.subplots(ncols=2, figsize=[12,4])
sns.regplot(data=data[data.Year == 2000], x="infant deaths", y="Life expectancy ", ax=ax1).set_title("Infant Mortality has an effect on Life Expectancy, 2000");
sns.regplot(data=data[data.Year == 2015], x="infant deaths", y="Life expectancy ", ax=ax2).set_title("Infant Mortality has an effect on Life Expectancy, 2015");

Infant Deaths have a  slight correlation, More children die, less life expectancy

## 8. Adult Mortality & Life Expectancy


In [None]:
fig, (ax1, ax2)=plt.subplots(ncols=2, figsize=[12,4])
sns.regplot(data=data[data.Year == 2000], x="Adult Mortality", y="Life expectancy ", ax=ax1).set_title("Adult Mortality has an effect on Life Expectancy, 2000");
sns.regplot(data=data[data.Year == 2015], x="Adult Mortality", y="Life expectancy ", ax=ax2).set_title("Adult Mortality has an effect on Life Expectancy, 2015");

More people die, less life expectancy

# Country-View

## What is the long living country?

In [None]:
top_life_exp_2000=(data[data.Year==2000]
.groupby("Country")
 ["Country", "Life expectancy ", "Year"]
 .median()
 .sort_values("Life expectancy ", ascending=False)
 .head(10)
)

top_life_exp_2015=(data[data.Year==2015]
.groupby("Country")
 ["Country", "Life expectancy ", "Year"]
 .mean()
 .sort_values("Life expectancy ", ascending=False)
 .head(10)
)
bottom_life_exp_2000=(
    data[data.Year==2000]
    .groupby("Country")
    ["Country", "Life expectancy ", "Year"]
    .median()
    .sort_values("Life expectancy ", ascending=True)
    .head(10)
)

bottom_life_exp_2015=(
    data[data.Year==2015]
    .groupby("Country")
    ["Country", "Life expectancy ", "Year"]
    .mean()
    .sort_values("Life expectancy ", ascending=True)
    .head(10)
)

plot_long_2000 = top_life_exp_2000.hvplot.bar(x="Country", y="Life expectancy ", stacked=True, rot=45, title="Countries with the longest and shortest life expectancy, 2000")
plot_long_2015 = top_life_exp_2015.hvplot.bar(x="Country", y="Life expectancy ", stacked=True, rot=45, title="Countries with the longest and shortest life expectancy, 2015")

plot_short_2000= bottom_life_exp_2000.hvplot.bar(x="Country", y="Life expectancy ", stacked=True, rot=45)
plot_short_2015= bottom_life_exp_2015.hvplot.bar(x="Country", y="Life expectancy ", stacked=True, rot=45)

plot_long_2000*plot_short_2000

In 2000, the country with the longest life expectancy was Japan (81), and the shortest life expectancy was in Siera Leone (39)

In [None]:
plot_long_2015*plot_short_2015


In 2015, the country with the longest life expectancy was Slovenia (88), and the shortest life expectancy was in Siera Leone (51)

## Status

In [None]:
status_2000=(data[data.Year==2000]
.groupby("Status")
 [["Country"]]
 .count()
)

status_2015=(data[data.Year==2015]
             .groupby("Status")
             [["Country"]]
             .count()
            )


f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
status_2000.plot.pie(y="Country", autopct='%1.0f%%', ax=ax1, figsize=(12,7)).set_title("There were 83% of developing countries and 17% of developed countries, 2000 and 2015");
status_2015.plot.pie(y="Country", autopct='%1.0f%%', ax=ax2, figsize=(12,7));

 According to the pie chart, the number of developing or developed countries have not changed from 2000 - 2015. There are 83% of Developing countries and 17% of Developed Countries

In [None]:
data["Compare Status"]=data.Status == data.groupby("Country").Status.shift()

In [None]:
data

In [None]:
mask=(data["Compare Status"]==False) & (data["Year"]!=2000)
data[mask]

Those Countries are only in 2013.

Result: None of the countries made a change in a status.

# Conclusion
- In 2015 the country with the longest life expectancy (88 years old) was Slovenia
- In 2o15 the country with the shortest life expectancy(51 years old) was Sierra Leone

- Various factors affect life expectancy:
    - Income Composition of Resources
    - Schooling
    -  Alcohol 
    - Polio & Diphtheria
    - Total Expenditure
    - Infant Mortality
    - Adult Mortality
    
- There are 83% of developing countries and 17% of developed countries both for 2000 and 2015 years
- There was no shift in the levels of the country. For 15 years, there was no country that shifted from being developing country to being developed
    