# Table of Contents
* [Explore a subset of countries](#1)
* [All countries](#2)
* [Analysis of individual country](#3)

In [None]:
# packages

# standard
import numpy as np
import pandas as pd
import time

# plots
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns

In [None]:
# load and preview
df = pd.read_csv('../input/life-expectancy-global-trend/life_expectancy_years.csv')
df.head()

In [None]:
# list of available countries
all_countries = df.geo.tolist()
print(all_countries)

<a id='1'></a>
# Explore a subset of countries

In [None]:
# select a few countries for plot
select_countries = ['China', 'India', 'France', 
                    'Germany', 'Japan', 'United Kingdom',
                    'United States']

df_sel = df[df.geo.isin(select_countries)].reset_index(drop=True)
df_sel

In [None]:
# rearrange a little bit
df_sel4plot = pd.DataFrame(df_sel.iloc[:,1:].transpose()) # convert rows to columns
df_sel4plot.columns = df_sel.geo.values # add columns
df_sel4plot.index = df_sel4plot.index.astype(int) # convert index from string to int
df_sel4plot.head()

In [None]:
# now plot, all years from 1800..2018
plt_para_save = plt.rcParams['figure.figsize'] # remember plot settings
plt.rcParams['figure.figsize'] = (16,7)

df_sel4plot.plot.line()
plt.title('Development of life expectancy for selected countries')
plt.grid()
plt.show()

# reset plot size again
plt.rcParams['figure.figsize'] = plt_para_save

In [None]:
# plot again, but only years >= 1950
plt_para_save = plt.rcParams['figure.figsize'] # remember plot settings
plt.rcParams['figure.figsize'] = (16,7)

df_sel4plot[df_sel4plot.index>=1950].plot.line()
plt.title('Development of life expectancy for selected countries - 1950+')
plt.grid()
plt.show()

# reset plot size again
plt.rcParams['figure.figsize'] = plt_para_save

In [None]:
# summary stats
df_sel4plot['Year'] = df_sel4plot.index # explicitly add year
df_sel4plot.describe()

<a id='2'></a>
# All Countries

In [None]:
# rearrange a little bit
df_all4plot = pd.DataFrame(df.iloc[:,1:].transpose()) # convert rows to columns
df_all4plot.columns = df.geo.values # add columns
df_all4plot.index = df_sel4plot.index.astype(int) # convert index from string to int
df_all4plot.head()

In [None]:
# add Year explicitly
df_all4plot['Year'] = df_all4plot.index

In [None]:
# melt all values into one column
df_all4plot_compact = pd.melt(df_all4plot, id_vars='Year', value_vars=all_countries)
df_all4plot_compact

In [None]:
# create boxplots for each year across all countries
plt.figure(figsize=(20,6))
ax = sns.boxplot(data=df_all4plot_compact, x='Year', y='value')
ax.xaxis.set_major_locator(plt.MaxNLocator(20)) # reduce number of x-labels
plt.title('Life Expectancy by Year - All Countries included')
plt.grid()
plt.show()

In [None]:
# create boxplots for each year across all countries; again years >= 1950 only
plt.figure(figsize=(20,6))
ax = sns.boxplot(data=df_all4plot_compact[df_all4plot_compact.Year>=1950], x='Year', y='value')
ax.xaxis.set_major_locator(plt.MaxNLocator(15)) # reduce number of x-labels
plt.title('Life Expectancy by Year - All Countries included - 1950+')
plt.grid()
plt.show()

In [None]:
# same with violinplots
plt.figure(figsize=(20,6))
ax = sns.violinplot(data=df_all4plot_compact[df_all4plot_compact.Year>=1950], x='Year', y='value')
ax.xaxis.set_major_locator(plt.MaxNLocator(15)) # reduce number of x-labels
plt.title('Life Expectancy by Year - All Countries included - 1950+')
plt.grid()
plt.show()

<a id='3'></a>
# Analysis of individual country

In [None]:
# select country of interest
select_country = 'United States'

In [None]:
# filter
df_c = df[df.geo==select_country]
# rearrange again
df_c = pd.DataFrame(df_c.iloc[:,1:].transpose()) # convert row to column
df_c.columns = ['LifeExp']
df_c.index = df_c.index.astype(int) # convert index from string to int
# add year on year increments
df_c['YoY_diff'] = df_c['LifeExp'].diff()
# ... and year on year percentage change
df_c['YoY_rel'] = df_c['LifeExp'].pct_change()
# show most recent values
df_c.tail(10)

In [None]:
# plot life expectancy
plt.figure(figsize=(16,6))
df_c.LifeExp.plot()
plt.title(select_country + ' - Life Expectancy')
plt.grid()
plt.show()

In [None]:
# plot year on year difference
plt.figure(figsize=(16,6))
df_c.YoY_diff.plot()
plt.title(select_country + ' - Life Expectancy - YoY difference')
plt.grid()
plt.show()

In [None]:
# plot year on year difference, from 1950 on
plt.figure(figsize=(16,6))
df_c[df_c.index>=1950].YoY_diff.plot()
plt.title(select_country + ' - Life Expectancy - YoY difference - 1950+')
plt.grid()
plt.show()

In [None]:
# plot year on year relative change
plt.figure(figsize=(16,6))
ax = df_c[df_c.index>=1950].YoY_rel.plot()
ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1, decimals=None, symbol='%', is_latex=False))
plt.title(select_country + ' - Life Expectancy - YoY rel.change - 1950+')
plt.grid()
plt.show()