# Gapminder project

This is just a very small project to learn some very basics of pandas and to visualize the gapminder dataset.

First of all import pandas

In [None]:
#!pip install pandas

In [None]:
import pandas as pd

Read in the data for fertility and lifeexpectancy.

In [None]:
fert = pd.read_csv('data/gapminder_total_fertility.csv', index_col=0)
life = pd.read_excel('data/gapminder_lifeexpectancy.xlsx', index_col=0)
pop  = pd.read_excel('data/gapminder_population.xlsx', index_col=0)

In [None]:
fert.head()

In [None]:
life.head()

In [None]:
pop.head()

To merge the data, one needs information about the shape.

In [None]:
print("life shape: ", life.shape)
print("fert shape: ", fert.shape)
print("pop shape: ", pop.shape)

Drop now all columns and rows which have only NaN values.

In [None]:
for i in range(2):
    pop.dropna(axis=i, how="all", inplace=True)
    fert.dropna(axis=i, how="all", inplace=True)
    life.dropna(axis=i, how="all", inplace=True)
print("life shape: ", life.shape)
print("fert shape: ", fert.shape)
print("pop shape: ", pop.shape)

Now check the datatype of the columns and convert them if neccessary.

In [None]:
print("life columns: ", life.columns)
print("fert columns: ", fert.columns)
print("pop columns: ", pop.columns)

Therefore, we will remove all "str" columns, since they have no values inside.

In [None]:
fert.columns = fert.columns.astype(int)
pop.columns = pop.columns.astype(int)

In [None]:
fert.index.name = 'country'
life.index.name = 'country'
pop.index.name = 'country'

In [None]:
fert.head()

In [None]:
fert = fert.reset_index()
life = life.reset_index()
pop = pop.reset_index()

In [None]:
fert.head()

In [None]:
fert = fert.melt(id_vars='country', var_name='year', value_name='fertility_rate')
pop  = pop.melt(id_vars='country', var_name='year', value_name='total_population')
life = life.melt(id_vars='country', var_name='year', value_name='life_expectancy')

In [None]:
df = fert.merge(pop)

In [None]:
df = df.merge(life)

In [None]:
df

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.style.use('ggplot')

In [None]:
df_subset = df.loc[df['year'] == 2000]
sns.scatterplot(x='life_expectancy', y='fertility_rate',
            data=df_subset, alpha=0.6)

In [None]:
df_60 = df[df["year"]>1959]

In [None]:
g = df_60.groupby("year")

## Create images for every year in Matplotlib and make GIF

In [None]:
for year, data in g:
    plt.figure(figsize=(12,6))
    plt.xlabel("Life expectancy")
    plt.ylabel("Fertility rate")
    plt.axis((25, 90, 0.5, 9))
    
    tot_pop = data["total_population"]
    m = tot_pop.mean()
    s = 100*tot_pop/m
    
    plt.title(year)
    plt.scatter(x='life_expectancy', y='fertility_rate', s=s, c=data["country"].index,
            data=data, alpha=0.6)
    
    filename = "plots/gap_{}.png".format(year)
    plt.savefig(filename)
    plt.close()

In [None]:
#!pip install imageio

In [None]:
import imageio

images = []

for year, _ in g:
    filename = 'plots/gap_{}.png'.format(year)
    images.append(imageio.imread(filename))

imageio.mimsave('output.gif', images, fps=8)

![SegmentLocal](output.gif "segment")

## Use Plotly for Vizualization

Plotly.express has many nice visualization functions. See here

https://plotly.com/python/plotly-express/

In [None]:
#!pip install plotly

In [None]:
import plotly.express as px

In [None]:
dF = df_60.copy()
dF['total_population'] = df_60['total_population'].fillna(0)

px.scatter(dF, x="life_expectancy", y="fertility_rate", animation_frame="year", size="total_population", size_max=60, log_x=True,
             hover_name="country", range_x=[25,90], range_y=[0.5,9])