# Goal: Recreate Gapminder's Bubble Chart*

![Gapminder](Gapminder.png)

*As much as possible...

See https://www.gapminder.org/tools/

## Import libraries

In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
import dash
from dash import dcc, html
from dash.dependencies import Input, Output

In [3]:
import plotly
print(plotly.__version__)

6.0.0


## Load the data

In [4]:
df = pd.read_csv('Gapminder-data.csv', sep=',')

In [5]:
df.head()

Unnamed: 0,Country,Region,Year,Income,Life expectancy,Population,Fertility,Child mortality
0,Afghanistan,Asia,1900,1088.0,29.41,4832414.0,7.001,481.77
1,Afghanistan,Asia,1901,1106.0,29.47,4879685.0,7.001,481.36
2,Afghanistan,Asia,1902,1124.0,29.53,4935122.0,7.001,480.87
3,Afghanistan,Asia,1903,1143.0,29.6,4998861.0,7.001,480.3
4,Afghanistan,Asia,1904,1162.0,29.66,5063419.0,7.001,481.08


In [6]:
df.describe()

Unnamed: 0,Year,Income,Life expectancy,Population,Fertility,Child mortality
count,22424.0,22424.0,22424.0,22424.0,22424.0,22424.0
mean,1960.114877,7759.2335,52.851484,20231490.0,4.807106,189.525802
std,34.893923,13587.506024,16.849974,86304550.0,1.931171,152.436433
min,1900.0,312.0,1.1,16409.0,0.877,1.58
25%,1930.0,1385.0,35.82,861798.0,2.91,45.8075
50%,1960.0,2915.5,53.91,3561738.0,5.392,157.185
75%,1990.0,7885.25,68.5325,10549410.0,6.5,328.5325
max,2020.0,178635.0,85.29,1439324000.0,9.223,756.29


In [7]:
df_info = pd.read_csv('Gapminder-info.csv', sep=',', index_col=0)

In [8]:
df_info

Unnamed: 0,Min,Max,Mid,LogScale,Meaning
Year,1900,2020,1960,False,
Income,300,180000,8000,True,"Per person (GDP/capita, PPP$ inflation-adjusted)"
Life expectancy,0,90,45,False,Years
Population,15000,2000000000,5000000,True,Total
Fertility,0,10,5,False,Babies per woman
Child mortality,1,800,40,True,0-5 year-olds dying per 1000 born


In [9]:
df = df.sort_values(['Year', 'Population'], ascending=[True, False])

In [10]:
df.head()

Unnamed: 0,Country,Region,Year,Income,Life expectancy,Population,Fertility,Child mortality
4114,China,Asia,1900,776.0,31.98,401579661.0,5.5,416.53
9025,India,Asia,1900,797.0,18.42,291979136.0,5.726,536.09
21335,United States,Americas,1900,6252.0,48.95,78763706.0,3.853,231.7
16381,Russia,Europe,1900,3087.0,30.75,64946671.0,7.36,409.33
7502,Germany,Europe,1900,6029.0,43.94,55185341.0,4.932,372.17


## Create the bubble chart

In [11]:
#how do you get these?
color_map = dict(
    Asia='#ff798e', 
    Europe='#ffeb33', 
    Africa='#33dded',
    Americas='#98ef33'
)
marker_dict = dict(
    opacity=0.8,
    line=dict(
        color='black',
        width=0.8
    )
)
layout_dict = dict(
    plot_bgcolor='white',
    font=dict(color='dimgray')
)
axes_dict = dict(
    gridcolor='lightgray',
    showline=True,
    linecolor='dimgray',
    linewidth=1,
    showspikes=True, #dashed lines from point center to axis (default True)
    spikethickness=1,
    spikecolor='dimgray'
)

In [12]:
fig = px.scatter(
    df.query('Year==2020'),
    x='Income',
    y='Life expectancy',
    color='Region',
    size='Population',
    hover_name='Country',
    color_discrete_map=color_map,
    size_max=60,
    log_x=True,
    hover_data={c: False for c in df.columns}, #censor data, display name only
    title='Gapminder<br><sup>Data by gapminder.org, CC-BY license</sup>'
)
# Adjust markers
fig.update_traces(marker=marker_dict)
# Adjust figure layout
fig.update_layout(layout_dict)
# Adjust axes/grid
fig.update_xaxes(axes_dict)
fig.update_yaxes(axes_dict)
fig.show()

## Add animation

In [13]:
# define data dictionary without data
# later leverage add_trace (layering functionality) 
def background_year(year, xaxis, yaxis):
    return dict(
        x=[df_info.loc[xaxis, 'Mid']],
        y=[df_info.loc[yaxis, 'Mid']],
        mode='text',
        text=[str(year)],
        showlegend=False,
        textfont=dict(size=200, color='lightgray'),
        textposition='middle center'
    )

In [15]:
def gapminder_fig(xaxis='Income', yaxis='Life expectancy'):
    fig = px.scatter(
        df.query('Year>=2010'),
        x=xaxis,
        y=yaxis,
        color='Region',
        size='Population',
        hover_name='Country',
        color_discrete_map=color_map,
        size_max=60,
        log_x=df_info.loc[xaxis, 'LogScale'],
        log_y=df_info.loc[yaxis, 'LogScale'],
        hover_data={c: False for c in df.columns},
        title='Gapminder<br><sup>Data by gapminder.org, CC-BY license</sup>',
        animation_frame='Year',
        animation_group='Country',
        range_x=[df_info.loc[xaxis, 'Min'], df_info.loc[xaxis, 'Max']],
        range_y=[df_info.loc[yaxis, 'Min'], df_info.loc[yaxis, 'Max']]
    )
    
    # Adjust markers
    fig.update_traces(marker=marker_dict)
    # Adjust figure layout
    fig.update_layout(layout_dict)
    # Adjust axes/grid
    fig.update_xaxes(axes_dict)
    fig.update_yaxes(axes_dict)

    # Show the year in the background
    frame_year = fig.frames[0].name
    # add to fig.data, to ensure background string appears before clicking play 
    fig.add_trace(go.Scatter(background_year(frame_year, xaxis, yaxis)))
    # order trace to ensure it goes underneath
    fig.data = (fig.data[-1], ) + fig.data[:-1]
    # add background year to each trace
    for frame in fig.frames:
        frame.data =  (background_year(frame.name, xaxis, yaxis), ) + frame.data
    fig.update(frames=fig.frames)

    # Add annotations to the axes
    fig.add_annotation(
        x=1, y=0, xref='x domain', yref='y domain', #xref and yref domain anchors to visual space (does not move with zooming)
        text=df_info.loc[xaxis, 'Meaning'],
        showarrow=False, align='right'
    )
    fig.add_annotation(
        x=0, y=1, xref='x domain', yref='y domain',
        text=df_info.loc[yaxis, 'Meaning'],
        showarrow=False, valign='top', textangle=-90
    )
    
    return fig
    
fig = gapminder_fig(yaxis='Life expectancy')
fig.show()

# Use Dash 
Let user choose dynamically x and y variable from drop-down menu