In [1]:
import pandas as pd
import numpy as np
import math
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import warnings

In [2]:
# read data
df_income = pd.read_csv('../data/world-banks-income-groups.csv')
df_emission = pd.read_csv('../data/ghg-emissions-by-sector.csv')
df_emission_percapita = pd.read_csv('../data/per-capita-ghg-emissions.csv')
df_pop = pd.read_csv('../data/population-and-demography.csv')

# modify income data
df_income = df_income.loc[:,['Entity','Year','Income classifications (World Bank (2021))']]
df_income.columns = ['Entity','Year','Income_clf']

# modify emission data
df_emission.iloc[:,3:14] = df_emission.iloc[:,3:14]/(10.00**6)
df_emission['Sum'] = df_emission.iloc[:,3:14].sum(axis=1)
df_emission.drop(columns='Code',inplace=True)

# modify emission per capita data
df_emission_percapita.drop(columns='Code',inplace=True)
df_emission_percapita.rename(columns={'Greenhouse gas emissions per person':'Emission_percapita'},inplace=True)

# modify population data
df_pop = df_pop.iloc[:,0:3]
df_pop['Population'] = df_pop['Population']/(10**6)
df_pop.rename(columns={'Country name':'Entity'},inplace=True)


# data frame merge
df = pd.merge(df_emission,df_income,on=['Entity','Year'])
df = pd.merge(df,df_pop,on=['Entity','Year'])
df = pd.merge(df,df_emission_percapita,on=['Entity','Year'])

# other data
years = df.Year.unique().tolist() #1990-2019
#incomes = df.Income_clf.unique().tolist() # income levels
incomes = ['High income','Upper-middle income','Lower-middle income','Low income','Not categorized']
colors = ['#d53e4f','#fc8d59','#e6f598','#99d594','#3288bd']

color_mapper = {
    'High income': colors[0],
    'Upper-middle income': colors[1],
    'Lower-middle income': colors[2],
    'Low income': colors[3],
    'Not categorized' : colors[4]
}


In [4]:
# define data for scatter plot and bar plot
scatter_df = df[['Year','Sum','Entity','Emission_percapita','Population','Income_clf']]
bar_df = df[['Year','Sum','Income_clf']].groupby(by=['Income_clf','Year']).sum().reset_index()

# make subplots
fig = make_subplots(rows=2, # 2 row and 1 column
                    cols=1,
                    row_heights=[0.75,0.25], # 75% on row 1, 25% on row 2
                    subplot_titles=['CO₂ Emissions in Countries and Income Groups','CO₂ Emissions in Different Income Groups'])

# initialize data (year = 1990)
scatter = go.Scatter(
    x = scatter_df.loc[scatter_df['Year']==1990,'Sum'],
    y = scatter_df.loc[scatter_df['Year']==1990,'Emission_percapita'],
    customdata = [list(x) for x in zip(scatter_df.loc[scatter_df['Year']==1990,'Population'],scatter_df.loc[scatter_df['Year']==1990,'Income_clf'])],
    text = scatter_df.loc[scatter_df['Year']==1990,'Entity'],
    name = 'Scatter Plot',
    showlegend=False,
    mode = 'markers',
    marker = dict(
        size = (scatter_df.loc[scatter_df['Year']==1990,'Population'])**0.5,
        #sizeref = 12,
        color = scatter_df.loc[scatter_df['Year']==1990,'Income_clf'].map(color_mapper)
    ),
    hovertemplate = '<br>Country: %{text}'+
                    '<br>Emission: %{x}'+
                    '<br>Emission per capita: %{y} million tons'+
                    '<br>Income group: %{customdata[1]}'+
                    '<br>Population: %{customdata[0]} million'
)

# initialize data (year = 1990)
bar = go.Bar(
    x = bar_df.loc[bar_df['Year']==1990,'Sum'],
    y = bar_df.loc[bar_df['Year']==1990,'Income_clf'],
    width = 0.7,
    name = 'Bar Plot',
    showlegend=False,
    orientation = 'h',
    marker = dict(
        color = bar_df.loc[bar_df['Year']==1990,'Income_clf'].map(color_mapper)
    ),
    hovertemplate = '<br>Income group: %{y}'+
                    '<br>Total emission: %{x} million tons'
)

# add to figure
fig.add_trace(scatter, row=1 ,col=1).add_trace(bar,row=2,col=1)

# modify axes
fig.update_xaxes(
    range = [1,5],
    type = 'log', # log range 10^1 = 10, 10^5 = 500000
    tickformat = '.1r',
    row=1,col=1
)

fig.update_yaxes(
    type = 'log', # change axis type to log
    tickformat = '.1r',
    row=1,col=1
)

fig.update_xaxes(
    range = [0,25000], # set axis range
    row=2,col=1
)

fig.update_yaxes(
    categoryorder='array', # set variables order
    categoryarray = incomes,
    row=2,col=1
)

# define a update menu for animation
updatemenus={
    'type': 'buttons',
    'buttons': [{
                'label': '► Play',
                'method': 'animate',
                'args': [None, {'frame': {'duration': 450, 'redraw': True}, 'fromcurrent': True, 'transition': {'duration': 0}}]
                }, 
                {
                'label': '❚❚ Pause',
                'method': 'animate',
                'args': [[None], {'frame': {'duration': 0, 'redraw': False}, 'mode': 'immediate', 'transition': {'duration': 0}}]
    }],
    'showactive': False,
    'direction':'right',
    'x':0.05,
    'y':-0.2
}

# define a slider
sliders = {
    "active": 0,
    "yanchor": "top",
    "xanchor": "left",
    "currentvalue": {
        "font": {"size": 12},
        "prefix": "Year:",
        "visible": True,
        "xanchor": "left"
    },
    "transition": {"duration": 300, "easing": "cubic-in-out"},
    "pad": {"b": 10, "t": 50},
    "len": 0.9,
    "x": 0.1,
    "y": 0,
    "steps": []
}

# add info to slider
for year in years:
    slider_step = {"args": [
        [year],
        {"frame": {"duration": 300, "redraw": False},
         "mode": "immediate",
         "transition": {"duration": 300}}
    ],
        "label": year,
        "method": "animate"}
    sliders["steps"].append(slider_step)



# add uodate menus and sliders to fig
fig.update_layout(updatemenus = [updatemenus],sliders = [sliders])

# add frames based on year
frames = [go.Frame(
    data = [
            go.Scatter(
                x = scatter_df.loc[scatter_df['Year']==year,'Sum'],
                y = scatter_df.loc[scatter_df['Year']==year, 'Emission_percapita'],
                customdata = [list(x) for x in zip(scatter_df.loc[scatter_df['Year']==year,'Population'],scatter_df.loc[scatter_df['Year']==year,'Income_clf'])],
                text = scatter_df.loc[scatter_df['Year']==year, 'Entity'],
                name = 'Scatter Plot',
                showlegend=False,
                mode = 'markers',
                marker = dict(
                    size = (scatter_df.loc[scatter_df['Year']==year,'Population'])**0.5,
                    #sizeref = 12,
                    color = scatter_df.loc[scatter_df['Year']==year,'Income_clf'].map(color_mapper)
                ),
                hovertemplate = '<br>Country: %{text}'+
                    '<br>Emission: %{x} million tons'+
                    '<br>Emission per capita: %{y} million tons'+
                    '<br>Income group: %{customdata[1]}'+
                    '<br>Population: %{customdata[0]} million'
            ),
            go.Bar(
                x = bar_df.loc[bar_df['Year']==year,'Sum'],
                y = bar_df.loc[bar_df['Year']==year,'Income_clf'],
                name = 'Bar Plot',
                showlegend=False,
                orientation = 'h',
                marker = dict(color = bar_df.loc[bar_df['Year']==year,'Income_clf'].map(color_mapper)),
                hovertemplate = '<br>Income group: %{y}'+
                                '<br>Total emission: %{x} million tons'
            )
    ],
    name = str(year)

) for year in years]

# add frames into figure
fig.update(frames = frames)

# change figure size, template and title
fig.update_layout(width=900,
                  height=700,
                  template = 'plotly_white',
                  title = dict(text = 'Global Nnequalities In CO₂ Emissions',font = dict(size=22))
                  )


# add labels to each axis
fig['layout']['xaxis']['title']='Annual CO₂ Emissions (million t)'
fig['layout']['xaxis2']['title']='Sum of Greenhouse Gas Emission (million t)'
fig['layout']['yaxis']['title']='Emission per Capita (million t)'
fig['layout']['yaxis2']['title']='Income Groups'

# change subplot titles font size
for i in fig['layout']['annotations']:
    i['font']['size'] = 15

fig.update_layout(yaxis2=dict(autorange="reversed"))

# change axis labels font size
fig.for_each_xaxis(lambda axis: axis.title.update(font=dict(size=12)))
fig.for_each_yaxis(lambda axis: axis.title.update(font=dict(size=12)))


# manually add legend for income groups
y1 = 1
d = 0
for income in incomes:
    fig.add_annotation(
        text = income,
        xref='paper',
        yref='paper',
        showarrow = False,
        xanchor = 'left',
        x = 0.9,
        y = y1 +d,

    )
    fig.add_annotation(
        text = '    ',
        xref='paper',
        yref='paper',
        showarrow = False,
        xanchor = 'left',
        x = 0.86,
        y = y1 + d,
        bgcolor = color_mapper[income],
    )
    d = d - 0.05


fig.show()


In [6]:
# save plot to html file
fig.write_html("./scatter_income_group.html",auto_play = False)