# Chartbook update 2021: Checking final series against sources and previous version

## Set-up

In [47]:
import pandas as pd
import ipywidgets as widgets
from plotnine import ggplot, geom_point, aes, geom_line
import plotly.graph_objects as go
#from plotly.colors import n_colors
#import numpy as np

In [3]:
url = "https://docs.google.com/spreadsheets/d/1jLNfP3iuteUJrH0zS9qWONskyKh9pFcl1hKSlgEc-I8/gviz/tq?tqx=out:csv&sheet=All+data"

In [4]:
all_data = pd.read_csv(url)
all_data.head()

Unnamed: 0,country,year,value,series_code,dimension,measure,welfare_concept,short_reference,long_reference,reference url,preferred_definition,description,source_codes_used,legend
0,Argentina,1953,40.0,S1,Overall Income Inequality,Gini coefficient,Household income,Altimir (1986),Altimir (1986) Cuadro 7,https://www.jstor.org/stable/3466844,,,,Gini coefficient - Household income (Altimir (...
1,Argentina,1959,44.7,S1,Overall Income Inequality,Gini coefficient,Household income,Altimir (1986),Altimir (1986) Cuadro 7,https://www.jstor.org/stable/3466844,,,,Gini coefficient - Household income (Altimir (...
2,Argentina,1961,41.9,S1,Overall Income Inequality,Gini coefficient,Household income,Altimir (1986),Altimir (1986) Cuadro 7,https://www.jstor.org/stable/3466844,,,,Gini coefficient - Household income (Altimir (...
3,Argentina,1972,35.3,S1,Overall Income Inequality,Gini coefficient,Household income,Altimir (1986),Altimir (1986) Cuadro 7,https://www.jstor.org/stable/3466844,,,,Gini coefficient - Household income (Altimir (...
4,Argentina,1980,37.6,S1,Overall Income Inequality,Gini coefficient,Household income,Altimir (1986),Altimir (1986) Cuadro 7,https://www.jstor.org/stable/3466844,,,,Gini coefficient - Household income (Altimir (...


In [5]:
# Create a string var that will be the legend in the source comparison plots
all_data['source_legend'] = all_data['short_reference'] + " - " + all_data['welfare_concept']


# Create a string var with a description of the series for final series for the selection widgets
all_data['series_code_desc'] = all_data['series_code'] + " - " \
                                + all_data['measure'] + " - " \
                                + all_data['welfare_concept']


## Compare sources and final series

Use the dropdown boxes to compare a final series with the sources used in its construction.

In [6]:

#Function to filter the data according to input widgets and print chart
def compare_source_chart(final_series):
    Country=countryW.value
    
    country_data = all_data.loc[(all_data['country']==Country)]
    sources_used = country_data.loc[(all_data['series_code_desc']==final_series)].source_codes_used.unique().item().split(",")
    source_series_df = country_data.loc[all_data.series_code.isin(sources_used)]
    final_series_df = country_data.loc[all_data.series_code_desc ==  final_series]
    
    p = (ggplot() 
         + geom_point(source_series_df, aes(x = "year", y = "value", color = "source_legend"))
         + geom_line(final_series_df, aes(x = "year", y = "value")))
    
    p.draw()
   
    
#Function to update the options shown in the 'final_series' widget based on selected country    
def select_source_code(Country):
    final_seriesW.options = all_data.loc[(all_data['country']==Country)& (all_data['series_code'].str.startswith('F'))].series_code_desc.unique().tolist()

#Country widget
countryW = widgets.Dropdown(options = all_data.country.unique().tolist())

#Grab the selected country 
init = countryW.value

# Final series widget (options update based on selected country)
final_seriesW = widgets.Dropdown(options = all_data.loc[(all_data['country']==init) & (all_data['series_code'].str.startswith('F'))].series_code_desc.unique().tolist())

# Initialise widgets (and chart)
j = widgets.interactive(compare_source_chart, final_series=final_seriesW)
i = widgets.interactive(select_source_code, Country=countryW)

# Diplay widgets and chart
print("Compare sources and final series (final series in black)")
display(i)
display(j)




Compare sources and final series (final series in black)


interactive(children=(Dropdown(description='Country', options=('Argentina', 'Australia', 'Brazil', 'Canada', '…

interactive(children=(Dropdown(description='final_series', options=('F1 - Gini coefficient - Equivalised house…

## Compare old and new final series

Use the dropdown boxes below to compare new and old final series – first as table, then as chart.

In [7]:
# Pull in the old chartbook data

url = "https://docs.google.com/spreadsheets/d/1jLNfP3iuteUJrH0zS9qWONskyKh9pFcl1hKSlgEc-I8/gviz/tq?tqx=out:csv&sheet=Old+final+series"

old_data = pd.read_csv(url)
old_data.head()


Unnamed: 0,country,year,dimension of inequality,measure of inequality,series,description,value
0,Argentina,1900,Poverty Measures,Poverty rate,1.0,Individuals below 50% median household per cap...,
1,Argentina,1900,Dispersion of Earnings,,,,
2,Argentina,1900,Top Income Shares,Top 1%,1.0,Share of top 1 per cent in gross income (indiv...,
3,Argentina,1900,Overall Income Inequality,Gini Coefficient,1.0,Household equivalised income,
4,Argentina,1900,Wealth Inequality,,,,


In [8]:
# Collapse metadata columns to aid selection

old_data = old_data.astype({"series": 'str'})

old_data['series_code_desc'] =  "Old series: " + old_data['measure of inequality'] + " - " \
                             + old_data['description'] + " - " \
                             + old_data['series']

old_data.head()


Unnamed: 0,country,year,dimension of inequality,measure of inequality,series,description,value,series_code_desc
0,Argentina,1900,Poverty Measures,Poverty rate,1.0,Individuals below 50% median household per cap...,,Old series: Poverty rate - Individuals below 5...
1,Argentina,1900,Dispersion of Earnings,,,,,
2,Argentina,1900,Top Income Shares,Top 1%,1.0,Share of top 1 per cent in gross income (indiv...,,Old series: Top 1% - Share of top 1 per cent i...
3,Argentina,1900,Overall Income Inequality,Gini Coefficient,1.0,Household equivalised income,,Old series: Gini Coefficient - Household equiv...
4,Argentina,1900,Wealth Inequality,,,,,


In [9]:
# Amend series desc to make it clear which is a new series
all_data['series_code_desc'] = "New series: " + all_data['series_code_desc']

In [53]:

#Function to filter the data according to input widgets and print chart
def compare_chart(new_final_series, old_series_desc):
    Country=countryW.value
    
    new_df = all_data.loc[(all_data['country']==Country) & (all_data['series_code_desc']==new_final_series), {"year", "value", "series_code_desc"}]
    
    old_df = old_data.loc[(old_data['country']==Country) & (old_data['series_code_desc']==old_series_desc), {"year", "value", "series_code_desc"}]
    
    combined_df = new_df.append(old_df)
    
    combined_wide_df = combined_df.pivot(index='year', columns='series_code_desc', values='value').reset_index()
    combined_wide_df["diff %"] = (combined_wide_df[new_final_series] - combined_wide_df[old_series_desc])/combined_wide_df[new_final_series]*100
    
    combined_wide_df["diff %"]=combined_wide_df["diff %"].map("{:,.2f}".format)


    fig = go.Figure(data=[go.Table(
    header=dict(values=list(combined_wide_df.columns),
                align='left'),
    cells=dict(values=[combined_wide_df[col] for col in combined_wide_df.columns],
               align='left'))
        ])
    
    p = (ggplot(combined_df, aes(x = "year", y = "value", color = "series_code_desc")) 
         + geom_line())

    return  fig.show(), p.draw() 
    
   
   
   
    
#Function to update the options shown in the 'final_series' widget based on selected country    
def update_widget_options(Country):
    new_final_seriesW.options = all_data.loc[(all_data['country']==Country)& (all_data['series_code'].str.startswith('F'))].series_code_desc.unique().tolist()

#Country widget
countryW = widgets.Dropdown(options = all_data.country.unique().tolist())

#Grab the selected country 
init = countryW.value

# Final series widget (options update based on selected country)
new_final_seriesW = widgets.Dropdown(options = all_data.loc[(all_data['country']==init) & (all_data['series_code'].str.startswith('F'))].series_code_desc.unique().tolist())

old_series_descW = widgets.Dropdown(options = old_data.loc[(old_data['country']==init)].series_code_desc.unique().tolist())


# Initialise widgets (and chart)
j = widgets.interactive(compare_chart, new_final_series=new_final_seriesW, old_series_desc=old_series_descW)
i = widgets.interactive(update_widget_options, Country=countryW)

# Diplay widgets and chart
print("Compare old and new final series")
display(i)
display(j)





Compare old and new final series


interactive(children=(Dropdown(description='Country', options=('Argentina', 'Australia', 'Brazil', 'Canada', '…

interactive(children=(Dropdown(description='new_final_series', options=('New series: F1 - Gini coefficient - E…