### World Income Inequality Data Visualization - Project 3 Group 5
#### Group Members
- Deogratius Nteza
- Xuan (Sam) Chen
- Travis Jones
- Gifty Appiagyei
- Yao Xiao

# Data Sources
- UNU-WIDER, World Income Inequality Database (WIID). Version 28 November 2023. https://doi.org/10.35188/UNU-WIDER/WIID-281123
The World Income Inequality Database (WIID) presents information on income inequality for most countries and historical entities. It provides the most comprehensive set of income inequality statistics available and can be downloaded for free

- Human Development Index (HDI) https://hdr.undp.org/data-center/human-development-index#/indicies/HDI
The Human Development Index (HDI) is a summary measure of average achievement in key dimensions of human development: a long and healthy life, being knowledgeable and having a decent standard of living. The HDI is the geometric mean of normalized indices for each of the three dimensions.

**Note:** We selected to look at data for 2018 and 2022, with the express purpose of trying to detect what impact COVID-19 restrictions had.

In [2]:
# Import the dependencies.
from pathlib import Path
from sqlalchemy import create_engine, MetaData, Table, select
import pandas as pd
import json
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
# Create a reference to the file.
database_path = Path("Resources/data.sqlite")

In [3]:
# Create a connection that can talk to the database
conn = create_engine(f"sqlite:///{database_path}").connect()

In [4]:
# Reflect the existing database into a new model
metadata = MetaData()
metadata.reflect(bind=conn)

In [5]:
# List all the tables in the database
tables = list(metadata.tables.keys())
print(tables)

['human_development_index', 'income_inequality_index']


In [6]:
# Reflect the tables
hdi = Table('human_development_index', metadata, autoload_with=conn)
iii = Table('income_inequality_index', metadata, autoload_with=conn)

# Build the join query
join_stmt = select(hdi, iii).select_from(
    hdi.join(iii, hdi.c.ctr_code == iii.c.ctr_code)
)


In [7]:
# Execute the query and fetch the result into a pandas DataFrame
with conn:
    result = conn.execute(join_stmt)
    df = pd.DataFrame(result.fetchall(), columns=result.keys())

# Display the DataFrame
df.head()


Unnamed: 0,ctr_code,country,hdicode,hdi_rank_2022,hdi_2018,hdi_2022,country_1,ctr_code_1,region_wb,incomegroup,gdp_2018,gdp_2022,gini_std_2018,gini_std_2022,population_2018,population_2022
0,ALB,Albania,High,74.0,0.797,0.789,Albania,ALB,Europe and Central Asia,Upper middle income,13317.11914,13278.37012,36.869999,35.220001,2877013.0,2866849.0
1,ARG,Argentina,Very High,48.0,0.852,0.849,Argentina,ARG,Latin America and the Caribbean,Upper middle income,22747.24219,22447.08789,39.099998,37.799999,44413596.0,45276780.0
2,ARM,Armenia,High,76.0,0.781,0.786,Armenia,ARM,Europe and Central Asia,Upper middle income,13231.43066,14193.11719,36.18,31.32,2836557.0,2790974.0
3,AUS,Australia,Very High,10.0,0.941,0.946,Australia,AUS,East Asia and the Pacific,High income,49052.81641,,34.41,,24979230.0,
4,AUT,Austria,Very High,22.0,0.917,0.926,Austria,AUT,Europe and Central Asia,High income,55217.28516,56280.50781,30.01,30.700001,8840513.0,8922082.0


In [8]:
# drop country_1 and ctr_code_1 columns
df.drop(['country_1', 'ctr_code_1'], axis=1, inplace=True)
df.head()

Unnamed: 0,ctr_code,country,hdicode,hdi_rank_2022,hdi_2018,hdi_2022,region_wb,incomegroup,gdp_2018,gdp_2022,gini_std_2018,gini_std_2022,population_2018,population_2022
0,ALB,Albania,High,74.0,0.797,0.789,Europe and Central Asia,Upper middle income,13317.11914,13278.37012,36.869999,35.220001,2877013.0,2866849.0
1,ARG,Argentina,Very High,48.0,0.852,0.849,Latin America and the Caribbean,Upper middle income,22747.24219,22447.08789,39.099998,37.799999,44413596.0,45276780.0
2,ARM,Armenia,High,76.0,0.781,0.786,Europe and Central Asia,Upper middle income,13231.43066,14193.11719,36.18,31.32,2836557.0,2790974.0
3,AUS,Australia,Very High,10.0,0.941,0.946,East Asia and the Pacific,High income,49052.81641,,34.41,,24979230.0,
4,AUT,Austria,Very High,22.0,0.917,0.926,Europe and Central Asia,High income,55217.28516,56280.50781,30.01,30.700001,8840513.0,8922082.0


In [9]:
# Export the pandas dataframe to json for use with the Leaflet
json_file_path = Path("Resources/data.json")

df.to_json(json_file_path, orient='records', lines=True)

Choropleth Map using Plotly

In [10]:
# Load the geojson data
countries_geojson = json.load(open("Resources/world_countries.geojson"))
# test code to parse the data
# countries_geojson['features'][1]['properties']

# Make a copy of the dataframe
df_copy = df.copy()

In [11]:
# Make a dictionary of the unique country code and assign them ids
country_id_map = {}
for feature in countries_geojson['features']:
    feature['id'] = feature['properties']['cartodb_id']
    country_id_map[feature['properties']['adm0_a3_us']] = feature['id']

# Uncomment to check the dict
# country_id_map


In [12]:
# Add the new unique country id column
df_copy['id'] = df_copy['ctr_code'].apply(lambda x: country_id_map[x])

# Make a 2nd copy for later use

df_copy2 = df_copy.copy()

In [13]:
# Change the formatting of the data for the plotly map
df_copy['population_2018'] = df_copy['population_2018'].apply(lambda x: "{:,.0f}".format(x))
df_copy['population_2022'] = df_copy['population_2022'].apply(lambda x: "{:,.0f}".format(x))
df_copy['gdp_2018'] = df_copy['gdp_2018'].apply(lambda x: "{:,.2f}".format(x))
df_copy['gdp_2022'] = df_copy['gdp_2022'].apply(lambda x: "{:,.2f}".format(x))
df_copy['gini_std_2018'] = df_copy['gini_std_2018'].apply(lambda x: "{:,.2f}".format(x))
df_copy['gini_std_2022'] = df_copy['gini_std_2022'].apply(lambda x: "{:,.2f}".format(x))
# df_copy.head()

In [14]:
# Use the 2nd copy of the dataframe made earlier for the work

# Gets the difference of specified columns
df_copy2['hdi_difference'] = (df_copy2['hdi_2022'] - df_copy2['hdi_2018'])
df_copy2['gdp_difference'] = (df_copy2['gdp_2022'] - df_copy2['gdp_2018']).round(2)
df_copy2['gini_std_difference'] = (df_copy2['gini_std_2022'] - df_copy2['gini_std_2018']).round(2)

df_copy2['gdp_difference'] = df_copy2['gdp_difference'].apply(lambda x: "{:,.2f}".format(x))
df_copy2['gini_std_difference'] = df_copy2['gini_std_difference'].apply(lambda x: "{:,.2f}".format(x))
# df_copy2.head()

In [15]:
# Create a custom colorscale with alpha for semi-transparency
custom_colorscale = [
    [0, 'rgba(68, 1, 84, 0.5)'],   # Adjusted to add alpha (0.5) for transparency
    [0.25, 'rgba(59, 82, 139, 0.5)'],
    [0.5, 'rgba(33, 145, 140, 0.5)'],
    [0.75, 'rgba(94, 201, 98, 0.5)'],
    [1, 'rgba(253, 231, 37, 0.5)']
]

# Create the choropleth maps as traces
fig = go.Figure()

# Trace 1: Choropleth map for 2018 data
fig.add_trace(
    go.Choroplethmapbox(
        geojson=countries_geojson,
        locations=df_copy['id'],
        z=df_copy['hdi_2018'],
        colorscale=custom_colorscale,
        text=df_copy['country'],
        hoverinfo='text+z',
        hovertemplate="<b>%{text}</b><br>HDI 2018: %{z}<br>Gini std: %{customdata[0]}<br>Population: %{customdata[1]}<br>GDP: %{customdata[2]}<extra></extra>",
        customdata=np.stack((df_copy['gini_std_2018'], df_copy['population_2018'], df_copy['gdp_2018']), axis=-1),
        visible=True,  # Make the first trace visible by default
        colorbar=dict(title="HDI 2018")  # Add colorbar label
    )
)

# Trace 2: Choropleth map for 2022 data
fig.add_trace(
    go.Choroplethmapbox(
        geojson=countries_geojson,
        locations=df_copy['id'],
        z=df_copy['hdi_2022'],
        colorscale=custom_colorscale,
        text=df_copy['country'],
        hoverinfo='text+z',
        hovertemplate="<b>%{text}</b><br>HDI 2022: %{z}<br>Gini std: %{customdata[0]}<br>Population: %{customdata[1]}<br>GDP: %{customdata[2]}<extra></extra>",
        customdata=np.stack((df_copy['gini_std_2022'], df_copy['population_2022'], df_copy['gdp_2022']), axis=-1),
        visible=False,  # Hide this trace by default
        colorbar=dict(title="HDI 2022")  # Add colorbar label
    )
)

# Trace 3: Choropleth map for the difference data (2018-2022)
fig.add_trace(
    go.Choroplethmapbox(
        geojson=countries_geojson,
        locations=df_copy2['id'],
        z=df_copy2['hdi_difference'],
        colorscale=custom_colorscale,
        text=df_copy2['country'],
        hoverinfo='text+z',
        hovertemplate="<b>%{text}</b><br>HDI Difference: %{z}<br>GDP Difference: %{customdata[0]}<br>Gini std Difference: %{customdata[1]}<extra></extra>",
        customdata=np.stack((df_copy2['gdp_difference'], df_copy2['gini_std_difference']), axis=-1),
        visible=False,  # Hide this trace by default
        colorbar=dict(title="HDI Difference (2018-2022)")  # Add colorbar label
    )
)

# Set up the Mapbox style and layout
fig.update_layout(
    mapbox_style="open-street-map",
    mapbox_center={"lat": 40.237552, "lon": -23.923692},
    mapbox_zoom=1.5,
    margin={"r": 0, "t": 0, "l": 0, "b": 0},
    title=dict(
        text="Global HDI and Changes (2018-2022)",  # Title text
        x=0.5,  # Center horizontally
        y=0.95,  # Slightly below the top
        xanchor='center',  # Anchor the title at the center horizontally
        yanchor='top'  # Anchor the title at the top vertically
    )
)

fig.update_layout(
    updatemenus=[
        dict(
            buttons=list([
                dict(
                    args=[{"visible": [True, False, False]}], 
                    label="HDI 2018",
                    method="update"
                ),
                dict(
                    args=[{"visible": [False, True, False]}], 
                    label="HDI 2022",
                    method="update"
                ),
                dict(
                    args=[{"visible": [False, False, True]}], 
                    label="HDI Difference (2018-2022)",
                    method="update"
                )
            ]),
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.05,  # Adjust the horizontal position
            xanchor="left",  # Anchor the dropdown to the left
            y=1.05,  # Adjust the vertical position
            yanchor="bottom"  # Anchor the dropdown to the top
        ),
    ]
)

# fig.show()
fig.write_html("hdi_choropleth_map.html")

Sources:

Geojson data from: https://rtr.carto.com/tables/world_countries_geojson/public/map

Mapbox Layers from: https://plotly.com/python/mapbox-layers/