In [None]:
!pip install wbdata pandas
# Install necessary packages
!pip install wbdata pandas dash plotly

Collecting wbdata
  Downloading wbdata-1.0.0-py3-none-any.whl.metadata (2.6 kB)
Collecting appdirs<2.0,>=1.4 (from wbdata)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting backoff<3.0.0,>=2.2.1 (from wbdata)
  Downloading backoff-2.2.1-py3-none-any.whl.metadata (14 kB)
Collecting dateparser<2.0.0,>=1.2.0 (from wbdata)
  Downloading dateparser-1.2.0-py2.py3-none-any.whl.metadata (28 kB)
Collecting decorator<6.0.0,>=5.1.1 (from wbdata)
  Downloading decorator-5.1.1-py3-none-any.whl.metadata (4.0 kB)
Collecting shelved-cache<0.4.0,>=0.3.1 (from wbdata)
  Downloading shelved_cache-0.3.1-py3-none-any.whl.metadata (4.7 kB)
Collecting tabulate<0.9.0,>=0.8.5 (from wbdata)
  Downloading tabulate-0.8.10-py3-none-any.whl.metadata (25 kB)
Downloading wbdata-1.0.0-py3-none-any.whl (18 kB)
Downloading appdirs-1.4.4-py2.py3-none-any.whl (9.6 kB)
Downloading backoff-2.2.1-py3-none-any.whl (15 kB)
Downloading dateparser-1.2.0-py2.py3-none-any.whl (294 kB)
[2K   [90m━━━━━━

Collecting dash
  Downloading dash-2.18.2-py3-none-any.whl.metadata (10 kB)
Collecting Werkzeug<3.1 (from dash)
  Downloading werkzeug-3.0.6-py3-none-any.whl.metadata (3.7 kB)
Collecting dash-html-components==2.0.0 (from dash)
  Downloading dash_html_components-2.0.0-py3-none-any.whl.metadata (3.8 kB)
Collecting dash-core-components==2.0.0 (from dash)
  Downloading dash_core_components-2.0.0-py3-none-any.whl.metadata (2.9 kB)
Collecting dash-table==5.0.0 (from dash)
  Downloading dash_table-5.0.0-py3-none-any.whl.metadata (2.4 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)
Downloading dash-2.18.2-py3-none-any.whl (7.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m60.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Downloading dash_html_components-2.0.0-py3-none-any.whl (4.1 kB)
Downloading dash_table-5.0.0-py3-none-any.whl (3.9 kB)
Downloadi

Pulling data from the World Bank API. The key indicators include:

1. **Gini Index** (Income Inequality): Measures income inequality within countries.
2. **Health Expenditure per Capita**: Indicates health-related spending.
3. **Literacy Rate**: Shows the percentage of literate individuals in the population.
4. **GDP per Capita**: A measure of a country's economic performance.

Based on the indicators and the assignment's focus on storytelling and advocacy, here are potential questions:

In [None]:
import wbdata
import pandas as pd
import datetime

start_date = datetime.datetime(1960, 1, 1)
end_date = datetime.datetime(2024, 12, 31)

indicators = {
    "SI.POV.GINI": "Gini Index",               # Gini Index (Income Inequality)
    "SH.XPD.CHEX.PC.CD": "Health Expenditure", # Health Expenditure per Capita
    "SE.ADT.LITR.ZS": "Literacy Rate",         # Literacy Rate
    "NY.GDP.PCAP.CD": "GDP per Capita"         # GDP per Capita
}

data = wbdata.get_dataframe(indicators, date=(start_date, end_date))
data.reset_index(inplace=True)

print(data.count())


country               17024
date                  17024
Gini Index             2111
Health Expenditure     5166
Literacy Rate          2852
GDP per Capita        13979
dtype: int64


In [None]:
data_cleaned = data.dropna()

data_filled = data.fillna(method='ffill')

data_cleaned.to_csv("world_bank_data_cleaned.csv", index=False)
print("Data cleaned and saved successfully.")


Data cleaned and saved successfully.


  data_filled = data.fillna(method='ffill')


In [None]:
countries_of_interest = ["United States", "India", "China"]
data_filtered = data_cleaned[data_cleaned['country'].isin(countries_of_interest)]

print(data_filtered.head())

     country  date  Gini Index  Health Expenditure  Literacy Rate  \
5763   China  2020        37.1          583.432190           97.0   
5773   China  2010        43.7          189.343704           95.0   
8844   India  2011        35.4           48.000000           69.0   

      GDP per Capita  
5763    10408.719554  
5773     4550.473944  
8844     1449.603301  


In [7]:
# Install dependencies if not already installed
# pip install wbdata pandas plotly dash

import wbdata
import pandas as pd
import plotly.express as px
from datetime import datetime
from dash import Dash, dcc, html, Input, Output

# Fetch data from World Bank
indicators = {
    "SI.POV.GINI": "Gini_Index",        # Gini Index for income inequality
    "SH.XPD.CHEX.PC.CD": "Health_Expenditure"  # Current health expenditure per capita (USD)
}

# Set date range for data
start_date = datetime(1990, 1, 1)
end_date = datetime(2020, 12, 31)

# Retrieve data
data = wbdata.get_dataframe(indicators, date=(start_date, end_date))
data = data.reset_index()
data['Gini_Index'] = pd.to_numeric(data['Gini_Index'], errors='coerce')
data['Health_Expenditure'] = pd.to_numeric(data['Health_Expenditure'], errors='coerce')
data = data.dropna()
data = data.groupby('country')[['Gini_Index', 'Health_Expenditure']].mean().reset_index()

# Create Dash app
app = Dash(__name__)

# Layout of the app
app.layout = html.Div([
    html.H1("Interactive Treemap: Income Inequality and Health Metrics", style={'text-align': 'center'}),

    html.Div([
        html.Label("Filter by Gini Index Range:", style={'font-weight': 'bold'}),
        dcc.RangeSlider(
            id='gini-slider',
            min=data['Gini_Index'].min(),
            max=data['Gini_Index'].max(),
            step=1,
            marks={int(i): str(int(i)) for i in range(int(data['Gini_Index'].min()), int(data['Gini_Index'].max()) + 1, 5)},
            value=[data['Gini_Index'].min(), data['Gini_Index'].max()],
        )
    ], style={'width': '80%', 'margin': 'auto'}),

    html.Div([
        html.Label("Select Number of Countries to Display:", style={'font-weight': 'bold'}),
        dcc.Slider(
            id='country-slider',
            min=5,
            max=50,
            step=5,
            marks={i: str(i) for i in range(5, 55, 5)},
            value=20,  # Default value
        )
    ], style={'width': '50%', 'margin': 'auto', 'margin-top': '20px'}),

    dcc.Graph(id='treemap', style={'margin-top': '20px'})
])

# Callback to update treemap based on filters
@app.callback(
    Output('treemap', 'figure'),
    [Input('gini-slider', 'value'), Input('country-slider', 'value')]
)
def update_treemap(gini_range, top_n):
    # Filter data based on Gini Index range
    filtered_data = data[(data['Gini_Index'] >= gini_range[0]) & (data['Gini_Index'] <= gini_range[1])]

    # Select top N countries with highest Gini Index
    filtered_data = filtered_data.nlargest(top_n, 'Gini_Index')

    # Create the treemap
    fig = px.treemap(
        filtered_data,
        path=['country'],
        values='Gini_Index',
        color='Health_Expenditure',
        color_continuous_scale='RdYlGn',  # Red for low health expenditure, green for high
        title=f"Interactive Treemap: Top {top_n} Countries by Income Inequality and Health Metrics"
    )

    # Customize hover data
    fig.update_traces(
        hovertemplate="<b>%{label}</b><br>Income Inequality (Gini Index): %{value}<br>Health Expenditure: %{color:.2f} USD"
    )

    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True, use_reloader=False)


<IPython.core.display.Javascript object>