In [None]:
!pip install wbdata pandas
# Install necessary packages
!pip install wbdata pandas dash plotly

Collecting wbdata
  Using cached wbdata-1.0.0-py3-none-any.whl.metadata (2.6 kB)
Collecting backoff<3.0.0,>=2.2.1 (from wbdata)
  Using cached backoff-2.2.1-py3-none-any.whl.metadata (14 kB)
Collecting dateparser<2.0.0,>=1.2.0 (from wbdata)
  Using cached dateparser-1.2.0-py2.py3-none-any.whl.metadata (28 kB)
Collecting decorator<6.0.0,>=5.1.1 (from wbdata)
  Using cached decorator-5.1.1-py3-none-any.whl.metadata (4.0 kB)
Using cached wbdata-1.0.0-py3-none-any.whl (18 kB)
Using cached backoff-2.2.1-py3-none-any.whl (15 kB)
Using cached dateparser-1.2.0-py2.py3-none-any.whl (294 kB)
Using cached decorator-5.1.1-py3-none-any.whl (9.1 kB)
Installing collected packages: decorator, backoff, dateparser, wbdata
  Attempting uninstall: decorator
    Found existing installation: decorator 4.4.2
    Uninstalling decorator-4.4.2:
      Successfully uninstalled decorator-4.4.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behav



Pulling data from the World Bank API. The key indicators include:

1. **Gini Index** (Income Inequality): Measures income inequality within countries.
2. **Health Expenditure per Capita**: Indicates health-related spending.
3. **Literacy Rate**: Shows the percentage of literate individuals in the population.
4. **GDP per Capita**: A measure of a country's economic performance.

Based on the indicators and the assignment's focus on storytelling and advocacy, here are potential questions:


In [None]:
import wbdata
import pandas as pd
import datetime

start_date = datetime.datetime(1960, 1, 1)
end_date = datetime.datetime(2024, 12, 31)

indicators = {
    "SI.POV.GINI": "Gini Index",               # Gini Index (Income Inequality)
    "SH.XPD.CHEX.PC.CD": "Health Expenditure", # Health Expenditure per Capita
    "SE.ADT.LITR.ZS": "Literacy Rate",         # Literacy Rate
    "NY.GDP.PCAP.CD": "GDP per Capita"         # GDP per Capita
}

data = wbdata.get_dataframe(indicators, date=(start_date, end_date))
data.reset_index(inplace=True)

print(data.count())


country               17024
date                  17024
Gini Index             2111
Health Expenditure     5166
Literacy Rate          2852
GDP per Capita        13979
dtype: int64


In [None]:
data_cleaned = data.dropna()

data_filled = data.fillna(method='ffill')

data_cleaned.to_csv("world_bank_data_cleaned.csv", index=False)
print("Data cleaned and saved successfully.")


Data cleaned and saved successfully.


  data_filled = data.fillna(method='ffill')


In [None]:
countries_of_interest = ["United States", "India", "China"]
data_filtered = data_cleaned[data_cleaned['country'].isin(countries_of_interest)]

print(data_filtered.head())

     country  date  Gini Index  Health Expenditure  Literacy Rate  \
5763   China  2020        37.1          583.432190           97.0   
5773   China  2010        43.7          189.343704           95.0   
8844   India  2011        35.4           48.000000           69.0   

      GDP per Capita  
5763    10408.719554  
5773     4550.473944  
8844     1449.603301  


 1.) Choropleth Map for Global Income Inequality:
This map will show the Gini Index by country, highlighting regions with high and low income inequality.

In [None]:
import wbdata
import pandas as pd
import plotly.express as px

indicators = {"SI.POV.GINI": "Gini Index"}
data = wbdata.get_dataframe(indicators)

data.reset_index(inplace=True)
data.columns = ["country", "date", "Gini Index"]
data = data.dropna()
data["date"] = pd.to_datetime(data["date"]).dt.year  # Convert date to year
data = data.sort_values(by="date")

regions = {"Brazil": "South America", "India": "Asia", "USA": "North America"}
data["region"] = data["country"].map(regions).fillna("Other")

fig = px.choropleth(
    data,
    locations="country",
    locationmode="country names",
    color="Gini Index",
    hover_name="country",
    animation_frame="date",
    color_continuous_scale="Reds",
    title="Global Income Inequality (Gini Index) Over Time"
)
fig.update_geos(showcoastlines=True)
fig.show()


b) Interactive Scatter Plot with Trend Lines: Income Inequality vs Literacy Rate
This scatter plot demonstrates the relationship between income inequality and literacy rate, with trend lines for each country

In [None]:
!pip install dash

In [10]:
import wbdata
import pandas as pd
import datetime
from dash import Dash, dcc, html, Input, Output
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import threading
from google.colab import output



# Fetch data from the World Bank API
indicators = {
    "SI.POV.GINI": "Gini Index",
    "SH.XPD.CHEX.PC.CD": "Health Expenditure",
    "SE.ADT.LITR.ZS": "Literacy Rate"
}

start_date = datetime.datetime(1963, 1, 1)
end_date = datetime.datetime(2023, 12, 31)

# Fetch and process data
data = wbdata.get_dataframe(indicators, date=(start_date, end_date))
data.reset_index(inplace=True)  # Flatten the index
data.dropna(inplace=True)  # Remove rows with missing values
data.rename(columns={"country": "Country"}, inplace=True)

# Aggregate data: Calculate mean for numeric columns only
numeric_columns = data.select_dtypes(include=['number']).columns  # Select numeric columns
aggregated_data = data.groupby("Country", as_index=False)[numeric_columns].mean()  # Aggregate numeric data

# Initialize the Dash app
app = Dash(__name__)

# App layout
app.layout = html.Div([
    html.H1("Income Inequality and Health Metrics and Literacy Rate Dashboard", style={'text-align': 'center'}),

    # Country selection dropdown
    dcc.Dropdown(
        id='country-filter',
        options=[{'label': country, 'value': country} for country in aggregated_data['Country'].unique()],
        multi=True,
        value=aggregated_data['Country'].unique().tolist(),
        placeholder="Select countries to display"
    ),

    # Scatter plots
    dcc.Graph(id='scatter-plots')
])

# Callback for updating scatter plots
@app.callback(
    Output('scatter-plots', 'figure'),
    Input('country-filter', 'value')
)
def update_graph(selected_countries):
    # Filter data by selected countries
    filtered_data = aggregated_data[aggregated_data['Country'].isin(selected_countries)]

    # Create subplots
    fig = make_subplots(
        rows=1, cols=2,
        column_widths=[0.65, 0.65],  # Make subplots slightly narrower
        horizontal_spacing=0.25,  # Increased horizontal spacing
        subplot_titles=(
            "Income Inequality vs Health Expenditure",
            "Income Inequality vs Literacy Rate"
        )
    )

    # Scatter plot for Health Expenditure
    fig.add_trace(go.Scatter(
        x=filtered_data["Gini Index"],
        y=filtered_data["Health Expenditure"],
        mode="markers",
        marker=dict(
            size=12,
            color=filtered_data["Health Expenditure"],
            colorscale='Blues',
            colorbar=dict(
                title="Health Expenditure (per Capita)",
                thickness=15,  # Narrow width
                len=0.8,  # Shorten color bar height
                x=0.4  # Adjust position of color bar
            ),
            showscale=True
        ),
        name="Health Expenditure",
        customdata=filtered_data["Country"],
        hovertemplate="<b>Country:</b> %{customdata}<br><b>Gini Index:</b> %{x}<br><b>Health Expenditure:</b> %{y}<extra></extra>"
    ), row=1, col=1)

    # Scatter plot for Literacy Rate
    fig.add_trace(go.Scatter(
        x=filtered_data["Gini Index"],
        y=filtered_data["Literacy Rate"],
        mode="markers",
        marker=dict(
            size=12,
            color=filtered_data["Literacy Rate"],
            colorscale='Greens',
            colorbar=dict(
                title="Literacy Rate (%)",
                thickness=15,  # Narrow width
                len=0.8,  # Shorten color bar height
                x=1.1  # Adjust position of color bar
            ),
            showscale=True
        ),
        name="Literacy Rate",
        customdata=filtered_data["Country"],
        hovertemplate="<b>Country:</b> %{customdata}<br><b>Gini Index:</b> %{x}<br><b>Literacy Rate:</b> %{y}<extra></extra>"
    ), row=1, col=2)

    # Update layout to provide better spacing
    fig.update_layout(
        title="Income Inequality , Literacy Rate and Health Metrics",
        height=600,  # Optimized height
        width=1400,  # Increased width for better spacing
        margin=dict(t=50, b=50, l=50, r=50),
        showlegend=False
    )

    # Update axis titles
    fig.update_xaxes(title_text="Gini Index (Income Inequality)", row=1, col=1)
    fig.update_yaxes(title_text="Health Expenditure (per Capita)", row=1, col=1)
    fig.update_xaxes(title_text="Gini Index (Income Inequality)", row=1, col=2)
    fig.update_yaxes(title_text="Literacy Rate (%)", row=1, col=2)

    return fig

if __name__ == '__main__':
    app.run_server(debug=True, use_reloader=False)





<IPython.core.display.Javascript object>