In [None]:
# ===============================================================
# Figure 4 — Deprivation mix across city-size classes by country
#
# Inputs (relative to repo root):
#   - 2_modelling/02_application/summary_statistics/
#       └─ city_deprivation_with_sizeclass_80pct.csv
#   - 4_Figures_Tables/
#       └─ country_ISOcodes_new.csv
#
# Output:
#   - 4_Figures_Tables/Figures/Figure4_Deprivation_by_CitySizeMix.png
# ===============================================================

In [None]:
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# ------------------------------------------------------------
# 1️⃣ Paths
# ------------------------------------------------------------
REPO_ROOT = Path.cwd()  # assume notebook is run from repo root

CITY_PATH = REPO_ROOT / "2_modelling" / "02_application" / "summary_statistics" / "city_deprivation_with_sizeclass_80pct.csv"
ISO_PATH  = REPO_ROOT / "4_Figures_Tables" / "country_ISOcodes_new.csv"
FIG_DIR   = REPO_ROOT / "4_Figures_Tables" / "Figures"
FIG_DIR.mkdir(parents=True, exist_ok=True)

print("City file:", CITY_PATH)
print("ISO file :", ISO_PATH)
print("Figures  :", FIG_DIR)

In [None]:
# ------------------------------------------------------------
# 2️⃣ Load data
# ------------------------------------------------------------
city_df = pd.read_csv(CITY_PATH)
iso_df  = pd.read_csv(ISO_PATH)

plt.rcParams["font.family"] = "DejaVu Sans"
plt.rcParams["figure.dpi"] = 300

In [None]:
# ------------------------------------------------------------
# 3️⃣ Data preparation
# ------------------------------------------------------------
def prepare_mix_data(df, region_name, iso_df):
    """
    Prepare both % and absolute deprived-population data for one region.

    Returns:
      mix_pct: DataFrame indexed by ISO, columns = city size classes, values = % share
      mix_abs: DataFrame indexed by ISO, columns = city size classes, values = absolute deprived pop
    """
    region_cities = df[df["Region"] == region_name]
    if region_cities.empty:
        return None, None

    # Aggregate to country × city size
    mix = (
        region_cities.groupby(["Country", "CitySizeClass"])
        .agg(DeprivedPop=("DeprivedPop", "sum"))
        .reset_index()
    )

    # Wide format for absolute and % share
    mix_abs = mix.pivot(index="Country", columns="CitySizeClass", values="DeprivedPop").fillna(0)
    mix_pct = mix_abs.div(mix_abs.sum(axis=1), axis=0) * 100

    # Consistent column (city size) order
    order = ["Small", "Medium", "Large", "Very large", "Megacity"]
    for col in order:
        if col not in mix_abs.columns:
            mix_abs[col] = 0
            mix_pct[col] = 0

    mix_abs = mix_abs[order]
    mix_pct = mix_pct[order]

    # Ensure numeric
    mix_abs = mix_abs.apply(pd.to_numeric, errors="coerce")
    mix_pct = mix_pct.apply(pd.to_numeric, errors="coerce")

    # Sort by % share from "Small" (could also use another size class)
    mix_pct = mix_pct.sort_values("Small", ascending=False)
    mix_abs = mix_abs.loc[mix_pct.index]

    # Merge ISO codes
    mix_pct = (
        mix_pct.reset_index()
        .merge(iso_df, on="Country", how="left")
        .set_index("ISO")
    )
    mix_abs = (
        mix_abs.reset_index()
        .merge(iso_df, on="Country", how="left")
        .set_index("ISO")
    )

    # Keep only city-size columns, same order
    mix_abs = mix_abs[order]
    mix_pct = mix_pct[order]

    return mix_pct, mix_abs

In [None]:
# ------------------------------------------------------------
# 4️⃣ Plotting function
# ------------------------------------------------------------
def plot_region_mix(ax, mix_pct, mix_abs, region_name):
    """
    Stacked horizontal bar chart of deprived-population mix by city size,
    with labels for the top 2 size classes (by absolute deprived pop) per country.
    """
    if mix_pct is None or mix_abs is None:
        ax.set_visible(False)
        return

    colors = ["#fcae91", "#fb6a4a", "#ef3b2c", "#cb181d", "#99000d"]

    # --- Bars ---
    mix_pct.plot(
        kind="barh",
        stacked=True,
        color=colors,
        ax=ax,
        width=0.9,
        legend=False,
    )

    ax.set_xlim(0, 100)
    ax.set_xlabel("Share of deprived population (%)", fontsize=10)
    ax.set_ylabel("")
    ax.set_title(
        f"{region_name} — Mix of population in deprived segments by city size",
        fontsize=11,
        weight="bold",
    )
    ax.grid(axis="x", linestyle=":", alpha=0.4)

    # --- Labels: top 2 visible size classes per country ---
    for i, (iso, row) in enumerate(mix_pct.iterrows()):
        abs_row = mix_abs.reindex([iso]).iloc[0] if iso in mix_abs.index else None
        if abs_row is None or abs_row.isna().all():
            continue

        # Combine share + absolute for all size classes
        combined = pd.DataFrame(
            {
                "abs_val": [abs_row.get(c, 0) for c in mix_pct.columns],
                "share": [row.get(c, 0) for c in mix_pct.columns],
            },
            index=mix_pct.columns,
        )

        # Only bars with at least 5% share are considered "visible"
        visible = combined[combined["share"] >= 5]
        if visible.empty:
            continue

        # Top 2 by absolute value among visible size classes
        top_cols = visible.sort_values("abs_val", ascending=False).head(2).index

        x_prev = 0.0
        for col, color_hex in zip(mix_pct.columns, colors):
            share = float(row.get(col, 0))
            abs_val = float(abs_row.get(col, 0))
            x_center = x_prev + share / 2
            x_prev += share

            if col not in top_cols or share < 5 or abs_val <= 0:
                continue

            # Format absolute value
            if abs_val >= 1e6:
                label = f"{abs_val / 1e6:.1f} M"
            elif abs_val >= 1e3:
                label = f"{abs_val / 1e3:.0f} K"
            else:
                continue

            # Adaptive text color for contrast
            rgb = tuple(int(color_hex.lstrip("#")[i:i+2], 16) for i in (0, 2, 4))
            brightness = 0.299 * rgb[0] + 0.587 * rgb[1] + 0.114 * rgb[2]
            text_color = "black" if brightness > 150 else "white"

            ax.text(
                x_center,
                i,
                label,
                ha="center",
                va="center",
                fontsize=9.5,
                color=text_color,
                weight="bold",
                clip_on=True,
            )


In [None]:
# ------------------------------------------------------------
# 5️⃣ Build multi-panel figure (Africa full height, Asia & LAC stacked)
# ------------------------------------------------------------
fig = plt.figure(figsize=(14, 14))
gs = fig.add_gridspec(
    2,
    2,
    width_ratios=[1, 1],
    height_ratios=[1.2, 0.8],
)

# Africa (left column, spans both rows)
ax_africa = fig.add_subplot(gs[:, 0])
mix_africa_pct, mix_africa_abs = prepare_mix_data(city_df, "Africa", iso_df)
plot_region_mix(ax_africa, mix_africa_pct, mix_africa_abs, "Africa")

# Asia (top-right)
ax_asia = fig.add_subplot(gs[0, 1])
mix_asia_pct, mix_asia_abs = prepare_mix_data(city_df, "Asia", iso_df)
plot_region_mix(ax_asia, mix_asia_pct, mix_asia_abs, "Asia")

# LAC (bottom-right)
ax_lac = fig.add_subplot(gs[1, 1])
mix_lac_pct, mix_lac_abs = prepare_mix_data(
    city_df,
    "Latin America and the Caribbean",
    iso_df,
)
plot_region_mix(ax_lac, mix_lac_pct, mix_lac_abs, "LAC")

# Shared legend (city-size classes)
handles, labels = ax_africa.get_legend_handles_labels()
fig.legend(
    handles,
    [
        "Small (< 500k)",
        "Medium (500k–1M)",
        "Large (1–5M)",
        "Very large (5–10M)",
        "Megacity (> 10M)",
    ],
    title="City size",
    loc="lower center",
    ncol=5,
    frameon=True,
    edgecolor="black",
)

plt.tight_layout(rect=[0, 0.06, 1, 1])

outfile = FIG_DIR / "Figure4_Deprivation_by_CitySizeMix.png"
plt.savefig(outfile, dpi=500, bbox_inches="tight")
print(f"✅ Saved Figure 4 to: {outfile}")

plt.show()