In [13]:
import os
from pathlib import Path
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import folium
from unidecode import unidecode

# ---- Paths (works in notebooks & scripts) ----
if "__file__" in globals():
    ROOT = Path(__file__).resolve().parents[1]
else:
    ROOT = Path.cwd()

OUT_DIR = ROOT / "outputs"
FIG_DIR = OUT_DIR / "figures"
MAP_DIR = OUT_DIR / "maps"
FIG_DIR.mkdir(parents=True, exist_ok=True)
MAP_DIR.mkdir(parents=True, exist_ok=True)

# ---- Load sample data ----
world_fp = gpd.datasets.get_path("naturalearth_lowres")
world = gpd.read_file(world_fp)

  world_fp = gpd.datasets.get_path("naturalearth_lowres")


In [14]:
# Clean a bit: remove Antarctica and rows with missing pop_est (optional)
world = world[world["name"] != "Antarctica"].copy()
world = world.dropna(subset=["pop_est"]).copy()

# ---- English/Latin labels via transliteration ----
world["label_en"] = world["name"].apply(lambda s: unidecode(s) if isinstance(s, str) else s)

# Optional tidy-ups (translation, not just transliteration)
fixes = {
    "Cote d'Ivoire": "Ivory Coast",
    "Turkiye": "Turkey",
}
world["label_en"] = world["label_en"].replace(fixes)

# ---- Compute area in an equal-area CRS ----
world_eq = world.to_crs(6933)  # equal-area; areas in m²
world_eq["area_km2"] = world_eq.geometry.area / 1e6

# ---- Export Top 20 largest countries ----
top20 = (world_eq.nlargest(20, "area_km2")[["name", "continent", "area_km2"]]
         .sort_values("area_km2", ascending=False)
         .reset_index(drop=True))
csv_path = OUT_DIR / "world_area_top20.csv"
top20.to_csv(csv_path, index=False)

# ---- Static choropleth (matplotlib) ----
fig = plt.figure(figsize=(12, 6))
ax = fig.add_subplot(111)

# Use quantiles if mapclassify is available; otherwise do a plain plot
try:
    import mapclassify  # noqa: F401
    world_eq.plot(column="area_km2", scheme="quantiles", k=5, legend=True,
                  ax=ax, edgecolor="black", linewidth=0.2)
except Exception:
    world_eq.plot(column="area_km2", legend=True,
                  ax=ax, edgecolor="black", linewidth=0.2)

ax.set_title("Country Area (km²) — Equal-Area Projection", pad=10)
ax.set_axis_off()
fig.tight_layout()
png_path = FIG_DIR / "world_area.png"
fig.savefig(png_path, dpi=180)
plt.close(fig)

In [15]:
# ---- Simple interactive map (Folium) ----
# Compute representative points in a projected CRS to stay inside polygons
# ---- Simple interactive map (Folium) ----
# Build safe label points (always inside polygons)
label_gdf = world.to_crs(3857).copy()
label_gdf["geometry"] = label_gdf.geometry.representative_point()
label_gdf = label_gdf.to_crs(4326)

m = folium.Map(location=[20, 0], zoom_start=2, tiles=None)
folium.TileLayer('CartoDB positron', control=False).add_to(m)
for name, continent, lat, lon in zip(
    label_gdf["label_en"],
    label_gdf["continent"],
    label_gdf.geometry.y,
    label_gdf.geometry.x,
):
    folium.CircleMarker(
        location=[lat, lon],
        radius=3,
        fill=True,
        popup=f"{name} — {continent}",
    ).add_to(m)

m.save(os.path.join(MAP_DIR, "world_map.html"))