# Top 5 slum statistics for key urban change categories
This notebook identifies the top 5 slums (by area) for four key urban change categories and exports corresponding KMLs for both slums and grid cells for validation purposes.

**Author:** Sai Ganesh Veeravalli  
**Dataset Used:** Google 2.5D Open Buildings (Temporal) , Nairobi IDEAtlas Slum reference data  

# 📦 Section 1: Import Required Libraries

In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt
import contextily as ctx
import os

## Section 1.1 Define your file paths

In [None]:
# Load classified urban change grid (2016–2023)
grid_path = r"F:\DEPRIMAP\EARSEL2025\March2025\Data\csv_experiment\urban_change_updated_23_16.gpkg"
grid = gpd.read_file(grid_path)

# Load slum polygons
slum_path = r"F:\DEPRIMAP\EARSEL2025\DATASETS\Reference Slum Parcels\nairobi_reference_slums.shp"
slums = gpd.read_file(slum_path)

In [None]:
# Reproject slum polygons to match grid CRS
if grid.crs != slums.crs:
    slums = slums.to_crs(grid.crs)

# 📊 Section 2: Compute top 5 slums

## Section 2.1 Filter for 4 focus categories

In [None]:
target_categories = [
    'Vertical Densification',
    'High Densification',
    'Horizontal Densification',
    'Decline'
]

In [None]:
# Filter grid cells to only the target categories
filtered_grid = grid[grid['urban_change'].isin(target_categories)]

## Section 2.2 Get grids within slums

In [None]:
# Perform spatial overlay (intersection) between filtered grid and slums
grids_in_slums = gpd.overlay(filtered_grid, slums, how='intersection')
grids_in_slums = grids_in_slums.reset_index(drop=True)

## Section 2.3 Calculate area and top 5 stats

In [None]:
# Calculate area of each intersected grid in square meters
grids_in_slums['area_sqm'] = grids_in_slums.geometry.area

In [None]:
# Group by category and slum ID
slum_id_col = 'Org_FID'
grouped = grids_in_slums.groupby(['urban_change', slum_id_col])['area_sqm'].sum().reset_index()

# Get top 5 slums by area for each category
top5_per_category = (
    grouped.sort_values(['urban_change', 'area_sqm'], ascending=[True, False])
    .groupby('urban_change')
    .head(5)
    .reset_index(drop=True)
)

# Add area in hectares
top5_per_category['area_ha'] = top5_per_category['area_sqm'] / 10_000

## Section 2.4 Plot top 5 slums map

In [None]:
from matplotlib.patches import FancyArrow

def add_north_arrow(ax, size=1000):
    """Draw a north arrow on the map."""
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()
    x = xlim[1] - 2000
    y = ylim[0] + 1500
    ax.add_patch(FancyArrow(x, y, 0, size, width=size/5, head_width=size/2,
                            head_length=size/2, length_includes_head=True, color='black', zorder=5))
    ax.text(x, y - 300, 'N', ha='center', va='top', fontsize=12, fontweight='bold')

def add_scalebar(ax, length=5000):
    """Add a scale bar of specified length (in meters)."""
    x_start = ax.get_xlim()[0] + 1000
    y_start = ax.get_ylim()[0] + 1000
    ax.plot([x_start, x_start + length], [y_start, y_start], color='k', lw=2)
    ax.text(x_start + length / 2, y_start + 500, f'{length/1000:.0f} km',
            ha='center', va='bottom', fontsize=9)

In [None]:
# Reproject to Web Mercator for plotting with basemap
slums_3857 = slums.to_crs(epsg=3857)
top_ids_all = top5_per_category['Org_FID'].unique()
all_top_slums = slums_3857[slums_3857['Org_FID'].isin(top_ids_all)]
bounds = all_top_slums.total_bounds
buffer = 500
xlim = (bounds[0] - buffer, bounds[2] + buffer)
ylim = (bounds[1] - buffer, bounds[3] + buffer)

In [None]:
# Plot setup
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
axes = axes.flatten()

for i, category in enumerate(target_categories):
    ax = axes[i]
    top_ids = top5_per_category[top5_per_category['urban_change'] == category]['Org_FID']
    subset = slums_3857[slums_3857['Org_FID'].isin(top_ids)]
    subset.plot(ax=ax, facecolor='none', edgecolor='black', linewidth=1.5)

    # Add label box with slum ID and area
    top5_info = top5_per_category[top5_per_category['urban_change'] == category]
    label_lines = ["Org_FID   Area (ha)"] + [
        f"{int(row['Org_FID']):<9} {row['area_ha']:.1f}" for _, row in top5_info.iterrows()
    ]
    ax.text(
        0.02, 0.98, "\n".join(label_lines), transform=ax.transAxes,
        fontsize=12, va='top', ha='left',
        bbox=dict(boxstyle="round,pad=0.4", fc="white", ec="gray", alpha=0.9),
        fontfamily='monospace'
    )

    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    ctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.Mapnik, crs=subset.crs.to_string())
    ax.set_title(category, fontsize=14)
    ax.set_xlabel("Easting (m)", fontsize=10)
    ax.set_ylabel("Northing (m)", fontsize=10)
    ax.tick_params(labelsize=9)
    ax.set_aspect("equal")
    add_north_arrow(ax)
    add_scalebar(ax)

# Final layout
fig.suptitle("Top 5 Slums by Urban Change Category (Area in Hectares)", fontsize=18)
plt.tight_layout(rect=[0, 0, 1, 0.99])
plt.subplots_adjust(hspace=0.01)
fig.text(
    0.5, 0.01,
    "Note: Area refers to the urban change extent intersecting each slum polygon.",
    ha='center', fontsize=10, style='italic'
)
plt.show()

## Section 2.5 Export top 5 slums and Grid KLMs for validation purposes

In [None]:
# Define output directory
output_dir = r"F:\DEPRIMAP\EARSEL2025\March2025\Data\csv_experiment\top5_slums"
os.makedirs(output_dir, exist_ok=True)

In [None]:
# Reproject for KML export (EPSG:4326)
slums_4326 = slums.to_crs(epsg=4326)
grids_4326 = grids_in_slums.to_crs(epsg=4326)

In [None]:
# Loop through each category and export
for category in top5_per_category['urban_change'].unique():
    safe_name = category.lower().replace(" ", "_").replace("/", "_")
    top_ids = top5_per_category[top5_per_category['urban_change'] == category]['Org_FID']

    # 🔸 Export slum polygons
    slum_subset = slums_4326[slums_4326['Org_FID'].isin(top_ids)]
    slum_subset.to_file(os.path.join(output_dir, f"top5_slums_{safe_name}.kml"), driver="KML")
    slum_subset.to_file(os.path.join(output_dir, f"top5_slums_{safe_name}.shp"))

    # 🔸 Export grid cells
    grid_subset = grids_4326[
        (grids_4326['urban_change'] == category) &
        (grids_4326['Org_FID'].isin(top_ids))
    ]
    grid_subset.to_file(os.path.join(output_dir, f"top5_grids_{safe_name}.kml"), driver="KML")
    grid_subset.to_file(os.path.join(output_dir, f"top5_grids_{safe_name}.shp"))

    print(f"✔ Exported files for {category}")