# Step 3: Add Population Feature Using WorldPop

This notebook uses population raster data from WorldPop to estimate the number of people in each grid cell within Kawempe. It adds a `pop_est` feature to the ML-ready grid for risk prioritization.

In [None]:
import geopandas as gpd
import rasterio
from rasterstats import zonal_stats
import matplotlib.pyplot as plt
import pandas as pd

## Step 1: Load Grid and WorldPop Population Raster

In [None]:
# Load the grid file from the previous step
grid = gpd.read_file('../data/processed/kawempe_grid_features.geojson')

# Load WorldPop raster path (modify if needed)
pop_raster_path = '../data/raw/uga_ppp_2020.tif'

# Preview grid
grid.plot(edgecolor='gray', facecolor='none', figsize=(8, 8))
plt.title('Kawempe Grid')
plt.show()

## Step 2: Compute Population per Grid Cell

In [None]:
# Compute zonal population stats
pop_stats = zonal_stats(
    grid.to_json(),
    pop_raster_path,
    stats=['sum'],
    geojson_out=True
)

# Add pop_est to grid
pop_grid = gpd.GeoDataFrame.from_features(pop_stats).set_crs('EPSG:4326')
pop_grid = pop_grid.rename(columns={'sum': 'pop_est'})
pop_grid['pop_est'] = pop_grid['pop_est'].fillna(0).astype(int)
pop_grid.head()

## Step 3: Visualize Estimated Population

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))
pop_grid.plot(column='pop_est', cmap='OrRd', legend=True, ax=ax, edgecolor='gray')
plt.title('Estimated Population per Grid Cell')
plt.axis('off')
plt.show()

## Step 4: Save Updated Grid with Population

In [None]:
# Merge population back with other features (if needed)
pop_grid.to_file('../data/processed/kawempe_grid_with_population.geojson', driver='GeoJSON')