# Kawempe Grid-Based Risk Features and ML Exploration

This notebook creates a uniform 250m grid over Kawempe, computes per-cell urban risk indicators (NDVI, buildings, services, roads), and performs basic ML clustering.

In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import rasterio
from rasterstats import zonal_stats
from shapely.geometry import box
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

## Step 1: Load NDVI Raster and Define Grid Area

In [None]:
ndvi_path = '../data/raw/Kawempe_NDVI_2023.tif'
with rasterio.open(ndvi_path) as src:
    bounds = src.bounds
    crs = src.crs
bbox = box(*bounds)
bbox_gdf = gpd.GeoDataFrame({'geometry': [bbox]}, crs=crs).to_crs('EPSG:32636')
bbox_gdf.plot(edgecolor='black', facecolor='none', figsize=(8, 8))

## Step 2: Create Uniform Grid (250m × 250m)

In [None]:
grid_size = 250
xmin, ymin, xmax, ymax = bbox_gdf.total_bounds
rows = int((ymax - ymin) / grid_size)
cols = int((xmax - xmin) / grid_size)
grid_cells = [box(xmin + i * grid_size, ymin + j * grid_size,
                  xmin + (i + 1) * grid_size, ymin + (j + 1) * grid_size)
              for i in range(cols) for j in range(rows)]
grid = gpd.GeoDataFrame({'geometry': grid_cells}, crs='EPSG:32636').to_crs('EPSG:4326')
grid.plot(edgecolor='gray', facecolor='none', figsize=(10, 10))

## Step 3: Compute NDVI per Grid Cell

In [None]:
stats = zonal_stats(grid.to_json(), ndvi_path, stats=['mean'], geojson_out=True)
ndvi_grid = gpd.GeoDataFrame.from_features(stats).set_crs('EPSG:4326')
ndvi_grid = ndvi_grid.rename(columns={'mean': 'mean_ndvi'})
ndvi_grid.head()

## Step 4: Load Infrastructure Layers

In [None]:
buildings = gpd.read_file('../data/raw/kawempe_buildings.geojson').to_crs('EPSG:4326')
roads = gpd.read_file('../data/raw/kawempe_roads.geojson').to_crs('EPSG:4326')
services = gpd.read_file('../data/raw/kawempe_services.geojson').to_crs('EPSG:4326')

## Step 5: Count Features per Grid Cell

In [None]:
# Building count
bldg_join = gpd.sjoin(buildings, ndvi_grid, predicate='within')
bldg_count = bldg_join.groupby(bldg_join.index_right).size()
ndvi_grid['building_count'] = ndvi_grid.index.map(bldg_count).fillna(0)

# Service count
svc_join = gpd.sjoin(services, ndvi_grid, predicate='within')
svc_count = svc_join.groupby(svc_join.index_right).size()
ndvi_grid['service_count'] = ndvi_grid.index.map(svc_count).fillna(0)

# Road length
roads_proj = roads.to_crs('EPSG:32636')
roads_proj['length_km'] = roads_proj.length / 1000
grid_proj = ndvi_grid.to_crs('EPSG:32636')
road_join = gpd.sjoin(roads_proj, grid_proj, predicate='within')
road_length = road_join.groupby(road_join.index_right)['length_km'].sum()
ndvi_grid['road_km'] = ndvi_grid.index.map(road_length).fillna(0)

## Step 6: Normalize Features and Cluster

In [None]:
X = ndvi_grid[['mean_ndvi', 'building_count', 'service_count', 'road_km']].fillna(0)
X_scaled = StandardScaler().fit_transform(X)
kmeans = KMeans(n_clusters=4, random_state=0)
ndvi_grid['cluster'] = kmeans.fit_predict(X_scaled)

## Step 7: Visualize Clustering Results

In [None]:
ndvi_grid.plot(column='cluster', cmap='tab10', legend=True, figsize=(10, 10), edgecolor='black')
plt.title('KMeans Clustering of Grid Cells')
plt.axis('off')
plt.show()

## Step 8: Save Results

In [None]:
ndvi_grid.to_file('../data/processed/kawempe_grid_features.geojson', driver='GeoJSON')