In [1]:
import rasterio
from rasterio.mask import mask
import geopandas as gpd
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt

print("=" * 60)
print("ESA WORLDCOVER - LAND COVER FEATURES")
print("=" * 60)

# Paths
DATA_DIR = Path('../data')
LANDCOVER_DIR = DATA_DIR / 'land_cover'
WARDS_DIR = DATA_DIR / 'wards'
LANDCOVER_DIR.mkdir(parents=True, exist_ok=True)

# Load WorldCover raster
print("\nüìÇ Loading ESA WorldCover...")
worldcover_path = LANDCOVER_DIR / 'KMC_WorldCover_2021.tif'

with rasterio.open(worldcover_path) as src:
    print(f"‚úì Loaded WorldCover raster")
    print(f"  Resolution: {src.res[0]:.6f}¬∞ (~{src.res[0] * 111000:.0f}m)")
    print(f"  Shape: {src.shape}")
    print(f"  CRS: {src.crs}")
    print(f"  Bounds: {src.bounds}")

    # Read data
    worldcover_data = src.read(1)
    worldcover_meta = src.meta

# Load wards
wards = gpd.read_file(WARDS_DIR / 'kmc_wards_gee_ready.geojson')
print(f"‚úì Loaded {len(wards)} wards")

# Land cover class definitions
landcover_classes = {
    10: 'Tree cover',
    20: 'Shrubland',
    30: 'Grassland',
    40: 'Cropland',
    50: 'Built-up',
    60: 'Bare/sparse vegetation',
    70: 'Snow and ice',
    80: 'Permanent water bodies',
    90: 'Herbaceous wetland',
    95: 'Mangroves',
    100: 'Moss and lichen'
}

# Check what classes are present
unique_classes = np.unique(worldcover_data)
print(f"\nüìä Land cover classes found in KMC:")
for code in unique_classes:
    if code in landcover_classes:
        pixel_count = (worldcover_data == code).sum()
        print(f"   {code}: {landcover_classes[code]} ({pixel_count:,} pixels)")

print("\n‚öôÔ∏è  Calculating land cover per ward...")
print("   (This may take 2-3 minutes)...")

ESA WORLDCOVER - LAND COVER FEATURES

üìÇ Loading ESA WorldCover...
‚úì Loaded WorldCover raster
  Resolution: 0.000090¬∞ (~10m)
  Shape: (2116, 2450)
  CRS: EPSG:4326
  Bounds: BoundingBox(left=88.23998322307759, bottom=22.449976928487782, right=88.46007046768688, top=22.640060442607474)
‚úì Loaded 141 wards

üìä Land cover classes found in KMC:
   10: Tree cover (1,767,432 pixels)
   20: Shrubland (2 pixels)
   30: Grassland (111,131 pixels)
   40: Cropland (280,298 pixels)
   50: Built-up (2,610,704 pixels)
   60: Bare/sparse vegetation (27,900 pixels)
   80: Permanent water bodies (358,745 pixels)
   90: Herbaceous wetland (27,988 pixels)

‚öôÔ∏è  Calculating land cover per ward...
   (This may take 2-3 minutes)...


In [2]:
# Calculate land cover percentages per ward
print("\n‚öôÔ∏è  Extracting land cover statistics per ward...")

ward_landcover_features = []

with rasterio.open(worldcover_path) as src:
    for idx, ward in wards.iterrows():
        if idx % 20 == 0:
            print(f"   Ward {idx+1}/{len(wards)}...")

        ward_id = ward.get('WARD', idx)
        ward_name = ward.get('ward_name', ward.get('WARD_NAME', f'Ward_{ward_id}'))

        try:
            # Mask raster to ward boundary
            ward_geom = [ward.geometry.__geo_interface__]
            out_image, out_transform = mask(src, ward_geom, crop=True, nodata=0)

            # Get pixel values (remove nodata)
            pixels = out_image[0]
            valid_pixels = pixels[pixels > 0]
            total_pixels = len(valid_pixels)

            if total_pixels == 0:
                # Ward outside raster extent
                features = {
                    'ward_id': str(ward_id),
                    'ward_name': ward_name,
                    'total_pixels': 0
                }
                # Add zero for all classes
                for code in landcover_classes.keys():
                    features[f'lc_{code}_pct'] = 0
                    features[f'lc_{code}_count'] = 0

            else:
                # Calculate percentage for each class
                features = {
                    'ward_id': str(ward_id),
                    'ward_name': ward_name,
                    'total_pixels': total_pixels
                }

                for code, name in landcover_classes.items():
                    count = (valid_pixels == code).sum()
                    pct = (count / total_pixels) * 100
                    features[f'lc_{code}_pct'] = pct
                    features[f'lc_{code}_count'] = int(count)

                # Derived features (important for model!)
                features['built_up_pct'] = features['lc_50_pct']  # Class 50
                features['vegetation_pct'] = (features['lc_10_pct'] +   # Trees
                                             features['lc_20_pct'] +   # Shrubs
                                             features['lc_30_pct'])    # Grass
                features['water_pct'] = features['lc_80_pct']  # Permanent water
                features['wetland_pct'] = features['lc_90_pct']  # Wetlands
                features['cropland_pct'] = features['lc_40_pct']  # Cropland

                # IMPERVIOUSNESS (key flood predictor!)
                # Built-up + Bare areas have high runoff
                features['imperviousness_worldcover'] = (
                    features['lc_50_pct'] +      # Built-up (90% impervious)
                    features['lc_60_pct'] * 0.8  # Bare areas (80% impervious)
                )

                # Runoff coefficient (0-1 scale)
                features['runoff_coefficient'] = (
                    features['lc_50_pct'] * 0.90 +      # Built-up: 90% runoff
                    features['lc_60_pct'] * 0.80 +      # Bare: 80% runoff
                    features['lc_40_pct'] * 0.40 +      # Cropland: 40% runoff
                    features['lc_30_pct'] * 0.25 +      # Grassland: 25% runoff
                    features['lc_10_pct'] * 0.15 +      # Trees: 15% runoff
                    features['lc_80_pct'] * 1.00 +      # Water: 100% runoff
                    features['lc_90_pct'] * 0.10        # Wetland: 10% runoff (absorbs!)
                ) / 100  # Convert to 0-1 scale

            ward_landcover_features.append(features)

        except Exception as e:
            print(f"      Warning: Ward {ward_id} failed - {e}")
            continue

# Create DataFrame
landcover_df = pd.DataFrame(ward_landcover_features)

print(f"\n‚úì Land cover features calculated for {len(landcover_df)} wards")

# Summary
print(f"\nüìä KMC LAND COVER SUMMARY (Mean % per ward):")
print(f"   Built-up: {landcover_df['built_up_pct'].mean():.1f}%")
print(f"   Vegetation (trees+grass+shrubs): {landcover_df['vegetation_pct'].mean():.1f}%")
print(f"   Cropland: {landcover_df['cropland_pct'].mean():.1f}%")
print(f"   Permanent water: {landcover_df['water_pct'].mean():.1f}%")
print(f"   Wetlands: {landcover_df['wetland_pct'].mean():.1f}%")

print(f"\nüåä FLOOD-RELEVANT METRICS:")
print(f"   Mean imperviousness: {landcover_df['imperviousness_worldcover'].mean():.1f}%")
print(f"   Mean runoff coefficient: {landcover_df['runoff_coefficient'].mean():.3f}")

print(f"\nüèóÔ∏è  Compare Buildings vs WorldCover:")
print(f"   Buildings imperviousness estimate: 53.4% (from earlier)")
print(f"   WorldCover built-up: {landcover_df['built_up_pct'].mean():.1f}%")
print(f"   Difference: WorldCover captures roads/pavements too!")

# Save
landcover_df.to_csv(LANDCOVER_DIR / 'ward_landcover_features.csv', index=False)
print(f"\n‚úì Saved: {LANDCOVER_DIR / 'ward_landcover_features.csv'}")

print("\n‚úÖ ESA WORLDCOVER EXTRACTION COMPLETE!")


‚öôÔ∏è  Extracting land cover statistics per ward...
   Ward 1/141...
   Ward 21/141...
   Ward 41/141...
   Ward 61/141...
   Ward 81/141...
   Ward 101/141...
   Ward 121/141...
   Ward 141/141...

‚úì Land cover features calculated for 141 wards

üìä KMC LAND COVER SUMMARY (Mean % per ward):
   Built-up: 82.5%
   Vegetation (trees+grass+shrubs): 15.3%
   Cropland: 0.6%
   Permanent water: 1.2%
   Wetlands: 0.1%

üåä FLOOD-RELEVANT METRICS:
   Mean imperviousness: 82.8%
   Mean runoff coefficient: 0.783

üèóÔ∏è  Compare Buildings vs WorldCover:
   Buildings imperviousness estimate: 53.4% (from earlier)
   WorldCover built-up: 82.5%
   Difference: WorldCover captures roads/pavements too!

‚úì Saved: ../data/land_cover/ward_landcover_features.csv

‚úÖ ESA WORLDCOVER EXTRACTION COMPLETE!


In [3]:
print("\nüèÜ TOP 10 MOST IMPERVIOUS WARDS:")
top_impervious = landcover_df.nlargest(10, 'imperviousness_worldcover')[
    ['ward_name', 'built_up_pct', 'imperviousness_worldcover', 'runoff_coefficient', 'vegetation_pct']
]
for idx, row in top_impervious.iterrows():
    print(f"   {row['ward_name']}: {row['built_up_pct']:.1f}% built, "
          f"{row['imperviousness_worldcover']:.1f}% impervious, "
          f"runoff={row['runoff_coefficient']:.2f}, "
          f"veg={row['vegetation_pct']:.1f}%")

print(f"\nüå≥ TOP 10 MOST VEGETATED WARDS:")
top_veg = landcover_df.nlargest(10, 'vegetation_pct')[
    ['ward_name', 'vegetation_pct', 'built_up_pct', 'runoff_coefficient']
]
for idx, row in top_veg.iterrows():
    print(f"   {row['ward_name']}: {row['vegetation_pct']:.1f}% vegetation, "
          f"{row['built_up_pct']:.1f}% built, "
          f"runoff={row['runoff_coefficient']:.2f}")

print(f"\nüíß WARDS WITH MOST PERMANENT WATER:")
water_wards = landcover_df.nlargest(10, 'water_pct')[
    ['ward_name', 'water_pct', 'wetland_pct']
]
for idx, row in water_wards.iterrows():
    print(f"   {row['ward_name']}: {row['water_pct']:.1f}% water, "
          f"{row['wetland_pct']:.1f}% wetland")

print("\n‚úÖ All land cover features extracted!")
print("\nüìã Features for model:")
print("   ‚Ä¢ built_up_pct (WorldCover class 50)")
print("   ‚Ä¢ vegetation_pct (trees + grass + shrubs)")
print("   ‚Ä¢ water_pct (permanent water bodies)")
print("   ‚Ä¢ imperviousness_worldcover (built + bare areas)")
print("   ‚Ä¢ runoff_coefficient (weighted by land cover)")


üèÜ TOP 10 MOST IMPERVIOUS WARDS:
   Ward_42
: 100.0% built, 100.0% impervious, runoff=0.90, veg=0.0%
   Ward_43
: 100.0% built, 100.0% impervious, runoff=0.90, veg=0.0%
   Ward_17
: 99.8% built, 99.9% impervious, runoff=0.90, veg=0.1%
   Ward_48
: 99.8% built, 99.8% impervious, runoff=0.90, veg=0.2%
   Ward_25
: 99.6% built, 99.7% impervious, runoff=0.90, veg=0.2%
   Ward_23
: 99.6% built, 99.6% impervious, runoff=0.90, veg=0.4%
   Ward_18
: 99.1% built, 99.6% impervious, runoff=0.90, veg=0.3%
   Ward_24
: 99.5% built, 99.5% impervious, runoff=0.90, veg=0.5%
   Ward_16
: 99.3% built, 99.4% impervious, runoff=0.90, veg=0.6%
   Ward_53
: 99.1% built, 99.1% impervious, runoff=0.89, veg=0.9%

üå≥ TOP 10 MOST VEGETATED WARDS:
   Ward_94
: 54.8% vegetation, 43.9% built, runoff=0.50
   Ward_141
: 54.5% vegetation, 37.9% built, runoff=0.48
   Ward_74
: 49.4% vegetation, 50.1% built, runoff=0.53
   Ward_140
: 48.4% vegetation, 50.3% built, runoff=0.53
   Ward_63
: 45.9% vegetation, 47.2% bu