In [2]:
import geopandas as gpd
import pandas as pd
import numpy as np
from pathlib import Path

print("=" * 60)
print("NEIGHBOR SPATIAL FEATURES - WARD CONNECTIVITY")
print("=" * 60)

# Paths
DATA_DIR = Path('../data')
WARDS_DIR = DATA_DIR / 'wards'
FEATURES_DIR = DATA_DIR / 'processed'
FEATURES_DIR.mkdir(parents=True, exist_ok=True)

# Load wards
print("\nüìÇ Loading ward data...")
wards = gpd.read_file(WARDS_DIR / 'kmc_wards_gee_ready.geojson')

# Convert ward ID to string for consistent merging
wards['ward_id_str'] = wards['WARD'].astype(str)

# Load existing features
canals = pd.read_csv(DATA_DIR / 'canals/ward_canal_features.csv')
buildings = pd.read_csv(DATA_DIR / 'buildings/ward_building_features.csv')
landcover = pd.read_csv(DATA_DIR / 'land_cover/ward_landcover_features.csv')
soil = pd.read_csv(DATA_DIR / 'soil/ward_soil_features.csv')

# Ensure all ward_ids are strings
canals['ward_id'] = canals['ward_id'].astype(str)
buildings['ward_id'] = buildings['ward_id'].astype(str)
landcover['ward_id'] = landcover['ward_id'].astype(str)
soil['ward_id'] = soil['ward_id'].astype(str)

print(f"‚úì Loaded {len(wards)} wards and all feature files")

# Merge features
wards = wards.merge(canals[['ward_id', 'canal_length_km', 'canal_density', 'primary_drainage_type']],
                    left_on='ward_id_str', right_on='ward_id', how='left')
wards = wards.merge(buildings[['ward_id', 'building_coverage_pct', 'imperviousness_estimate']],
                    left_on='ward_id_str', right_on='ward_id', how='left', suffixes=('', '_bldg'))
wards = wards.merge(landcover[['ward_id', 'imperviousness_worldcover', 'runoff_coefficient', 'built_up_pct']],
                    left_on='ward_id_str', right_on='ward_id', how='left', suffixes=('', '_lc'))
wards = wards.merge(soil[['ward_id', 'infiltration_index']],
                    left_on='ward_id_str', right_on='ward_id', how='left', suffixes=('', '_soil'))

print(f"‚úì Merged all feature datasets")

# Calculate spatial neighbors
print(f"\n‚öôÔ∏è  Finding neighboring wards (adjacency)...")

ward_neighbors = {}

for idx, ward in wards.iterrows():
    if idx % 20 == 0:
        print(f"   Ward {idx+1}/{len(wards)}...")

    ward_id = str(ward['WARD'])
    ward_geom = ward.geometry

    # Find touching/intersecting wards
    neighbors = wards[
        (wards.geometry.touches(ward_geom)) |
        (wards.geometry.intersects(ward_geom))
    ]

    # Exclude self
    neighbors = neighbors[neighbors['WARD'].astype(str) != ward_id]

    neighbor_ids = neighbors['WARD'].astype(str).tolist()
    ward_neighbors[ward_id] = neighbor_ids

print(f"‚úì Neighbor relationships calculated for {len(ward_neighbors)} wards")

# Statistics
neighbor_counts = [len(v) for v in ward_neighbors.values()]
print(f"\nüìä NEIGHBOR STATISTICS:")
print(f"   Mean neighbors per ward: {np.mean(neighbor_counts):.1f}")
print(f"   Median neighbors: {np.median(neighbor_counts):.0f}")
print(f"   Max neighbors: {max(neighbor_counts)}")
print(f"   Min neighbors: {min(neighbor_counts)}")

# Find wards with few neighbors (boundary/isolated)
isolated = [k for k, v in ward_neighbors.items() if len(v) <= 2]
print(f"   Isolated wards (‚â§2 neighbors): {len(isolated)}")
if len(isolated) > 0:
    print(f"      Wards: {isolated[:10]}")  # Show first 10

NEIGHBOR SPATIAL FEATURES - WARD CONNECTIVITY

üìÇ Loading ward data...
‚úì Loaded 141 wards and all feature files
‚úì Merged all feature datasets

‚öôÔ∏è  Finding neighboring wards (adjacency)...
   Ward 1/141...
   Ward 21/141...
   Ward 41/141...
   Ward 61/141...
   Ward 81/141...
   Ward 101/141...
   Ward 121/141...
   Ward 141/141...
‚úì Neighbor relationships calculated for 141 wards

üìä NEIGHBOR STATISTICS:
   Mean neighbors per ward: 5.2
   Median neighbors: 5
   Max neighbors: 10
   Min neighbors: 2
   Isolated wards (‚â§2 neighbors): 4
      Wards: ['125\n', '127\n', '124\n', '1\n']


In [3]:
print("\n‚öôÔ∏è  Calculating neighbor-based features...")

neighbor_features_list = []

for idx, ward in wards.iterrows():
    if idx % 20 == 0:
        print(f"   Ward {idx+1}/{len(wards)}...")

    ward_id = str(ward['WARD'])
    neighbor_ids = ward_neighbors.get(ward_id, [])

    # Get neighbor data
    neighbors = wards[wards['WARD'].astype(str).isin(neighbor_ids)]

    if len(neighbors) > 0:
        # Calculate neighbor aggregates
        features = {
            'ward_id': ward_id,
            'neighbor_count': len(neighbors),

            # Drainage features
            'neighbor_avg_canal_density': neighbors['canal_density'].mean(),
            'neighbor_max_canal_density': neighbors['canal_density'].max(),
            'neighbor_canal_wards': (neighbors['canal_length_km'] > 0).sum(),

            # Urban intensity
            'neighbor_avg_imperviousness': neighbors['imperviousness_worldcover'].mean(),
            'neighbor_max_imperviousness': neighbors['imperviousness_worldcover'].max(),
            'neighbor_avg_runoff': neighbors['runoff_coefficient'].mean(),

            # Building density
            'neighbor_avg_building_coverage': neighbors['building_coverage_pct'].mean(),

            # Drainage type distribution
            'neighbor_canal_fraction': (neighbors['primary_drainage_type'] == 'canal').sum() / len(neighbors),
            'neighbor_pipe_fraction': (neighbors['primary_drainage_type'] == 'pipe').sum() / len(neighbors),

            # Spatial heterogeneity
            'neighbor_imperviousness_std': neighbors['imperviousness_worldcover'].std(),
            'neighbor_runoff_std': neighbors['runoff_coefficient'].std(),

            # Self vs neighbors comparison
            'imperviousness_vs_neighbors': ward['imperviousness_worldcover'] - neighbors['imperviousness_worldcover'].mean(),
            'runoff_vs_neighbors': ward['runoff_coefficient'] - neighbors['runoff_coefficient'].mean(),

            # Downstream indicator (receives flow from neighbors)
            # Simple proxy: If I have higher avg imperviousness, I might receive runoff
            'likely_receives_flow': 1 if ward['runoff_coefficient'] < neighbors['runoff_coefficient'].mean() else 0,
        }
    else:
        # Isolated ward or boundary ward
        features = {
            'ward_id': ward_id,
            'neighbor_count': 0,
            'neighbor_avg_canal_density': 0,
            'neighbor_max_canal_density': 0,
            'neighbor_canal_wards': 0,
            'neighbor_avg_imperviousness': 0,
            'neighbor_max_imperviousness': 0,
            'neighbor_avg_runoff': 0,
            'neighbor_avg_building_coverage': 0,
            'neighbor_canal_fraction': 0,
            'neighbor_pipe_fraction': 0,
            'neighbor_imperviousness_std': 0,
            'neighbor_runoff_std': 0,
            'imperviousness_vs_neighbors': 0,
            'runoff_vs_neighbors': 0,
            'likely_receives_flow': 0,
        }

    neighbor_features_list.append(features)

# Create DataFrame
neighbor_features_df = pd.DataFrame(neighbor_features_list)

print(f"\n‚úì Neighbor features calculated for {len(neighbor_features_df)} wards")

# Summary
print(f"\nüìä NEIGHBOR FEATURE SUMMARY:")
print(f"   Mean neighbor imperviousness: {neighbor_features_df['neighbor_avg_imperviousness'].mean():.1f}%")
print(f"   Mean neighbor runoff coeff: {neighbor_features_df['neighbor_avg_runoff'].mean():.3f}")
print(f"   Wards receiving flow from neighbors: {neighbor_features_df['likely_receives_flow'].sum()}")

print(f"\nüèÜ WARDS WITH HIGHEST-RUNOFF NEIGHBORS:")
high_neighbor_runoff = neighbor_features_df.nlargest(10, 'neighbor_avg_runoff')[
    ['ward_id', 'neighbor_count', 'neighbor_avg_imperviousness', 'neighbor_avg_runoff']
]
for idx, row in high_neighbor_runoff.iterrows():
    print(f"   Ward {row['ward_id']}: {row['neighbor_count']} neighbors, "
          f"avg {row['neighbor_avg_imperviousness']:.1f}% impervious, "
          f"runoff={row['neighbor_avg_runoff']:.3f}")

print(f"\nüåä WARDS LIKELY RECEIVING FLOW:")
receivers = neighbor_features_df[neighbor_features_df['likely_receives_flow'] == 1]
print(f"   Count: {len(receivers)} wards")
print(f"   These wards have lower runoff than neighbors (water flows TO them)")

# Save
neighbor_features_df.to_csv(FEATURES_DIR / 'ward_neighbor_features.csv', index=False)
print(f"\n‚úì Saved: {FEATURES_DIR / 'ward_neighbor_features.csv'}")

print("\n‚úÖ NEIGHBOR FEATURES EXTRACTION COMPLETE!")


‚öôÔ∏è  Calculating neighbor-based features...
   Ward 1/141...
   Ward 21/141...
   Ward 41/141...
   Ward 61/141...
   Ward 81/141...
   Ward 101/141...
   Ward 121/141...
   Ward 141/141...

‚úì Neighbor features calculated for 141 wards

üìä NEIGHBOR FEATURE SUMMARY:
   Mean neighbor imperviousness: nan%
   Mean neighbor runoff coeff: nan
   Wards receiving flow from neighbors: 0

üèÜ WARDS WITH HIGHEST-RUNOFF NEIGHBORS:
   Ward 93
: 6 neighbors, avg nan% impervious, runoff=nan
   Ward 61
: 4 neighbors, avg nan% impervious, runoff=nan
   Ward 86
: 4 neighbors, avg nan% impervious, runoff=nan
   Ward 90
: 8 neighbors, avg nan% impervious, runoff=nan
   Ward 26
: 6 neighbors, avg nan% impervious, runoff=nan
   Ward 72
: 6 neighbors, avg nan% impervious, runoff=nan
   Ward 134
: 3 neighbors, avg nan% impervious, runoff=nan
   Ward 99
: 5 neighbors, avg nan% impervious, runoff=nan
   Ward 125
: 2 neighbors, avg nan% impervious, runoff=nan
   Ward 118
: 5 neighbors, avg nan% impervio

In [4]:
import geopandas as gpd
import pandas as pd
import numpy as np
from pathlib import Path

print("=" * 60)
print("NEIGHBOR SPATIAL FEATURES - DEBUG & FIX")
print("=" * 60)

# Paths
DATA_DIR = Path('../data')
WARDS_DIR = DATA_DIR / 'wards'
FEATURES_DIR = DATA_DIR / 'processed'
FEATURES_DIR.mkdir(parents=True, exist_ok=True)

# Load wards (geometry only)
print("\nüìÇ Loading data...")
wards_geom = gpd.read_file(WARDS_DIR / 'kmc_wards_gee_ready.geojson')
wards_geom['ward_id'] = wards_geom['WARD'].astype(str)

# Load feature CSVs (don't merge yet - keep separate)
canals = pd.read_csv(DATA_DIR / 'canals/ward_canal_features.csv')
buildings = pd.read_csv(DATA_DIR / 'buildings/ward_building_features.csv')
landcover = pd.read_csv(DATA_DIR / 'land_cover/ward_landcover_features.csv')

# Ensure string IDs
canals['ward_id'] = canals['ward_id'].astype(str).str.strip()
buildings['ward_id'] = buildings['ward_id'].astype(str).str.strip()
landcover['ward_id'] = landcover['ward_id'].astype(str).str.strip()
wards_geom['ward_id'] = wards_geom['ward_id'].str.strip()

print(f"‚úì Loaded geometries and features separately")
print(f"  Wards: {len(wards_geom)}")
print(f"  Canals: {len(canals)} rows")
print(f"  Buildings: {len(buildings)} rows")
print(f"  Landcover: {len(landcover)} rows")

# Create lookup dictionaries for fast access
print(f"\n‚öôÔ∏è  Creating feature lookup dictionaries...")

# Canal features
canal_dict = canals.set_index('ward_id')['canal_density'].to_dict()
drainage_type_dict = canals.set_index('ward_id')['primary_drainage_type'].to_dict()

# Building features
building_coverage_dict = buildings.set_index('ward_id')['building_coverage_pct'].to_dict()

# Landcover features
imperviousness_dict = landcover.set_index('ward_id')['imperviousness_worldcover'].to_dict()
runoff_dict = landcover.set_index('ward_id')['runoff_coefficient'].to_dict()
builtup_dict = landcover.set_index('ward_id')['built_up_pct'].to_dict()

print(f"‚úì Feature dictionaries created")

# Calculate neighbors
print(f"\n‚öôÔ∏è  Finding neighbors and calculating features...")

ward_neighbors = {}
neighbor_features_list = []

for idx, ward in wards_geom.iterrows():
    if idx % 20 == 0:
        print(f"   Ward {idx+1}/{len(wards_geom)}...")

    ward_id = ward['ward_id']
    ward_geom = ward.geometry

    # Find neighbors
    neighbors = wards_geom[
        (wards_geom.geometry.touches(ward_geom)) |
        (wards_geom.geometry.intersects(ward_geom))
    ]
    neighbors = neighbors[neighbors['ward_id'] != ward_id]

    neighbor_ids = neighbors['ward_id'].tolist()
    ward_neighbors[ward_id] = neighbor_ids

    if len(neighbor_ids) > 0:
        # Get feature values for neighbors (using dictionaries)
        neighbor_canal = [canal_dict.get(nid, 0) for nid in neighbor_ids]
        neighbor_imperv = [imperviousness_dict.get(nid, 0) for nid in neighbor_ids]
        neighbor_runoff = [runoff_dict.get(nid, 0) for nid in neighbor_ids]
        neighbor_building = [building_coverage_dict.get(nid, 0) for nid in neighbor_ids]
        neighbor_drainage = [drainage_type_dict.get(nid, 'unknown') for nid in neighbor_ids]

        # Get current ward values
        ward_imperv = imperviousness_dict.get(ward_id, 0)
        ward_runoff = runoff_dict.get(ward_id, 0)

        features = {
            'ward_id': ward_id,
            'neighbor_count': len(neighbor_ids),

            # Canal features
            'neighbor_avg_canal_density': np.mean(neighbor_canal),
            'neighbor_max_canal_density': np.max(neighbor_canal),
            'neighbor_canal_wards': sum(1 for c in neighbor_canal if c > 0),

            # Imperviousness
            'neighbor_avg_imperviousness': np.mean(neighbor_imperv),
            'neighbor_max_imperviousness': np.max(neighbor_imperv),
            'neighbor_min_imperviousness': np.min(neighbor_imperv),

            # Runoff
            'neighbor_avg_runoff': np.mean(neighbor_runoff),
            'neighbor_max_runoff': np.max(neighbor_runoff),

            # Building coverage
            'neighbor_avg_building_coverage': np.mean(neighbor_building),

            # Drainage type
            'neighbor_canal_fraction': sum(1 for d in neighbor_drainage if d == 'canal') / len(neighbor_drainage),
            'neighbor_pipe_fraction': sum(1 for d in neighbor_drainage if d == 'pipe') / len(neighbor_drainage),

            # Variability
            'neighbor_imperviousness_std': np.std(neighbor_imperv),
            'neighbor_runoff_std': np.std(neighbor_runoff),

            # Self vs neighbors
            'imperviousness_vs_neighbors': ward_imperv - np.mean(neighbor_imperv),
            'runoff_vs_neighbors': ward_runoff - np.mean(neighbor_runoff),

            # Flow direction indicators
            'lower_runoff_than_neighbors': 1 if ward_runoff < np.mean(neighbor_runoff) else 0,
            'higher_imperv_than_neighbors': 1 if ward_imperv > np.mean(neighbor_imperv) else 0,

            # Receives flow proxy (lower runoff = sink for neighbor water)
            'likely_receives_flow': 1 if ward_runoff < np.percentile(neighbor_runoff, 25) else 0,
        }
    else:
        # No neighbors (boundary ward)
        features = {
            'ward_id': ward_id,
            'neighbor_count': 0,
            'neighbor_avg_canal_density': 0,
            'neighbor_max_canal_density': 0,
            'neighbor_canal_wards': 0,
            'neighbor_avg_imperviousness': 0,
            'neighbor_max_imperviousness': 0,
            'neighbor_min_imperviousness': 0,
            'neighbor_avg_runoff': 0,
            'neighbor_max_runoff': 0,
            'neighbor_avg_building_coverage': 0,
            'neighbor_canal_fraction': 0,
            'neighbor_pipe_fraction': 0,
            'neighbor_imperviousness_std': 0,
            'neighbor_runoff_std': 0,
            'imperviousness_vs_neighbors': 0,
            'runoff_vs_neighbors': 0,
            'lower_runoff_than_neighbors': 0,
            'higher_imperv_than_neighbors': 0,
            'likely_receives_flow': 0,
        }

    neighbor_features_list.append(features)

# Create DataFrame
neighbor_features_df = pd.DataFrame(neighbor_features_list)

print(f"\n‚úì Neighbor features calculated for {len(neighbor_features_df)} wards")

# Summary
print(f"\nüìä NEIGHBOR FEATURE SUMMARY:")
print(f"   Mean neighbor imperviousness: {neighbor_features_df['neighbor_avg_imperviousness'].mean():.1f}%")
print(f"   Mean neighbor runoff: {neighbor_features_df['neighbor_avg_runoff'].mean():.3f}")
print(f"   Wards receiving flow: {neighbor_features_df['likely_receives_flow'].sum()}")

print(f"\nüåä SPATIAL PATTERNS:")
print(f"   Wards with higher imperv than neighbors: {neighbor_features_df['higher_imperv_than_neighbors'].sum()}")
print(f"   Wards with lower runoff than neighbors: {neighbor_features_df['lower_runoff_than_neighbors'].sum()}")

# Top wards receiving flow
print(f"\nüèÜ WARDS LIKELY RECEIVING FLOW FROM NEIGHBORS:")
flow_receivers = neighbor_features_df[neighbor_features_df['likely_receives_flow'] == 1].nlargest(10, 'neighbor_avg_runoff')[
    ['ward_id', 'neighbor_count', 'neighbor_avg_runoff', 'runoff_vs_neighbors']
]
for idx, row in flow_receivers.iterrows():
    print(f"   Ward {row['ward_id']}: {row['neighbor_count']} neighbors, "
          f"neighbor runoff={row['neighbor_avg_runoff']:.3f}, "
          f"difference={row['runoff_vs_neighbors']:.3f} (sink)")

# Save
neighbor_features_df.to_csv(FEATURES_DIR / 'ward_neighbor_features.csv', index=False)
print(f"\n‚úì Saved: {FEATURES_DIR / 'ward_neighbor_features.csv'}")

print("\n‚úÖ NEIGHBOR SPATIAL FEATURES COMPLETE!")
print("\nüéØ Features capture:")
print("   ‚Ä¢ Spatial context beyond flow accumulation")
print("   ‚Ä¢ Neighbor imperviousness and runoff characteristics")
print("   ‚Ä¢ Flow direction proxies (who receives water from whom)")
print("   ‚Ä¢ Spatial heterogeneity (urban-green interface)")
print("\nExpected contribution: +1-2% F1-score")

NEIGHBOR SPATIAL FEATURES - DEBUG & FIX

üìÇ Loading data...
‚úì Loaded geometries and features separately
  Wards: 141
  Canals: 141 rows
  Buildings: 141 rows
  Landcover: 141 rows

‚öôÔ∏è  Creating feature lookup dictionaries...
‚úì Feature dictionaries created

‚öôÔ∏è  Finding neighbors and calculating features...
   Ward 1/141...
   Ward 21/141...
   Ward 41/141...
   Ward 61/141...
   Ward 81/141...
   Ward 101/141...
   Ward 121/141...
   Ward 141/141...

‚úì Neighbor features calculated for 141 wards

üìä NEIGHBOR FEATURE SUMMARY:
   Mean neighbor imperviousness: 81.9%
   Mean neighbor runoff: 0.778
   Wards receiving flow: 38

üåä SPATIAL PATTERNS:
   Wards with higher imperv than neighbors: 90
   Wards with lower runoff than neighbors: 51

üèÜ WARDS LIKELY RECEIVING FLOW FROM NEIGHBORS:
   Ward 41: 5 neighbors, neighbor runoff=0.897, difference=-0.050 (sink)
   Ward 51: 3 neighbors, neighbor runoff=0.894, difference=-0.062 (sink)
   Ward 26: 6 neighbors, neighbor runoff=0

In [5]:
print("\n‚öôÔ∏è  Adding boundary ward indicators...")

# Add boundary features to neighbor_features_df
neighbor_features_df['is_boundary_ward'] = (
    neighbor_features_df['neighbor_count'] <= 3
).astype(int)

# Identify which edge (for directional external flow)
# Need to check ward position relative to KMC bounds

# Reload wards for geometry
wards_geom = gpd.read_file(WARDS_DIR / 'kmc_wards_gee_ready.geojson')
wards_geom['ward_id'] = wards_geom['WARD'].astype(str).str.strip()

# Get KMC bounding box
kmc_bounds = wards_geom.total_bounds  # [minx, miny, maxx, maxy]

# For each ward, determine which edge it's on
boundary_positions = []

for idx, row in neighbor_features_df.iterrows():
    ward_id = row['ward_id']

    if row['is_boundary_ward'] == 1:
        # Get ward geometry
        ward_geom = wards_geom[wards_geom['ward_id'] == ward_id].geometry.iloc[0]
        centroid = ward_geom.centroid

        # Determine position relative to KMC bounds
        # Use 5% margin to identify edge wards
        margin = 0.02  # degrees (~2km)

        positions = []
        if centroid.x < kmc_bounds[0] + margin:
            positions.append('west')
        if centroid.x > kmc_bounds[2] - margin:
            positions.append('east')
        if centroid.y < kmc_bounds[1] + margin:
            positions.append('south')
        if centroid.y > kmc_bounds[3] - margin:
            positions.append('north')

        position = '_'.join(positions) if positions else 'interior'
    else:
        position = 'interior'

    boundary_positions.append(position)

neighbor_features_df['boundary_position'] = boundary_positions

# Create directional external flow indicators
neighbor_features_df['external_flow_east'] = (
    (neighbor_features_df['boundary_position'].str.contains('east', na=False)) &
    (neighbor_features_df['is_boundary_ward'] == 1)
).astype(int)

neighbor_features_df['external_flow_west'] = (
    (neighbor_features_df['boundary_position'].str.contains('west', na=False)) &
    (neighbor_features_df['is_boundary_ward'] == 1)
).astype(int)

neighbor_features_df['external_flow_north'] = (
    (neighbor_features_df['boundary_position'].str.contains('north', na=False)) &
    (neighbor_features_df['is_boundary_ward'] == 1)
).astype(int)

neighbor_features_df['external_flow_south'] = (
    (neighbor_features_df['boundary_position'].str.contains('south', na=False)) &
    (neighbor_features_df['is_boundary_ward'] == 1)
).astype(int)

print(f"‚úì Boundary indicators added")

# Summary
print(f"\nüö© BOUNDARY WARD ANALYSIS:")
boundary_wards = neighbor_features_df[neighbor_features_df['is_boundary_ward'] == 1]
print(f"   Total boundary wards: {len(boundary_wards)}")

boundary_dist = neighbor_features_df['boundary_position'].value_counts()
print(f"\n   By position:")
for pos, count in boundary_dist.items():
    if pos != 'interior':
        print(f"      {pos}: {count} wards")

print(f"\n   External flow directions:")
print(f"      East (Salt Lake, New Town): {neighbor_features_df['external_flow_east'].sum()} wards")
print(f"      West (Howrah, across river): {neighbor_features_df['external_flow_west'].sum()} wards")
print(f"      North (Dum Dum, Baranagar): {neighbor_features_df['external_flow_north'].sum()} wards")
print(f"      South (Jadavpur, suburbs): {neighbor_features_df['external_flow_south'].sum()} wards")

# Re-save with boundary features
neighbor_features_df.to_csv(FEATURES_DIR / 'ward_neighbor_features.csv', index=False)
print(f"\n‚úì Updated: {FEATURES_DIR / 'ward_neighbor_features.csv'}")

print("\n‚úÖ BOUNDARY FEATURES ADDED!")
print("\nüí° Model will learn:")
print("   ‚Ä¢ Boundary wards may have different flood patterns")
print("   ‚Ä¢ Eastern boundary receives flow from Salt Lake/New Town")
print("   ‚Ä¢ Western boundary affected by Howrah/river dynamics")
print("\nFuture work: Explicit buffer zone ward modeling")



‚öôÔ∏è  Adding boundary ward indicators...
‚úì Boundary indicators added

üö© BOUNDARY WARD ANALYSIS:
   Total boundary wards: 20

   By position:
      south: 3 wards
      west: 2 wards
      north: 2 wards

   External flow directions:
      East (Salt Lake, New Town): 0 wards
      West (Howrah, across river): 2 wards
      North (Dum Dum, Baranagar): 2 wards
      South (Jadavpur, suburbs): 3 wards

‚úì Updated: ../data/processed/ward_neighbor_features.csv

‚úÖ BOUNDARY FEATURES ADDED!

üí° Model will learn:
   ‚Ä¢ Boundary wards may have different flood patterns
   ‚Ä¢ Eastern boundary receives flow from Salt Lake/New Town
   ‚Ä¢ Western boundary affected by Howrah/river dynamics

Future work: Explicit buffer zone ward modeling


In [6]:
# Rename/reinterpret western flag
print("\nüîß Correcting western boundary interpretation...")

neighbor_features_df['river_proximity_high'] = neighbor_features_df['external_flow_west']
neighbor_features_df['tidal_influence_ward'] = neighbor_features_df['external_flow_west']

# Drop misleading column name
neighbor_features_df = neighbor_features_df.drop('external_flow_west', axis=1)

# Re-save
neighbor_features_df.to_csv(FEATURES_DIR / 'ward_neighbor_features.csv', index=False)

print("‚úì Corrected western boundary interpretation")
print("\nüìã Updated features:")
print("   ‚Ä¢ is_boundary_ward (20 wards)")
print("   ‚Ä¢ external_flow_east (receives from Salt Lake)")
print("   ‚Ä¢ external_flow_north (receives from Dum Dum)")
print("   ‚Ä¢ external_flow_south (mixed)")
print("   ‚Ä¢ river_proximity_high (western wards near Hooghly)")
print("   ‚Ä¢ tidal_influence_ward (affected by river tides)")

print("\n‚úÖ BOUNDARY FEATURES FINALIZED!")


üîß Correcting western boundary interpretation...
‚úì Corrected western boundary interpretation

üìã Updated features:
   ‚Ä¢ is_boundary_ward (20 wards)
   ‚Ä¢ external_flow_east (receives from Salt Lake)
   ‚Ä¢ external_flow_north (receives from Dum Dum)
   ‚Ä¢ external_flow_south (mixed)
   ‚Ä¢ river_proximity_high (western wards near Hooghly)
   ‚Ä¢ tidal_influence_ward (affected by river tides)

‚úÖ BOUNDARY FEATURES FINALIZED!
