# Process Point Data

In [None]:
import geopandas as gpd
import numpy as np
from sklearn.neighbors import BallTree
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import statsmodels.api as sm

# Prepare Dataset

Add columns for `burned_proportion_10m`, `burned_proportion_20m`, `burned_proportion_50m`, `burned_proportion_100m`. 

In [None]:
def preprocess_inputs(polygon_shapefile, point_gpkg_path, point_layer):
    # Load input layers
    polys = gpd.read_file(polygon_shapefile)
    points = gpd.read_file(point_gpkg_path, layer=point_layer)

    # Set and check projection
    polys.set_crs(epsg=3857, inplace=True)
    points.set_crs(epsg=3857, inplace=True)

    for gdf, name in zip([polys, points], ['Polygon layer', 'Point layer']):
        if not gdf.crs.is_projected:
            raise ValueError(f"{name} must be projected in meters (e.g., EPSG:3857).")

    # Create burned field from DAMAGE
    points['burned'] = points['DAMAGE'].apply(lambda x: 1 if str(x).lower() == 'destroyed (>50%)' else 0)
    print("Unique DAMAGE values in points:", points['DAMAGE'].unique())

    # Spatial join: keep only points that fall inside a polygon
    joined = gpd.sjoin(points, polys[['geometry']], how='inner', predicate='within')
    joined = joined.drop(columns=['index_right'])

    # Perform spatial join: keep only polygons that contain at least one point
    polys_with_burned = gpd.sjoin(polys, joined[['geometry', 'burned']], how='inner', predicate='contains')
    polys_with_burned = polys_with_burned.drop(columns=['index_right'], errors='ignore')
    polys_with_burned['burned'] = polys_with_burned['burned'].astype(int)

    # Add unique ID for cross-layer tracking
    polys_with_burned = polys_with_burned.reset_index(drop=True)
    polys_with_burned['uid'] = polys_with_burned.index

    joined = joined.reset_index(drop=True)
    joined['uid'] = joined.index

    return polys_with_burned, joined
def calculate_neighbor_stats(polys, radii_meters=[10, 20, 50, 100]):
    enriched_polys = polys.copy()

    if 'burned' not in enriched_polys.columns:
        raise KeyError("Missing 'burned' column in polygon input — check preprocessing.")

    for radius in radii_meters:
        print(f"Calculating neighbors within {radius}m...")

        # Buffer geometries
        buffered = enriched_polys.copy()
        buffered['geometry'] = buffered.geometry.buffer(radius)

        # Perform spatial join (burned comes from enriched_polys → RIGHT side)
        joined = gpd.sjoin(buffered, enriched_polys, how='left', predicate='intersects')

        # Drop self-matches
        joined = joined[joined['uid_left'] != joined['uid_right']]

        # Use renamed columns (burned_right, uid_right)
        if 'burned_right' not in joined.columns:
            print("Joined columns:", joined.columns.tolist())
            raise KeyError("Expected 'burned_right' column missing after spatial join")

        # Aggregate neighbor stats
        stats = joined.groupby('uid_left').agg(
            total_neighbors=('uid_right', 'count'),
            burned_neighbors=('burned_right', 'sum')
        ).reindex(enriched_polys['uid'], fill_value=0)

        stats['burned_proportion'] = stats['burned_neighbors'] / stats['total_neighbors'].replace(0, np.nan)

        # Add to output
        enriched_polys[f'total_neighbors_{radius}m'] = stats['total_neighbors'].values
        enriched_polys[f'burned_neighbors_{radius}m'] = stats['burned_neighbors'].values
        enriched_polys[f'burned_proportion_{radius}m'] = stats['burned_proportion'].fillna(0).values

    return enriched_polys


def merge_stats_back_to_points(points, enriched_polys):
    stat_cols = [col for col in enriched_polys.columns if 'neighbors_' in col or 'proportion' in col]
    merged = points.merge(enriched_polys[['uid'] + stat_cols], on='uid', how='left')
    return merged

# Input paths
polygon_shapefile = "data/clipped_structure_polygons_altadena/clipped_structure_polygons_altadena.shp"
point_gpkg_path = "data/structures.gpkg"
point_layer = "POSTFIRE"

# Output paths
polygon_output_gpkg = "outputs/structures_polygons_with_neighbors_POLY-METHOD.gpkg"
point_output_gpkg = "outputs/structures_points_with_neighbors_POLY-METHOD.gpkg"

# Main Execution
print("Loading and preprocessing input data...")
polys, points = preprocess_inputs(polygon_shapefile, point_gpkg_path, point_layer)

print("Computing neighbor statistics...")
enriched_polys = calculate_neighbor_stats(polys, radii_meters=[10, 20, 50, 100])

print("Merging statistics back to centroid points...")
enriched_points = merge_stats_back_to_points(points, enriched_polys)

print("Saving output GeoPackages...")
enriched_polys.to_file(polygon_output_gpkg, layer="polygons", driver="GPKG")
enriched_points.to_file(point_output_gpkg, layer="points", driver="GPKG")

print("Done.")
print(f"Polygon output saved to: {polygon_output_gpkg}")
print(f"Point output saved to:   {point_output_gpkg}")