# New concept

2. Network Graph Model

    Analyze:
    - How burning "propagated" through the graph.
    - How graph centrality, degree (number of neighbors), clustering coefficient relate to burn probability.
    - Create animation for how spread occured between nodes

In [1]:
import geopandas as gpd
import numpy as np
from sklearn.neighbors import BallTree
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import statsmodels.api as sm

# Prepare Dataset

Add columns for `burned_proportion_20m`, `burned_proportion_50m`, `burned_proportion_100m`. 

In [None]:
# def preprocess_structures(gpkg_path, layer_name):
#     # Load the structures layer
#     gdf = gpd.read_file(gpkg_path, layer=layer_name)

#     gdf.set_crs(epsg=3857, inplace=True)

#     # Reproject to meters if needed
#     if not gdf.crs.is_projected:
#         raise ValueError("CRS must be projected (meters). Please reproject your data first.")

#     # Create a binary "burned" column: 1 = Destroyed, 0 = Otherwise
#     gdf['burned'] = gdf['DAMAGE'].apply(lambda x: 1 if str(x).lower() == 'destroyed (>50%)' else 0)
#     print("Unique values:")
#     print(gdf['DAMAGE'].unique())
    
#     return gdf

# def calculate_neighbor_stats(gdf, radii_meters=[20, 50, 100]):
#     coords = np.array(list(zip(gdf.geometry.x, gdf.geometry.y)))
#     tree = BallTree(coords, metric='euclidean')

#     for radius in radii_meters:
#         indices = tree.query_radius(coords, r=radius)
        
#         total_neighbors = []
#         burned_neighbors = []
#         burned_proportion = []

#         burned_array = gdf['burned'].to_numpy()

#         for i, neighbors in enumerate(indices):
#             neighbors = neighbors[neighbors != i]  # exclude self
#             total = len(neighbors)
#             burned_count = burned_array[neighbors].sum() if total > 0 else 0

#             total_neighbors.append(total)
#             burned_neighbors.append(burned_count)
#             burned_proportion.append(burned_count / total if total > 0 else 0)

#         # Save to GeoDataFrame
#         gdf[f'total_neighbors_{radius}m'] = total_neighbors
#         gdf[f'burned_neighbors_{radius}m'] = burned_neighbors
#         gdf[f'burned_proportion_{radius}m'] = burned_proportion

#     return gdf

In [None]:
# gpkg_file = "data/structures.gpkg"
# layer_name = "postfire"
# output_file = "data/structures_with_neighbors.gpkg"

# # Step 1: Load and preprocess
# gdf = preprocess_structures(gpkg_file, layer_name)

# # Step 2: Calculate neighbor stats
# gdf = calculate_neighbor_stats(gdf, radii_meters=[20, 50, 100])

# # Step 3: Save enriched data
# gdf.to_file(output_file, driver="GPKG")
# print(f"Saved enriched structures to {output_file}")

# # Step 4: Train a simple logistic regression model
# feature_columns = [
#     'burned_proportion_20m',
#     'burned_proportion_50m',
#     'burned_proportion_100m'
# ]

Unique values:
['No Damage' 'Destroyed (>50%)' 'Affected (1-9%)' 'Minor (10-25%)'
 'Inaccessible' 'Major (26-50%)']
Saved enriched structures to data/structures_with_neighbors.gpkg


# NEW 5/6/2025

In [15]:
import geopandas as gpd
import numpy as np

def preprocess_inputs(polygon_shapefile, point_gpkg_path, point_layer):
    # === Load input layers ===
    polys = gpd.read_file(polygon_shapefile)
    points = gpd.read_file(point_gpkg_path, layer=point_layer)

    # === Set and check projection ===
    polys.set_crs(epsg=3857, inplace=True)
    points.set_crs(epsg=3857, inplace=True)

    for gdf, name in zip([polys, points], ['Polygon layer', 'Point layer']):
        if not gdf.crs.is_projected:
            raise ValueError(f"{name} must be projected in meters (e.g., EPSG:3857).")

    # === Create burned field from DAMAGE ===
    points['burned'] = points['DAMAGE'].apply(lambda x: 1 if str(x).lower() == 'destroyed (>50%)' else 0)
    print("Unique DAMAGE values in points:", points['DAMAGE'].unique())

    # === Spatial join: keep only points that fall inside a polygon ===
    joined = gpd.sjoin(points, polys[['geometry']], how='inner', predicate='within')
    joined = joined.drop(columns=['index_right'])

    # === Assign burned values to polygons ===
    print("Assigning burned status to polygons...")
    polys_with_burned = gpd.sjoin(polys, joined[['geometry', 'burned']], how='left', predicate='contains')
    polys_with_burned = polys_with_burned.drop(columns=['index_right'], errors='ignore')
    polys_with_burned['burned'] = polys_with_burned['burned'].fillna(0).astype(int)

    # === Add unique ID for cross-layer tracking ===
    polys_with_burned = polys_with_burned.reset_index(drop=True)
    polys_with_burned['uid'] = polys_with_burned.index

    joined = joined.reset_index(drop=True)
    joined['uid'] = joined.index

    return polys_with_burned, joined
def calculate_neighbor_stats(polys, radii_meters=[10, 20, 50, 100]):
    enriched_polys = polys.copy()

    if 'burned' not in enriched_polys.columns:
        raise KeyError("Missing 'burned' column in polygon input — check preprocessing.")

    for radius in radii_meters:
        print(f"Calculating neighbors within {radius}m...")

        # Buffer geometries
        buffered = enriched_polys.copy()
        buffered['geometry'] = buffered.geometry.buffer(radius)

        # Perform spatial join (burned comes from enriched_polys → RIGHT side)
        joined = gpd.sjoin(buffered, enriched_polys, how='left', predicate='intersects')

        # Drop self-matches
        joined = joined[joined['uid_left'] != joined['uid_right']]

        # Use renamed columns (burned_right, uid_right)
        if 'burned_right' not in joined.columns:
            print("Joined columns:", joined.columns.tolist())
            raise KeyError("Expected 'burned_right' column missing after spatial join")

        # Aggregate neighbor stats
        stats = joined.groupby('uid_left').agg(
            total_neighbors=('uid_right', 'count'),
            burned_neighbors=('burned_right', 'sum')
        ).reindex(enriched_polys['uid'], fill_value=0)

        stats['burned_proportion'] = stats['burned_neighbors'] / stats['total_neighbors'].replace(0, np.nan)

        # Add to output
        enriched_polys[f'total_neighbors_{radius}m'] = stats['total_neighbors'].values
        enriched_polys[f'burned_neighbors_{radius}m'] = stats['burned_neighbors'].values
        enriched_polys[f'burned_proportion_{radius}m'] = stats['burned_proportion'].fillna(0).values

    return enriched_polys


def merge_stats_back_to_points(points, enriched_polys):
    stat_cols = [col for col in enriched_polys.columns if 'neighbors_' in col or 'proportion' in col]
    merged = points.merge(enriched_polys[['uid'] + stat_cols], on='uid', how='left')
    return merged

# === Input paths ===
polygon_shapefile = "data/clipped_structure_polygons_altadena/clipped_structure_polygons_altadena.shp"
point_gpkg_path = "data/structures.gpkg"
point_layer = "POSTFIRE"

# === Output paths ===
polygon_output_gpkg = "outputs/structures_polygons_with_neighbors_POLY-METHOD.gpkg"
point_output_gpkg = "outputs/structures_points_with_neighbors_POLY-METHOD.gpkg"

# === Main Execution ===
print("Loading and preprocessing input data...")
polys, points = preprocess_inputs(polygon_shapefile, point_gpkg_path, point_layer)

print("Computing neighbor statistics...")
enriched_polys = calculate_neighbor_stats(polys, radii_meters=[10, 20, 50, 100])

print("Merging statistics back to centroid points...")
enriched_points = merge_stats_back_to_points(points, enriched_polys)

print("Saving output GeoPackages...")
enriched_polys.to_file(polygon_output_gpkg, layer="polygons", driver="GPKG")
enriched_points.to_file(point_output_gpkg, layer="points", driver="GPKG")

print("✅ Done.")
print(f"Polygon output saved to: {polygon_output_gpkg}")
print(f"Point output saved to:   {point_output_gpkg}")

Loading and preprocessing input data...
Unique DAMAGE values in points: ['No Damage' 'Destroyed (>50%)' 'Affected (1-9%)' 'Minor (10-25%)'
 'Inaccessible' 'Major (26-50%)']
Assigning burned status to polygons...
Computing neighbor statistics...
Calculating neighbors within 10m...
Calculating neighbors within 20m...
Calculating neighbors within 50m...
Calculating neighbors within 100m...
Merging statistics back to centroid points...
Saving output GeoPackages...
✅ Done.
Polygon output saved to: outputs/structures_polygons_with_neighbors_POLY-METHOD.gpkg
Point output saved to:   outputs/structures_points_with_neighbors_POLY-METHOD.gpkg
