# New concept

2. Network Graph Model

    Analyze:
    - How burning "propagated" through the graph.
    - How graph centrality, degree (number of neighbors), clustering coefficient relate to burn probability.
    - Create animation for how spread occured between nodes

In [1]:
import geopandas as gpd
import numpy as np
from sklearn.neighbors import BallTree
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import statsmodels.api as sm

# Prepare Dataset

Add columns for `burned_proportion_20m`, `burned_proportion_50m`, `burned_proportion_100m`. 

In [2]:
def preprocess_structures(gpkg_path, layer_name):
    # Load the structures layer
    gdf = gpd.read_file(gpkg_path, layer=layer_name)

    gdf.set_crs(epsg=3857, inplace=True)

    # Reproject to meters if needed
    if not gdf.crs.is_projected:
        raise ValueError("CRS must be projected (meters). Please reproject your data first.")

    # Create a binary "burned" column: 1 = Destroyed, 0 = Otherwise
    gdf['burned'] = gdf['DAMAGE'].apply(lambda x: 1 if str(x).lower() == 'destroyed (>50%)' else 0)
    print("Unique values:")
    print(gdf['DAMAGE'].unique())
    
    return gdf

def calculate_neighbor_stats(gdf, radii_meters=[20, 50, 100]):
    coords = np.array(list(zip(gdf.geometry.x, gdf.geometry.y)))
    tree = BallTree(coords, metric='euclidean')

    for radius in radii_meters:
        indices = tree.query_radius(coords, r=radius)
        
        total_neighbors = []
        burned_neighbors = []
        burned_proportion = []

        burned_array = gdf['burned'].to_numpy()

        for i, neighbors in enumerate(indices):
            neighbors = neighbors[neighbors != i]  # exclude self
            total = len(neighbors)
            burned_count = burned_array[neighbors].sum() if total > 0 else 0

            total_neighbors.append(total)
            burned_neighbors.append(burned_count)
            burned_proportion.append(burned_count / total if total > 0 else 0)

        # Save to GeoDataFrame
        gdf[f'total_neighbors_{radius}m'] = total_neighbors
        gdf[f'burned_neighbors_{radius}m'] = burned_neighbors
        gdf[f'burned_proportion_{radius}m'] = burned_proportion

    return gdf

In [3]:
gpkg_file = "data/structures.gpkg"
layer_name = "postfire"
output_file = "data/structures_with_neighbors.gpkg"

# Step 1: Load and preprocess
gdf = preprocess_structures(gpkg_file, layer_name)

# Step 2: Calculate neighbor stats
gdf = calculate_neighbor_stats(gdf, radii_meters=[20, 50, 100])

# Step 3: Save enriched data
gdf.to_file(output_file, driver="GPKG")
print(f"Saved enriched structures to {output_file}")

# Step 4: Train a simple logistic regression model
feature_columns = [
    'burned_proportion_20m',
    'burned_proportion_50m',
    'burned_proportion_100m'
]

Unique values:
['No Damage' 'Destroyed (>50%)' 'Affected (1-9%)' 'Minor (10-25%)'
 'Inaccessible' 'Major (26-50%)']
Saved enriched structures to data/structures_with_neighbors.gpkg
