# New concept

1. Append fields to the structures geopackage that describe how many structures burned at different radius distances. Then implement a regression model that determines wheether a house burned based on the number of nearby structures. And test for statistical significance. 

2. Network Graph Model

    Create a graph where:

        Each node = a structure.

        Edges = structures within X meters of each other.

    Assign "burned" or "not burned" to each node.

    Analyze:

        How burning "propagated" through the graph.

        How graph centrality, degree (number of neighbors), clustering coefficient relate to burn probability.

✅ Super cool for visual people — you can even animate it.

In [1]:
import geopandas as gpd
import numpy as np
from sklearn.neighbors import BallTree
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [14]:
def preprocess_structures(gpkg_path, layer_name):
    # Load the structures layer
    gdf = gpd.read_file(gpkg_path, layer=layer_name)

    gdf.set_crs(epsg=3857, inplace=True)

    # Reproject to meters if needed
    if not gdf.crs.is_projected:
        raise ValueError("CRS must be projected (meters). Please reproject your data first.")

    # Create a binary "burned" column: 1 = Destroyed, 0 = Otherwise
    gdf['burned'] = gdf['DAMAGE'].apply(lambda x: 1 if str(x).lower() == 'Destroyed (>50%)' else 0)

    return gdf

def calculate_neighbor_stats(gdf, radii_meters=[20, 50, 100]):
    coords = np.array(list(zip(gdf.geometry.x, gdf.geometry.y)))
    tree = BallTree(coords, metric='euclidean')

    for radius in radii_meters:
        indices = tree.query_radius(coords, r=radius)
        
        total_neighbors = []
        burned_neighbors = []
        burned_proportion = []

        burned_array = gdf['burned'].to_numpy()

        for i, neighbors in enumerate(indices):
            neighbors = neighbors[neighbors != i]  # exclude self
            total = len(neighbors)
            burned_count = burned_array[neighbors].sum() if total > 0 else 0

            total_neighbors.append(total)
            burned_neighbors.append(burned_count)
            burned_proportion.append(burned_count / total if total > 0 else 0)

        # Save to GeoDataFrame
        gdf[f'total_neighbors_{radius}m'] = total_neighbors
        gdf[f'burned_neighbors_{radius}m'] = burned_neighbors
        gdf[f'burned_proportion_{radius}m'] = burned_proportion

    return gdf

def train_logistic_model(gdf, features, target='burned'):
    # Drop any NaN values if they exist
    data = gdf.dropna(subset=features + [target])

    X = data[features]
    y = data[target]

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.3)

    model = LogisticRegression(max_iter=500)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    print("=== Logistic Regression Report ===")
    print(classification_report(y_test, y_pred))

    return model

In [15]:
gpkg_file = "I:/terrain_generation_project/structures.gpkg"
layer_name = "postfire"
output_file = "I:/terrain_generation_project/structures_with_neighbors.gpkg"

# Step 1: Load and preprocess
gdf = preprocess_structures(gpkg_file, layer_name)

# Step 2: Calculate neighbor stats
gdf = calculate_neighbor_stats(gdf, radii_meters=[20, 50, 100])

# Step 3: Save enriched data
gdf.to_file(output_file, driver="GPKG")
print(f"Saved enriched structures to {output_file}")

# Step 4: Train a simple logistic regression model
feature_columns = [
    'burned_proportion_20m',
    'burned_proportion_50m',
    'burned_proportion_100m'
]

model = train_logistic_model(gdf, features=feature_columns)

Saved enriched structures to I:/terrain_generation_project/structures_with_neighbors.gpkg


ValueError: This solver needs samples of at least 2 classes in the data, but the data contains only one class: np.int64(0)