Combining cvs data from 4 different regions downloaded with GEE

In [None]:
import pandas as pd
import os

csv_paths = {
    "Region1": "FireRisk_Region1_NDVI_NBR_Burned.csv",
    "Region2": "FireRisk_Region2_NDVI_NBR_Burned.csv",
    "Region3": "FireRisk_Region3_NDVI_NBR_Burned.csv",
    "Region4": "FireRisk_Region4_NDVI_NBR_Burned.csv"
}

df_list = []
for region, path in csv_paths.items():
    if os.path.exists(path):
        df = pd.read_csv(path)
        df["Region"] = region
        df_list.append(df)

df_all = pd.concat(df_list, ignore_index=True)
df_all.to_csv("FireRisk_Combined_AllRegions.csv", index=False)


 Demonstrates how MODIS NDVI/NBR values were matched to image coordinates and dates

In [None]:
def match_dates(target_path, output_path):
    df_target = pd.read_csv(target_path)
    coords = df_target[["lon", "lat"]].values
    distances, indices = nn_model.kneighbors(coords)
    matched_dates = df_ndvi.iloc[indices.flatten()].reset_index(drop=True)
    df_target["date"] = matched_dates["date"]
    df_target.to_csv(output_path, index=False)


Sample Snippet: Matching FIRMS Fire Data

In [None]:
import pandas as pd
from datetime import timedelta
from sklearn.neighbors import NearestNeighbors

# Load FIRMS fire data
df_firms = pd.read_csv("fire_data_final.csv")
df_firms = df_firms.dropna(subset=["latitude", "longitude", "datetime"])
df_firms["datetime"] = pd.to_datetime(df_firms["datetime"])

# Matching function
def match_firms(input_path, output_path, tolerance=3):
    df_target = pd.read_csv(input_path)
    df_target["date"] = pd.to_datetime(df_target["date"])

    matched_records = []

    for _, row in df_target.iterrows():
        t_date = row["date"]
        t_lon, t_lat = row["lon"], row["lat"]

        mask = (
            (df_firms["datetime"] >= t_date - timedelta(days=tolerance)) &
            (df_firms["datetime"] <= t_date + timedelta(days=tolerance))
        )
        df_sub = df_firms[mask]

        if df_sub.empty:
            matched_records.append({
                "frp": None,
                "brightness": None,
                "confidence": None
            })
            continue

        model = NearestNeighbors(n_neighbors=1)
        model.fit(df_sub[["longitude", "latitude"]])
        _, idx = model.kneighbors([[t_lon, t_lat]])
        nearest = df_sub.iloc[idx[0][0]]

        matched_records.append({
            "frp": nearest["frp"],
            "brightness": nearest["brightness"],
            "confidence": nearest["confidence"]
        })

    df_result = pd.concat(
        [df_target.reset_index(drop=True), pd.DataFrame(matched_records)],
        axis=1
    )
    df_result.to_csv(output_path, index=False)
    print(f"Saved with FIRMS: {output_path}")


Sample Snippet of Climate Processing

In [None]:
import xarray as xr
import pandas as pd
import numpy as np

# Load instant and accumulated ERA5 NetCDF files
ds_instant = xr.open_dataset("instant.nc")
ds_accum = xr.open_dataset("accum.nc")

# Extract variables
t2m = ds_instant["t2m"].values - 273.15
u10 = ds_instant["u10"].values
v10 = ds_instant["v10"].values
wind_mps = np.sqrt(u10**2 + v10**2)
tp = ds_accum["tp"].values

# Flatten into tabular structure
times = pd.to_datetime(ds_instant["valid_time"].values).normalize()
lats = ds_instant["latitude"].values
lons = ds_instant["longitude"].values
time_grid, lat_grid, lon_grid = np.meshgrid(times, lats, lons, indexing="ij")

df_era = pd.DataFrame({
    "date": time_grid.ravel(),
    "lat": lat_grid.ravel(),
    "lon": lon_grid.ravel(),
    "temp_c": t2m.ravel(),
    "wind_mps": wind_mps.ravel(),
    "precip_m": tp.ravel()
}).dropna()


Domain-Inspired Features

In [None]:
import pandas as pd

# Load dataset
train_path = "Matched_Train_withFIRMS_ERAS_fixed_final.csv" ## Merged MODIS, FIRMS, and ERA5 features
df = pd.read_csv(train_path)

# Derived features
df["burn_count"] = df.groupby(["lat", "lon"])["Burned"].transform("sum")
df["fire_potential"] = df["temp_c"] * df["precip_m"]
df["frp_wind_conf"] = df["frp"] * df["wind_mps"] * df["confidence"]
df["burned_veg_loss"] = df["Burned"] * (df["NDVI"] - df["NBR"])
df["high_heat_stress"] = df["temp_c"] * (1 - df["NDVI"])
df["risk_persistence"] = df["burn_count"] * df["fire_potential"]

# Inspect
print(df[["burn_count", "fire_potential", "frp_wind_conf", "risk_persistence",
          "burned_veg_loss", "high_heat_stress"]].describe())


Creating final CVs by bringing all the features together

In [None]:
def create_and_save_final_features(train_path, val_path=None, test_path=None, save=True):
    def process(df):
        df["date"] = pd.to_datetime(df["date"], errors="coerce")
        df["burn_count"] = df.groupby(["lat", "lon"])["Burned"].transform("sum")

        if "wind_mps" in df.columns and "confidence" in df.columns:
            df["wind_conf"] = df["wind_mps"] * df["confidence"]
        else:
            df["wind_conf"] = None

        if "frp" in df.columns and "Burned" in df.columns:
            df["fire_potential"] = df["frp"] * df["Burned"]
        else:
            df["fire_potential"] = None

        df["risk_persistence"] = 1 - df["frp"]
        df["risk_factor"] = df["burn_count"] + df["fire_potential"] + df["risk_persistence"]

        df["classname"] = df["class"]

        final_cols = [
            "filename", "classname", "class", "lat", "lon", "date",
            "NDVI", "NBR", "Burned", "frp", "brightness", "confidence",
            "temp_c", "wind_mps", "precip_m",
            "burn_count", "fire_potential", "wind_conf",
            "risk_factor", "risk_persistence"
        ]

        return df[final_cols]

    df_train, df_val, df_test = None, None, None

    if train_path:
        df_train = pd.read_csv(train_path)
        df_train_final = process(df_train)
        if save:
            df_train_final.to_csv("train_Final.csv", index=False)

    if val_path:
        df_val = pd.read_csv(val_path)
        df_val_final = process(df_val)
        if save:
            df_val_final.to_csv("val_Final.csv", index=False)

    if test_path:
        df_test = pd.read_csv(test_path)
        df_test_final = process(df_test)
        if save:
            df_test_final.to_csv("test_Final.csv", index=False)

    return df_train_final, df_val_final, df_test_final

train_path = "Matched_Train_withFIRMS_ERAS_fixed_final.csv"
val_path = "Matched_Val_withFIRMS_ERAS_fixed_final.csv"
test_path = "Matched_Test_withFIRMS_ERAS_fixed_final.csv"

df_train, df_val, df_test = create_and_save_final_features(
    train_path, val_path, test_path, save=True
)

import shutil
output_path = "..."

shutil.copy("train_Final.csv", drive_dir + "train_Final.csv")
shutil.copy("val_Final.csv", drive_dir + "val_Final.csv")
shutil.copy("test_Final.csv", drive_dir + "test_Final.csv")