In [45]:
import pandas as pd
import numpy as np
import os
from glob import glob
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)

In [46]:
# load the files
file_zenodo = "../data/zenodo.csv" # https://zenodo.org/records/14712845 # Thanks to Andranique
file_calfire = "../data/calfire.csv" # https://www.fire.ca.gov/incidents # Thanks to Matt

df_zenodo = pd.read_csv(file_zenodo)
df_calfire = pd.read_csv(file_calfire)

# Standardize the date formats
df_zenodo['DATE'] = pd.to_datetime(df_zenodo['DATE'])
df_calfire['incident_dateonly_created'] = pd.to_datetime(df_calfire['incident_dateonly_created'])

# Rename the DATE column in zenodo for merging
df_zenodo = df_zenodo.rename(columns={'DATE': 'incident_dateonly_created'})

# Perform the merge
df_merged = pd.merge(df_calfire, df_zenodo, on='incident_dateonly_created', how='inner')

# Select only the requested columns and reformat column names
selected_columns = {
    'incident_name': 'fire_name',
    'incident_dateonly_created': 'fire_dateonly_created',
    'PRECIPITATION': 'precipitation',
    'MAX_TEMP': 'max_temp',
    'MIN_TEMP': 'min_temp',
    'AVG_WIND_SPEED': 'avg_wind_speed',
    'TEMP_RANGE': 'temp_range',
    'WIND_TEMP_RATIO': 'wind_temp_ratio',
    'LAGGED_PRECIPITATION': 'lagged_precipitation',
    'LAGGED_AVG_WIND_SPEED': 'lagged_avg_wind_speed',
    'MONTH': 'month',
    'SEASON': 'season',
    'incident_longitude': 'fire_longitude',
    'incident_latitude': 'fire_latitude',
    'incident_acres_burned': 'fire_acres_burned',
    'incident_county': 'fire_county'
}

df_cleaned = df_merged[list(selected_columns.keys())]
df_cleaned = df_cleaned.rename(columns=selected_columns)

# Save to CSV
output_path_cleaned = "../data/calfire_zenodo.csv"
df_cleaned.to_csv(output_path_cleaned, index=False)

In [47]:
# Load CalFire-Zenodo Merged Data
df_cal_zen = pd.read_csv("../data/calfire_zenodo.csv")
df_cal_zen['fire_dateonly_created'] = pd.to_datetime(df_cal_zen['fire_dateonly_created'])

# Prepare to collect satellite data
satellite = []

# Loop through all MODIS files
modis_files = glob("../data/modis_satellite_data/modis_*_United_States.csv")  # https://firms.modaps.eosdis.nasa.gov/country/
for file in modis_files:
    df_modis = pd.read_csv(file)
    df_modis['acq_date'] = pd.to_datetime(df_modis['acq_date'])
    df_modis['lat_rounded'] = df_modis['latitude'].round(2)
    df_modis['lon_rounded'] = df_modis['longitude'].round(2)

    # Loop through each fire
    for idx, row in df_cal_zen.iterrows():
        fire_date = row['fire_dateonly_created']
        lat = round(row['fire_latitude'], 2)
        lon = round(row['fire_longitude'], 2)

        # Filter MODIS for matching date
        matched_date = df_modis[df_modis['acq_date'] == fire_date]

        if matched_date.empty:
            continue

        # Try matching on lat/lon
        matched_location = matched_date[
            (matched_date['lat_rounded'] == lat) &
            (matched_date['lon_rounded'] == lon)
        ]

        if not matched_location.empty:
            record = matched_location[['bright_t31', 'frp', 'brightness']].mean().to_dict()
        else:
            record = matched_date[['bright_t31', 'frp', 'brightness']].mean().to_dict()

        record['fire_index'] = idx
        satellite.append(record)

# Convert the list of records to DataFrame and merge
df_satellite_summary = pd.DataFrame(satellite)
df_satellite_summary = df_satellite_summary.groupby('fire_index').first().reset_index()

# Merge with the CalFire-Zenodo data
df_cal_zen_with_satellite = df_cal_zen.reset_index().merge(
    df_satellite_summary,
    left_on='index',
    right_on='fire_index',
    how='left'
).drop(columns=['index', 'fire_index'])

# Save the final result
df_cal_zen_with_satellite.to_csv("../data/combined.csv", index=False)