# WatchDuty EDA: Evacuation Alert Delays

Goal: benchmark timing from first credible signal to first protective action (evac zone activity) and surface “dangerous delay” patterns by region, source, and incident type.

Primary proxy metrics:
- First signal time: earliest `date_created` in `geo_events_external_message` per `geo_event_id`
- First protective action time: earliest `date_created` in `evac_zone_status_geo_event_map` per `geo_event_id`
- Delay (minutes): protective action minus first signal


In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

DATA_DIR = Path(".")  # put your CSVs in the same folder as the notebook, or change this

FILES = {
    "ev_map": "evac_zone_status_geo_event_map.csv",
    "evac_zones": "evac_zones_gis_evaczone.csv",
    "evac_zones_changelog": "evac_zones_gis_evaczonechangelog.csv",
    "perimeters": "fire_perimeters_gis_fireperimeter.csv",
    "perimeters_changelog": "fire_perimeters_gis_fireperimeterchangelog.csv",
    "external_msgs": "geo_events_externalgeoevent.csv",
    "external_msgs_changelog": "geo_events_externalgeoeventchangelog.csv",
    "geoevents": "geo_events_geoevent.csv",
    "geoevents_changelog": "geo_events_geoeventchangelog.csv",
}

def read_csv_safely(name, nrows=None):
    path = DATA_DIR / name
    if not path.exists():
        print(f"Missing: {path}")
        return None
    return pd.read_csv(path, low_memory=False, nrows=nrows)

print("Files found:", [k for k,v in FILES.items() if (DATA_DIR / v).exists()])


Files found: ['ev_map', 'evac_zones', 'evac_zones_changelog', 'perimeters', 'perimeters_changelog', 'external_msgs', 'external_msgs_changelog', 'geoevents', 'geoevents_changelog']


In [None]:
#printing (rows, columns)  
ev_map = read_csv_safely(FILES["ev_map"])
external = read_csv_safely(FILES["external_msgs"])
geoevents = read_csv_safely(FILES["geoevents"])

(ev_map.shape, external.shape, geoevents.shape)


((4429, 3), (1502495, 14), (62696, 17))

In [3]:
def to_dt(s):
    return pd.to_datetime(s, errors="coerce", utc=True)

ev_map["date_created"] = to_dt(ev_map["date_created"])
external["date_created"] = to_dt(external["date_created"])
geoevents["date_created"] = to_dt(geoevents["date_created"])
geoevents["date_modified"] = to_dt(geoevents.get("date_modified"))

print("ev_map range:", ev_map["date_created"].min(), "to", ev_map["date_created"].max())
print("external range:", external["date_created"].min(), "to", external["date_created"].max())
print("geoevents range:", geoevents["date_created"].min(), "to", geoevents["date_created"].max())

# uniqueness checks
print("ev_map unique uid_v2:", ev_map["uid_v2"].nunique())
print("ev_map unique geo_event_id:", ev_map["geo_event_id"].nunique())
print("external unique geo_event_id:", external["geo_event_id"].nunique())


ev_map range: 2023-10-25 11:28:15.254517+00:00 to 2025-09-20 22:29:54.827623+00:00
external range: 2023-02-21 21:34:31.052942+00:00 to 2025-09-05 20:35:26.465983+00:00
geoevents range: 2021-08-11 00:09:56.481066+00:00 to 2025-09-05 20:25:28.007096+00:00
ev_map unique uid_v2: 3909
ev_map unique geo_event_id: 483
external unique geo_event_id: 41906


In [4]:
first_signal = (
    external.dropna(subset=["geo_event_id", "date_created"])
    .groupby("geo_event_id", as_index=False)["date_created"]
    .min()
    .rename(columns={"date_created": "first_signal_time"})
)

first_action = (
    ev_map.dropna(subset=["geo_event_id", "date_created"])
    .groupby("geo_event_id", as_index=False)["date_created"]
    .min()
    .rename(columns={"date_created": "first_action_time"})
)

delays = first_signal.merge(first_action, on="geo_event_id", how="left")
delays["mins_signal_to_action"] = (
    (delays["first_action_time"] - delays["first_signal_time"]).dt.total_seconds() / 60.0
)

delays.describe(include="all")


Unnamed: 0,geo_event_id,first_signal_time,first_action_time,mins_signal_to_action
count,41906.0,41906,108,108.0
mean,32298.550685,2024-08-27 09:36:14.215993856+00:00,2025-04-10 15:50:42.317532672+00:00,2697.107601
min,179.0,2023-04-28 22:12:22.767797+00:00,2024-06-25 20:50:53.404446+00:00,-570.402931
25%,16579.25,2024-02-27 21:59:40.964591360+00:00,2024-09-06 11:46:41.976812800+00:00,70.887725
50%,32576.5,2024-08-29 19:06:04.453777408+00:00,2025-07-14 02:35:28.833531904+00:00,211.488664
75%,46744.75,2025-04-22 22:33:16.376892416+00:00,2025-08-23 01:01:51.767871488+00:00,2228.183869
max,62822.0,2025-09-05 20:16:43.241686+00:00,2025-09-11 00:04:48.611875+00:00,34654.248365
std,17799.62424,,,5847.959622


In [5]:
delays2 = delays.merge(
    geoevents[["id", "name", "geo_event_type", "notification_type", "external_source", "is_active", "is_visible"]],
    left_on="geo_event_id",
    right_on="id",
    how="left",
).drop(columns=["id"])

# Define “dangerous delay” threshold (tune this)
THRESH_MIN = 60  # 60 minutes as a starting point
delays2["dangerous_delay"] = delays2["mins_signal_to_action"].ge(THRESH_MIN)

# Top delayed incidents
delays2.sort_values("mins_signal_to_action", ascending=False).head(25)


Unnamed: 0,geo_event_id,first_signal_time,first_action_time,mins_signal_to_action,name,geo_event_type,notification_type,external_source,is_active,is_visible,dangerous_delay
37612,57385.0,2025-07-28 18:45:17.244457+00:00,2025-08-21 20:19:32.146348+00:00,34654.248365,Stoner Mesa Fire,wildfire,normal,wildcad,1.0,1.0,True
16465,26342.0,2024-07-17 02:41:36.766166+00:00,2024-08-02 11:28:33.765191+00:00,23566.949984,Trail Fire (Diamond Complex),wildfire,normal,wildcad,0.0,0.0,True
16693,26643.0,2024-07-18 17:55:33.791581+00:00,2024-08-03 17:48:19.579860+00:00,23032.763138,Double Snag Fire,wildfire,normal,wildcad,0.0,0.0,True
40843,61614.0,2025-08-26 01:28:15.553805+00:00,2025-09-11 00:04:48.611875+00:00,22956.550968,Wildcat Fire,wildfire,normal,nifc,1.0,1.0,True
16552,26458.0,2024-07-17 21:50:46.150833+00:00,2024-08-02 11:23:53.964174+00:00,22413.130222,Lemolo Fire (Diamond Complex),wildfire,normal,wildcad,0.0,0.0,True
16501,26392.0,2024-07-17 17:35:39.903290+00:00,2024-07-30 00:22:20.249384+00:00,17686.672435,Red Fire,wildfire,normal,wildcad,0.0,0.0,True
16188,25865.0,2024-07-14 00:30:42.820710+00:00,2024-07-21 20:09:31.416057+00:00,11258.809922,Lone Rock Fire,wildfire,normal,nifc,0.0,0.0,True
22655,35116.0,2024-09-28 21:30:56.402600+00:00,2024-10-06 15:38:11.093628+00:00,11167.24485,Yellow Lake Fire,wildfire,normal,wildcad,0.0,0.0,True
36350,55331.0,2025-07-10 18:23:01.272446+00:00,2025-07-18 01:50:14.616964+00:00,10527.222409,Turner Gulch Fire,wildfire,normal,wildcad,0.0,0.0,True
32807,48507.0,2025-05-13 17:50:06.711036+00:00,2025-05-19 20:54:33.368183+00:00,8824.444286,Greer Fire,wildfire,normal,wildcad,0.0,0.0,True


In [6]:
summary_by_source = (
    delays2.groupby(["external_source", "geo_event_type"], dropna=False)
    .agg(
        incidents=("geo_event_id", "nunique"),
        pct_missing_action=("first_action_time", lambda x: x.isna().mean()),
        median_mins=("mins_signal_to_action", "median"),
        p90_mins=("mins_signal_to_action", lambda s: np.nanpercentile(s.dropna(), 90) if s.notna().any() else np.nan),
    )
    .reset_index()
    .sort_values(["pct_missing_action", "p90_mins"], ascending=False)
)

summary_by_source.head(30)



Unnamed: 0,external_source,geo_event_type,incidents,pct_missing_action,median_mins,p90_mins
0,adeq,wildfire,86,1.0,,
1,dnr,wildfire,163,1.0,,
3,odf,wildfire,700,1.0,,
4,pfirs,wildfire,2713,1.0,,
5,pulsepoint,wildfire,1,1.0,,
6,texas_am,wildfire,157,1.0,,
9,,,1,1.0,,
2,nifc,wildfire,14321,0.997975,1127.512987,5984.501505
7,wildcad,wildfire,22988,0.997825,620.929991,11819.187609
8,,wildfire,776,0.962629,72.357276,793.638585


In [7]:
danger_candidates = delays2[
    delays2["first_signal_time"].notna() &
    (delays2["first_action_time"].isna() | (delays2["mins_signal_to_action"] >= THRESH_MIN))
].copy()

danger_candidates = danger_candidates.sort_values(
    ["first_action_time", "mins_signal_to_action"],
    ascending=[True, False]
)

danger_candidates.head(50)


Unnamed: 0,geo_event_id,first_signal_time,first_action_time,mins_signal_to_action,name,geo_event_type,notification_type,external_source,is_active,is_visible,dangerous_delay
15864,25339.0,2024-07-10 16:25:50.415096+00:00,2024-07-14 19:03:42.071178+00:00,5917.860935,Falls Fire,wildfire,normal,nifc,0.0,0.0,True
16529,26430.0,2024-07-17 21:01:17.010341+00:00,2024-07-18 00:30:01.380195+00:00,208.739498,Round Mountain Fire,wildfire,normal,nifc,0.0,0.0,True
16528,26429.0,2024-07-17 23:16:18.166596+00:00,2024-07-18 00:32:12.111168+00:00,75.899076,Wickiup Fire,wildfire,normal,nifc,0.0,0.0,True
16817,26813.0,2024-07-19 21:00:45.956045+00:00,2024-07-19 22:23:38.270918+00:00,82.871915,Jackpine Fire,wildfire,normal,nifc,0.0,0.0,True
16550,26456.0,2024-07-17 22:31:36.766465+00:00,2024-07-21 19:57:18.436105+00:00,5605.694494,Pilot Rock Fire,wildfire,normal,wildcad,0.0,0.0,True
16556,26464.0,2024-07-17 23:05:17.167663+00:00,2024-07-21 19:58:56.385578+00:00,5573.653632,North Fork Owens Fire,wildfire,normal,nifc,0.0,0.0,True
16585,26505.0,2024-07-17 23:02:39.523630+00:00,2024-07-21 20:03:02.317811+00:00,5580.379903,Snake Fire,wildfire,normal,wildcad,0.0,0.0,True
16188,25865.0,2024-07-14 00:30:42.820710+00:00,2024-07-21 20:09:31.416057+00:00,11258.809922,Lone Rock Fire,wildfire,normal,nifc,0.0,0.0,True
16738,26700.0,2024-07-18 22:14:39.563845+00:00,2024-07-21 20:27:12.455439+00:00,4212.548193,Monkey Creek Fire (Battle Mountain Complex),wildfire,normal,wildcad,0.0,0.0,True
17155,27325.0,2024-07-22 20:41:15.584368+00:00,2024-07-23 05:13:39.062092+00:00,512.391295,Telephone Fire,wildfire,normal,nifc,0.0,0.0,True


In [8]:
evac_zones = read_csv_safely(FILES["evac_zones"], nrows=None)
evac_zones_ch = read_csv_safely(FILES["evac_zones_changelog"], nrows=None)

evac_zones["date_created"] = to_dt(evac_zones["date_created"])
evac_zones["date_modified"] = to_dt(evac_zones["date_modified"])
evac_zones_ch["date_created"] = to_dt(evac_zones_ch["date_created"])

evac_zones[["is_active", "status", "external_status"]].value_counts(dropna=False).head(20)


is_active  status      external_status             
True       NaN         Normal                          30236
False      NaN         NaN                              3872
True       NaN         NaN                               866
                       No Evacuation Order               559
                       Lifted                            370
                       INACTIVE                          327
                       0                                 270
                       Liftd                              98
                       NO EVACUATION                      85
           orders      Evacuation Order                   16
           advisories  Monitor                            13
                       Ready                              13
           NaN         00                                 12
           advisories  Evacuation Level 1: Be Ready       11
           NaN         No Status                          11
           orders      Order     