# NASA FIRMS Dataset

Download at https://firms.modaps.eosdis.nasa.gov/download/.

In [2]:
import glob
import os
from collections import defaultdict

import pandas as pd

data_dir = os.path.abspath("wild_fire_nasa")
pattern = os.path.join(data_dir, "fire_archive*.csv")
csv_files = sorted(glob.glob(pattern))

if not csv_files:
    print(f"No fire_archive*.csv files found in {data_dir}")

print(f"Found {len(csv_files)} file(s): {[os.path.basename(f) for f in csv_files]}")

# (year, type) -> count, process in chunks to save memory
count_by_year_type = defaultdict(int)

for filepath in csv_files:
    for chunk in pd.read_csv(
        filepath,
        chunksize=100_000,
        usecols=["acq_date", "type"],
    ):
        chunk["year"] = pd.to_datetime(chunk["acq_date"], errors="coerce").dt.year
        chunk = chunk.dropna(subset=["year"])
        chunk["year"] = chunk["year"].astype(int)
        for (year, typ), cnt in chunk.groupby(["year", "type"]).size().items():
            count_by_year_type[(year, typ)] += cnt

# Build summary DataFrame
rows = [
    {"year": year, "type": typ, "count": count}
    for (year, typ), count in sorted(count_by_year_type.items())
]
summary = pd.DataFrame(rows)

# Save long format (year, type, count)
output_path = os.path.join("preprocessed", "wildfire_count_by_year_type.csv")
summary.to_csv("preprocessed/wildfire_count_by_year_type.csv")
print(f"Saved summary to: {output_path}")
print(summary.to_string(index=False))

Found 14 file(s): ['fire_archive_SV-C2_2012.csv', 'fire_archive_SV-C2_2013.csv', 'fire_archive_SV-C2_2014.csv', 'fire_archive_SV-C2_2015.csv', 'fire_archive_SV-C2_2016.csv', 'fire_archive_SV-C2_2017.csv', 'fire_archive_SV-C2_2018.csv', 'fire_archive_SV-C2_2019.csv', 'fire_archive_SV-C2_2020.csv', 'fire_archive_SV-C2_2021.csv', 'fire_archive_SV-C2_2022.csv', 'fire_archive_SV-C2_2023.csv', 'fire_archive_SV-C2_2024.csv', 'fire_archive_SV-C2_2025.csv']
Saved summary to: preprocessed/wildfire_count_by_year_type.csv
 year  type    count
 2012     0 19503040
 2012     1    20953
 2012     2  1300566
 2012     3   143892
 2013     0 17875225
 2013     1    23090
 2013     2  1348256
 2013     3   158075
 2014     0 18847252
 2014     1    27375
 2014     2  1245267
 2014     3   194652
 2015     0 20026487
 2015     1    30366
 2015     2  1237209
 2015     3   157695
 2016     0 18899034
 2016     1    28752
 2016     2  1284168
 2016     3   146176
 2017     0 18583192
 2017     1    28279
 