In [None]:
import geopandas as gpd
import os
import pandas as pd

print("Starting GeoPandas script...")

# --- 1. DEFINE FILE PATHS ---
viirs_path = '/explore/nobackup/people/spotter5/cnn_mapping/active_fire/DL_FIRE_SV-C2_681282/fire_archive_SV-C2_681282.shp'
aoi_path = '/explore/nobackup/people/spotter5/arctic_report_card/shapes/tundra_and_boreal.shp'
output_dir = '/explore/nobackup/people/spotter5/cnn_mapping/VIIRS/DL_FIRE_SV-C2_681282/geopandas/'
output_file = os.path.join(output_dir, 'fire_archive_filter.parquet')

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# --- 2. LOAD DATA WITH GEOPANDAS ---
# Load the VIIRS shapefile into a GeoDataFrame. This loads the entire file into memory.
print(f"Loading VIIRS data from: {viirs_path}")
viirs = gpd.read_file(viirs_path)

# Load the AOI shapefile and ensure its CRS matches the VIIRS data.
print(f"Loading AOI data from: {aoi_path}")
aoi = gpd.read_file(aoi_path).to_crs(viirs.crs)

# --- 3. PROCESS DATA ---
# These operations are performed immediately (not lazily as in Dask).

# Convert ACQ_DATE to datetime objects
viirs["ACQ_DATE"] = pd.to_datetime(viirs["ACQ_DATE"], errors="coerce")

# Filter out rows with invalid dates
viirs = viirs.dropna(subset=["ACQ_DATE"])

# Filter for nominal ('n') or high ('h') confidence
print("Filtering for nominal and high confidence points...")
viirs = viirs[viirs['CONFIDENCE'].isin(['n', 'h'])]

# Extract year and day of year
viirs = viirs.assign(
    Year=viirs["ACQ_DATE"].dt.year,
    dob=viirs["ACQ_DATE"].dt.dayofyear
)

# --- 4. PERFORM INTERSECTION ---
# This uses the standard, single-threaded overlay function from GeoPandas.
print("Starting intersection with AOI...")
viirs_clipped = gpd.overlay(viirs, aoi, how="intersection")

# --- 5. SELECT FINAL COLUMNS ---
# Select the desired columns from th e clipped result.
viirs_final = viirs_clipped[["Year", "dob", "CONFIDENCE", "geometry"]]

# --- 6. SAVE TO A SINGLE PARQUET FILE ---
print(f"Saving results to single Parquet file: {output_file}")
viirs_final.to_parquet(output_file)

print("Processing complete.")


Starting GeoPandas script...
Loading VIIRS data from: /explore/nobackup/people/spotter5/cnn_mapping/active_fire/DL_FIRE_SV-C2_681282/fire_archive_SV-C2_681282.shp
Loading AOI data from: /explore/nobackup/people/spotter5/arctic_report_card/shapes/tundra_and_boreal.shp
Filtering for nominal and high confidence points...
Starting intersection with AOI...
