In [None]:
import geopandas as gpd
import os
import pandas as pd
import numpy as np
print("Starting GeoPandas script...")

# --- 1. DEFINE FILE PATHS ---
viirs_path = '/explore/nobackup/people/spotter5/cnn_mapping/active_fire/DL_FIRE_M-C61_681279/fire_archive_M-C61_681279.shp'
aoi_path = '/explore/nobackup/people/spotter5/arctic_report_card/shapes/tundra_and_boreal.shp'
output_dir = '/explore/nobackup/people/spotter5/cnn_mapping/VIIRS/DL_FIRE_M-C61_681279/geopandas/'
output_file = os.path.join(output_dir, 'fire_archive_filter.parquet')

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# --- 3. LOAD DATA ---
# Load the VIIRS shapefile with standard GeoPandas.
# This may be memory-intensive for large files.
print("Loading VIIRS data into memory...")
viirs = gpd.read_file(viirs_path)

# Load the smaller AOI shapefile.
print("Loading AOI data...")
aoi = gpd.read_file(aoi_path)

# Ensure the Coordinate Reference System (CRS) matches for the overlay operation.
aoi = aoi.to_crs(viirs.crs)

# Pre-emptively fix any invalid geometries in the AOI.
aoi.geometry = aoi.geometry.buffer(0)
print("Data loading complete.")

# --- 4. TRANSFORM DATA ---
# Each operation is executed immediately.

# Convert CONFIDENCE from numeric to categorical ('l', 'n', 'h').
viirs['CONFIDENCE_numeric'] = pd.to_numeric(viirs['CONFIDENCE'], errors='coerce')
viirs['CONFIDENCE'] = pd.cut(
    viirs['CONFIDENCE_numeric'],
    bins=[-np.inf, 29, 79, 100],
    labels=['l', 'n', 'h']
)

# Filter for nominal ('n') and high ('h') confidence levels.
print("Filtering for nominal and high confidence points...")
viirs = viirs[viirs['CONFIDENCE'].isin(['n', 'h'])].copy()

# Convert ACQ_DATE to datetime objects.
viirs["ACQ_DATE"] = pd.to_datetime(viirs["ACQ_DATE"], errors="coerce")

# Filter out any rows where date conversion failed.
viirs.dropna(subset=['ACQ_DATE'], inplace=True)

# Extract Year and Day of Year (dob).
viirs['Year'] = viirs["ACQ_DATE"].dt.year
viirs['dob'] = viirs["ACQ_DATE"].dt.dayofyear

# --- 5. SPATIAL INTERSECTION (OVERLAY) ---
# This is the most computationally intensive step.
print("Starting intersection with AOI...")
viirs_clipped = gpd.overlay(viirs, aoi, how="intersection")

# Select and arrange the final columns.
viirs_final = viirs_clipped[['SATELLITE', 'Year', 'dob', 'CONFIDENCE', 'geometry']]

# --- 6. SAVE RESULTS ---
# Save the processed data to a Parquet file.
print(f"Saving results to Parquet file: {output_file}")
viirs_final.to_parquet(output_file)

print("\nProcessing complete.")


Starting GeoPandas script...
Loading VIIRS data into memory...


In [2]:
't'

't'