In [None]:
import geopandas as gpd
import pandas as pd
import dask.dataframe as dd
from dask import delayed
import os

# Function to process shapefiles
def process_shapefile(file_path):
    df = gpd.read_file(file_path)
    df['Year'] = pd.to_datetime(df['ACQ_DATE']).dt.year
    df['dob'] = pd.to_datetime(df['ACQ_DATE']).dt.dayofyear
    return df[['Year', 'dob', 'CONFIDENCE', 'geometry']]

# Paths to shapefiles
file_one = '/explore/nobackup/people/spotter5/cnn_mapping/VIIRS/DL_FIRE_SV-C2_351277/fire_archive_SV-C2_365468_clip.shp'
file_two = '/explore/nobackup/people/spotter5/cnn_mapping/VIIRS/DL_FIRE_SV-C2_517701/fire_archive_SV-C2_517701_clip.shp'
aoi_path = '/explore/nobackup/people/spotter5/Moore/raw/ArcticMaps_studydomain.shp'

# Read the AOI (Area of Interest) shapefile
aoi = gpd.read_file(aoi_path)

# Ensure the AOI is in the same CRS as the VIIRS data
aoi = aoi.to_crs(crs=gpd.read_file(file_one).crs)

# Process the files in parallel
one = delayed(process_shapefile)(file_one)
two = delayed(process_shapefile)(file_two)

# Merge the dataframes
viirs = dd.from_delayed([one, two])

# Check for unique years
unique_years = viirs['Year'].unique().compute()

unique_years = range(2012, 2024)

# Output path
out_path = '/explore/nobackup/people/spotter5/cnn_mapping/VIIRS/pts_by_year/all_conf'
os.makedirs(out_path, exist_ok=True)

# Function to save and clip shapefiles by year
def save_by_year(year, viirs, aoi):
    sub = viirs[viirs['Year'] == year].dropna()
    sub_gdf = gpd.GeoDataFrame(sub, geometry='geometry')
    
    # Clip the sub_gdf by the AOI
    clipped_gdf = gpd.clip(sub_gdf, aoi)
    
    output_file = os.path.join(out_path, f'{year}.shp')
    clipped_gdf.to_file(output_file)
    print(f'Saved {year}.shp')

# Save each year in parallel with clipping
delayed_tasks = [delayed(save_by_year)(year, viirs, aoi) for year in unique_years]
dd.compute(*delayed_tasks)




In [None]:
't'