In [1]:
# Block 0: Imports
import os, sys
import glob
import time
import yaml
import requests
import time
import zipfile
import shutil
import csv
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
import random
import backoff

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..", "Scripts")))

from clip_ALPOD_to_SR_extent import clip_vector_with_geometry, extract_geospatial_info_from_xml
from mask_clouds_and_classify_ice import calculate_output_rasters
from calculate_ice_cover_statistics_per_lake import calculate_lake_statistics

In [None]:
# Block 1: Config for lake classification

config = {
    #UDM mask bands to remove data beneath
    'udm_mask_bands': [3, 4, 5, 6], # 3 - Shadow, 4 - Light Haze, 5 - Heavy Haze (Depracated but kept in case any are UDM 2.0), 6 - Cloud
   
    #SR image bands to keep in the final TIF files
    'sr_keep_bands': [3], #3 - Red

    'pixel_reflectance_thresholds': {
        'Ice': (950, 3800),  # Could change to 1100-- silty problem persists
        'Snow': (3800, float('inf')),
        'Water': (float('-inf'), 950)
    }
}

study_sites_to_process = {
    'YKD': [
        r"D:\planetscope_lake_ice\Data\Input\YKD 5x5 km\Breakup_2022",
    ]
}

output_path = r"D:\planetscope_lake_ice\Data\Output"

alpod_vector_shapefiles = {
    'YKD': r"D:\planetscope_lake_ice\Data\Input\alpod5x5ykd.shp"
}

In [None]:
# Block 2: Image Processing Function 

def process_planetscope_image(sr_image_path, study_site, lake_vector_shapefile):
    """
    For the given PlanetScope SR TIFF:
      0) build output folders, and find its accompanying UDM & XML
      1) delete everything but sr_keep_band to reduce size of TIFs in deep storage, create cloud mask as uint 8 file, create classified mask as a uint8 file
      2) mask red band, threshold/classify ice & snow
      3) append lake stats to NetCDF
    """
    
    # ────────────────────────────────────────────────────────────────────────────────
    # 0: Create output folders // find correlated XML & UDM for this SR image
    # ────────────────────────────────────────────────────────────────────────────────
    # build image name strings for processing dates
    sr_filename = os.path.basename(sr_image_path) # E.g. 20200415_222212_87_1060_ortho_analytic_4b_sr.tif
    sr_filename_no_ext = os.path.splitext(sr_filename)[0] # E.g. 20200415_222212_87_1060_ortho_analytic_4b_sr
    image_core_name = sr_filename_no_ext.split('_ortho', 1)[0] ## E.g. 20200415_222212_87_1060

    # Get the full directory path where the SR image is located
    sr_image_directory = os.path.dirname(sr_image_path)
    
    # identify the current season folder (e.g. "Breakup_2019") and build output paths based on that season
    input_season_folder = os.path.basename(sr_image_directory) # e.g. "Breakup_2019 from downloads - read from global config"
    output_rasters_dir = os.path.join(output_path, "Rasters", input_season_folder) 
    output_shapefile_dir = os.path.join(output_path, "Shapefiles", input_season_folder, sr_filename_no_ext)
    for d in (output_rasters_dir, output_shapefile_dir): 
        os.makedirs(d, exist_ok=True)

    # locate UDM and XML metadata using glob to find matching files in the same directory
    udm_pattern = os.path.join(sr_image_directory, f"{image_core_name}*udm2.tif")
    xml_pattern = os.path.join(sr_image_directory, f"{image_core_name}*.xml")
    
    udm_files = glob.glob(udm_pattern)
    xml_files = glob.glob(xml_pattern)
    
    if not udm_files or not xml_files:
        raise FileNotFoundError(f"\n######################\nERROR: UDM or XML files not found for {sr_filename}\nLooked for UDM: {udm_pattern}\nLooked for XML: {xml_pattern}\n######################\n")
    
    udm_path = udm_files[0]  # Take the first match
    xml_path = xml_files[0]  # Take the first match

    print(f"All pre-processing complete. Beginning classification for image {sr_filename}.\n")

    # ────────────────────────────────────────────────────────────────────────────────
    # 1: Clip study site lake mask to the image footprint (sourced from the XML file) so only lakes
    #  which fall entirely within this SR image's extent are considered
    # ────────────────────────────────────────────────────────────────────────────────
    output_vector_path = os.path.join(output_shapefile_dir, f"{image_core_name}_lakes.shp") # The lakes which fall within the image's extent

    print("Clipping geometry...")
    geo_info = extract_geospatial_info_from_xml(xml_path)

    clip_vector_with_geometry(
        lake_vector_shapefile,
        geo_info['geometry'],
        output_vector_path
    )

    print("Geometry clipped successfully.\n")

    # ────────────────────────────────────────────────────────────────────────────────
    # 2: Delete everything but sr_keep_band to reduce size of TIFs in deep storage, create cloud mask as uint 8 file, create classified mask as a uint8 file, all with lzw compression
    # ────────────────────────────────────────────────────────────────────────────────

    output_sr_path = os.path.join(output_rasters_dir, "Single_Band_Rasters_Uint16", sr_filename)
    output_udm_path = os.path.join(output_rasters_dir, "Cloud_Masks_Uint8", f"{image_core_name}_cloud_mask.tif")
    output_classified_path = os.path.join(output_rasters_dir, "Ice_Snow_Water_Classified_Masks_Uint8", f"{image_core_name}_classified_ice_snow.tif")
    
    # Make sure the subdirectories exist
    for subdir_path in [os.path.dirname(output_sr_path), os.path.dirname(output_udm_path), os.path.dirname(output_classified_path)]:
        os.makedirs(subdir_path, exist_ok=True)
    
    print("Creating cloud mask, clipping rasters...")
    calculate_output_rasters(
        sr_image_path,
        udm_path,
        config['udm_mask_bands'],  # Fixed: was 'mask_bands', should be 'udm_mask_bands'
        config['sr_keep_bands'],   # Fixed: was 'keep_bands', should be 'sr_keep_bands'
        output_sr_path,
        output_udm_path,
        output_classified_path
    )

    print(f"     Clouds masked successfully.")

    # ────────────────────────────────────────────────────────────────────────────────
    # 3: Calculate statistics for each lake, and then add the statistics to an excel sheet
    # ────────────────────────────────────────────────────────────────────────────────

    print(f"Calculating lake statistics...")
    calculate_lake_statistics(
        output_classified_path,
        image_core_name,
        output_path,
        study_site,
        lake_vector_shapefile
    )

    print(f"Finished processing {image_core_name}\n# ──────────────────────────────────────────────────────────────────────────────── #\n")

In [None]:
# Block 3: Loop through all images to clip, clean, and classify lake ice cover

for study_site, paths_list in study_sites_to_process.items():
    lake_vector_shapefile = alpod_vector_shapefiles[study_site]  # Get corresponding shapefile

    for site_folder in paths_list:
        pattern = os.path.join(site_folder, "*.tif")  # Match all .tif files
        for sr_tif_path in glob.glob(pattern):
            if 'udm' not in os.path.basename(sr_tif_path).lower():  # Exclude files with 'udm' so only SR goes through
                try:
                    # Fixed: Corrected function call with proper arguments
                    process_planetscope_image(sr_tif_path, study_site, lake_vector_shapefile)
                except Exception as e:
                    print(f"Error processing {sr_tif_path}: {e}")

All pre-processing complete. Beginning classification for image 20220419_211221_04_2440_ortho_analytic_4b_sr.tif.

Clipping geometry...
     Extracted polygon with 5 vertices
     Output shapefile contains 1 lakes
Geometry clipped successfully.

Creating cloud mask, clipping rasters...
Loading raster data with Dask...
Creating single red band raster...
