In [1]:
# Block 0: Imports
import os, sys
import glob
import time

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..", "Scripts")))

from clip_ALPOD_to_SR_extent import clip_vector_with_geometry, extract_geospatial_info_from_json
from mask_clouds_and_classify_ice import create_mask_rasters, classify_ice_cover
from calculate_ice_cover_statistics_per_lake import calculate_lake_statistics, add_observation

In [2]:
# Block 1: Initialize Data directory & instructions for project setup / organization
# !git clone https://github.com/nj142/planetscope_lake_ice
# Create unclassified Data folder, with the option to retrain a model with new parameters?
# List your batches and make a folder for input and output in both.
# User drops their downloads into the input folder and then the code spits out the NetCDF and other stuff into output
# Create NetCDF file with unlimited lakes dimension, etc?  For now don't need to do this, can just update.

In [3]:
# Block 2 (ONLY IF RETRAINING): Convert downloaded PlanetScope RGB directories to RGB JPEGs to be labeled
# Run tif-to-jpeg.py, then upload to labelbox.  
# Once classified on labelbox, save to 3-Download Labelbox masks here, then run clip_PNG_label_data_to_tif.py

In [4]:
# Block 3: Config for lake classification

config = {
    #UDM mask bands
    'mask_bands': [3, 4, 6],
    #SR image bands to keep in the final TIF files
    'keep_bands': [3],
    'thresholds': {
        'Ice': (950, 3800),
        'Snow': (3800, float('inf')),
        'Water': (float('-inf'), 950)
    },
    'min_clear_percent': 30
}
    

study_sites_to_process = {
    'YKD': [r"D:\planetscope_lake_ice\Data\Input\YKD\20190429_215112_103c_3B_AnalyticMS_SR.tif",]
}

output_rasters_dir = r"D:\planetscope_lake_ice\Data\Output\Rasters"
output_shapefiles_dir = r"D:\planetscope_lake_ice\Data\Output\Shapefiles"
netcdf_path = r"D:\planetscope_lake_ice\Data\Output\TEST_CDF.nc"
vector_path = r"D:\planetscope_lake_ice\Data\Input\ALPOD\ALPODlakes.shp"

In [5]:
# Block 4: Function for classifying ice cover in a PlanetScope image (separated for code logic readability)

def process_planetscope_image(planetscope_image_path, study_site):

    # Find relevant folder for study site
    site_rasters_dir = os.path.join(output_rasters_dir, study_site)
    site_shapefiles_dir = os.path.join(output_shapefiles_dir, study_site)

    # "planetscope_image_basename" is just the root name of each image w/ extension.  Used to find corresponding UDM mask for SR images
    planetscope_image_basename = os.path.basename(planetscope_image_path)
    # planetscope_image_name is just the basename with no file extension 
    image_name_parts = planetscope_image_basename.split('_')
    planetscope_image_name = '_'.join(image_name_parts[:4])  # e.g., "20240614_213553_65_242d"

    print(f"Processing {planetscope_image_basename} from {study_site}")


    # -------------------------------------------------------------------------
    # 0. Find corresponding Planet's Usable Data Mask (UDM) and JSON for given SR image. 
    # ------------------------------------------------------------------------- 

    # Find corresponding UDM file
    image_dir = os.path.dirname(planetscope_image_path)
    
    # Generate a pattern that matches UDM files
    udm_pattern = os.path.join(image_dir, f"{planetscope_image_name}*udm*.tif")

    # Use glob to find all matching files
    matching_files = glob.glob(udm_pattern)
    
    if matching_files:
        # If any matching files are found, return the first match
        udm_path = matching_files[0]
        print(f"Found UDM file: {udm_path}")
    else:
        print(f"UDM file not found for {planetscope_image_basename}")

    # Split the filename on underscores and join the first four parts
    json_name = f"{planetscope_image_name}.json"
    json_path = os.path.join(os.path.dirname(planetscope_image_path), json_name)


    if not os.path.exists(json_path):
        print(f"Could not find JSON file: {json_path}")
        print("Checking directory contents for a valid JSON file...")
        
        possible_jsons = [
            f for f in os.listdir(os.path.dirname(planetscope_image_path))
            if f.endswith('.json') and '_metadata' not in f
        ]
        
        if possible_jsons:
            json_path = os.path.join(os.path.dirname(planetscope_image_path), possible_jsons[0])
            print(f"Using fallback JSON file: {json_path}")
        else:
            raise ValueError(f"JSON file not found for {planetscope_image_basename}")
    
    # -------------------------------------------------------------------------
    # 1. Clip the given large ALPOD vector dataset to just the SR image's extent.
    #     Save the clipped vector to the clipped_vector_path.
    # ------------------------------------------------------------------------- 
    
    start_time = time.time()
    print("Clipping lakes to valid UDM data extent (excluding padded areas)...")

    # Make an output subfolder for each image (there are 4 files per shapefile, this keeps it organized)
    img_subfolder = os.path.join(site_shapefiles_dir, os.path.splitext(planetscope_image_basename)[0])
    os.makedirs(img_subfolder, exist_ok=True)
    clipped_vector_path = os.path.join(img_subfolder, "clipped.shp")

    # Get Planet geometry and projection metadata for quick calculations
    geospatial_info = extract_geospatial_info_from_json(json_path)
    geometry = geospatial_info['geometry']
    print(f"{geometry} is geometry")
    
    #"""
    # Use the UDM file to clip the vector to the SR image
    features_kept = clip_vector_with_geometry(
        vector_path,
        geometry,
        clipped_vector_path
    )
    #"""
    end_time = time.time()
    """print(f"Lakes kept: {features_kept} in {end_time - start_time} seconds")"""
    del start_time,end_time 

    # -------------------------------------------------------------------------
    # 2. Mask out the haze/cloud/shadow layers (or whichever "mask_bands" are selected)
    #      and save just the pixels from the red band (or whichever "keep_bands" are selected)
    #      which are contained within the vector outlines from step 1, used like a cookie cutter.
    # ------------------------------------------------------------------------- 

    # Want to try to change this so it works on a lake-by-lake basis.  Fill each rasterized
    # pixel from the vector with its lake ID.  Then draw a minimum bounding box around all the pixels
    # with that lake id.  save that as a new TIF.

    start_time = time.time()
    print("Removing unusable data...")

    # Create classified subdirectory to save the cloud and land-free satellite images to
    site_masked_dir = os.path.join(site_rasters_dir, 'Masked Rasters')
    os.makedirs(site_masked_dir, exist_ok=True)
    masked_path = os.path.join(site_masked_dir, planetscope_image_basename)

    site_lake_ids_dir = os.path.join(site_rasters_dir, 'Lake ID Rasters')
    os.makedirs(site_lake_ids_dir, exist_ok=True)
    lake_ids_path = os.path.join(site_lake_ids_dir, planetscope_image_basename)

    #"""
    #THIS IS CURRENTLY BROKEN BECAUSE THE POLYGON IS WRONGLY PLACED!!  NEED TO FIX
    create_mask_rasters(
        planetscope_image_path,
        udm_path,
        clipped_vector_path,
        config['mask_bands'],
        config['keep_bands'],
        masked_path,
        lake_ids_path
    )
    #"""

    end_time = time.time()
    print(f"Lake IDs raster mask saved to {lake_ids_path}")
    print(f"Masked {', '.join(['band ' + str(b) for b in config['keep_bands']])} saved to {masked_path} in {end_time - start_time} seconds")
    del start_time,end_time

    # -------------------------------------------------------------------------
    # 3. Classify Ice, Snow, and Water (or whatever given classes are) using band
    #     thresholding on the "keep" band.  (For our cases, this is red band thresholding.)
    # ------------------------------------------------------------------------- 

    start_time = time.time()
    print("Classifying pixels...")

    # Create classified & lake ID raster subdirectory to save the ice/snow/water categorically classified rasters & lake id categorized rasters
    site_classified_dir = os.path.join(site_rasters_dir, 'Classified Rasters')
    os.makedirs(site_classified_dir, exist_ok=True)
    classified_path = os.path.join(site_classified_dir, planetscope_image_basename)

    #"""
    classify_ice_cover(
        masked_path,
        config['thresholds'],
        classified_path
    )
    #"""

    end_time = time.time()
    class_labels = [f"{i+1} = {name}" for i, name in enumerate(config['thresholds'].keys())]
    print(f"Categorical classified ice mask saved with {', '.join(class_labels)} in {end_time - start_time} seconds")
    del start_time,end_time

    # -------------------------------------------------------------------------
    # 4. Calculate lake statistics for the lake, and save to the final NetCDF file.
    # ------------------------------------------------------------------------- 
        
    print(f"\nProcessing {planetscope_image_basename} for lake statistics...")

    # Calculate lake statistics using the updated function signature
    calculate_lake_statistics(
        lake_ids_path,
        classified_path,
        planetscope_image_name,
        netcdf_path,
        study_site,
        vector_path,
        config
    )

    print(f"Lake statistics calculated and saved to netCDF file for {planetscope_image_basename}")

In [6]:
# Block 5: Loop through all images to clip, clean, and classify lake ice cover (details in script above)

# Loops through every study site
for study_site in study_sites_to_process:

    # Loops through every PlanetScope image in each study site folder
    for planetscope_image_path in study_sites_to_process[study_site]:
        try:
            process_planetscope_image(planetscope_image_path, study_site)
        except Exception as e:
            print(f"Error processing {planetscope_image_path}: {str(e)}")

Processing 20190429_215112_103c_3B_AnalyticMS_SR.tif from YKD
Found UDM file: D:\planetscope_lake_ice\Data\Input\YKD\20190429_215112_103c_3B_udm2.tif
Could not find JSON file: D:\planetscope_lake_ice\Data\Input\YKD\20190429_215112_103c_3B.json
Checking directory contents for a valid JSON file...
Using fallback JSON file: D:\planetscope_lake_ice\Data\Input\YKD\20190429_215112_103c.json
Clipping lakes to valid UDM data extent (excluding padded areas)...
Reading JSON file: D:\planetscope_lake_ice\Data\Input\YKD\20190429_215112_103c.json
Raw geometry from JSON: {"coordinates": [[[-163.5078022789682, 62.90750460876561], [-163.55876157160918, 62.835407254229935], [-163.07086430047738, 62.76315468832276], [-163.01962861270457, 62.83521893825884], [-163.5078022789682, 62.90750460876561]]], "type": "Polygon"}
Number of coordinates in polygon: 5
{'coordinates': [[[-163.5078022789682, 62.90750460876561], [-163.55876157160918, 62.835407254229935], [-163.07086430047738, 62.76315468832276], [-163.01



Reading cloud mask bands [3, 4, 6] from: 20190429_215112_103c_3B_udm2.tif
Saved lake ID raster to: D:\planetscope_lake_ice\Data\Output\Rasters\YKD\Lake ID Rasters\20190429_215112_103c_3B_AnalyticMS_SR.tif
Applying combined lake and cloud mask to red band
Saved masked band to: D:\planetscope_lake_ice\Data\Output\Rasters\YKD\Masked Rasters\20190429_215112_103c_3B_AnalyticMS_SR.tif
Lake IDs raster mask saved to D:\planetscope_lake_ice\Data\Output\Rasters\YKD\Lake ID Rasters\20190429_215112_103c_3B_AnalyticMS_SR.tif
Masked band 3 saved to D:\planetscope_lake_ice\Data\Output\Rasters\YKD\Masked Rasters\20190429_215112_103c_3B_AnalyticMS_SR.tif in 64.24384045600891 seconds
Classifying pixels...

Classifying ice cover in: 20190429_215112_103c_3B_AnalyticMS_SR.tif
Using classification scheme:
  Ice: values from 950 to 3800
  Snow: values from 3800 to inf
  Water: values from -inf to 950
Applying classification to valid pixels...
Output classes:
  Class 1: Ice
  Class 2: Snow
  Class 3: Water
Sa