Calculate Normalized Difference Water Index (NDWI) to create masking between water and land.

In [None]:
import os
import glob
import numpy as np
import rasterio
from tqdm import tqdm

from preprocessing.preprocessing import sunglint_correction, calculate_mndwi


In [None]:
# Configurations
input_path = r'data'
output_path = r'data\corrected' # Output folder name
plot = r'data\corrected\plot' # For QC plots

# Parameters
mndwi_threshold = 0.0 # Threshold for SWIR-based water detection

# Create directories
os.makedirs(output_path, exist_ok=True)
os.makedirs(plot, exist_ok=True)

tif_files = glob.glob(os.path.join(input_path, "*.tif"))
print(f"Found {len(tif_files)} images.")

# Sentinel-2 Band Names for Metadata
s2_band = [
    'B1', 'B2', 'B3', 'B4', 'B5', 'B6',
    'B7', 'B8', 'B8A', 'B9', 'B11', 'B12'
]


Found 8 images.


In [None]:
# Batch Processing for Sunglint Correction + MNDWI Masking
for filepath in tqdm(tif_files, desc="Hedley + MNDWI Processing"):
    filename = os.path.basename(filepath)
    
    # Define output path using the FOLDER variable (prevents recursive error)
    current_output_path = os.path.join(output_path, f"corrected_{filename}")

    try:
        with rasterio.open(filepath) as src:
            # 1. Read Data
            data = src.read().astype('float32')
            profile = src.profile.copy()
            
            if src.count < 12:
                print(f"[SKIP] {filename}: Not enough bands ({src.count}). Need 12.")
                continue

            # 2. Identify Bands Indexes
            idx_blue, idx_green, idx_red = 1, 2, 3
            idx_nir = 7   # Band 8 (Used for Hedley Glint Calc)
            idx_swir = 10 # Band 11 (Used for MNDWI Masking)
            
            raw_blue = data[idx_blue]
            raw_green = data[idx_green]
            raw_red = data[idx_red]
            raw_nir = data[idx_nir]
            raw_swir = data[idx_swir]

            # 3. Hedley Sunglint Corrections (RGB Only)
            # This calculates the slope automatically and removes glint
            corrected_bands = sunglint_correction(
                visible_bands=[raw_blue, raw_green, raw_red],
                nir_band=raw_nir,
                output_dir=plot,
                image_id=filename,
                plot_graphs=True # Check the plots to see the regression slope!
            )
            
            clean_blue = corrected_bands[0]
            clean_green = corrected_bands[1]
            clean_red = corrected_bands[2]
            
            # 4. MNDWI Water Masking
            # Use Corrected Green + Raw SWIR
            # SWIR is much better at ignoring glint than NIR
            mndwi = calculate_mndwi(clean_green, raw_swir)
            water_mask = mndwi > mndwi_threshold

            # 5. Reconstruct Final Stack
            final_stack = data.copy()
            
            # Overwrite RGB with Clean Versions
            final_stack[idx_blue] = clean_blue
            final_stack[idx_green] = clean_green
            final_stack[idx_red] = clean_red
            
            # Apply Mask to ALL 12 bands
            for b in range(12):
                final_stack[b] = np.where(water_mask, final_stack[b], 0)

            # 6. Save
            profile.update(
                dtype='float32',
                nodata=0,
                count=12
            )

            with rasterio.open(current_output_path, 'w', **profile) as dst:
                dst.descriptions = tuple(s2_band)
                dst.write(final_stack)

    except Exception as e:
        print(f"[ERROR] {filename}: {e}")

print("Processing complete. Check 'Plots' folder for Hedley regression graphs.")


Hedley + MNDWI Processing: 100%|██████████| 8/8 [00:09<00:00,  1.13s/it]

Processing complete. Check 'Plots' folder for Hedley regression graphs.



