In [1]:
!pip install pystac_client
!pip install stackstac
!pip install planetary_computer
!pip install xarray

Collecting pystac_client
  Downloading pystac_client-0.9.0-py3-none-any.whl.metadata (3.1 kB)
Collecting pystac>=1.10.0 (from pystac[validation]>=1.10.0->pystac_client)
  Downloading pystac-1.14.1-py3-none-any.whl.metadata (4.7 kB)
Downloading pystac_client-0.9.0-py3-none-any.whl (41 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.8/41.8 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pystac-1.14.1-py3-none-any.whl (207 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.7/207.7 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pystac, pystac_client
Successfully installed pystac-1.14.1 pystac_client-0.9.0
Collecting stackstac
  Downloading stackstac-0.5.1-py3-none-any.whl.metadata (8.1 kB)
Collecting rasterio<2.0.0,>=1.3.0 (from stackstac)
  Downloading rasterio-1.4.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Collecting affine (from rasterio<2.0.0,>=1.3.0->stacksta

In [2]:
# =========================
# INSTALLATION & IMPORTS
# =========================

import os
import ee
import glob
import geemap
import pprint
import numpy as np
import pandas as pd
import seaborn as sns
import geopandas as gpd
import matplotlib.pyplot as plt
import tensorflow as tf

from tqdm import tqdm
from datetime import datetime
from rasterio.features import rasterize
from rasterio.transform import from_bounds

import torch
import torch.nn as nn

In [3]:
# Initialize Earth Engine
ee.Authenticate()

ee.Initialize(project='ee-chriscandido93')
print("GEE initialized successfully")

GEE initialized successfully


In [4]:
# Check GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

if torch.cuda.is_available():
  print(f"GPU: {torch.cuda.get_device_name(0)}")

Using device: cpu


In [5]:
gpkg_dir = '/content/drive/MyDrive/Bathymetry/GPKG'

bounds_list = []

for file in os.listdir(gpkg_dir):
    if file.endswith('.gpkg'):
        file_path = os.path.join(gpkg_dir, file)
        gdf = gpd.read_file(file_path)

        min_lon, min_lat, max_lon, max_lat = gdf.total_bounds
        bounds_list.append({
            "filename": file,
            "min_lon": min_lon,
            "min_lat": min_lat,
            "max_lon": max_lon,
            "max_lat": max_lat
        })

# convert to DataFrame
bounds_df = pd.DataFrame(bounds_list)
print(bounds_df)

               filename     min_lon    min_lat     max_lon    max_lat
0    Mindanao_tile.gpkg  121.736044   5.351779  126.673972   9.374745
1  NorthLuzon_tile.gpkg  119.506900  14.352658  122.620625  18.775376
2        Sulu_tile.gpkg  118.787441   4.172512  122.789678   7.051896
3     Visayas_tile.gpkg  120.531048   8.836129  126.601240  12.329150
4  SouthLuzon_tile.gpkg  119.328018  11.706214  124.973611  14.706591
5     Palawan_tile.gpkg  115.964644   7.699117  121.547333  12.113016


In [6]:
area = 'Visayas_tile.gpkg'

min_lon = bounds_df[bounds_df['filename'] == area]['min_lon']
min_lat = bounds_df[bounds_df['filename'] == area]['min_lat']
max_lon = bounds_df[bounds_df['filename'] == area]['max_lon']
max_lat = bounds_df[bounds_df['filename'] == area]['max_lat']

min_lon = min_lon.values[0]
min_lat = min_lat.values[0]
max_lon = max_lon.values[0]
max_lat = max_lat.values[0]

### Download Input Dataset (S2)

In [7]:
# =========================
# 1. DEFINE AOI AND TIME RANGE
# =========================

# min_lon, min_lat, max_lon, max_lat = 119.735, 12.516, 121.735, 15.516
aoi = ee.Geometry.Rectangle([min_lon, min_lat, max_lon, max_lat])

start_date = '2025-01-01'
end_date = '2025-05-31'

print(f"\nAOI Bounds: ({min_lon}, {min_lat}) to ({max_lon}, {max_lat})")
print(f"Time Range: {start_date} to {end_date}\n")



AOI Bounds: (120.53104804612197, 8.836129414910001) to (126.601239780195, 12.329149950839279)
Time Range: 2025-01-01 to 2025-05-31



In [8]:
# =========================
# 2. CREATE TILES WITH OVERLAP
# =========================

def create_tiles_with_overlap(min_lon, min_lat, max_lon, max_lat, tile_size, overlap=0.1):
    """
    Create tiles with overlap between adjacent tiles

    Parameters:
    - min_lon, min_lat, max_lon, max_lat: Bounding box coordinates
    - tile_size: Size of each tile in degrees
    - overlap: Overlap percentage between tiles (0.1 = 10% overlap)
    """
    tiles = []
    overlap_distance = tile_size * overlap

    lon = min_lon
    while lon < max_lon:
        lat = min_lat
        while lat < max_lat:
            tile_min_lon = lon
            tile_min_lat = lat
            tile_max_lon = min(lon + tile_size + overlap_distance, max_lon)
            tile_max_lat = min(lat + tile_size + overlap_distance, max_lat)

            tile = ee.Geometry.Rectangle([
                tile_min_lon,
                tile_min_lat,
                tile_max_lon,
                tile_max_lat
            ])
            tiles.append(tile)
            lat += tile_size
        lon += tile_size

    print(f"Generated {len(tiles)} tiles with {overlap*100}% overlap")
    return tiles

# Create tiles with 10% overlap
tiles = create_tiles_with_overlap(min_lon, min_lat, max_lon, max_lat, 0.5, overlap=0.1)

Generated 91 tiles with 10.0% overlap


In [9]:
# =========================
# 3. CLOUD MASKING AND COMPOSITE FUNCTIONS
# =========================

def mask_clouds_and_quality(image):
    """
    Advanced cloud and quality masking for Sentinel-2
    """
    # QA60 cloud masking
    qa = image.select('QA60')
    cloud_bit_mask = 1 << 10
    cirrus_bit_mask = 1 << 11
    qa_mask = (qa.bitwiseAnd(cloud_bit_mask).eq(0)
               .And(qa.bitwiseAnd(cirrus_bit_mask).eq(0)))

    # Additional spectral-based cloud/haze removal
    blue = image.select('B2')
    nir = image.select('B8')
    swir1 = image.select('B11')

    # Remove bright pixels (clouds/haze)
    not_bright = blue.lt(2000)

    # Remove pixels with cloud-like spectral signature
    cloud_score = blue.add(nir).divide(2).subtract(swir1)
    not_cloud = cloud_score.lt(500)

    # Shadow removal (very dark pixels)
    not_shadow = nir.gt(300)

    # Combine all masks
    combined_mask = qa_mask

    # Scale to reflectance
    return (image.updateMask(combined_mask)
            .divide(10000)
            .copyProperties(image, ["system:time_start", "CLOUDY_PIXEL_PERCENTAGE"]))


def add_cloud_score(image):
    """
    Add a custom cloud score to each image
    """
    blue = image.select('B2')
    nir = image.select('B8')
    swir1 = image.select('B11')

    cloud_score = (blue.add(nir).divide(2).subtract(swir1)
                   .divide(blue.add(nir).divide(2))
                   .multiply(100)
                   .rename('cloud_score'))

    return image.addBands(cloud_score)


def mask_to_reef(image):
    """
    Mask image to reef areas using Allen Coral Atlas
    """
    aca = ee.Image("ACA/reef_habitat/v2_0").select("reef_mask")
    reef_mask = aca.eq(1)

    return image.updateMask(reef_mask).copyProperties(image, ["system:time_start"])


def create_composite(aoi, start_date, end_date, max_cloud_cover=30):
    """
    Create improved Sentinel-2 composite with multiple quality filters

    Args:
        aoi: Area of interest
        start_date: Start date (YYYY-MM-DD)
        end_date: End date (YYYY-MM-DD)
        max_cloud_cover: Maximum cloud cover percentage (default: 20)
    """

    print(f"\n{'='*60}")
    print("Creating Improved Sentinel-2 Composite")
    print(f"{'='*60}")

    # Load Sentinel-2 Surface Reflectance
    s2 = (ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
          .filterBounds(aoi)
          .filterDate(start_date, end_date)
          .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', max_cloud_cover))
          .map(mask_to_reef))

    print(f"Initial images: {s2.size().getInfo()}")

    # Apply cloud masking
    s2_masked = s2.map(mask_clouds_and_quality)

    # Add cloud score
    s2_scored = s2_masked.map(add_cloud_score)

    # Percentile composite (reduces outliers)
    composite_percentile = (s2_masked.reduce(ee.Reducer.percentile([75]))
                           .select(['B1_p75','B2_p75','B3_p75','B4_p75',
                                   'B8_p75','B8A_p75','B11_p75','B12_p75'],
                                  ['B1','B2','B3','B4','B8','B8A','B11','B12'])
                           .clip(aoi))

    return {'percentile': composite_percentile}


def enhance_water_clarity(composite):
    """
    Enhance water clarity for reef/coastal imaging
    Apply atmospheric correction and water enhancement
    """

    # Sun glint removal using SWIR
    nir = composite.select('B8')
    swir1 = composite.select('B11')

    # Estimate glint in visible bands
    blue = composite.select('B2')
    green = composite.select('B3')
    red = composite.select('B4')

    # Simple deglinting (for better water penetration)
    slope = 0.9

    blue_deglint = blue.subtract(nir.multiply(slope))
    green_deglint = green.subtract(nir.multiply(slope))
    red_deglint = red.subtract(nir.multiply(slope))

    # Clip to valid range
    blue_deglint = blue_deglint.clamp(0, 1)
    green_deglint = green_deglint.clamp(0, 1)
    red_deglint = red_deglint.clamp(0, 1)

    # Replace bands
    enhanced = composite.addBands(blue_deglint, overwrite=True) \
                        .addBands(green_deglint, ['B3'], True) \
                        .addBands(red_deglint, ['B4'], True)

    return enhanced


def create_best_composite(aoi, start_date, end_date, max_cloud_cover=20):
    """
    Create the best possible composite for coastal/reef areas
    Combines multiple techniques
    """

    print(f"\n{'='*70}")
    print("CREATING OPTIMIZED COMPOSITE FOR COASTAL/REEF AREAS")
    print(f"{'='*70}")
    print(f"Date range: {start_date} to {end_date}")
    print(f"Max cloud cover: {max_cloud_cover}%")

    # Step 1: Get composites
    composites = create_composite(aoi, start_date, end_date, max_cloud_cover)

    # Step 2: Use quality mosaic as base
    base_composite = composites['percentile']

    # Step 3: Enhance water clarity
    print("\nEnhancing water clarity...")
    final_composite = enhance_water_clarity(base_composite)

    print("\n✓ Optimized composite created")

    return final_composite, composites


def visualize_comparison(composites, aoi):

    Map = geemap.Map()
    Map.centerObject(aoi, zoom=12)

    # Visualization parameters for coastal/reef
    water_vis = {
        'bands': ['B4', 'B3', 'B2'],
        'min': 0,
        'max': 0.2,
        'gamma': 1.5
    }

    # Add different composites
    Map.addLayer(composites['percentile'], water_vis, 'Percentile', False)

    # Add enhanced indices for reef analysis
    reef_index = composites['percentile'].normalizedDifference(['B3', 'B4'])
    reef_vis = {'min': -0.5, 'max': 0.5, 'palette': ['brown', 'yellow', 'cyan', 'blue']}
    Map.addLayer(reef_index, reef_vis, 'Reef Index', False)

    # Water depth proxy
    depth_proxy = composites['percentile'].select('B2').divide(composites['percentile'].select('B3'))
    depth_vis = {'min': 0.5, 'max': 2, 'palette': ['darkblue', 'blue', 'cyan', 'yellow']}
    Map.addLayer(depth_proxy, depth_vis, 'Depth Proxy (B2/B3)', False)

    Map.addLayer(ee.Image().paint(aoi, 0, 2), {'palette': 'yellow'}, 'AOI')

    return Map


def complete_reef_composite_workflow(aoi, start_date, end_date):
    """
    Complete workflow for creating best reef/coastal composite

    Args:
        aoi: Area of interest
        start_date: Start date
        end_date: End date
    """

    # Create optimized composite
    final_composite, all_composites = create_best_composite(
        aoi, start_date, end_date, max_cloud_cover=15
    )

    # Get projection info
    projection = final_composite.projection()
    crs = projection.crs().getInfo()
    scale = projection.nominalScale().getInfo()

    print(f"\n{'='*70}")
    print("FINAL COMPOSITE READY")
    print(f"{'='*70}")
    print(f"Projection: {crs}")
    print(f"Scale: {scale}m")
    print(f"Bands: {final_composite.bandNames().getInfo()}")

    # Create comparison map
    Map = visualize_comparison(all_composites, aoi)

    # Add final enhanced version
    final_vis = {
        'bands': ['B4', 'B3', 'B2'],
        'min': 0,
        'max': 0.2,
        'gamma': 1.5
    }
    Map.addLayer(final_composite, final_vis, '★ FINAL ENHANCED', True)

    print("\n✓ Visualization ready")
    print("\nRECOMMENDATIONS:")
    print("  - Use 'FINAL ENHANCED' layer for best results")
    print("  - Adjust gamma (1.2-2.0) for shallow water detail")
    print("  - Lower max value (0.15-0.25) for clearer water")
    print("  - Check 'Reef Index' for substrate classification")
    print(f"{'='*70}\n")

    return final_composite, Map

In [10]:
# =========================
# 4. CREATE COMPOSITE
# =========================

sentinel_composite, Map = complete_reef_composite_workflow(
    aoi=aoi,
    start_date=start_date,
    end_date=end_date
)

# Display map
Map


CREATING OPTIMIZED COMPOSITE FOR COASTAL/REEF AREAS
Date range: 2025-01-01 to 2025-05-31
Max cloud cover: 15%

Creating Improved Sentinel-2 Composite
Initial images: 163

Enhancing water clarity...

✓ Optimized composite created

FINAL COMPOSITE READY
Projection: EPSG:4326
Scale: 111319.49079327357m
Bands: ['B1', 'B2', 'B3', 'B4', 'B8', 'B8A', 'B11', 'B12']

✓ Visualization ready

RECOMMENDATIONS:
  - Use 'FINAL ENHANCED' layer for best results
  - Adjust gamma (1.2-2.0) for shallow water detail
  - Lower max value (0.15-0.25) for clearer water
  - Check 'Reef Index' for substrate classification



Map(center=[10.593824156634662, 123.56614391315843], controls=(WidgetControl(options=['position', 'transparent…

In [11]:
# =========================
# 5. TFRECORD EXPORT FUNCTIONS
# =========================

def export_composite_as_tfrecord(composite, tiles, output_folder, prefix='sentinel2',
                                  patch_size=256, scale=10):
    """
    Export composite to TFRecord format with tiling to avoid memory limits

    Args:
        composite: ee.Image - The composite image to export
        tiles: list - List of tile geometries
        output_folder: str - Google Drive folder name for output
        prefix: str - Prefix for output files
        patch_size: int - Size of each patch in pixels (default: 256)
        scale: int - Export scale in meters (default: 10m for S2)

    Returns:
        list: Export tasks
    """

    print(f"\n{'='*70}")
    print("EXPORTING COMPOSITE AS TFRECORD")
    print(f"{'='*70}")
    print(f"Number of tiles: {len(tiles)}")
    print(f"Patch size: {patch_size}x{patch_size} pixels")
    print(f"Scale: {scale}m")
    print(f"Output folder: {output_folder}")

    # Get band names
    bands = composite.bandNames().getInfo()
    print(f"Bands: {bands}")

    # Prepare composite for export
    composite_export = composite.select(bands).float()

    # Create export tasks for each tile
    tasks = []

    for i, tile in enumerate(tiles):
        print(f"\n[Tile {i+1}/{len(tiles)}] Preparing export...")

        # Get tile bounds
        coords = tile.bounds().coordinates().getInfo()[0]
        tile_bounds = tile

        # Create export description
        description = f'{prefix}_tile_{i+1:03d}'

        # Configure export parameters
        export_params = {
            'image': composite_export.clip(tile_bounds),
            'description': description,
            'folder': output_folder,
            'fileNamePrefix': description,
            'scale': scale,
            'region': tile_bounds,
            'fileFormat': 'TFRecord',
            'maxPixels': 1e13,
            'formatOptions': {
                'patchDimensions': [patch_size, patch_size],
                'compressed': True,
                'maxFileSize': 104857600  # 100MB per file
            }
        }

        # Create export task
        task = ee.batch.Export.image.toDrive(**export_params)

        # Start the task
        task.start()
        tasks.append(task)

        print(f"   ✓ Task started: {description}")
        print(f"   Status: {task.status()['state']}")

    print(f"\n{'='*70}")
    print(f"✓ All {len(tasks)} export tasks started successfully!")
    print(f"{'='*70}")
    print("\nMONITORING:")
    print("  - Check task status in the next cell")
    print("  - Files will be saved to Google Drive")
    print(f"  - Folder: {output_folder}")
    print("\nNOTE:")
    print("  - Large exports may take several hours")
    print("  - You can close this notebook - tasks run on GEE servers")
    print("  - Monitor progress at: https://code.earthengine.google.com/tasks")

    return tasks


def monitor_export_tasks(tasks):
    """
    Monitor the status of export tasks

    Args:
        tasks: list - List of ee.batch.Task objects
    """
    import time

    print(f"\n{'='*70}")
    print("MONITORING EXPORT TASKS")
    print(f"{'='*70}\n")

    while True:
        states = {}
        for task in tasks:
            state = task.status()['state']
            states[state] = states.get(state, 0) + 1

        print(f"[{datetime.now().strftime('%H:%M:%S')}] Task Status:")
        for state, count in states.items():
            print(f"  {state}: {count}")

        # Check if all tasks are completed or failed
        if all(task.status()['state'] in ['COMPLETED', 'FAILED', 'CANCELLED']
               for task in tasks):
            break

        print("\nRefreshing in 60 seconds...")
        time.sleep(60)
        print("\n" + "-"*70)

    print(f"\n{'='*70}")
    print("✓ ALL TASKS FINISHED")
    print(f"{'='*70}\n")

    # Print final summary
    final_states = {}
    for task in tasks:
        state = task.status()['state']
        final_states[state] = final_states.get(state, 0) + 1

    print("FINAL SUMMARY:")
    for state, count in final_states.items():
        emoji = "✓" if state == "COMPLETED" else "✗"
        print(f"  {emoji} {state}: {count}")


def export_single_tile_tfrecord(composite, tile_index, tile, output_folder,
                                  prefix='sentinel2', patch_size=256, scale=10):
    """
    Export a single tile as TFRecord (useful for testing or re-exporting failed tiles)

    Args:
        composite: ee.Image - The composite image
        tile_index: int - Tile number (for naming)
        tile: ee.Geometry - Tile geometry
        output_folder: str - Google Drive folder
        prefix: str - Prefix for filename
        patch_size: int - Patch size in pixels
        scale: int - Export scale in meters

    Returns:
        ee.batch.Task - Export task
    """

    bands = composite.bandNames().getInfo()
    composite_export = composite.select(bands).float()

    coords = tile.bounds().coordinates().getInfo()[0]
    tile_bounds = tile

    description = f'{prefix}_tile_{tile_index:03d}'

    export_params = {
        'image': composite_export.clip(tile_bounds),
        'description': description,
        'folder': output_folder,
        'fileNamePrefix': description,
        'scale': scale,
        'region': tile_bounds,
        'fileFormat': 'TFRecord',
        'maxPixels': 1e13,
        'formatOptions': {
            'patchDimensions': [patch_size, patch_size],
            'compressed': True,
            'maxFileSize': 104857600
        }
    }

    task = ee.batch.Export.image.toDrive(**export_params)
    task.start()

    print(f"✓ Task started: {description}")
    return task

In [12]:
# =========================
# 6. RUN EXPORT
# =========================

# Configure export parameters
OUTPUT_FOLDER = 'COASTALTFRecord'  # Google Drive folder name
PREFIX = 'S2_composite_2025'
PATCH_SIZE = 256  # 256x256 pixel patches
SCALE = 10  # 10m resolution for Sentinel-2

# Start export
export_tasks = export_composite_as_tfrecord(
    composite=sentinel_composite,
    tiles=tiles,
    output_folder=OUTPUT_FOLDER,
    prefix=PREFIX,
    patch_size=PATCH_SIZE,
    scale=SCALE
)

print(f"\n{'='*70}")
print(f"Total tasks created: {len(export_tasks)}")
print(f"{'='*70}")


# =========================
# 7. MONITOR EXPORT PROGRESS
# =========================

# Option 1: Monitor all tasks (will refresh every 60 seconds)
# Uncomment to use:
# monitor_export_tasks(export_tasks)

# Option 2: Check current status once
print("\nCURRENT TASK STATUS:")
print(f"{'='*70}\n")

states = {}
for i, task in enumerate(export_tasks):
    status = task.status()
    state = status['state']
    states[state] = states.get(state, 0) + 1

    if state == 'FAILED':
        print(f"Task {i+1}: {state} - {status.get('error_message', 'Unknown error')}")
    else:
        print(f"Task {i+1}: {state}")

print(f"\n{'='*70}")
print("SUMMARY:")
for state, count in states.items():
    print(f"  {state}: {count}")
print(f"{'='*70}\n")

print("TIP: Run this cell again to check updated status")
print("Or visit: https://code.earthengine.google.com/tasks")


# =========================
# 8. EXPORT SINGLE TILE (if needed)
# =========================

# Use this if you need to re-export a specific tile that failed
# Example: Export tile 5

# TILE_TO_EXPORT = 5  # Change this to the tile number you want
#
# single_task = export_single_tile_tfrecord(
#     composite=sentinel_composite,
#     tile_index=TILE_TO_EXPORT,
#     tile=tiles[TILE_TO_EXPORT-1],  # -1 because list is 0-indexed
#     output_folder=OUTPUT_FOLDER,
#     prefix=PREFIX,
#     patch_size=PATCH_SIZE,
#     scale=SCALE
# )
#
# print(f"\nCheck status: {single_task.status()}")



EXPORTING COMPOSITE AS TFRECORD
Number of tiles: 91
Patch size: 256x256 pixels
Scale: 10m
Output folder: COASTALTFRecord
Bands: ['B1', 'B2', 'B3', 'B4', 'B8', 'B8A', 'B11', 'B12']

[Tile 1/91] Preparing export...
   ✓ Task started: S2_composite_2025_tile_001
   Status: READY

[Tile 2/91] Preparing export...
   ✓ Task started: S2_composite_2025_tile_002
   Status: READY

[Tile 3/91] Preparing export...
   ✓ Task started: S2_composite_2025_tile_003
   Status: READY

[Tile 4/91] Preparing export...
   ✓ Task started: S2_composite_2025_tile_004
   Status: READY

[Tile 5/91] Preparing export...
   ✓ Task started: S2_composite_2025_tile_005
   Status: READY

[Tile 6/91] Preparing export...
   ✓ Task started: S2_composite_2025_tile_006
   Status: READY

[Tile 7/91] Preparing export...
   ✓ Task started: S2_composite_2025_tile_007
   Status: READY

[Tile 8/91] Preparing export...
   ✓ Task started: S2_composite_2025_tile_008
   Status: READY

[Tile 9/91] Preparing export...
   ✓ Task started: