# Mangrove Biomass Workflow - CWL Compatible

Simplified version for CWL generation using ipython2cwl.

**Inputs:**
- Bounding box coordinates (west, south, east, north)
- Cloud cover threshold
- Days to search backwards

**Outputs:**
- Biomass summary CSV
- Carbon summary CSV
- Mangrove mask GeoTIFF

In [None]:
# Core imports
import numpy as np
import pandas as pd
import xarray as xr
from datetime import datetime, timedelta
from pystac_client import Client
import stackstac
import rioxarray
import warnings
warnings.filterwarnings('ignore')

In [None]:
# CWL Type annotations
from ipython2cwl.iotypes import (
    CWLFilePathInput,
    CWLFilePathOutput,
    CWLFloatInput,
    CWLIntInput,
    CWLStringInput
)

In [None]:
# Input parameters with CWL type annotations
west: 'CWLFloatInput' = 95.15
south: 'CWLFloatInput' = 15.9
east: 'CWLFloatInput' = 95.35
north: 'CWLFloatInput' = 16.1
cloud_cover_max: 'CWLIntInput' = 20
days_back: 'CWLIntInput' = 90
output_dir: 'CWLStringInput' = 'outputs'

In [None]:
# Create output directory
import os
os.makedirs(output_dir, exist_ok=True)
print(f"Output directory: {output_dir}")

In [None]:
# Step 1: Search STAC catalog for Sentinel-2 imagery
print(f"Searching for Sentinel-2 scenes...")
print(f"Bounds: ({west}, {south}) to ({east}, {north})")
print(f"Max cloud cover: {cloud_cover_max}%")

catalog = Client.open('https://earth-search.aws.element84.com/v1')
bbox = [west, south, east, north]

end_date = datetime.now()
start_date = end_date - timedelta(days=days_back)

search = catalog.search(
    collections=['sentinel-2-l2a'],
    bbox=bbox,
    datetime=f'{start_date.isoformat()}/{end_date.isoformat()}',
    query={'eo:cloud_cover': {'lt': cloud_cover_max}}
)

items = list(search.items())
print(f"Found {len(items)} scenes")

if len(items) == 0:
    raise ValueError(f"No scenes found with <{cloud_cover_max}% cloud cover")

In [None]:
# Step 2: Download best scene (lowest cloud cover)
best_item = min(items, key=lambda x: x.properties.get('eo:cloud_cover', 100))
print(f"Selected scene: {best_item.datetime.strftime('%Y-%m-%d')}")
print(f"Cloud cover: {best_item.properties.get('eo:cloud_cover', 'N/A'):.1f}%")

print("Downloading bands (red, green, nir)...")
sentinel2_lazy = stackstac.stack(
    [best_item],
    assets=['red', 'green', 'nir'],
    epsg=4326,
    resolution=0.0001,  # ~10m
    chunksize=(1, 1, 2048, 2048)
)

# Compute and clip to bounds
sentinel2_full = sentinel2_lazy.compute()
sentinel2_data = sentinel2_full.sel(
    x=slice(bbox[0], bbox[2]),
    y=slice(bbox[3], bbox[1])
)

print(f"Data shape: {sentinel2_data.shape}")

In [None]:
# Step 3: Extract bands and calculate vegetation indices
print("Calculating vegetation indices...")

# Extract bands
if 'time' in sentinel2_data.dims:
    red = sentinel2_data.sel(band='red').isel(time=0).values
    green = sentinel2_data.sel(band='green').isel(time=0).values
    nir = sentinel2_data.sel(band='nir').isel(time=0).values
else:
    red = sentinel2_data.sel(band='red').values
    green = sentinel2_data.sel(band='green').values
    nir = sentinel2_data.sel(band='nir').values

# Calculate indices
ndvi = (nir - red) / (nir + red + 1e-8)
ndwi = (green - nir) / (green + nir + 1e-8)
savi = ((nir - red) / (nir + red + 0.5)) * 1.5

print(f"NDVI range: {np.nanmin(ndvi):.3f} to {np.nanmax(ndvi):.3f}")

In [None]:
# Step 4: Detect mangroves using threshold classification
print("Detecting mangroves...")

mangrove_mask = (
    (ndvi > 0.3) &       # Vegetated
    (ndvi < 0.9) &       # Not upland forest
    (ndwi > -0.3) &      # Near water
    (savi > 0.2)         # Adjusted vegetation
).astype(float)

pixel_area_m2 = 10 * 10
mangrove_pixels = np.sum(mangrove_mask)
mangrove_area_ha = (mangrove_pixels * pixel_area_m2) / 10000

print(f"Detected mangrove area: {mangrove_area_ha:.1f} hectares")
print(f"Coverage: {(mangrove_pixels / mangrove_mask.size * 100):.1f}% of study area")

In [None]:
# Step 5: Estimate biomass using allometric equation
print("Estimating biomass...")

# Allometric model: AGB = 250.5 × NDVI - 75.2
biomass = 250.5 * ndvi - 75.2
biomass_masked = np.where(mangrove_mask > 0, biomass, np.nan)
biomass_masked = np.maximum(biomass_masked, 0)

valid_biomass = biomass_masked[~np.isnan(biomass_masked)]

if len(valid_biomass) > 0:
    mean_biomass = np.mean(valid_biomass)
    median_biomass = np.median(valid_biomass)
    max_biomass = np.max(valid_biomass)
    std_biomass = np.std(valid_biomass)
    
    print(f"Mean biomass: {mean_biomass:.1f} Mg/ha")
    print(f"Median biomass: {median_biomass:.1f} Mg/ha")
    print(f"Max biomass: {max_biomass:.1f} Mg/ha")
else:
    print("Warning: No valid biomass estimates")
    mean_biomass = 0
    median_biomass = 0
    max_biomass = 0
    std_biomass = 0

In [None]:
# Step 6: Calculate carbon stocks
print("Calculating carbon stocks...")

pixel_area_ha = (10 * 10) / 10000
total_biomass_mg = np.sum(valid_biomass) * pixel_area_ha if len(valid_biomass) > 0 else 0
carbon_stock_mg = total_biomass_mg * 0.47  # 47% carbon content
co2_equivalent_mg = carbon_stock_mg * 3.67  # CO2 to C ratio

print(f"Total biomass: {total_biomass_mg:,.0f} Mg")
print(f"Carbon stock: {carbon_stock_mg:,.0f} Mg C")
print(f"CO2 equivalent: {co2_equivalent_mg:,.0f} Mg CO2")

In [None]:
# Step 7: Export biomass summary CSV
biomass_summary_path: 'CWLFilePathOutput' = os.path.join(output_dir, 'biomass_summary.csv')

biomass_df = pd.DataFrame([
    {'Metric': 'Mangrove Area (ha)', 'Value': f'{mangrove_area_ha:.1f}'},
    {'Metric': 'Mean Biomass (Mg/ha)', 'Value': f'{mean_biomass:.1f}'},
    {'Metric': 'Median Biomass (Mg/ha)', 'Value': f'{median_biomass:.1f}'},
    {'Metric': 'Max Biomass (Mg/ha)', 'Value': f'{max_biomass:.1f}'},
    {'Metric': 'Std Deviation (Mg/ha)', 'Value': f'{std_biomass:.1f}'},
])

biomass_df.to_csv(biomass_summary_path, index=False)
print(f"Saved: {biomass_summary_path}")

In [None]:
# Step 8: Export carbon summary CSV
carbon_summary_path: 'CWLFilePathOutput' = os.path.join(output_dir, 'carbon_summary.csv')

carbon_df = pd.DataFrame([
    {'Metric': 'Total Biomass (Mg)', 'Value': f'{total_biomass_mg:,.0f}'},
    {'Metric': 'Carbon Stock (Mg C)', 'Value': f'{carbon_stock_mg:,.0f}'},
    {'Metric': 'CO2 Equivalent (Mg CO2)', 'Value': f'{co2_equivalent_mg:,.0f}'},
    {'Metric': 'Analysis Date', 'Value': datetime.now().strftime('%Y-%m-%d')},
    {'Metric': 'Scene Date', 'Value': best_item.datetime.strftime('%Y-%m-%d')},
    {'Metric': 'Cloud Cover (%)', 'Value': f"{best_item.properties.get('eo:cloud_cover', 'N/A'):.1f}"},
    {'Metric': 'Uncertainty', 'Value': '±30%'},
])

carbon_df.to_csv(carbon_summary_path, index=False)
print(f"Saved: {carbon_summary_path}")

In [None]:
# Step 9: Export mangrove mask as GeoTIFF
mangrove_mask_path: 'CWLFilePathOutput' = os.path.join(output_dir, 'mangrove_mask.tif')

# Create xarray DataArray with spatial reference
if 'x' in sentinel2_data.coords and 'y' in sentinel2_data.coords:
    mask_xr = xr.DataArray(
        mangrove_mask,
        dims=['y', 'x'],
        coords={
            'x': sentinel2_data.coords['x'].values,
            'y': sentinel2_data.coords['y'].values
        }
    )
    mask_xr = mask_xr.rio.write_crs('EPSG:4326')
    mask_xr.rio.to_raster(mangrove_mask_path, compress='lzw')
    print(f"Saved: {mangrove_mask_path}")
else:
    print("Warning: Could not export GeoTIFF (missing coordinates)")
    # Save as simple array instead
    np.savetxt(mangrove_mask_path.replace('.tif', '.csv'), mangrove_mask, delimiter=',', fmt='%.0f')
    print(f"Saved as CSV instead: {mangrove_mask_path.replace('.tif', '.csv')}")

In [None]:
# Final summary
print("\n=== WORKFLOW COMPLETE ===")
print(f"Study area: ({west}, {south}) to ({east}, {north})")
print(f"Mangrove area: {mangrove_area_ha:.1f} ha")
print(f"Mean biomass: {mean_biomass:.1f} Mg/ha")
print(f"Total carbon: {carbon_stock_mg:,.0f} Mg C")
print(f"\nOutputs saved to: {output_dir}/")