In [1]:
# ============================================================================
# STEP 1: ENVIRONMENT SETUP
# ============================================================================
# Load environment variables and initialize Google Earth Engine

from dotenv import load_dotenv
load_dotenv()

import ee, eemont
from forestry_carbon_arr.core import ForestryCarbonARR
from forestry_carbon_arr.utils.zarr_utils import save_dataset_efficient_zarr, load_dataset_zarr

import gcsfs
import os

fs = gcsfs.GCSFileSystem(project=os.getenv("GOOGLE_CLOUD_PROJECT"), token='/usr/src/app/user_id.json')

forestry = ForestryCarbonARR(config_path='./00_input/korindo.json')
forestry.initialize_gee()

âœ“ GEE Initialized successfully
  Credentials Path: /usr/src/app/user_id.json - loaded successfully


In [7]:
# ============================================================================
# STEP 2: CREATE SPECTRAL INDICES TABLE WITH LATEX FORMULAS
# ============================================================================
# eemont uses the Awesome Spectral Indices list: https://github.com/awesome-spectral-indices

import pandas as pd
import re

# Band name mapping: short -> descriptive (for readable formulas)
# NOTE: For LaTeX symbols, we use placeholders first, then replace after regex processing
BAND_MAPPING = {
    'N': 'nir',
    'R': 'red',
    'G': 'green', 
    'B': 'blue',
    'S1': 'swir1',
    'S2': 'swir2',
    'RE1': 'redE1',
    'RE2': 'redE2',
    'RE3': 'redE3',
    'RE4': 'redE4',
    'A': 'aerosol',
    'T1': 'thermal1',
    'T2': 'thermal2',
    'L': 'L',           # soil brightness correction factor
    'C1': 'C1',         # atmospheric resistance coefficients
    'C2': 'C2',
    'g': 'g',           # gain factor
    'gamma': 'GAMMA_PLACEHOLDER',      # will be replaced with \gamma later
    'alpha': 'ALPHA_PLACEHOLDER',      # will be replaced with \alpha later
    'sla': 'sla',
    'slb': 'slb',
    'PAR': 'PAR',
    'lambdaN': 'LAMBDA_NIR_PLACEHOLDER',
    'lambdaR': 'LAMBDA_RED_PLACEHOLDER',
    'lambdaG': 'LAMBDA_GREEN_PLACEHOLDER',
}

# LaTeX replacements (applied after regex processing)
LATEX_SYMBOLS = {
    'GAMMA_PLACEHOLDER': r'\gamma',
    'ALPHA_PLACEHOLDER': r'\alpha',
    'LAMBDA_NIR_PLACEHOLDER': r'\lambda_{nir}',
    'LAMBDA_RED_PLACEHOLDER': r'\lambda_{red}',
    'LAMBDA_GREEN_PLACEHOLDER': r'\lambda_{green}',
}

def convert_formula_to_latex(formula: str) -> str:
    """
    Convert eemont formula to LaTeX format with readable band names.
    Output is ready for Google Docs equation rendering.
    """
    result = formula
    
    # Sort by length descending to replace longer names first (e.g., RE1 before R)
    for short, replacement in sorted(BAND_MAPPING.items(), key=lambda x: -len(x[0])):
        # Use word boundaries to avoid partial replacements
        # Use lambda to return the replacement string (avoids regex escape issues)
        result = re.sub(rf'\b{re.escape(short)}\b', lambda m: replacement, result)
    
    # Convert Python math syntax to LaTeX
    # Handle ** exponents -> ^{}
    result = re.sub(r'\*\*\s*(\d+\.?\d*)', r'^{\1}', result)
    result = re.sub(r'\*\*\s*\(([^)]+)\)', r'^{(\1)}', result)
    
    # Handle sqrt: x ** 0.5 -> \sqrt{x}
    result = re.sub(r'(\([^)]+\))\s*\^{0\.5}', r'\\sqrt{\1}', result)
    result = re.sub(r'(\w+)\s*\^{0\.5}', r'\\sqrt{\1}', result)
    
    # Replace * with \cdot for multiplication (but be careful with **)
    result = result.replace(' * ', ' \\cdot ')
    result = re.sub(r'(\d)\*([a-zA-Z])', r'\1 \\cdot \2', result)
    result = re.sub(r'([a-zA-Z])\*(\d)', r'\1 \\cdot \2', result)
    result = re.sub(r'([a-zA-Z0-9])\*([a-zA-Z])', r'\1 \\cdot \2', result)
    
    # Now replace the placeholders with actual LaTeX symbols
    for placeholder, latex_symbol in LATEX_SYMBOLS.items():
        result = result.replace(placeholder, latex_symbol)
    
    return f"$${result}$$"

# Target indices to check
target_indices = [
    'ARVI', 'BAI', 'EVI', 'ExG', 'ExGR', 'CRI700', 'GEMI', 'GLI', 'GNDVI', 
    'MSR', 'NGRDI', 'NDREI', 'NLI', 'OSAVI', 'RDVI', 'SAVI', 'TVI', 'VIG', 
    'WDRVI', 'AVI', 'SI'
]

# Get all indices from eemont
all_indices = eemont.indices()
available_indices = eemont.listIndices()

# Check which indices exist
print("=== Checking Index Availability ===")
found_indices = []
not_found = []
for idx in target_indices:
    if idx in available_indices:
        found_indices.append(idx)
        print(f"âœ“ {idx}: Found")
    else:
        not_found.append(idx)
        print(f"âœ— {idx}: NOT FOUND")

if not_found:
    print(f"\nâš  Missing indices: {not_found}")
    print("Searching for similar names...")


=== Checking Index Availability ===
âœ“ ARVI: Found
âœ“ BAI: Found
âœ“ EVI: Found
âœ“ ExG: Found
âœ“ ExGR: Found
âœ“ CRI700: Found
âœ“ GEMI: Found
âœ“ GLI: Found
âœ“ GNDVI: Found
âœ“ MSR: Found
âœ“ NGRDI: Found
âœ“ NDREI: Found
âœ“ NLI: Found
âœ“ OSAVI: Found
âœ“ RDVI: Found
âœ“ SAVI: Found
âœ“ TVI: Found
âœ“ VIG: Found
âœ“ WDRVI: Found
âœ“ AVI: Found
âœ“ SI: Found


In [8]:
# # ============================================================================
# # STEP 3: SEARCH FOR SIMILAR INDEX NAMES (for missing ones)
# # ============================================================================

# # Search for similar names for missing indices
# if not_found:
#     for missing in not_found:
#         similar = [idx for idx in available_indices if missing.upper() in idx.upper() or idx.upper() in missing.upper()]
#         if similar:
#             print(f"  '{missing}' -> Similar found: {similar}")
#         else:
#             # Try partial match
#             partial = [idx for idx in available_indices if missing[:3].upper() in idx.upper()]
#             if partial:
#                 print(f"  '{missing}' -> Partial matches: {partial[:5]}")

# # Manual mapping for indices with different names in eemont
# INDEX_ALTERNATIVES = {
#     'ExG': 'ExG',       # Excess Green Index
#     'ExGR': 'ExGR',     # Excess Green minus Excess Red
#     'CRI700': 'CRI1',   # Carotenoid Reflectance Index (might be CRI1 or CRI2)
#     'NDREI': 'NDREI',   # Normalized Difference Red Edge Index (check variations)
#     'TVI': 'TVI',       # Triangular Vegetation Index
#     'VIG': 'VIG',       # Vegetation Index Green
#     'SI': 'SI',         # Shadow Index (might have different name)
# }

# print("\n=== Searching for alternative names ===")
# for orig, alt in INDEX_ALTERNATIVES.items():
#     if orig in not_found:
#         # Search for variations
#         variations = [idx for idx in available_indices if alt in idx or orig[:3] in idx]
#         print(f"  {orig}: Possible alternatives = {variations[:8]}")


In [10]:
# ============================================================================
# STEP 4: CREATE THE FINAL TABLE WITH LATEX FORMULAS
# ============================================================================

# Final list of indices to include (found in eemont + alternatives)
# Some indices may have different names - we'll use what's available
final_indices = [
    'ARVI',    # Atmospherically Resistant Vegetation Index
    'BAI',     # Burned Area Index
    'EVI',     # Enhanced Vegetation Index
    'ExG',     # Excess Green Index
    'ExGR',    # Excess Green minus Excess Red
    'CRI700',    # Carotenoid Reflectance Index 1 (alternative for CRI700)
    'GEMI',    # Global Environment Monitoring Index
    'GLI',     # Green Leaf Index
    'GNDVI',   # Green Normalized Difference Vegetation Index
    'MSR',     # Modified Simple Ratio
    'NGRDI',   # Normalized Green Red Difference Index
    'NDREI',   # Normalized Difference Red Edge Index
    'NLI',     # Non-Linear Vegetation Index
    'OSAVI',   # Optimized Soil Adjusted Vegetation Index
    'RDVI',    # Renormalized Difference Vegetation Index
    'SAVI',    # Soil Adjusted Vegetation Index
    'TVI',     # Triangular Vegetation Index
    'VIG',     # Vegetation Index Green
    'WDRVI',   # Wide Dynamic Range Vegetation Index
    'AVI',     # Ashburn Vegetation Index
    'SI',      # Shadow Index
]

# Build the table data
table_data = []
indices = eemont.indices()

for idx_name in final_indices:
    if idx_name in indices:
        idx = indices[idx_name]
        original_formula = idx.formula
        latex_formula = convert_formula_to_latex(original_formula)
        
        table_data.append({
            'Spectral Index': idx_name,
            'Description': idx.long_name,
            'Formula (LaTeX)': latex_formula,
            'Reference': idx.reference
        })
    else:
        table_data.append({
            'Spectral Index': idx_name,
            'Description': f'NOT FOUND in eemont',
            'Formula (LaTeX)': 'N/A',
            'Reference': 'N/A'
        })

# Create DataFrame
df_table = pd.DataFrame(table_data)
print(f"=== Spectral Indices Table ({len(df_table)} indices) ===\n")
df_table


=== Spectral Indices Table (21 indices) ===



Unnamed: 0,Spectral Index,Description,Formula (LaTeX),Reference
0,ARVI,Atmospherically Resistant Vegetation Index,$$(nir - (red - \gamma \cdot (red - blue))) / ...,https://doi.org/10.1109/36.134076
1,BAI,Burned Area Index,$$1.0 / ((0.1 - red) ^{2.0} + (0.06 - nir) ^{2...,https://digital.csic.es/bitstream/10261/6426/1...
2,EVI,Enhanced Vegetation Index,$$g \cdot (nir - red) / (nir + C1 \cdot red - ...,https://doi.org/10.1016/S0034-4257(96)00112-5
3,ExG,Excess Green Index,$$2 \cdot green - red - blue$$,https://doi.org/10.13031/2013.27838
4,ExGR,ExG - ExR Vegetation Index,$$(2.0 \cdot green - red - blue) - (1.3 \cdot ...,https://doi.org/10.1016/j.compag.2008.03.009
5,CRI700,Carotenoid Reflectance Index using 700 nm,$$(1.0 / blue) - (1.0 / redE1)$$,https://doi.org/10.1562/0031-8655(2002)0750272...
6,GEMI,Global Environment Monitoring Index,$$((2.0*((nir ^{2.0})-(red ^{2.0})) + 1.5 \cdo...,http://dx.doi.org/10.1007/bf00031911
7,GLI,Green Leaf Index,$$(2.0 \cdot green - red - blue) / (2.0 \cdot ...,http://dx.doi.org/10.1080/10106040108542184
8,GNDVI,Green Normalized Difference Vegetation Index,$$(nir - green)/(nir + green)$$,https://doi.org/10.1016/S0034-4257(96)00072-7
9,MSR,Modified Simple Ratio,$$(nir / red - 1) / \sqrt{((nir / red + 1)})$$,https://doi.org/10.1080/07038992.1996.10855178


In [12]:
df_table.to_csv('./01_output/spectral_indices_latex.csv', index=False)

In [11]:
# ============================================================================
# STEP 5: PRINT LATEX FORMULAS FOR GOOGLE DOCS (COPY-PASTE READY)
# ============================================================================

print("=" * 90)
print("SPECTRAL INDICES - LATEX FORMULAS FOR GOOGLE DOCS")
print("Copy the formula between $$ $$ to Google Docs equation editor")
print("=" * 90)

for _, row in df_table.iterrows():
    if row['Formula (LaTeX)'] != 'N/A':
        print(f"\n{'â”€' * 90}")
        print(f"ðŸ“Š {row['Spectral Index']} - {row['Description']}")
        print(f"{'â”€' * 90}")
        print(f"Formula: {row['Formula (LaTeX)']}")
        print(f"Reference: {row['Reference']}")


SPECTRAL INDICES - LATEX FORMULAS FOR GOOGLE DOCS
Copy the formula between $$ $$ to Google Docs equation editor

â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
ðŸ“Š ARVI - Atmospherically Resistant Vegetation Index
â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
Formula: $$(nir - (red - \gamma \cdot (red - blue))) / (nir + (red - \gamma \cdot (red - blue)))$$
Reference: https://doi.org/10.1109/36.134076

â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â

In [13]:
# ============================================================================
# STEP 6: EXPORT TABLE TO DIFFERENT FORMATS
# ============================================================================

# Option 1: Save to CSV
df_table.to_csv('spectral_indices_latex.csv', index=False)
print("âœ“ Saved to spectral_indices_latex.csv")

# Option 2: Display as markdown table (for documentation)
print("\n" + "=" * 90)
print("MARKDOWN TABLE FORMAT:")
print("=" * 90)
print(df_table.to_markdown(index=False))

# Option 3: Just the LaTeX formulas (for quick copy-paste)
print("\n" + "=" * 90)
print("JUST THE LATEX FORMULAS (for Google Docs):")
print("=" * 90)
for _, row in df_table.iterrows():
    if row['Formula (LaTeX)'] != 'N/A':
        # Remove the $$ delimiters for easier editing in Google Docs
        formula_clean = row['Formula (LaTeX)'].replace('$$', '')
        print(f"{row['Spectral Index']}: {formula_clean}")


âœ“ Saved to spectral_indices_latex.csv

MARKDOWN TABLE FORMAT:
| Spectral Index   | Description                                  | Formula (LaTeX)                                                                                                                                                                                                              | Reference                                                                           |
|:-----------------|:---------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
| ARVI             | Atmospherically Resistant Vegetation Index   | $$(nir - (red - \gamma \cdot (red - blue))) / (nir + (red - \gamma \cdot (red - blue)))$$                         