In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
import ee, eemont
from forestry_carbon_arr.core import ForestryCarbonARR
from forestry_carbon_arr.utils.zarr_utils import save_dataset_efficient_zarr, load_dataset_zarr

import gcsfs
import os

fs = gcsfs.GCSFileSystem(project=os.getenv("GOOGLE_CLOUD_PROJECT"), token='/usr/src/app/user_id.json')


forestry = ForestryCarbonARR(config_path='./00_input/korindo.json')
forestry.initialize_gee()

‚úì GEE Initialized successfully
  Credentials Path: /usr/src/app/user_id.json - loaded successfully


In [3]:
# aoi
from forestry_carbon_arr.core.utils import DataUtils
import geopandas as gpd
import geemap

data_utils = DataUtils(forestry.config, use_gee=True)
aoi_gpd, aoi_ee = data_utils.load_geodataframe_gee(forestry.config["AOI_path"])

aoi_gpd_utm = aoi_gpd.to_crs(epsg=32749)

print(f"‚úÖ AOI loaded: {len(aoi_gpd_utm)} features")
print(f"   Area: {aoi_gpd_utm.geometry.area.sum()/10000:.2f} hectares")

  import pkg_resources


‚úÖ AOI loaded: 1 features
   Area: 144217.67 hectares


In [4]:
### load the exported gee image
asset_monthly_interpolated = 'projects/remote-sensing-476412/assets/korindo_smooth_monthly'

In [5]:
# !pip install lt-gee-py

In [6]:
import ee, eemont
from ltgee import LandTrendr

In [7]:
use_yearly_ee = True

def annual_col_median(img_col, years):
    def per_year(year):
        start = ee.Date.fromYMD(year, 1, 1)
        end = start.advance(1, 'year')
        return (
            img_col
            .filterDate(start, end)
            .median()
            .set('year', year)
            .set('system:time_start', start.millis())
        )
    return ee.ImageCollection(years.map(per_year))

monthly_agg = ee.ImageCollection(asset_monthly_interpolated)

# Server-side list of unique years derived from system:time_start
year_list = (
    monthly_agg
        .aggregate_array('system:time_start')         # ee.List of millis
        .map(lambda ts: ee.Date(ts).get('year'))      # convert to year
        .distinct()                                   # keep unique values
        .sort()                                       # optional
)

ee_col_year_median = annual_col_median(monthly_agg, year_list)

year_list_c = year_list.getInfo()
print(year_list_c)

[2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025]


In [8]:
# ee_col_year_median.first().bandNames().getInfo()

In [9]:
# ee_col_year_median.first().propertyNames().getInfo()

In [10]:
# ee_col_year_median.first().get('system:time_start').getInfo()

In [11]:
# ### wmts

# ## skip this for gcp docker environment (visual is not needed yet into fastapi)

# from wmts_manager import WMTSManager

# wmts = WMTSManager(project_name=forestry.config['project_name'], aoi=aoi_ee.geometry())

# # Filter for August 2025
# aug_2025_img = ee_col_year_median.filterDate('2025-01-01', '2025-12-31').first()

# # Use a descriptive layer name (no getInfo needed)
# layer_name = 'annual_median_aug_2025'

# wmts.addLayer(
#     aug_2025_img, 
#     {
#         'bands': ['swir2', 'nir', 'red'],
#         'min': 0,
#         'max': 0.6,
#         'gamma': 1.5
#     }, 
#     layer_name
# )

# wmts.publish()

In [12]:
# ee_col_year_median

In [13]:
### look for the availability of the spectral indices in the eemont
# Dynamically get all available spectral indices from eemont-osi
# import sys
# sys.path.insert(0, '/usr/src/app/eemont-osi')
import ee_extra.Spectral.core as spec_core

# Get all available indices dynamically
indices_dict = spec_core.indices(online=False)
spectral_indices_awesome_list = sorted(list(indices_dict.keys()))

print(f"‚úÖ Loaded {len(spectral_indices_awesome_list)} spectral indices from eemont-osi")
print(f"\nFirst 10 indices: {spectral_indices_awesome_list[:10]}")
print(f"\nLast 10 indices: {spectral_indices_awesome_list[-10:]}")
print(f"\n‚úÖ List ready to use: spectral_indices_awesome_list")

# Function to get formula and metadata for any spectral index
def get_index_info(index_name):
    """
    Get formula and metadata for a spectral index.
    
    Parameters:
    -----------
    index_name : str
        Name of the spectral index (e.g., 'NDVI', 'EVI', 'SAVI')
    
    Returns:
    --------
    dict : Dictionary containing:
        - formula: Mathematical formula string
        - long_name: Full name of the index
        - bands: List of bands used (N=NIR, R=Red, G=Green, B=Blue, S1=SWIR1, S2=SWIR2, RE1-4=Red Edge)
        - application_domain: Category (vegetation, water, burn, etc.)
        - platforms: Supported satellite platforms
        - reference: Reference URL or DOI
    """
    index_name_upper = index_name.upper()
    
    if index_name_upper not in indices_dict:
        available = [idx for idx in spectral_indices_awesome_list if index_name.upper() in idx.upper()]
        raise ValueError(
            f"Index '{index_name}' not found. "
            f"Did you mean: {available[:5] if available else 'None'}?"
        )
    
    info = indices_dict[index_name_upper].copy()
    return info

def formula(index_name):
    """
    Get the formula for a spectral index.
    
    Parameters:
    -----------
    index_name : str
        Name of the spectral index (e.g., 'NDVI', 'EVI', 'SAVI')
    
    Returns:
    --------
    str : Mathematical formula string using band abbreviations
         Band abbreviations: N (NIR), R (Red), G (Green), B (Blue), 
         S1 (SWIR1), S2 (SWIR2), RE1-4 (Red Edge 1-4)
    
    Examples:
    --------
    >>> formula('NDVI')
    '(N - R)/(N + R)'
    
    >>> formula('EVI')
    'G * ((N - R) / (N + C1 * R - C2 * B + L))'
    """
    info = get_index_info(index_name)
    return info['formula']

# Function to convert formula band abbreviations to OSI band names
def formula_to_osi_bands(formula_str):
    """
    Convert eemont-osi formula band abbreviations to OSI band names.
    
    Mapping:
    - N (NIR) -> nir
    - R (Red) -> red
    - G (Green) -> green
    - B (Blue) -> blue
    - S1 (SWIR1) -> swir1
    - S2 (SWIR2) -> swir2
    - RE1 (Red Edge 1) -> redE1
    - RE2 (Red Edge 2) -> redE2
    - RE3 (Red Edge 3) -> redE3
    - RE4 (Red Edge 4) -> redE4
    - Variables (g, C1, C2, L, etc.) remain as-is
    
    Parameters:
    -----------
    formula_str : str
        Formula string from eemont-osi (e.g., "(N - R)/(N + R)")
    
    Returns:
    --------
    str : Formula with OSI band names (e.g., "(nir - red)/(nir + red)")
    """
    import re
    
    # Mapping from eemont-osi abbreviations to OSI band names
    band_mapping = {
        'N': 'nir',      # Near Infrared
        'R': 'red',      # Red
        'G': 'green',    # Green
        'B': 'blue',     # Blue
        'S1': 'swir1',   # Shortwave Infrared 1
        'S2': 'swir2',   # Shortwave Infrared 2
        'RE1': 'redE1',  # Red Edge 1
        'RE2': 'redE2',  # Red Edge 2
        'RE3': 'redE3',  # Red Edge 3
        'RE4': 'redE4',  # Red Edge 4
    }
    
    # Sort by length (longest first) to avoid partial matches (e.g., RE1 before R)
    sorted_bands = sorted(band_mapping.keys(), key=len, reverse=True)
    
    result = formula_str
    
    # Replace band abbreviations with OSI names
    # Use word boundaries to avoid replacing partial matches in variables
    for abbrev in sorted_bands:
        osi_name = band_mapping[abbrev]
        # Use regex to match whole words only (not part of other words)
        # Pattern: \b matches word boundary, but we need to handle cases like "RE1" in "RE1*RE2"
        pattern = r'\b' + re.escape(abbrev) + r'\b'
        result = re.sub(pattern, osi_name, result)
    
    return result

def formula_osi(index_name):
    """
    Get the formula for a spectral index with OSI band names.
    
    Parameters:
    -----------
    index_name : str
        Name of the spectral index (e.g., 'NDVI', 'EVI', 'NBR')
    
    Returns:
    --------
    str : Mathematical formula string using OSI band names
         (nir, red, green, blue, swir1, swir2, redE1-4)
    
    Examples:
    --------
    >>> formula_osi('NDVI')
    '(nir - red)/(nir + red)'
    
    >>> formula_osi('NBR')
    '(nir - swir2)/(nir + swir2)'
    """
    formula_orig = formula(index_name)
    return formula_to_osi_bands(formula_orig)

# Example usage
print("\n" + "="*60)
print("Example: Getting formula for NDVI and NBR")
print("="*60)
try:
    # NDVI example
    ndvi_formula = formula('NDVI')
    ndvi_formula_osi = formula_osi('NDVI')
    ndvi_info = get_index_info('NDVI')
    print(f"\nNDVI:")
    print(f"  Original Formula: {ndvi_formula}")
    print(f"  OSI Band Names: {ndvi_formula_osi}")
    print(f"  Long Name: {ndvi_info['long_name']}")
    print(f"  Bands: {ndvi_info['bands']}")
    
    # NBR example (shows S2 -> swir2)
    nbr_formula = formula('NBR')
    nbr_formula_osi = formula_osi('NBR')
    nbr_info = get_index_info('NBR')
    print(f"\nNBR (Normalized Burn Ratio):")
    print(f"  Original Formula: {nbr_formula}")
    print(f"  OSI Band Names: {nbr_formula_osi}")
    print(f"  Long Name: {nbr_info['long_name']}")
    print(f"  Bands: {nbr_info['bands']} (S2 = SWIR2)")
    print(f"  Domain: {nbr_info['application_domain']}")
    
except Exception as e:
    print(f"Error: {e}")



‚úÖ Loaded 253 spectral indices from eemont-osi

First 10 indices: ['AFRI1600', 'AFRI2100', 'ANDWI', 'ARI', 'ARI2', 'ARVI', 'ATSAVI', 'AVI', 'AWEInsh', 'AWEIsh']

Last 10 indices: ['kNDVI', 'kRVI', 'kVARI', 'mND705', 'mSR705', 'sNIRvLSWI', 'sNIRvNDPI', 'sNIRvNDVILSWIP', 'sNIRvNDVILSWIS', 'sNIRvSWIR']

‚úÖ List ready to use: spectral_indices_awesome_list

Example: Getting formula for NDVI and NBR

NDVI:
  Original Formula: (N - R)/(N + R)
  OSI Band Names: (nir - red)/(nir + red)
  Long Name: Normalized Difference Vegetation Index
  Bands: ['N', 'R']

NBR (Normalized Burn Ratio):
  Original Formula: (N - S2) / (N + S2)
  OSI Band Names: (nir - swir2) / (nir + swir2)
  Long Name: Normalized Burn Ratio
  Bands: ['N', 'S2'] (S2 = SWIR2)
  Domain: burn


In [14]:
# spectral_indices_awesome_list

In [15]:
ee_col_year_median_with_indices = ee_col_year_median.spectralIndices(
        index=['EVI', 'GNDVI', 'SAVI','NDVI','NBR','VARI','NDWI','MTVI2'],
        satellite_type='Sentinel',  # OSI-style satellite type
        G=2.5,  # EVI parameters
        C1=6.0,
        C2=7.5,
        L=1.0,  # SAVI parameter
        drop=False  # Keep original bands
)

In [16]:
# ee_col_year_median_with_indices.first().bandNames().getInfo()

In [17]:
# Server-side list of unique years derived from system:time_start
year_list = (
    ee_col_year_median_with_indices
        .aggregate_array('system:time_start')         # ee.List of millis
        .map(lambda ts: ee.Date(ts).get('year'))      # convert to year
        .distinct()                                   # keep unique values
        .sort()                                       # optional
)
year_list_c = year_list.getInfo()
year_list_c

[2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025]

In [18]:
# year_list_c[0]

In [19]:
forestry.config

{'project': {'name': 'forestry_carbon_project',
  'region': 'asia',
  'description': 'Forestry Carbon ARR Analysis Project',
  'version': '1.0.0'},
 'gee': {'project_id': None,
  'service_account': None,
  'initialize': True,
  'max_pixels': 10000000000000.0,
  'scale': 30,
  'crs': 'EPSG:4326'},
 'satellite': {'provider': 'Custom',
  'date_range': ['2025-8-1', '2025-8-31'],
  'cloud_cover_threshold': 80,
  'bands': {'Sentinel': ['B2', 'B3', 'B4', 'B8', 'B11', 'B12'],
   'Planet': ['red', 'green', 'blue', 'nir'],
   'Landsat': ['B2', 'B3', 'B4', 'B5', 'B6', 'B7']},
  'composite_method': 'median',
  'mask_clouds': True},
 'ml': {'algorithm': 'gbm',
  'training_samples': 1000,
  'validation_split': 0.2,
  'cross_validation': True,
  'n_folds': 5,
  'random_state': 42,
  'hyperparameter_tuning': True},
 'fcd': {'method': 'pca',
  'thresholds': {'high_forest': 65,
   'yrf_forest': 45,
   'shrub_grass': 45,
   'open_land': 30},
  'apply_smoothing': True,
  'smoothing_kernel': 3},
 'classifi

In [20]:
if use_yearly_ee != True:
    ## do the FCD every year
    from gee_lib.osi.fcd import FCDCalc
    
    list_fcd_year = []
    
    forestry.config['AOI'] = aoi_ee
    
    year_0 = year_list_c[0]
    fcd = FCDCalc(forestry.config, image_mosaick=ee_col_year_median_with_indices.filterDate(f'{year_0}-1-1',f'{year_0}-12-31').first())
    for i, year in enumerate(year_list_c):
        print(f'processing year {year}')
        if i ==0:
            fcd_2_1 = fcd.fcd_calc()['FCD2_1']
        else:
            fcd.image_mosaick = ee_col_year_median_with_indices.filterDate(f'{year}-1-1',f'{year}-12-31').first()
            fcd_2_1 = fcd.fcd_calc()['FCD2_1']
        fcd_2_1 = fcd_2_1.set('year', year).set('system:time_start', ee.Date.fromYMD(year, 1, 1).millis())
        
        list_fcd_year.append(fcd_2_1)

In [21]:
# list_fcd_year[0].bandNames().getInfo()

In [22]:
# list_fcd_year[0].propertyNames().getInfo()

In [23]:
# list_fcd_year[0].get('year')

In [24]:
from wfs_manager import WFSManager

AOI = ee.FeatureCollection('projects/remote-sensing-476412/assets/korindo_with_buffer')

wfs = WFSManager(fastapi_url="http://fastapi:8000", wfs_base_url="http://localhost:8001")
wfs.addLayer(AOI, "AOI Boundary")
wfs.publish()

{'status': 'success',
 'total_layers': 1,
 'successful_layers': 1,
 'failed_layers': 0,
 'layers': {'aoi_boundary': {'status': 'success',
   'layer_name': 'aoi_boundary',
   'fc_url': 'http://fastapi:8000/fc/aoi_boundary',
   'wfs_url': 'http://localhost:8001/wfs?service=WFS&version=1.1.0&request=GetFeature&typename=aoi_boundary',
   'feature_count': 1,
   'response': {'message': "FeatureCollection 'aoi_boundary' created successfully",
    'name': 'aoi_boundary',
    'type': 'FeatureCollection',
    'count': 1}}},
 'service_urls': {'wfs_capabilities': 'http://localhost:8001/wfs?service=WFS&version=1.1.0&request=GetCapabilities',
  'wfs_base': 'http://localhost:8001/wfs'}}

In [25]:
# fcd_2015 = list_fcd_year[0]

# # Use a descriptive layer name (no getInfo needed)
# layer_name = 'fcd_2015'

# wmts.addLayer(
#     fcd_2015, 
#     {'min':0 ,'max':80, 'palette':['ff4c16', 'ffd96c', '39a71d']}, 
#     layer_name
# )

# wmts.publish()


In [26]:
import ee

# Example: Export single image to GCS
def export_image_to_gcs(image, gcs_bucket, gcs_path, scale=10, crs='EPSG:4326', region=None, max_pixels=1e13):
    """
    Export Earth Engine Image to Google Cloud Storage
    
    Parameters:
    -----------
    image : ee.Image
        Earth Engine Image to export
    gcs_bucket : str
        GCS bucket name (e.g., 'my-bucket' or 'gs://my-bucket')
    gcs_path : str
        Path within bucket (e.g., 'exports/fcd_2020.tif')
    scale : float
        Pixel scale in meters (default: 30)
    crs : str
        Coordinate reference system (default: 'EPSG:4326')
    region : ee.Geometry, optional
        Region to export (default: None, uses image bounds)
    max_pixels : int
        Maximum pixels to export (default: 1e9)
    
    Returns:
    --------
    ee.batch.Task : Export task
    """
    # Clean bucket name (remove gs:// if present)
    if gcs_bucket.startswith('gs://'):
        gcs_bucket = gcs_bucket.replace('gs://', '').split('/')[0]
    
    # Full GCS path
    gcs_uri = f"gs://{gcs_bucket}/{gcs_path}"
    
    # Export parameters
    export_params = {
        'image': image,
        'description': gcs_path.split('/')[-1].replace('.tif', ''),  # Task name
        'bucket': gcs_bucket,
        'fileNamePrefix': gcs_path.replace('.tif', ''),  # Path without extension
        'scale': scale,
        'crs': crs,
        'maxPixels': max_pixels,
        'fileFormat': 'GeoTIFF',
        'formatOptions': {
            'cloudOptimized': True  # COG format
        }
    }
    
    # Add region if provided
    if region is not None:
        export_params['region'] = region
    
    # Create export task
    task = ee.batch.Export.image.toCloudStorage(**export_params)
    
    # Start the task
    task.start()
    
    print(f"‚úÖ Export task started: {gcs_uri}")
    print(f"   Task ID: {task.id}")
    
    return task

In [27]:
if use_yearly_ee != True:
    ## collection will be export to the gcs as geotif
    for i, img in enumerate(list_fcd_year):
        year = img.get('year').getInfo()
        
        task = export_image_to_gcs(
            image=img,
            gcs_bucket=f'remote_sensing_saas',
            gcs_path=f'01-korindo/yearly_mosaic_gee/fcd_{year}.tif',
            scale=10,
            crs='EPSG:32749',  # UTM zone for your AOI
            region=aoi_ee.geometry()  # Optional: clip to AOI
        )

In [28]:
### wait until all the fcd is processed (exported)!
# ============================================
# STEP 2: Load directly from GCS when needed
# ============================================
# Later, when you need to use the images:
def load_yearly_images_from_gcs(years, gcs_bucket='remote_sensing_saas', base_path='01-korindo/yearly_mosaic_gee'):
    """
    Load yearly images directly from GCS without ingesting to GEE.
    
    Parameters:
    -----------
    years : list[int]
        List of years to load (e.g., [2020, 2021, 2022])
    gcs_bucket : str
        GCS bucket name
    base_path : str
        Base path in bucket
    
    Returns:
    --------
    ee.ImageCollection : Collection of images loaded from GCS
    """
    images = []
    for year in years:
        gcs_path = f'gs://{gcs_bucket}/{base_path}/fcd_{year}.tif'
        img = ee.Image.loadGeoTIFF(gcs_path)
        img = img.set('year', year)
        images.append(img)
    
    return ee.ImageCollection(images)
fcd_col = load_yearly_images_from_gcs(year_list_c)
fcd_col.first().bandNames().getInfo()

['FCD']

In [29]:
year_list_c[-1]

2025

In [54]:
# ============================================================================
# STEP 3: Run LandTrendr on FCD (already 0-100 scale)
# ============================================================================
# Sort by 'year' property (FCD images have 'year' property, not necessarily system:time_start)
# Also ensure system:time_start is set for LandTrendr compatibility
def add_time_property(img):
    """Add system:time_start from year property if missing"""
    year = ee.Number(img.get('year'))
    time_start = ee.Date.fromYMD(year, 12, 31).millis()
    return img.set('system:time_start', time_start)

fcd_collection_lt = fcd_col.select('FCD').map(add_time_property).sort('system:time_start')

# LandTrendr parameters for FCD (0-100 range)
lt_params_fcd = {
    'maxSegments': 9,
    'spikeThreshold': 0.95,
    'vertexCountOvershoot': 3,
    'preventOneYearRecovery': True,
    'recoveryThreshold': 5,
    'pvalThreshold': 0.05,
    'bestModelProportion': 0.75
}

# Run LandTrendr on FCD
lt_fcd_result = ee.Algorithms.TemporalSegmentation.LandTrendr(
    timeSeries=fcd_collection_lt,
    **lt_params_fcd
)

# Extract LandTrendr band and rename
lt_fcd = lt_fcd_result.select('LandTrendr').rename('FCD_lt')

In [55]:
lt_fcd_result.bandNames().getInfo()

['LandTrendr', 'rmse']

In [56]:
lt_fcd_result.propertyNames().getInfo()

['system:bands', 'system:band_names']

In [57]:
### wmts - hansen tree cover loss ##
# Load Hansen Global Forest Change dataset
gfc = ee.Image("UMD/hansen/global_forest_change_2024_v1_12")



# Select the lossyear band (contains year of loss, 0 = no loss)
tree_loss_year = gfc.select(['lossyear'])
canopyCover = gfc.select(['treecover2000'])

# Create tree cover loss mask (all years with loss, i.e., > 14 & <24)
tree_cover_loss = tree_loss_year.gt(14).selfMask()
treeLoss = tree_cover_loss

# Clip to AOI and visualize in red
tree_cover_loss_red = tree_cover_loss.clip(aoi_ee.geometry())


# #Canopy cover percentage (e.g. 30%), for Indonesia
cc = ee.Number(30)

# #Minimum forest area in pixels (e.g. 3 pixels, ~ 0.27 ha in this example).
pixels = ee.Number(3)

# #Minimum mapping area for tree loss (usually same as the minimum forest area).
lossPixels = pixels

# canopyCover = gfc.select(['treecover2000'])
canopyCoverThreshold = canopyCover.gte(cc).selfMask()

# #Use connectedPixelCount() to get contiguous area.
contArea = canopyCoverThreshold.connectedPixelCount()
# #Apply the minimum area requirement.
minArea = contArea.gte(pixels).selfMask()

treecoverLoss = minArea.And(treeLoss).rename(f'lossfrom_0-24').selfMask()
        
# #Create connectedPixelCount() to get contiguous area.
contLoss = treecoverLoss.connectedPixelCount()
#Apply the minimum area requirement, and get the TCL data ---> minLoss - ACTUAL TCL AREA from Hansen since the year_start_loss
minLoss = contLoss.gte(lossPixels).selfMask()


In [58]:
# ============================================================================
# LANDTRENDR FCD CHANGE DETECTION VISUALIZATION
# ============================================================================
# Extract and visualize change information from FCD time series:
# - Year of Detection (YOD) - when the greatest loss occurred
# - Magnitude of change - how much FCD was lost
# - Duration of change - how long the change took
# - Pre-value and Post-value - FCD before and after change
#
# Implementation matches LT-GEE getChangeMap flow:
# Since native LandTrendr only outputs fitted values (not vertices),
# we process the original time series collection to detect changes.
# This matches how LT-GEE getChangeMap works internally.

import ee
import geemap

print("=" * 70)
print("LANDTRENDR FCD CHANGE DETECTION MAPPING")
print("=" * 70)

# Check what LandTrendr outputs
print("\n1. Checking LandTrendr output structure...")
lt_bands = lt_fcd_result.bandNames().getInfo()
print(f"   LandTrendr output bands: {lt_bands}")

# Note: Native LandTrendr only outputs 'LandTrendr' (fitted) and 'rmse'
# To get change information like LT-GEE, we process the original time series
print("   ‚ÑπÔ∏è  Native LandTrendr doesn't output vertices.")
print("   ‚ÑπÔ∏è  Processing original FCD time series to detect changes...")

# Get start and end years from year_list_c
start_year = min(year_list_c)
end_year = max(year_list_c)
print(f"   Analysis period: {start_year} - {end_year}")

# Process original FCD time series to detect changes
# This matches LT-GEE getChangeMap approach
print("\n2. Processing FCD time series to detect greatest disturbance...")

def get_change_map_from_timeseries(collection, start_year, end_year, change_params=None):
    """
    Extract change map from time series collection.
    Finds greatest loss by comparing consecutive years.
    Matches LT-GEE getChangeMap functionality.
    
    This function processes the original time series (not LandTrendr fitted values)
    to detect changes, since native LandTrendr doesn't output vertices.
    """
    # Default change parameters
    if change_params is None:
        change_params = {
            'delta': 'loss',
            'mag_threshold': 5,
            'dur_max': 10,
            'preval_min': 20
        }
    
    # Convert collection to list for processing
    img_list = collection.toList(collection.size())
    num_images = img_list.size()
    
    # Initialize accumulator with first image
    first_img = ee.Image(img_list.get(0))
    # Extract year from 'year' property (FCD images have 'year' property set)
    # If system:time_start exists, use it; otherwise use 'year' property
    first_year_prop = first_img.get('year')
    # Convert to float and create image
    first_year_num = ee.Number(first_year_prop).toFloat()
    first_year = ee.Image.constant(first_year_num).rename('yod')
    first_fcd = first_img.select('FCD')
    
    # Initialize output (will be updated as we find greater losses)
    initial = ee.Image.cat([
        first_year,
        ee.Image(0).rename('mag'),
        ee.Image(1).rename('dur'),
        first_fcd.rename('preval'),
        first_fcd.rename('postval')
    ])
    
    # Function to compare consecutive images and find max loss
    def process_pair(i, acc):
        i = ee.Number(i)
        acc = ee.Image(acc)
        
        # Get previous and current images
        prev_idx = i.subtract(1)
        prev_img = ee.Image(img_list.get(prev_idx))
        curr_img = ee.Image(img_list.get(i))
        
        # Extract years and FCD values
        # Use 'year' property (FCD images have 'year' property set by load_yearly_images_from_gcs)
        prev_year_prop = prev_img.get('year')
        curr_year_prop = curr_img.get('year')
        
        prev_year_num = ee.Number(prev_year_prop).toFloat()
        curr_year_num = ee.Number(curr_year_prop).toFloat()
        prev_year = ee.Image.constant(prev_year_num).rename('yod')  # Convert Number to Image
        curr_year = ee.Image.constant(curr_year_num)
        prev_fcd = prev_img.select('FCD')
        curr_fcd = curr_img.select('FCD')
        
        # Calculate loss (FCD decrease) and duration
        loss = prev_fcd.subtract(curr_fcd)
        duration = curr_year.subtract(prev_year).rename('dur')
        
        # Create candidate change image
        candidate = ee.Image.cat([
            prev_year,
            loss.rename('mag'),
            duration,
            prev_fcd.rename('preval'),
            curr_fcd.rename('postval')
        ])
        
        # Compare with accumulator - keep the one with greater magnitude
        acc_mag = acc.select('mag')
        cand_mag = candidate.select('mag')
        is_greater = cand_mag.gt(acc_mag)
        
        # Select the greater loss
        result = candidate.where(is_greater, acc)
        return result
    
    # Process all consecutive pairs (start from index 1)
    indices = ee.List.sequence(1, num_images.subtract(1))
    result = indices.iterate(process_pair, initial)
    result = ee.Image(result)
    
    # Apply thresholds (matching LT-GEE changeParams)
    mag_threshold = change_params.get('mag_threshold', 5)
    dur_max = change_params.get('dur_max', 10)
    preval_min = change_params.get('preval_min', 20)
    
    result_mag = result.select('mag')
    result_dur = result.select('dur')
    result_preval = result.select('preval')
    
    # Mask pixels that don't meet criteria
    valid = result_mag.gte(mag_threshold).And(
        result_dur.lte(dur_max)
    ).And(
        result_preval.gte(preval_min)
    )
    
    return result.updateMask(valid)

# Define change parameters (matching LT-GEE example)
change_params = {
    'delta': 'loss',  # Detect loss
    'mag_threshold': 5,  # Minimum 5 FCD points loss
    'dur_max': 10,  # Maximum 10 years duration
    'preval_min': 20  # Minimum 20 FCD points before change
}

print("   Change detection parameters:")
print(f"     - Delta: {change_params['delta']} (detect FCD loss)")
print(f"     - Magnitude threshold: {change_params['mag_threshold']} FCD points")
print(f"     - Duration max: {change_params['dur_max']} years")
print(f"     - Pre-value min: {change_params['preval_min']} FCD points")

# Extract change map from time series
print("\n3. Extracting change information from time series...")
change_img = get_change_map_from_timeseries(
    fcd_collection_lt,
    start_year,
    end_year,
    change_params
)

# Clip to AOI
change_img = change_img.clip(aoi_ee.geometry())

print("   ‚úÖ Change map extracted!")
print(f"   Change map bands: {change_img.bandNames().getInfo()}")

# Set visualization parameters
print("\n3. Setting up visualizations...")

# Color palette for year of detection (similar to example)
yod_palette = ['#9400D3', '#4B0082', '#0000FF', '#00FF00', '#FFFF00', '#FF7F00', '#FF0000']

# Visualization parameters
yod_viz = {
    'min': start_year,
    'max': end_year,
    'palette': yod_palette
}

mag_viz = {
    'min': 0,
    'max': 50,  # FCD range 0-100, so max loss could be up to 100
    'palette': ['white', 'yellow', 'orange', 'red', 'darkred']
}

dur_viz = {
    'min': 1,
    'max': 10,
    'palette': ['blue', 'cyan', 'green', 'yellow', 'red']
}

preval_viz = {
    'min': 0,
    'max': 100,
    'palette': ['brown', 'yellow', 'green', 'darkgreen']
}

print("   ‚úÖ Visualization parameters set!")

# Display information
print("\n4. Change map ready for visualization!")
print(f"   Year range: {start_year} - {end_year}")
print(f"   Available bands: {change_img.bandNames().getInfo()}")

from wmts_manager import WMTSManager
from wfs_manager import WFSManager

AOI = ee.FeatureCollection('projects/remote-sensing-476412/assets/korindo_with_buffer')

wfs = WFSManager(fastapi_url="http://fastapi:8000", wfs_base_url="http://localhost:8001")

wmts = WMTSManager(project_name=forestry.config['project_name'], aoi=aoi_ee.geometry(),  clear_cache_first=True)


# Add AOI boundary
# wmts.addLayer(aoi_ee, {'color': 'blue'}, 'AOI Boundary')
wfs.addLayer(AOI, "AOI Boundary")
wfs.publish()

## wmts
wmts.addLayer(minLoss, {'palette': ['red']}, 'Forest Tree Cover Loss Year')


# Add Year of Detection layer
wmts.addLayer(
    change_img.select('yod'),
    yod_viz,
    'Year of Detection (YOD) - FCD Loss'
)

# Add Magnitude layer
wmts.addLayer(
    change_img.select('mag'),
    mag_viz,
    'Magnitude of Change - FCD Loss'
)

# Add Duration layer
wmts.addLayer(
    change_img.select('dur'),
    dur_viz,
    'Duration of Change (years)'
)

# Add Pre-value layer (FCD before change)
wmts.addLayer(
    change_img.select('preval'),
    preval_viz,
    'Pre-value (FCD before change)'
)

# Add Post-value layer (FCD after change)
wmts.addLayer(
    change_img.select('postval'),
    preval_viz,
    'Post-value (FCD after change)'
)

# Display map
print("   ‚úÖ Map created with all change detection layers!")
print("\n   Map layers:")
print("   1. Year of Detection (YOD) - When FCD loss occurred")
print("   2. Magnitude - How much FCD was lost")
print("   3. Duration - How long the change took")
print("   4. Pre-value - FCD value before change")
print("   5. Post-value - FCD value after change")


wmts.publish()

print("\n" + "=" * 70)
print("‚úÖ LandTrendr FCD Change Detection Map Created and Visualized!")
print("=" * 70)


LANDTRENDR FCD CHANGE DETECTION MAPPING

1. Checking LandTrendr output structure...
   LandTrendr output bands: ['LandTrendr', 'rmse']
   ‚ÑπÔ∏è  Native LandTrendr doesn't output vertices.
   ‚ÑπÔ∏è  Processing original FCD time series to detect changes...
   Analysis period: 2015 - 2025

2. Processing FCD time series to detect greatest disturbance...
   Change detection parameters:
     - Delta: loss (detect FCD loss)
     - Magnitude threshold: 5 FCD points
     - Duration max: 10 years
     - Pre-value min: 20 FCD points

3. Extracting change information from time series...
   ‚úÖ Change map extracted!


INFO:wfs_manager:WFSManager initialized:
INFO:wfs_manager:  FastAPI URL: http://fastapi:8000
INFO:wfs_manager:  WFS Base URL: http://localhost:8001
INFO:wmts_manager:WMTSManager initialized for project: korindo
INFO:wfs_manager:Added vector layer: AOI Boundary -> aoi_boundary
INFO:wfs_manager:Publishing 1 vector layers to WFS...


   Change map bands: ['yod', 'mag', 'dur', 'preval', 'postval']

3. Setting up visualizations...
   ‚úÖ Visualization parameters set!

4. Change map ready for visualization!
   Year range: 2015 - 2025
   Available bands: ['yod', 'mag', 'dur', 'preval', 'postval']


INFO:wfs_manager:‚úÖ Published layer 'aoi_boundary': FeatureCollection 'aoi_boundary' created successfully
INFO:wfs_manager:WFS publishing completed: success
INFO:wfs_manager:  Successful: 1
INFO:wfs_manager:  Failed: 0
INFO:wmts_manager:Added layer: Forest Tree Cover Loss Year
INFO:wmts_manager:Added layer: Year of Detection (YOD) - FCD Loss
INFO:wmts_manager:Added layer: Magnitude of Change - FCD Loss
INFO:wmts_manager:Added layer: Duration of Change (years)
INFO:wmts_manager:Added layer: Pre-value (FCD before change)
INFO:wmts_manager:Added layer: Post-value (FCD after change)
INFO:wmts_manager:Generating map IDs for 6 layers...


   ‚úÖ Map created with all change detection layers!

   Map layers:
   1. Year of Detection (YOD) - When FCD loss occurred
   2. Magnitude - How much FCD was lost
   3. Duration - How long the change took
   4. Pre-value - FCD value before change
   5. Post-value - FCD value after change
Generating GEE Map IDs...


INFO:wmts_manager:AOI processed: {'minx': 111.706493374869, 'miny': -0.457839253478653, 'maxx': 112.109836017418, 'maxy': -0.167195667545911}
INFO:wmts_manager:Publishing 6 layers to WMTS...
INFO:gee_integration:GEE Integration Manager initialized:
INFO:gee_integration:  FastAPI URL: http://fastapi:8000
INFO:gee_integration:  MapStore Config: /usr/src/app/mapstore/configs/localConfig.json
INFO:gee_integration:Processing GEE analysis: korindo
INFO:gee_integration:üßπ Clearing duplicate projects before processing new analysis...
INFO:cache_manager:No existing catalog entries to check for duplicates
INFO:gee_integration:‚úÖ Duplicate clearing successful: 0 duplicates cleared, 0 unique projects kept
INFO:gee_integration:‚úÖ Cache cleared: 0 duplicate entries, kept 0 unique projects
INFO:gee_integration:Using complex layer info for 'Forest Tree Cover Loss Year': ['tile_url', 'name', 'description', 'vis_params']
INFO:gee_integration:Using complex layer info for 'Year of Detection (YOD) - FC

‚úÖ Centroid calculated successfully with error margin 1
Calculated bbox from coordinates: {'minx': 111.706493374869, 'miny': -0.457839253478653, 'maxx': 112.109836017418, 'maxy': -0.167195667545911}
‚úÖ AOI processed successfully:
   - Center: [111.90816469614442, -0.31251872622459265]
   - Area: Unknown
   - BBox: {'minx': 111.706493374869, 'miny': -0.457839253478653, 'maxx': 112.109836017418, 'maxy': -0.167195667545911}


INFO:gee_integration:‚úÖ FastAPI registration successful: MapStore catalog updated successfully
INFO:gee_integration:Creating FastAPI proxy URLs for project: korindo_20251125_082009
INFO:gee_integration:‚úÖ Created 6 FastAPI proxy URLs
INFO:gee_integration:Updating MapStore WMTS: korindo_20251125_082009
INFO:gee_integration:üîÑ Forcing comprehensive WMTS refresh...
INFO:gee_utils:Starting comprehensive WMTS refresh...
INFO:gee_utils:Clearing old WMTS services...
ERROR:gee_utils:Error clearing WMTS services: 'list' object has no attribute 'get'
INFO:gee_utils:Refreshing WMTS capabilities...
INFO:gee_utils:WMTS capabilities refreshed successfully
INFO:gee_utils:Updating MapStore WMTS service...
INFO:gee_utils:Successfully updated MapStore WMTS service: gee_analysis_wmts
INFO:gee_utils:Getting current WMTS layers...
INFO:gee_utils:Comprehensive WMTS refresh completed successfully - Found 6 layers
INFO:gee_integration:‚úÖ MapStore WMTS configuration updated
INFO:gee_integration:   New lay


‚úÖ LandTrendr FCD Change Detection Map Created and Visualized!


<!-- ## gee lib - normalization
from gee_lib.osi.spectral_indices.utils import normalization_100 # using reduce region, computation expensive -->

In [66]:
## adding the spectral indices bands
monthly_agg = ee.ImageCollection(asset_monthly_interpolated)
    
# Server-side list of unique years derived from system:time_start
year_list = (
    monthly_agg
        .aggregate_array('system:time_start')         # ee.List of millis
        .map(lambda ts: ee.Date(ts).get('year'))      # convert to year
        .distinct()                                   # keep unique values
        .sort()                                       # optional
)

ee_col_year_median = annual_col_median(monthly_agg, year_list)

ee_col_year_median_with_indices = ee_col_year_median.spectralIndices(
        index=['EVI', 'GNDVI', 'SAVI','NDVI','NBR','VARI','NDWI','MTVI2'],
        satellite_type='Sentinel',  # OSI-style satellite type
        G=2.5,  # EVI parameters
        C1=6.0,
        C2=7.5,
        L=1.0,  # SAVI parameter
        drop=False  # Keep original bands
    )

# Simple normalization function (no reduceRegion needed!)
def normalize_index_to_100(img, index_name):
    """
    Normalize spectral index to 0-100 scale using theoretical range.
    Most indices are -1 to 1, so we map: -1‚Üí0, 0‚Üí50, 1‚Üí100
    """
    # Most indices: -1 to 1 range ‚Üí 0-100
    if index_name == 'FCD':
        # FCD is already 0-100, just return as-is
        return img.select(index_name).rename(f'{index_name}_norm100')
    else:
        # Normalize from -1 to 1 range to 0-100
        # Formula: (value + 1) / 2 * 100
        return img.expression(
            '(b + 1) / 2 * 100',
            {'b': img.select(index_name)}
        ).rename(f'{index_name}_norm100')

# Apply to ImageCollection
def add_normalized_indices(collection, index_names):
    """Add normalized (0-100) versions of indices to collection"""
    def add_norm_bands(img):
        result = img
        for idx in index_names:
            norm_band = normalize_index_to_100(img, idx)
            result = result.addBands(norm_band)
        return result
    
    return collection.map(add_norm_bands)

# Usage in your notebook
indices = ['EVI', 'GNDVI', 'SAVI', 'NDVI', 'NBR', 'VARI', 'NDWI', 'MTVI2']

# Add normalized bands (0-100 scale)
ee_col_with_norm = add_normalized_indices(
    ee_col_year_median_with_indices,
    indices
)

# Now run LandTrendr on normalized bands with SAME parameters!
def run_landtrendr_normalized(collection, index_name):
    """Run LandTrendr on normalized (0-100) index"""
    norm_band_col = collection.select(f'{index_name}_norm100').sort('system:time_start')
    
    # Now ALL indices use the same parameters!
    lt_result = ee.Algorithms.TemporalSegmentation.LandTrendr(
        timeSeries=norm_band_col,
        maxSegments=6,
        spikeThreshold=0.85,
        vertexCountOvershoot=3,
        preventOneYearRecovery=True,
        recoveryThreshold=5,  # Same for all! (5 points on 0-100 scale)
        pvalThreshold=0.05,
        bestModelProportion=0.75
    )
    
    return lt_result.select('LandTrendr').rename(f'{index_name}_lt')

# Apply to all indices
lt_results_indices= [
    run_landtrendr_normalized(ee_col_with_norm, idx) 
    for idx in indices
]

In [101]:
# ============================================================================
# STEP 4: Combine all LandTrendr results into one feature image
# ============================================================================

all_lt_results = lt_results_indices + [lt_fcd]
lt_combined_all = ee.Image.cat(all_lt_results)

print("‚úÖ All LandTrendr features combined!")
band_names = lt_combined_all.bandNames()
print(f"Feature bands: {band_names.getInfo()}")

years = ee.List(year_list_c)
first_year = ee.Number(years.get(0))

def year_to_image(year):
    year = ee.Number(year)
    idx = year.subtract(first_year)
    # For each LandTrendr band, pull the (time_idx, value_idx=0) element
    per_band = band_names.map(
        lambda b: lt_combined_all
            .select([ee.String(b)])
            .arrayGet([idx, 0])          # specify both axes
            .rename([ee.String(b)])
    )
    # Convert list of single-band images ‚Üí multi-band image for this year
    img = ee.ImageCollection(per_band).toBands().rename(band_names)
    return img.set({
        'year': year,
        'system:time_start': ee.Date.fromYMD(year, 12, 31).millis()
    })

lt_yearly_collection = ee.ImageCollection.fromImages(years.map(year_to_image))
print("LT yearly collection bands:", lt_yearly_collection.first().bandNames().getInfo())

‚úÖ All LandTrendr features combined!
Feature bands: ['EVI_lt', 'GNDVI_lt', 'SAVI_lt', 'NDVI_lt', 'NBR_lt', 'VARI_lt', 'NDWI_lt', 'MTVI2_lt', 'FCD_lt']
LT yearly collection bands: ['EVI_lt', 'GNDVI_lt', 'SAVI_lt', 'NDVI_lt', 'NBR_lt', 'VARI_lt', 'NDWI_lt', 'MTVI2_lt', 'FCD_lt']


In [140]:
lt_yearly_collection.first().bandNames().getInfo()

['EVI_lt',
 'GNDVI_lt',
 'SAVI_lt',
 'NDVI_lt',
 'NBR_lt',
 'VARI_lt',
 'NDWI_lt',
 'MTVI2_lt',
 'FCD_lt']

In [141]:
# Test if yearly images can be evaluated
test_img = lt_yearly_collection.first()
print("Year:", test_img.get('year').getInfo())
print("Bands:", test_img.bandNames().getInfo())

# Try to get a simple property
try:
    test_img.select('EVI_lt').projection().getInfo()
    print("‚úÖ Projection OK")
except Exception as e:
    print(f"‚ùå Projection error: {e}")

Year: 2015
Bands: ['EVI_lt', 'GNDVI_lt', 'SAVI_lt', 'NDVI_lt', 'NBR_lt', 'VARI_lt', 'NDWI_lt', 'MTVI2_lt', 'FCD_lt']
‚úÖ Projection OK


In [159]:
# ============================================================================
# FIX: Correct yearly image extraction with proper array indexing
# ============================================================================

years = ee.List(year_list_c)
first_year = ee.Number(years.get(0))
band_names = lt_combined_all.bandNames()

def year_to_image(year):
    year = ee.Number(year)
    idx = year.subtract(first_year)
    
    # Extract each band separately and ensure it's a scalar
    band_images = []
    for band_name in band_names.getInfo():
        band_img = (
            lt_combined_all
            .select([band_name])
            .arrayGet([idx, 0])
            .rename([band_name])
        )
        band_images.append(band_img)
    
    # Combine all bands into one image
    img = ee.Image.cat(band_images)
    
    # Set projection from reference image
    reference_proj = fcd_col.first().select('FCD').projection()
    img = img.setDefaultProjection(
        crs=reference_proj.crs(),
        scale=reference_proj.nominalScale()
    )
    
    return img.set({
        'year': year,
        'system:time_start': ee.Date.fromYMD(year, 12, 31).millis()
    })

# Recreate yearly collection
print("Recreating yearly collection with corrected indexing...")
yearly_images = []
for year in year_list_c:
    yearly_images.append(year_to_image(year))

lt_yearly_collection = ee.ImageCollection(yearly_images)

# Verify the fix
print(f"\n‚úÖ Yearly collection recreated")
print(f"   Total images: {lt_yearly_collection.size().getInfo()}")
print(f"   First image year: {lt_yearly_collection.first().get('year').getInfo()}")
print(f"   Bands: {lt_yearly_collection.first().bandNames().getInfo()}")

Recreating yearly collection with corrected indexing...



‚úÖ Yearly collection recreated
   Total images: 11
   First image year: 2015
   Bands: ['EVI_lt', 'GNDVI_lt', 'SAVI_lt', 'NDVI_lt', 'NBR_lt', 'VARI_lt', 'NDWI_lt', 'MTVI2_lt', 'FCD_lt']


In [102]:
### input training - groundtruth preparation

gcs_path_df_long = f'gs://{os.getenv('GCS_BUCKET_PATH')}/01-korindo/sample_tsfresh/20251112_df_long.parquet'
training_gdf = gpd.read_parquet(gcs_path_df_long, filesystem=fs)
# Load training data (already have this)
training_gdf = training_gdf.to_crs('EPSG:4326')
training_gdf

Unnamed: 0,layer,time_period,type,date,geometry,year
18,sample_3,t_201603,1,2016-03-15,"POLYGON ((111.75070 -0.28607, 111.75084 -0.286...",2016
18,sample_3,t_201603,1,2016-03-15,"POLYGON ((111.75569 -0.28418, 111.75555 -0.284...",2016
18,sample_3,t_201603,1,2016-03-15,"POLYGON ((111.76850 -0.27782, 111.76855 -0.277...",2016
18,sample_3,t_201603,1,2016-03-15,"POLYGON ((111.76534 -0.27342, 111.76555 -0.273...",2016
18,sample_3,t_201603,1,2016-03-15,"POLYGON ((111.75775 -0.27233, 111.75739 -0.272...",2016
...,...,...,...,...,...,...
5381,sample_1,t_202509,1,2025-09-15,"POLYGON ((111.80652 -0.41045, 111.80644 -0.410...",2025
5381,sample_1,t_202509,1,2025-09-15,"POLYGON ((111.80448 -0.41012, 111.80434 -0.409...",2025
5381,sample_1,t_202509,1,2025-09-15,"POLYGON ((111.81644 -0.41920, 111.81650 -0.419...",2025
5381,sample_1,t_202509,1,2025-09-15,"POLYGON ((111.82105 -0.41238, 111.82092 -0.412...",2025


In [103]:
# training_gdf.columns

In [151]:
import pandas as pd

# ============================================================================
# Process training data: Keep max date per (year, layer, type)
# This preserves both type 0 and type 1 polygons if they exist
# ============================================================================
# Ensure date column is datetime
training_gdf['date'] = pd.to_datetime(training_gdf['date'])

# Ensure year column exists (if not, create it)
if 'year' not in training_gdf.columns:
    training_gdf['year'] = training_gdf['date'].dt.year

# Group by year, layer, AND type - keeps both 0 and 1 if they exist
training_gdf_yearly = (
    training_gdf
    .sort_values('date', ascending=False)  # Sort by date descending
    .groupby(['year', 'layer', 'type'], as_index=False)  # Group by year, location, and type
    .first()  # Take first row (which is max date due to sorting)
)

# Verify the result
print(f"Original training data: {len(training_gdf)} rows")
print(f"Yearly training data: {len(training_gdf_yearly)} rows")
print(f"\nYears covered: {sorted(training_gdf_yearly['year'].unique())}")
print(f"\nSample of yearly data:")
print(training_gdf_yearly[['layer', 'year', 'date', 'type']].head(20))

# Check: one row per (year, layer, type) combination
print(f"\nVerification:")
unique_combos = len(training_gdf_yearly.groupby(['year', 'layer', 'type']))
print(f"Unique (year, layer, type) combinations: {unique_combos}")
print(f"Total rows: {len(training_gdf_yearly)}")

# Fixed assertion: check (year, layer, type) not (year, layer, year)
assert unique_combos == len(training_gdf_yearly), \
    "Should have one row per (year, layer, type) combination"

# Check how many locations have both type 0 and 1 in same year
multi_type = training_gdf_yearly.groupby(['year', 'layer'])['type'].nunique()
multi_type_locations = multi_type[multi_type > 1]
print(f"\nLocations with both type 0 and 1 in same year: {len(multi_type_locations)}")
if len(multi_type_locations) > 0:
    print("Example locations with multiple types:")
    for (year, layer), count in multi_type_locations.head(5).items():
        types = training_gdf_yearly[(training_gdf_yearly['year'] == year) & 
                                     (training_gdf_yearly['layer'] == layer)]['type'].tolist()
        print(f"  {layer} ({year}): types {types}")

# ============================================================================
# Convert to Earth Engine FeatureCollection for sampling
# ============================================================================

# Convert to Earth Engine FeatureCollection
def gdf_to_ee_fc(gdf):
    """Convert GeoDataFrame to Earth Engine FeatureCollection"""
    features = []
    for idx, row in gdf.iterrows():
        geom = ee.Geometry.Polygon(row.geometry.__geo_interface__['coordinates'])
        feature = ee.Feature(
            geom,
            {
                'year': int(row['year']),
                'date': row['date'].strftime('%Y-%m-%d'),
                'type': int(row['type']),  # Label: 1 = forest/trees, 0 = no-trees
                'layer': str(row['layer']),
                'time_period': str(row.get('time_period', ''))
            }
        )
        features.append(feature)
    return ee.FeatureCollection(features)

training_fc = gdf_to_ee_fc(training_gdf_yearly)

print(f"\n‚úÖ Training FeatureCollection created!")
print(f"   Features: {training_fc.size().getInfo()}")
print(f"   Properties: {training_fc.first().propertyNames().getInfo()}")

Original training data: 7470 rows
Yearly training data: 60 rows

Years covered: [2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025]

Sample of yearly data:
       layer  year       date  type
0   sample_1  2016 2016-12-15     0
1   sample_1  2016 2016-12-15     1
2   sample_2  2016 2016-12-15     0
3   sample_2  2016 2016-12-15     1
4   sample_3  2016 2016-12-15     0
5   sample_3  2016 2016-12-15     1
6   sample_1  2017 2017-12-15     0
7   sample_1  2017 2017-12-15     1
8   sample_2  2017 2017-12-15     0
9   sample_2  2017 2017-12-15     1
10  sample_3  2017 2017-12-15     0
11  sample_3  2017 2017-12-15     1
12  sample_1  2018 2018-12-15     0
13  sample_1  2018 2018-12-15     1
14  sample_2  2018 2018-12-15     0
15  sample_2  2018 2018-12-15     1
16  sample_3  2018 2018-12-15     0
17  sample_3  2018 2018-12-15     1
18  sample_1  2019 2019-12-15     0
19  sample_1  2019 2019-12-15     1

Verification:
Unique (year, layer, type) combinations: 60
Total rows: 60

Loca

In [153]:
training_gdf_yearly

Unnamed: 0,year,layer,type,time_period,date,geometry
0,2016,sample_1,0,t_201612,2016-12-15,"POLYGON ((111.80505 -0.41069, 111.80543 -0.410..."
1,2016,sample_1,1,t_201612,2016-12-15,"POLYGON ((111.81644 -0.41920, 111.81650 -0.419..."
2,2016,sample_2,0,t_201612,2016-12-15,"POLYGON ((111.81571 -0.44316, 111.81564 -0.443..."
3,2016,sample_2,1,t_201612,2016-12-15,"POLYGON ((111.81485 -0.44718, 111.81488 -0.447..."
4,2016,sample_3,0,t_201612,2016-12-15,"POLYGON ((111.75775 -0.27233, 111.75739 -0.272..."
5,2016,sample_3,1,t_201612,2016-12-15,"POLYGON ((111.76936 -0.27875, 111.76888 -0.278..."
6,2017,sample_1,0,t_201712,2017-12-15,"POLYGON ((111.80993 -0.40996, 111.80979 -0.409..."
7,2017,sample_1,1,t_201712,2017-12-15,"POLYGON ((111.81563 -0.41067, 111.81566 -0.410..."
8,2017,sample_2,0,t_201712,2017-12-15,"POLYGON ((111.81571 -0.44316, 111.81564 -0.443..."
9,2017,sample_2,1,t_201712,2017-12-15,"POLYGON ((111.82594 -0.44139, 111.82620 -0.441..."


In [152]:
lt_yearly_collection.first().bandNames().getInfo()

['EVI_lt',
 'GNDVI_lt',
 'SAVI_lt',
 'NDVI_lt',
 'NBR_lt',
 'VARI_lt',
 'NDWI_lt',
 'MTVI2_lt',
 'FCD_lt']

In [160]:
# ============================================================================
# DIAGNOSTIC: Verify lt_yearly_collection with random test points
# ============================================================================

print("=" * 70)
print("DIAGNOSTIC: LandTrendr Yearly Collection")
print("=" * 70)

# 1. Check collection size and years
print(f"\n1. Collection Info:")
print(f"   Total images: {lt_yearly_collection.size().getInfo()}")
print(f"   Years: {year_list_c}")

# 2. Check first image structure
print(f"\n2. First Image (Year {year_list_c[0]}):")
first_img = lt_yearly_collection.first()
print(f"   Bands: {first_img.bandNames().getInfo()}")
print(f"   Year property: {first_img.get('year').getInfo()}")

# 3. Create random test points within AOI
print(f"\n3. Random Test Points Sampling:")
n_test_points = 5
test_points = ee.FeatureCollection.randomPoints(
    region=aoi_ee.geometry(),
    points=n_test_points,
    seed=42
)

print(f"   Created {n_test_points} random test points")

# 4. Sample from each year at these test points
print(f"\n4. Sample Values per Year (at test points):")
all_bands = first_img.bandNames().getInfo()

for year in year_list_c[:3]:  # Test first 3 years
    year_img = lt_yearly_collection.filter(ee.Filter.eq('year', year)).first()
    
    # Sample at test points
    samples = year_img.select(all_bands).sampleRegions(
        collection=test_points,
        scale=10,
        geometries=False
    )
    
    # Get sample values
    sample_list = samples.getInfo()['features']
    print(f"\n   Year {year} ({len(sample_list)} points):")
    
    # Show values for first 3 bands, first 2 points
    for i, feat in enumerate(sample_list[:2]):
        props = feat['properties']
        print(f"     Point {i+1}:")
        for band in all_bands[:3]:
            val = props.get(band, 'N/A')
            print(f"       {band}: {val:.2f}" if isinstance(val, (int, float)) else f"       {band}: {val}")

# 5. Verify all years have same bands
print(f"\n5. Band Consistency Check:")
for year in year_list_c:
    year_img = lt_yearly_collection.filter(ee.Filter.eq('year', year)).first()
    year_bands = year_img.bandNames().getInfo()
    if set(year_bands) != set(all_bands):
        print(f"   ‚ö†Ô∏è  Year {year}: bands mismatch! Got {year_bands}")
    else:
        print(f"   ‚úì Year {year}: {len(year_bands)} bands match")

# 6. Quick projection check
print(f"\n6. Projection Check:")
try:
    proj = first_img.select('EVI_lt').projection()
    print(f"   CRS: {proj.crs().getInfo()}")
    print(f"   Scale: {proj.nominalScale().getInfo()}")
    print(f"   ‚úÖ Projection OK")
except Exception as e:
    print(f"   ‚ö†Ô∏è  Projection error: {e}")

print(f"\n{'=' * 70}")
print("‚úÖ Diagnostic complete!")
print("=" * 70)

DIAGNOSTIC: LandTrendr Yearly Collection

1. Collection Info:
   Total images: 11
   Years: [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025]

2. First Image (Year 2015):
   Bands: ['EVI_lt', 'GNDVI_lt', 'SAVI_lt', 'NDVI_lt', 'NBR_lt', 'VARI_lt', 'NDWI_lt', 'MTVI2_lt', 'FCD_lt']
   Year property: 2015

3. Random Test Points Sampling:
   Created 5 random test points

4. Sample Values per Year (at test points):

   Year 2015 (4 points):
     Point 1:
       EVI_lt: 2014.00
       GNDVI_lt: 2014.00
       SAVI_lt: 2014.00
     Point 2:
       EVI_lt: 2014.00
       GNDVI_lt: 2014.00
       SAVI_lt: 2014.00

   Year 2016 (5 points):
     Point 1:
       EVI_lt: 69.67
       GNDVI_lt: 71.11
       SAVI_lt: 63.36
     Point 2:
       EVI_lt: 53.69
       GNDVI_lt: 61.47
       SAVI_lt: 53.58

   Year 2017 (5 points):
     Point 1:
       EVI_lt: 69.67
       GNDVI_lt: 71.11
       SAVI_lt: 60.78
     Point 2:
       EVI_lt: 53.72
       GNDVI_lt: 61.97
       SAVI_lt: 53.68


In [None]:
## year 2015 above looks weird, because I think there is no data for that, we should look into it on how the transformation of array into image
## this takes time, using ML GEE takes time to extract the data 
## smoothing is already done, it seem when we do the landtrendr, we're doing another layer of smoothing which not necessary,
## landtrendr can be applied if the data directly used with cloud masking, and not smoothing I think,
## fcd on the other hand, is already smoothed, so we don't need to do that,
## let's do tsfresh

In [156]:
## error here, and decided not continue with landtrendr ML, landtrendr is already done with param, but result is not very good
# band_list = lt_yearly_collection.first().bandNames()

# def sample_year_image(img):
#     year = ee.Number(img.get('year'))
    
#     # Reproject to EPSG:4326 for consistent sampling
#     img_reproj = img.reproject(crs='EPSG:4326', scale=10)

#     fc = img_reproj.sampleRegions(
#         collection=training_fc,
#         scale=10,
#         geometries=True,
#         tileScale=2
#     ).filter(ee.Filter.notNull(band_list)) \
#      .map(lambda feat: feat.set('year', year))

#     return fc

# samples_fc = ee.FeatureCollection(
#     lt_yearly_collection.map(sample_year_image)
# ).flatten()

# # Test
# print("Total features:", samples_fc.size().getInfo())

In [137]:
## CHECK
import geemap

# band_list = lt_yearly_collection.first().bandNames().getInfo()
# properties = band_list + ['layer', 'type', 'date', 'year']

# Convert Earth Engine FeatureCollection to GeoDataFrame
samples_gdf = geemap.ee_to_df(samples_fc,
    # crs="EPSG:4326",            # skip EE projection lookup
    # selectors=properties        # optional: only keep these fields
)

print(samples_gdf.head())
print(len(samples_gdf), "rows")

Exception: Array index 0 out of bounds. Expected value between -4 and 3, found 8.