In [None]:
import os
import ee
import geemap
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
import xgboost as xgb

# ----------------------------------------------------------------------
# Configuration
# ----------------------------------------------------------------------
START_DATE = '2022-07-01'
END_DATE = '2022-10-01'
BOUNDARY_PATH = '/path/to/bdy.shp'
SOIL_DATA_PATH = '/path/to/soil_samples.csv'
OUTPUT_DIR = 'outputs'

LANDSAT_COLLECTION = 'LANDSAT/LC08/C02/T1_L2'
SENTINEL1_COLLECTION = 'COPERNICUS/S1_GRD'
MODIS_ET_COLLECTION = 'MODIS/061/MOD16A2'
CHIRPS_COLLECTION = 'UCSB-CHG/CHIRPS/DAILY'
SRTM = 'USGS/SRTMGL1_003'
WORLDCOVER = 'ESA/WorldCover/v200'

CLOUD_THRESHOLD = 70

# ----------------------------------------------------------------------
# Helper functions
# ----------------------------------------------------------------------

def initialize_gee():
    """Authenticate and initialize Google Earth Engine."""
    try:
        ee.Initialize()
    except Exception:
        ee.Authenticate()
        ee.Initialize()

def load_boundary(path):
    return geemap.shp_to_ee(path)

def mask_clouds_l8(image):
    qa = image.select('QA_PIXEL')
    cloud = qa.bitwiseAnd(1 << 3).eq(0)
    shadow = qa.bitwiseAnd(1 << 4).eq(0)
    snow = qa.bitwiseAnd(1 << 5).eq(0)
    mask = cloud.And(shadow).And(snow)
    return image.updateMask(mask)

def apply_scale(image):
    optical = image.select('SR_B.*').multiply(0.0000275).add(-0.2)
    thermal = image.select('ST_B.*').multiply(0.00341802).add(149.0)
    return image.addBands(optical, None, True).addBands(thermal, None, True)


def process_landsat(boundary):
    collection = (
        ee.ImageCollection(LANDSAT_COLLECTION)
        .filterDate(START_DATE, END_DATE)
        .filterBounds(boundary)
        .map(apply_scale)
        .map(mask_clouds_l8)
    )
    composite = collection.median().clip(boundary)
    ndvi = composite.normalizedDifference(['SR_B5', 'SR_B4']).rename('NDVI')
    si = composite.select('SR_B2').multiply(composite.select('SR_B4')).sqrt().rename('SI1')
    return composite.addBands([ndvi, si])


def process_sentinel1(boundary):
    def mask_edge(image):
        edge = image.lt(-30.0)
        mask = image.mask().And(edge.Not())
        return image.updateMask(mask)

    collection = (
        ee.ImageCollection(SENTINEL1_COLLECTION)
        .filterDate(START_DATE, END_DATE)
        .filterBounds(boundary)
        .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VV'))
        .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VH'))
        .filter(ee.Filter.eq('instrumentMode', 'IW'))
        .map(mask_edge)
    )
    vv = collection.select('VV').median()
    vh = collection.select('VH').median()
    ratio = vv.subtract(vh).rename('VV_VH_diff')
    return ee.Image.cat([vv, vh, ratio]).clip(boundary)


def process_environment(boundary):
    et = (
        ee.ImageCollection(MODIS_ET_COLLECTION)
        .filterDate(START_DATE, END_DATE)
        .filterBounds(boundary)
        .select('ET')
        .mean()
        .rename('ET')
    )
    precip = (
        ee.ImageCollection(CHIRPS_COLLECTION)
        .filterDate(START_DATE, END_DATE)
        .filterBounds(boundary)
        .select('precipitation')
        .sum()
        .rename('Precip')
    )
    dem = ee.Image(SRTM)
    slope = ee.Terrain.slope(dem).rename('slope')
    return ee.Image.cat([et, precip, dem.rename('elevation'), slope]).clip(boundary)


def build_feature_stack(boundary):
    l8 = process_landsat(boundary)
    s1 = process_sentinel1(boundary)
    env = process_environment(boundary)
    worldcover = ee.ImageCollection(WORLDCOVER).first().select('Map')
    return ee.Image.cat([l8, s1, env, worldcover.rename('landcover')])


def sample_points(image, boundary, sample_path):
    samples = geemap.shp_to_ee(sample_path)
    sample = image.sampleRegions(
        collection=samples,
        properties=['salinity'],
        scale=30,
        geometries=True
    )
    df = geemap.ee_to_pandas(sample)
    return df.dropna()


def train_models(df):
    X = df.drop(columns=['salinity', 'longitude', 'latitude'], errors='ignore')
    y = df['salinity']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    models = {
        'RandomForest': RandomForestRegressor(random_state=42),
        'SVR': SVR(),
        'Linear': LinearRegression(),
        'XGB': xgb.XGBRegressor(objective='reg:squarederror', random_state=42)
    }

    params = {
        'RandomForest': {'n_estimators': [100, 200], 'max_depth': [5, 10]},
        'SVR': {'C': [1, 10], 'gamma': ['scale', 'auto']},
        'Linear': {},
        'XGB': {'n_estimators': [100, 200], 'max_depth': [3, 6]}
    }

    results = {}
    for name, model in models.items():
        grid = GridSearchCV(model, params[name], cv=KFold(n_splits=5, shuffle=True, random_state=42))
        grid.fit(X_train, y_train)
        pred = grid.predict(X_test)
        r2 = r2_score(y_test, pred)
        rmse = mean_squared_error(y_test, pred, squared=False)
        results[name] = {'model': grid.best_estimator_, 'r2': r2, 'rmse': rmse}
        print(f'{name}: R2={r2:.3f}, RMSE={rmse:.3f}')
    return results


def main():
    initialize_gee()
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    boundary = load_boundary(BOUNDARY_PATH)
    feature_image = build_feature_stack(boundary)
    df = sample_points(feature_image, boundary, SOIL_DATA_PATH)
    df.to_csv(os.path.join(OUTPUT_DIR, 'training_samples.csv'), index=False)

    results = train_models(df)
    best_name = max(results, key=lambda k: results[k]['r2'])
    best_model = results[best_name]['model']

    print(f'Best model: {best_name}')

    # Predict across image
    feature_bands = feature_image.bandNames()
    predictors = feature_image.select(feature_bands)
    model = geemap.sk_export_model(best_model, predictors)
    # Export predictions (placeholder, requires geemap>=0.30)
    task = geemap.ee_export_image_to_drive(
        model,
        description='salinity_prediction',
        folder='gee_outputs',
        region=boundary,
        scale=30
    )
    print('Export task started.')

if __name__ == '__main__':
    main()