In [None]:
import pandas as pd
import numpy as np
import glob
import os
import rasterio
from rasterio.windows import Window
from scipy.spatial import distance_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input,
    Conv2D,
    MaxPooling2D,
    Flatten,
    Dense,
    Concatenate,
    Dropout,
    Layer,
    Lambda,
    GlobalAveragePooling2D,
    Reshape,
    Multiply
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import Sequence
import tensorflow as tf
import gc # Import garbage collector
import sys
from io import StringIO
import pickle

# Define the single buffer size to use
BUFFER_METERS = 500

# ==================== 1. Load Data ==================== #
# NOTE: The data loading logic remains the same.
# Replace with your actual data paths if needed
orig = pd.read_csv("../../data/RainySeason.csv")
river_100 = pd.read_csv("../data/Samples_100.csv")

drop_cols = ['Stations','River','Lat','Long','geometry']
numeric_cols = orig.drop(columns=drop_cols).columns.drop('RI')

# Train-test split
train_orig = orig.sample(10, random_state=42)
test_orig = orig.drop(train_orig.index)
train_combined = pd.concat([river_100, train_orig], ignore_index=True)

# ==================== 2. Collect ALL Rasters ==================== #
raster_paths = []
raster_paths += glob.glob("../CalIndices/*.tif")
raster_paths += glob.glob("../LULCMerged/*.tif")
raster_paths += glob.glob("../IDW/*.tif")

print(f"Using {len(raster_paths)} raster layers for CNN input.")
for r in raster_paths:
    print("  -", os.path.basename(r))

# ==================== 3. Create a Custom Data Generator ==================== #
def extract_patch_for_generator(coords, raster_files, buffer_pixels_x, buffer_pixels_y, patch_width, patch_height):
    """
    Extracts a batch of patches from rasters for a given set of coordinates.
    This function is optimized to be called by the data generator for each batch.
    """
    patches = []
    # Loop through each coordinate pair in the batch
    for lon, lat in coords:
        channels = []
        # Loop through each raster file to get a single patch for each raster
        for rfile in raster_files:
            with rasterio.open(rfile) as src:
                try:
                    row, col = src.index(lon, lat)
                    win = Window(col - buffer_pixels_x, row - buffer_pixels_y, patch_width, patch_height)
                    arr = src.read(1, window=win, boundless=True, fill_value=0)
                    arr = arr.astype(np.float32)

                    if np.nanmax(arr) != 0:
                        arr /= np.nanmax(arr)
                except Exception as e:
                    print(f"Error processing {rfile} for coordinates ({lon}, {lat}): {e}")
                    arr = np.zeros((patch_width, patch_height), dtype=np.float32)
            channels.append(arr)
        patches.append(np.stack(channels, axis=-1))
    
    return np.array(patches)

class DataGenerator(Sequence):
    def __init__(self, coords, mlp_data, gnn_data, y, raster_paths, buffer_meters, batch_size=4, shuffle=True, **kwargs):
        super().__init__(**kwargs)
        self.coords = coords
        self.mlp_data = mlp_data
        self.gnn_data = gnn_data
        self.y = y
        self.raster_paths = raster_paths
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.indices = np.arange(len(self.y))
        self.buffer_meters = buffer_meters

        # Pre-calculate patch size from the first raster
        with rasterio.open(raster_paths[0]) as src:
            res_x, res_y = src.res
            self.buffer_pixels_x = int(self.buffer_meters / res_x)
            self.buffer_pixels_y = int(self.buffer_meters / res_y)
            self.patch_width = 2 * self.buffer_pixels_x
            self.patch_height = 2 * self.buffer_pixels_y

        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.y) / self.batch_size))

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)
            
    def __getitem__(self, index):
        # Get batch indices
        batch_indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]

        # Get batch data
        batch_coords = self.coords[batch_indices]
        batch_mlp = self.mlp_data[batch_indices]
        batch_gnn = self.gnn_data[batch_indices, :]
        batch_y = self.y[batch_indices]

        # Extract CNN patches for the current batch
        batch_cnn = extract_patch_for_generator(
            batch_coords,
            self.raster_paths,
            self.buffer_pixels_x,
            self.buffer_pixels_y,
            self.patch_width,
            self.patch_height
        )

        return (batch_cnn, batch_mlp, batch_gnn), batch_y

# ==================== 4. Prepare GNN & MLP Input (only once) ==================== #
coords_train = train_combined[['Long','Lat']].values
coords_test = test_orig[['Long','Lat']].values
dist_mat_train = distance_matrix(coords_train, coords_train)
gnn_train = np.exp(-dist_mat_train/10)
dist_mat_test_train = distance_matrix(coords_test, coords_train)
gnn_test = np.exp(-dist_mat_test_train/10)

scaler = StandardScaler()
mlp_train = scaler.fit_transform(train_combined[numeric_cols])
mlp_test = scaler.transform(test_orig[numeric_cols])
y_train = train_combined['RI'].values
y_test = test_orig['RI'].values

# ==================== 5. Define Base Models ==================== #
def build_cnn_mlp_model(patch_shape, mlp_dim):
    cnn_input = Input(shape=patch_shape, name="cnn_input")
    mlp_input = Input(shape=(mlp_dim,), name="mlp_input")

    # CNN branch
    cnn_branch = Conv2D(32, (3,3), activation="relu", padding="same")(cnn_input)
    cnn_branch = MaxPooling2D((2,2))(cnn_branch)
    cnn_branch = Conv2D(64, (3,3), activation="relu", padding="same")(cnn_branch)
    cnn_branch = MaxPooling2D((2,2))(cnn_branch)
    cnn_embedding = Flatten()(cnn_branch)

    # MLP branch
    mlp_embedding = Dense(64, activation="relu")(mlp_input)
    mlp_embedding = Dense(32, activation="relu")(mlp_embedding)

    # Combine
    combined = Concatenate()([cnn_embedding, mlp_embedding])
    f = Dense(128, activation="relu")(combined)
    output = Dense(1, activation="linear", name="cnn_mlp_output")(f)
    
    model = Model(inputs=[cnn_input, mlp_input], outputs=output)
    model.compile(optimizer=Adam(learning_rate=0.0005), loss="mse")
    return model

def build_gnn_mlp_model(gnn_dim, mlp_dim):
    gnn_input = Input(shape=(gnn_dim,), name="gnn_input")
    mlp_input = Input(shape=(mlp_dim,), name="mlp_input")

    # GNN branch
    gnn_embedding = Dense(64, activation="relu")(gnn_input)
    gnn_embedding = Dense(32, activation="relu")(gnn_embedding)

    # MLP branch
    mlp_embedding = Dense(64, activation="relu")(mlp_input)
    mlp_embedding = Dense(32, activation="relu")(mlp_embedding)

    # Combine
    combined = Concatenate()([gnn_embedding, mlp_embedding])
    f = Dense(64, activation="relu")(combined)
    output = Dense(1, activation="linear", name="gnn_mlp_output")(f)
    
    model = Model(inputs=[gnn_input, mlp_input], outputs=output)
    model.compile(optimizer=Adam(learning_rate=0.0005), loss="mse")
    return model

def build_cnn_gnn_model(patch_shape, gnn_dim):
    cnn_input = Input(shape=patch_shape, name="cnn_input")
    gnn_input = Input(shape=(gnn_dim,), name="gnn_input")

    # CNN branch
    cnn_branch = Conv2D(32, (3,3), activation="relu", padding="same")(cnn_input)
    cnn_branch = MaxPooling2D((2,2))(cnn_branch)
    cnn_branch = Conv2D(64, (3,3), activation="relu", padding="same")(cnn_branch)
    cnn_branch = MaxPooling2D((2,2))(cnn_branch)
    cnn_embedding = Flatten()(cnn_branch)
    
    # GNN branch
    gnn_embedding = Dense(64, activation="relu")(gnn_input)
    gnn_embedding = Dense(32, activation="relu")(gnn_embedding)

    # Combine
    combined = Concatenate()([cnn_embedding, gnn_embedding])
    f = Dense(128, activation="relu")(combined)
    output = Dense(1, activation="linear", name="cnn_gnn_output")(f)
    
    model = Model(inputs=[cnn_input, gnn_input], outputs=output)
    model.compile(optimizer=Adam(learning_rate=0.0005), loss="mse")
    return model

def build_meta_learner_model():
    # Takes predictions from the 3 base models as input
    pred1_input = Input(shape=(1,), name="pred1_input")
    pred2_input = Input(shape=(1,), name="pred2_input")
    pred3_input = Input(shape=(1,), name="pred3_input")

    # Concatenate the predictions
    combined = Concatenate()([pred1_input, pred2_input, pred3_input])
    
    # Simple MLP as the meta-learner
    f = Dense(32, activation="relu")(combined)
    f = Dense(16, activation="relu")(f)
    output = Dense(1, activation="linear", name="final_output")(f)
    
    model = Model(inputs=[pred1_input, pred2_input, pred3_input], outputs=output)
    model.compile(optimizer=Adam(learning_rate=0.0005), loss="mse")
    return model

# ==================== 6. Create Data Generators for Base Models ==================== #
# NOTE: We create generators that provide only the necessary inputs for each base model.
class CNNDropoutGenerator(DataGenerator):
    def __getitem__(self, index):
        (batch_cnn, batch_mlp, batch_gnn), batch_y = super().__getitem__(index)
        return (batch_cnn, batch_mlp), batch_y

class GNNDropoutGenerator(DataGenerator):
    def __getitem__(self, index):
        (batch_cnn, batch_mlp, batch_gnn), batch_y = super().__getitem__(index)
        return (batch_gnn, batch_mlp), batch_y

class MLPDropoutGenerator(DataGenerator):
    def __getitem__(self, index):
        (batch_cnn, batch_mlp, batch_gnn), batch_y = super().__getitem__(index)
        return (batch_cnn, batch_gnn), batch_y

def get_base_model_predictions(model, coords, mlp_data, gnn_data, y, raster_paths, buffer_meters, batch_size):
    num_samples = len(y)
    y_pred_list = []
    
    with rasterio.open(raster_paths[0]) as src:
        res_x, res_y = src.res
        buffer_pixels_x = int(buffer_meters / res_x)
        buffer_pixels_y = int(buffer_meters / res_y)
        patch_width = 2 * buffer_pixels_x
        patch_height = 2 * buffer_pixels_y

    for i in range(0, num_samples, batch_size):
        batch_coords = coords[i:i+batch_size]
        batch_mlp = mlp_data[i:i+batch_size]
        batch_gnn = gnn_data[i:i+batch_size, :]
        
        batch_cnn = extract_patch_for_generator(
            batch_coords, raster_paths, buffer_pixels_x, buffer_pixels_y, patch_width, patch_height
        )
        
        # Check which inputs the model expects and provide them
        input_names = [inp.name for inp in model.inputs]
        input_dict = {}
        if 'cnn_input' in input_names:
            input_dict['cnn_input'] = batch_cnn
        if 'mlp_input' in input_names:
            input_dict['mlp_input'] = batch_mlp
        if 'gnn_input' in input_names:
            input_dict['gnn_input'] = batch_gnn
            
        y_pred_list.append(model.predict(input_dict).flatten())
            
    return np.concatenate(y_pred_list)


# ==================== Run the Analysis ==================== #


print("\n" + "="*80)
print(f"Analyzing Stacked Deep Ensemble for BUFFER_METERS = {BUFFER_METERS}m")
print("="*80)

batch_size = 4
gnn_input_dim = len(coords_train)

# Calculate CNN patch shape based on the current buffer size
with rasterio.open(raster_paths[0]) as src:
    res_x, res_y = src.res
    buffer_pixels_x = int(BUFFER_METERS / res_x)
    patch_width = 2 * buffer_pixels_x
    cnn_patch_shape = (patch_width, patch_width, len(raster_paths))

mlp_input_dim = mlp_train.shape[1]

# --- Train Base Models ---
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=2,
    restore_best_weights=True
)

print("\n--- Training CNN-MLP Base Model ---")
cnn_mlp_model = build_cnn_mlp_model(cnn_patch_shape, mlp_input_dim)
cnn_mlp_train_gen = CNNDropoutGenerator(
    coords=coords_train, mlp_data=mlp_train, gnn_data=gnn_train, y=y_train,
    raster_paths=raster_paths, buffer_meters=BUFFER_METERS, batch_size=batch_size, shuffle=True
)
cnn_mlp_model.fit(cnn_mlp_train_gen, epochs=100, verbose=1, callbacks=[early_stopping], validation_data=cnn_mlp_train_gen)

print("\n--- Training GNN-MLP Base Model ---")
gnn_mlp_model = build_gnn_mlp_model(gnn_input_dim, mlp_input_dim)
gnn_mlp_train_gen = GNNDropoutGenerator(
    coords=coords_train, mlp_data=mlp_train, gnn_data=gnn_train, y=y_train,
    raster_paths=raster_paths, buffer_meters=BUFFER_METERS, batch_size=batch_size, shuffle=True
)
gnn_mlp_model.fit(gnn_mlp_train_gen, epochs=100, verbose=1, callbacks=[early_stopping], validation_data=gnn_mlp_train_gen)

print("\n--- Training CNN-GNN Base Model ---")
cnn_gnn_model = build_cnn_gnn_model(cnn_patch_shape, gnn_input_dim)
cnn_gnn_train_gen = MLPDropoutGenerator(
    coords=coords_train, mlp_data=mlp_train, gnn_data=gnn_train, y=y_train,
    raster_paths=raster_paths, buffer_meters=BUFFER_METERS, batch_size=batch_size, shuffle=True
)
cnn_gnn_model.fit(cnn_gnn_train_gen, epochs=100, verbose=1, callbacks=[early_stopping], validation_data=cnn_gnn_train_gen)

# --- Generate predictions for meta-learner ---
# Get predictions from base models on training data
preds1_train = get_base_model_predictions(cnn_mlp_model, coords_train, mlp_train, gnn_train, y_train, raster_paths, BUFFER_METERS, batch_size)
preds2_train = get_base_model_predictions(gnn_mlp_model, coords_train, mlp_train, gnn_train, y_train, raster_paths, BUFFER_METERS, batch_size)
preds3_train = get_base_model_predictions(cnn_gnn_model, coords_train, mlp_train, gnn_train, y_train, raster_paths, BUFFER_METERS, batch_size)

meta_train_inputs = (preds1_train.reshape(-1, 1), preds2_train.reshape(-1, 1), preds3_train.reshape(-1, 1))

# --- Train Meta-Learner ---
print("\n--- Training Meta-Learner Model ---")
meta_model = build_meta_learner_model()
meta_model.fit(meta_train_inputs, y_train, epochs=100, verbose=1, callbacks=[early_stopping], validation_split=0.2)

# --- Get predictions from base models on test data ---
preds1_test = get_base_model_predictions(cnn_mlp_model, coords_test, mlp_test, gnn_test, y_test, raster_paths, BUFFER_METERS, batch_size)
preds2_test = get_base_model_predictions(gnn_mlp_model, coords_test, mlp_test, gnn_test, y_test, raster_paths, BUFFER_METERS, batch_size)
preds3_test = get_base_model_predictions(cnn_gnn_model, coords_test, mlp_test, gnn_test, y_test, raster_paths, BUFFER_METERS, batch_size)

meta_test_inputs = (preds1_test.reshape(-1, 1), preds2_test.reshape(-1, 1), preds3_test.reshape(-1, 1))

# --- Evaluate with Meta-Learner ---
y_pred = meta_model.predict(meta_test_inputs).flatten()
r2_test = r2_score(y_test, y_pred)
rmse_test = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"\n Stacked Deep Ensemble Model Performance ({BUFFER_METERS}m):")
print(f"R² Test: {r2_test:.4f} | RMSE Test: {rmse_test:.4f}")

# --- NEW: Feature Importance for Meta-Learner ---
print("\n" + "-"*50)
print(f"Meta-Learner Feature Importance (Permutation-based)")
print("-"*50)
baseline_r2 = r2_test

# Importance for CNN-MLP predictions
preds1_test_shuffled = np.copy(preds1_test)
np.random.shuffle(preds1_test_shuffled)
shuffled_test_inputs = (preds1_test_shuffled.reshape(-1, 1), preds2_test.reshape(-1, 1), preds3_test.reshape(-1, 1))
y_pred_shuffled = meta_model.predict(shuffled_test_inputs).flatten()
r2_shuffled = r2_score(y_test, y_pred_shuffled)
importance_cnn_mlp = baseline_r2 - r2_shuffled
print(f"Importance of CNN-MLP predictions (R² drop): {importance_cnn_mlp:.4f}")

# Importance for GNN-MLP predictions
preds2_test_shuffled = np.copy(preds2_test)
np.random.shuffle(preds2_test_shuffled)
shuffled_test_inputs = (preds1_test.reshape(-1, 1), preds2_test_shuffled.reshape(-1, 1), preds3_test.reshape(-1, 1))
y_pred_shuffled = meta_model.predict(shuffled_test_inputs).flatten()
r2_shuffled = r2_score(y_test, y_pred_shuffled)
importance_gnn_mlp = baseline_r2 - r2_shuffled
print(f"Importance of GNN-MLP predictions (R² drop): {importance_gnn_mlp:.4f}")

# Importance for CNN-GNN predictions
preds3_test_shuffled = np.copy(preds3_test)
np.random.shuffle(preds3_test_shuffled)
shuffled_test_inputs = (preds1_test.reshape(-1, 1), preds2_test.reshape(-1, 1), preds3_test_shuffled.reshape(-1, 1))
y_pred_shuffled = meta_model.predict(shuffled_test_inputs).flatten()
r2_shuffled = r2_score(y_test, y_pred_shuffled)
importance_cnn_gnn = baseline_r2 - r2_shuffled
print(f"Importance of CNN-GNN predictions (R² drop): {importance_cnn_gnn:.4f}")


# Save the feature importance results
feature_importance = {
    "CNN-MLP_importance": importance_cnn_mlp,
    "GNN-MLP_importance": importance_gnn_mlp,
    "CNN-GNN_importance": importance_cnn_gnn
}


In [6]:
import pandas as pd
import numpy as np
import glob
import os
import rasterio
from rasterio.windows import Window
from scipy.spatial import distance_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.model_selection import KFold
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input,
    Conv2D,
    MaxPooling2D,
    Flatten,
    Dense,
    Concatenate,
    Layer,
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import Sequence
import tensorflow as tf
import gc
import sys
from io import StringIO
import pickle

# Define the single buffer size to use
BUFFER_METERS = 500

# ==================== 1. Load Data ==================== #
# NOTE: The data loading logic remains the same.
# Replace with your actual data paths if needed
orig = pd.read_csv("../../data/RainySeason.csv")
river_100 = pd.read_csv("../data/Samples_100.csv")

# Combine all data for cross-validation purposes
all_data = pd.concat([river_100, orig], ignore_index=True).sample(frac=1, random_state=42).reset_index(drop=True)

drop_cols = ['Stations','River','Lat','Long','geometry', "Source"]
numeric_cols = all_data.drop(columns=drop_cols).columns.drop('RI')

# Separate features and target for the full dataset
coords_all = all_data[['Long','Lat']].values
mlp_all = all_data[numeric_cols].values
y_all = all_data['RI'].values
gnn_all = distance_matrix(coords_all, coords_all)
gnn_all = np.exp(-gnn_all/10)

# ==================== 2. Collect ALL Rasters ==================== #
raster_paths = []
raster_paths += glob.glob("../CalIndices/*.tif")
raster_paths += glob.glob("../LULCMerged/*.tif")
raster_paths += glob.glob("../IDW/*.tif")

print(f"Using {len(raster_paths)} raster layers for CNN input.")
for r in raster_paths:
    print("  -", os.path.basename(r))

# ==================== 3. Define Metric Functions ==================== #
def smape(y_true, y_pred):
    """
    Calculates the Symmetric Mean Absolute Percentage Error (SMAPE).
    """
    numerator = np.abs(y_pred - y_true)
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2
    
    # Handle the case where denominator is zero to avoid division by zero errors
    # Add a small epsilon to the denominator for robustness
    mask = denominator == 0
    denominator[mask] = 1e-8
    
    return np.mean(numerator / denominator) * 100

# ==================== 4. Create a Custom Data Generator ==================== #
def extract_patch_for_generator(coords, raster_files, buffer_pixels_x, buffer_pixels_y, patch_width, patch_height):
    """
    Extracts a batch of patches from rasters for a given set of coordinates.
    This function is optimized to be called by the data generator for each batch.
    """
    patches = []
    # Loop through each coordinate pair in the batch
    for lon, lat in coords:
        channels = []
        # Loop through each raster file to get a single patch for each raster
        for rfile in raster_files:
            with rasterio.open(rfile) as src:
                try:
                    row, col = src.index(lon, lat)
                    win = Window(col - buffer_pixels_x, row - buffer_pixels_y, patch_width, patch_height)
                    arr = src.read(1, window=win, boundless=True, fill_value=0)
                    arr = arr.astype(np.float32)

                    if np.nanmax(arr) != 0:
                        arr /= np.nanmax(arr)
                except Exception as e:
                    print(f"Error processing {rfile} for coordinates ({lon}, {lat}): {e}")
                    arr = np.zeros((patch_width, patch_height), dtype=np.float32)
            channels.append(arr)
        patches.append(np.stack(channels, axis=-1))
    
    return np.array(patches)

class DataGenerator(Sequence):
    # Added 'model_inputs' to specify which data types the generator should return
    def __init__(self, coords, mlp_data, gnn_data, y, raster_paths, buffer_meters, model_inputs, batch_size=4, shuffle=True, **kwargs):
        super().__init__(**kwargs)
        self.coords = coords
        self.mlp_data = mlp_data
        self.gnn_data = gnn_data
        self.y = y
        self.raster_paths = raster_paths
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.indices = np.arange(len(self.y))
        self.buffer_meters = buffer_meters
        # Store the list of inputs the model expects
        self.model_inputs = model_inputs

        # Pre-calculate patch size from the first raster
        with rasterio.open(raster_paths[0]) as src:
            res_x, res_y = src.res
            self.buffer_pixels_x = int(self.buffer_meters / res_x)
            self.buffer_pixels_y = int(self.buffer_meters / res_y)
            self.patch_width = 2 * self.buffer_pixels_x
            self.patch_height = 2 * self.buffer_pixels_y

        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.y) / self.batch_size))

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)
            
    def __getitem__(self, index):
        # Get batch indices
        batch_indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]

        # Get batch data
        batch_coords = self.coords[batch_indices]
        batch_y = self.y[batch_indices]
        
        # New logic: Prepare the inputs as a dictionary based on 'model_inputs'
        inputs = {}
        if 'cnn' in self.model_inputs:
            batch_cnn = extract_patch_for_generator(
                batch_coords,
                self.raster_paths,
                self.buffer_pixels_x,
                self.buffer_pixels_y,
                self.patch_width,
                self.patch_height
            )
            inputs['cnn_input'] = batch_cnn
        
        if 'mlp' in self.model_inputs:
            batch_mlp = self.mlp_data[batch_indices]
            inputs['mlp_input'] = batch_mlp

        if 'gnn' in self.model_inputs:
            batch_gnn = self.gnn_data[batch_indices, :]
            inputs['gnn_input'] = batch_gnn

        return inputs, batch_y

# ==================== 5. Define Base Models ==================== #
# Reduced learning rate and added gradient clipping to prevent NaN loss
LEARNING_RATE = 0.0001
def build_cnn_mlp_model(patch_shape, mlp_dim):
    cnn_input = Input(shape=patch_shape, name="cnn_input")
    mlp_input = Input(shape=(mlp_dim,), name="mlp_input")

    # CNN branch
    cnn_branch = Conv2D(32, (3,3), activation="relu", padding="same")(cnn_input)
    cnn_branch = MaxPooling2D((2,2))(cnn_branch)
    cnn_branch = Conv2D(64, (3,3), activation="relu", padding="same")(cnn_branch)
    cnn_branch = MaxPooling2D((2,2))(cnn_branch)
    cnn_embedding = Flatten()(cnn_branch)

    # MLP branch
    mlp_embedding = Dense(64, activation="relu")(mlp_input)
    mlp_embedding = Dense(32, activation="relu")(mlp_embedding)

    # Combine
    combined = Concatenate()([cnn_embedding, mlp_embedding])
    f = Dense(128, activation="relu")(combined)
    output = Dense(1, activation="linear", name="cnn_mlp_output")(f)
    
    model = Model(inputs=[cnn_input, mlp_input], outputs=output)
    # Added clipnorm to the Adam optimizer
    model.compile(optimizer=Adam(learning_rate=LEARNING_RATE, clipnorm=1.0), loss="mse")
    return model

def build_gnn_mlp_model(gnn_dim, mlp_dim):
    gnn_input = Input(shape=(gnn_dim,), name="gnn_input")
    mlp_input = Input(shape=(mlp_dim,), name="mlp_input")

    # GNN branch
    gnn_embedding = Dense(64, activation="relu")(gnn_input)
    gnn_embedding = Dense(32, activation="relu")(gnn_embedding)

    # MLP branch
    mlp_embedding = Dense(64, activation="relu")(mlp_input)
    mlp_embedding = Dense(32, activation="relu")(mlp_embedding)

    # Combine
    combined = Concatenate()([gnn_embedding, mlp_embedding])
    f = Dense(64, activation="relu")(combined)
    output = Dense(1, activation="linear", name="gnn_mlp_output")(f)
    
    model = Model(inputs=[gnn_input, mlp_input], outputs=output)
    # Added clipnorm to the Adam optimizer
    model.compile(optimizer=Adam(learning_rate=LEARNING_RATE, clipnorm=1.0), loss="mse")
    return model

def build_cnn_gnn_model(patch_shape, gnn_dim):
    cnn_input = Input(shape=patch_shape, name="cnn_input")
    gnn_input = Input(shape=(gnn_dim,), name="gnn_input")

    # CNN branch
    cnn_branch = Conv2D(32, (3,3), activation="relu", padding="same")(cnn_input)
    cnn_branch = MaxPooling2D((2,2))(cnn_branch)
    cnn_branch = Conv2D(64, (3,3), activation="relu", padding="same")(cnn_branch)
    cnn_branch = MaxPooling2D((2,2))(cnn_branch)
    cnn_embedding = Flatten()(cnn_branch)
    
    # GNN branch
    gnn_embedding = Dense(64, activation="relu")(gnn_input)
    gnn_embedding = Dense(32, activation="relu")(gnn_input)

    # Combine
    combined = Concatenate()([cnn_embedding, gnn_embedding])
    f = Dense(128, activation="relu")(combined)
    output = Dense(1, activation="linear", name="cnn_gnn_output")(f)
    
    model = Model(inputs=[cnn_input, gnn_input], outputs=output)
    # Added clipnorm to the Adam optimizer
    model.compile(optimizer=Adam(learning_rate=LEARNING_RATE, clipnorm=1.0), loss="mse")
    return model

def build_meta_learner_model():
    # Takes predictions from the 3 base models as input
    pred1_input = Input(shape=(1,), name="pred1_input")
    pred2_input = Input(shape=(1,), name="pred2_input")
    pred3_input = Input(shape=(1,), name="pred3_input")

    # Concatenate the predictions
    combined = Concatenate()([pred1_input, pred2_input, pred3_input])
    
    # Simple MLP as the meta-learner
    f = Dense(32, activation="relu")(combined)
    f = Dense(16, activation="relu")(f)
    output = Dense(1, activation="linear", name="final_output")(f)
    
    model = Model(inputs=[pred1_input, pred2_input, pred3_input], outputs=output)
    # Added clipnorm to the Adam optimizer
    model.compile(optimizer=Adam(learning_rate=LEARNING_RATE, clipnorm=1.0), loss="mse")
    return model

# ==================== 6. Helper Function for Predictions ==================== #
def get_model_predictions(model, data_coords, data_mlp, data_gnn, raster_paths, buffer_meters, batch_size):
    num_samples = len(data_coords)
    y_pred_list = []
    
    with rasterio.open(raster_paths[0]) as src:
        res_x, res_y = src.res
        buffer_pixels_x = int(buffer_meters / res_x)
        buffer_pixels_y = int(buffer_meters / res_y)
        patch_width = 2 * buffer_pixels_x
        patch_height = 2 * buffer_pixels_y

    for i in range(0, num_samples, batch_size):
        batch_coords = data_coords[i:i+batch_size]
        batch_mlp = data_mlp[i:i+batch_size]
        batch_gnn = data_gnn[i:i+batch_size, :]
        
        batch_cnn = extract_patch_for_generator(
            batch_coords, raster_paths, buffer_pixels_x, buffer_pixels_y, patch_width, patch_height
        )
        
        # Check which inputs the model expects and provide them
        input_names = [inp.name for inp in model.inputs]
        input_dict = {}
        if 'cnn_input' in input_names:
            input_dict['cnn_input'] = batch_cnn
        if 'mlp_input' in input_names:
            input_dict['mlp_input'] = batch_mlp

        if 'gnn_input' in input_names:
            input_dict['gnn_input'] = batch_gnn
            
        y_pred_list.append(model.predict(input_dict).flatten())
            
    return np.concatenate(y_pred_list)

# ==================== 7. Run the Analysis with K-Fold Cross-Validation ==================== #
print("\n" + "="*80)
print(f"Analyzing Stacked Deep Ensemble with 5-Fold Cross-Validation for BUFFER_METERS = {BUFFER_METERS}m")
print("="*80)

# Pre-calculate patch shape based on the current buffer size
with rasterio.open(raster_paths[0]) as src:
    res_x, res_y = src.res
    buffer_pixels_x = int(BUFFER_METERS / res_x)
    patch_width = 2 * buffer_pixels_x
    cnn_patch_shape = (patch_width, patch_width, len(raster_paths))

mlp_input_dim = mlp_all.shape[1]
gnn_input_dim = gnn_all.shape[0]

kf = KFold(n_splits=5, shuffle=True, random_state=42)
fold_results = []
trained_models = {}

# Increased patience to allow for more epochs to find a good minimum
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

for fold, (train_idx, test_idx) in enumerate(kf.split(all_data)):
    print(f"\n--- Starting Fold {fold + 1}/5 ---")
    
    # Split data for this fold
    coords_train, coords_test = coords_all[train_idx], coords_all[test_idx]
    mlp_train, mlp_test = mlp_all[train_idx], mlp_all[test_idx]
    y_train, y_test = y_all[train_idx], y_all[test_idx]
    
    # Scale MLP data
    scaler = StandardScaler()
    mlp_train_scaled = scaler.fit_transform(mlp_train)
    mlp_test_scaled = scaler.transform(mlp_test)

    # Prepare GNN data for the current fold
    gnn_train = distance_matrix(coords_train, coords_train)
    gnn_train = np.exp(-gnn_train/10)
    gnn_test = distance_matrix(coords_test, coords_train)
    gnn_test = np.exp(-gnn_test/10)
    
    # --- Train Base Models ---
    batch_size = 4
    
    # CNN-MLP
    cnn_mlp_model = build_cnn_mlp_model(cnn_patch_shape, mlp_input_dim)
    cnn_mlp_train_gen = DataGenerator(
        coords=coords_train, mlp_data=mlp_train_scaled, gnn_data=gnn_train, y=y_train,
        raster_paths=raster_paths, buffer_meters=BUFFER_METERS, model_inputs=['cnn', 'mlp'], batch_size=batch_size, shuffle=True
    )
    # Increased epochs to give the model more time to train
    cnn_mlp_model.fit(cnn_mlp_train_gen, epochs=1, verbose=1, callbacks=[early_stopping], validation_data=cnn_mlp_train_gen)

    # GNN-MLP
    gnn_mlp_model = build_gnn_mlp_model(gnn_train.shape[1], mlp_input_dim)
    gnn_mlp_train_gen = DataGenerator(
        coords=coords_train, mlp_data=mlp_train_scaled, gnn_data=gnn_train, y=y_train,
        raster_paths=raster_paths, buffer_meters=BUFFER_METERS, model_inputs=['gnn', 'mlp'], batch_size=batch_size, shuffle=True
    )
    # Increased epochs to give the model more time to train
    gnn_mlp_model.fit(gnn_mlp_train_gen, epochs=1, verbose=1, callbacks=[early_stopping], validation_data=gnn_mlp_train_gen)
    
    # CNN-GNN
    cnn_gnn_model = build_cnn_gnn_model(cnn_patch_shape, gnn_train.shape[1])
    cnn_gnn_train_gen = DataGenerator(
        coords=coords_train, mlp_data=mlp_train_scaled, gnn_data=gnn_train, y=y_train,
        raster_paths=raster_paths, buffer_meters=BUFFER_METERS, model_inputs=['cnn', 'gnn'], batch_size=batch_size, shuffle=True
    )
    # Increased epochs to give the model more time to train
    cnn_gnn_model.fit(cnn_gnn_train_gen, epochs=1, verbose=1, callbacks=[early_stopping], validation_data=cnn_gnn_train_gen)

    # --- Generate predictions for meta-learner ---
    preds1_train = get_model_predictions(cnn_mlp_model, coords_train, mlp_train_scaled, gnn_train, raster_paths, BUFFER_METERS, batch_size)
    preds2_train = get_model_predictions(gnn_mlp_model, coords_train, mlp_train_scaled, gnn_train, raster_paths, BUFFER_METERS, batch_size)
    preds3_train = get_model_predictions(cnn_gnn_model, coords_train, mlp_train_scaled, gnn_train, raster_paths, BUFFER_METERS, batch_size)
    meta_train_inputs = (preds1_train.reshape(-1, 1), preds2_train.reshape(-1, 1), preds3_train.reshape(-1, 1))

    # --- Train Meta-Learner ---
    meta_model = build_meta_learner_model()
    meta_model.fit(meta_train_inputs, y_train, epochs=1, verbose=0, callbacks=[early_stopping], validation_split=0.2)

    # --- Evaluate on test data of the fold ---
    preds1_test = get_model_predictions(cnn_mlp_model, coords_test, mlp_test_scaled, gnn_test, raster_paths, BUFFER_METERS, batch_size)
    preds2_test = get_model_predictions(gnn_mlp_model, coords_test, mlp_test_scaled, gnn_test, raster_paths, BUFFER_METERS, batch_size)
    preds3_test = get_model_predictions(cnn_gnn_model, coords_test, mlp_test_scaled, gnn_test, raster_paths, BUFFER_METERS, batch_size)
    meta_test_inputs = (preds1_test.reshape(-1, 1), preds2_test.reshape(-1, 1), preds3_test.reshape(-1, 1))

    y_pred = meta_model.predict(meta_test_inputs).flatten()
    r2_test = r2_score(y_test, y_pred)
    rmse_test = np.sqrt(mean_squared_error(y_test, y_pred))
    mae_test = mean_absolute_error(y_test, y_pred)
    smape_test = smape(y_test, y_pred)
    
    print(f"Fold {fold + 1} Performance:")
    print(f"  R²: {r2_test:.4f} | RMSE: {rmse_test:.4f} | MAE: {mae_test:.4f} | SMAPE: {smape_test:.4f}%")
    
    fold_results.append({
        'r2': r2_test,
        'rmse': rmse_test,
        'mae': mae_test,
        'smape': smape_test,
    })
    
    # Store models from the final fold for the feature importance analysis
    if fold == kf.get_n_splits() - 1:
        # Create CNN patches for the last fold's test set
        cnn_test = extract_patch_for_generator(coords_test, raster_paths, buffer_pixels_x, buffer_pixels_y, patch_width, patch_height)

        trained_models = {
            'cnn_mlp': cnn_mlp_model,
            'gnn_mlp': gnn_mlp_model,
            'cnn_gnn': cnn_gnn_model,
            'meta': meta_model,
            'test_data': {
                'coords': coords_test,
                'mlp_scaled': mlp_test_scaled,
                'gnn': gnn_test,
                'y_true': y_test,
                'cnn': cnn_test
            }
        }
        
    # Free up memory
    del cnn_mlp_model, gnn_mlp_model, cnn_gnn_model, meta_model
    gc.collect()

# --- Print Final Cross-Validation Summary ---
avg_r2 = np.mean([r['r2'] for r in fold_results])
std_r2 = np.std([r['r2'] for r in fold_results])
avg_rmse = np.mean([r['rmse'] for r in fold_results])
std_rmse = np.std([r['rmse'] for r in fold_results])
avg_mae = np.mean([r['mae'] for r in fold_results])
std_mae = np.std([r['mae'] for r in fold_results])
avg_smape = np.mean([r['smape'] for r in fold_results])
std_smape = np.std([r['smape'] for r in fold_results])

print("\n" + "="*80)
print(f"Final Cross-Validation Performance (Mean ± Std Dev)")
print("="*80)
print(f"R²: {avg_r2:.4f} ± {std_r2:.4f}")
print(f"RMSE: {avg_rmse:.4f} ± {std_rmse:.4f}")
print(f"MAE: {avg_mae:.4f} ± {std_mae:.4f}")
print(f"SMAPE: {avg_smape:.4f} ± {std_smape:.4f}%")

# ==================== 8. Full Feature Importance Analysis ==================== #
print("\n" + "="*80)
print("Running Full Feature Importance (Permutation-based) on Last Fold's Test Set")
print("="*80)

def get_full_pipeline_predictions(models, coords, mlp_scaled, gnn, cnn_input, raster_paths, buffer_meters, batch_size, permute_mlp_idx=None, permute_raster_idx=None):
    """Gets predictions from the full stacked ensemble pipeline with an optional feature permutation."""
    
    # Get CNN patches, with optional channel permutation
    if permute_raster_idx is not None:
        cnn_input_permuted = np.copy(cnn_input)
        np.random.shuffle(cnn_input_permuted[:, :, :, permute_raster_idx].ravel())
    else:
        cnn_input_permuted = cnn_input

    # Get MLP data, with optional column permutation
    if permute_mlp_idx is not None:
        mlp_scaled_permuted = np.copy(mlp_scaled)
        np.random.shuffle(mlp_scaled_permuted[:, permute_mlp_idx])
    else:
        mlp_scaled_permuted = mlp_scaled

    # Run data through base models
    preds1 = models['cnn_mlp'].predict({'cnn_input': cnn_input_permuted, 'mlp_input': mlp_scaled_permuted}).flatten()
    preds2 = models['gnn_mlp'].predict({'gnn_input': gnn, 'mlp_input': mlp_scaled_permuted}).flatten()
    preds3 = models['cnn_gnn'].predict({'cnn_input': cnn_input_permuted, 'gnn_input': gnn}).flatten()
    
    # Run predictions through meta-learner
    meta_inputs = (preds1.reshape(-1, 1), preds2.reshape(-1, 1), preds3.reshape(-1, 1))
    y_pred = models['meta'].predict(meta_inputs).flatten()
    return y_pred

# Get baseline performance
models = trained_models
test_data = models['test_data']
y_test = test_data['y_true']
y_pred_baseline = get_full_pipeline_predictions(models, test_data['coords'], test_data['mlp_scaled'], test_data['gnn'], test_data['cnn'], raster_paths, BUFFER_METERS, 4)
baseline_r2 = r2_score(y_test, y_pred_baseline)

importance_scores = {}

# Importance for MLP features
print("\n--- Importance of MLP Features ---")
for i, feature_name in enumerate(numeric_cols):
    y_pred_shuffled = get_full_pipeline_predictions(models, test_data['coords'], test_data['mlp_scaled'], test_data['gnn'], test_data['cnn'], raster_paths, BUFFER_METERS, 4, permute_mlp_idx=i)
    shuffled_r2 = r2_score(y_test, y_pred_shuffled)
    importance = baseline_r2 - shuffled_r2
    importance_scores[feature_name] = importance
    print(f"  {feature_name:<20}: R² drop = {importance:.4f}")

# Importance for CNN (Raster) features
print("\n--- Importance of CNN (Raster) Features ---")
for i, raster_path in enumerate(raster_paths):
    raster_name = os.path.basename(raster_path)
    y_pred_shuffled = get_full_pipeline_predictions(models, test_data['coords'], test_data['mlp_scaled'], test_data['gnn'], test_data['cnn'], raster_paths, BUFFER_METERS, 4, permute_raster_idx=i)
    shuffled_r2 = r2_score(y_test, y_pred_shuffled)
    importance = baseline_r2 - shuffled_r2
    importance_scores[raster_name] = importance
    print(f"  {raster_name:<20}: R² drop = {importance:.4f}")

# Importance for GNN features (by shuffling GNN input)
print("\n--- Importance of GNN Features ---")
gnn_test_permuted = np.copy(test_data['gnn'])
np.random.shuffle(gnn_test_permuted.ravel())
preds1_shuffled_gnn = models['cnn_mlp'].predict({'cnn_input': test_data['cnn'], 'mlp_input': test_data['mlp_scaled']}).flatten()
preds2_shuffled_gnn = models['gnn_mlp'].predict({'gnn_input': gnn_test_permuted, 'mlp_input': test_data['mlp_scaled']}).flatten()
preds3_shuffled_gnn = models['cnn_gnn'].predict({'cnn_input': test_data['cnn'], 'gnn_input': gnn_test_permuted}).flatten()
meta_inputs_shuffled_gnn = (preds1_shuffled_gnn.reshape(-1, 1), preds2_shuffled_gnn.reshape(-1, 1), preds3_shuffled_gnn.reshape(-1, 1))
y_pred_shuffled_gnn = models['meta'].predict(meta_inputs_shuffled_gnn).flatten()
shuffled_r2_gnn = r2_score(y_test, y_pred_shuffled_gnn)
importance_gnn = baseline_r2 - shuffled_r2_gnn
importance_scores['GNN_distance_matrix'] = importance_gnn
print(f"  GNN_distance_matrix: R² drop = {importance_gnn:.4f}")




Using 26 raster layers for CNN input.
  - bui.tif
  - ndsi.tif
  - savi.tif
  - ndbsi.tif
  - ui.tif
  - ndwi.tif
  - ndbi.tif
  - awei.tif
  - evi.tif
  - mndwi.tif
  - ndvi.tif
  - LULC2020.tif
  - LULC2021.tif
  - LULC2022.tif
  - LULC2019.tif
  - LULC2018.tif
  - LULC2017.tif
  - Pb_R.tif
  - ClayR.tif
  - SandR.tif
  - CdR.tif
  - CrR.tif
  - AsR.tif
  - SiltR.tif
  - CuR.tif
  - NiR.tif

Analyzing Stacked Deep Ensemble with 5-Fold Cross-Validation for BUFFER_METERS = 500m

--- Starting Fold 1/5 ---
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 189ms/step - loss: nan - val_loss: nan
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: nan - val_loss: nan
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 188ms/step - loss: 41421.8867 - val_loss: 85178.5625
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━

KeyboardInterrupt: 

In [None]:
# ==================== Save all info to a folder ==================== #
output_folder = "stacked_ensemble_kfold"
os.makedirs(output_folder, exist_ok=True)
print(f"\nCreating folder: '{output_folder}' and saving results...")

# Save all four models from the last fold
cnn_mlp_model_path = os.path.join(output_folder, "cnn_mlp_model.keras")
models['cnn_mlp'].save(cnn_mlp_model_path)
print(f"CNN-MLP model saved to: {cnn_mlp_model_path}")

gnn_mlp_model_path = os.path.join(output_folder, "gnn_mlp_model.keras")
models['gnn_mlp'].save(gnn_mlp_model_path)
print(f"GNN-MLP model saved to: {gnn_mlp_model_path}")

cnn_gnn_model_path = os.path.join(output_folder, "cnn_gnn_model.keras")
models['cnn_gnn'].save(cnn_gnn_model_path)
print(f"CNN-GNN model saved to: {cnn_gnn_model_path}")

meta_model_path = os.path.join(output_folder, "meta_learner.keras")
models['meta'].save(meta_model_path)
print(f"Meta-learner model saved to: {meta_model_path}")

# Save the metrics and feature importance
metrics_path = os.path.join(output_folder, "kfold_metrics.pkl")
with open(metrics_path, 'wb') as f:
    pickle.dump(fold_results, f)
print(f"K-Fold performance metrics saved to: {metrics_path}")

importance_path = os.path.join(output_folder, "full_feature_importance.pkl")
with open(importance_path, 'wb') as f:
    pickle.dump(importance_scores, f)
print(f"Full feature importance scores saved to: {importance_path}")

print("All information successfully saved.")

# Garbage collect to free up memory
del models
gc.collect()

