In [7]:
import pandas as pd
import numpy as np
import glob
import os
import rasterio
from rasterio.windows import Window
from scipy.spatial import distance_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Concatenate, Dropout, Layer
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
import gc # Import garbage collector
import pickle # Import for saving feature importance results

# Define the single buffer size to use
BUFFER_METERS = 500

# ==================== 1. Load Data ==================== #
# NOTE: The file paths are relative to the notebook's execution directory.
# Please ensure they are correct for your environment.
orig = pd.read_csv("../../data/RainySeason.csv")
river_100 = pd.read_csv("../data/Samples_100.csv")

drop_cols = ['Stations','River','Lat','Long','geometry']
numeric_cols = orig.drop(columns=drop_cols).columns.drop('RI')

# Train-test split
train_orig = orig.sample(10, random_state=42)
test_orig = orig.drop(train_orig.index)
train_combined = pd.concat([river_100, train_orig], ignore_index=True)

# ==================== 2. Collect ALL Rasters ==================== #
# We are not using rasters in this GNN model, but the paths are still
# defined for consistency with previous versions.
raster_paths = []
raster_paths += glob.glob("../CalIndices/*.tif")
raster_paths += glob.glob("../LULCMerged/*.tif")
raster_paths += glob.glob("../IDW/*.tif")

print("Note: Raster data is not used in this Stacking GNN ensemble model.")

# ==================== 3. Prepare GNN & MLP Input (only once) ==================== #
# Split the combined training data into a training and a validation set
mlp_train_val, mlp_test = train_test_split(train_combined, test_size=len(test_orig), random_state=42)
y_train_val, y_test = train_test_split(train_combined['RI'], test_size=len(test_orig), random_state=42)
mlp_train, mlp_val, y_train, y_val = train_test_split(mlp_train_val, y_train_val, test_size=0.2, random_state=42)

# Now, re-do the distance matrices and scaling with the new splits
coords_train = mlp_train[['Long', 'Lat']].values
coords_val = mlp_val[['Long', 'Lat']].values
coords_test = test_orig[['Long', 'Lat']].values

# Create distance matrices, which serve as the adjacency matrix for the GNN
dist_mat_train = distance_matrix(coords_train, coords_train)
gnn_train_data = np.exp(-dist_mat_train/10) # Using a radial basis function kernel
dist_mat_val = distance_matrix(coords_val, coords_val)
gnn_val_data = np.exp(-dist_mat_val/10)
dist_mat_test = distance_matrix(coords_test, coords_test)
gnn_test_data = np.exp(-dist_mat_test/10)

# Scale the MLP features using StandardScaler
scaler = StandardScaler()
mlp_train_scaled = scaler.fit_transform(mlp_train[numeric_cols])
mlp_val_scaled = scaler.transform(mlp_val[numeric_cols])
mlp_test_scaled = scaler.transform(test_orig[numeric_cols])

# Convert target data to numpy arrays
y_train_arr = y_train.values
y_val_arr = y_val.values
y_test_arr = y_test.values

# Add a batch dimension to the data since we're using full-graph training
mlp_train_data = np.expand_dims(mlp_train_scaled, axis=0)
gnn_train_data = np.expand_dims(gnn_train_data, axis=0)
mlp_val_data = np.expand_dims(mlp_val_scaled, axis=0)
gnn_val_data = np.expand_dims(gnn_val_data, axis=0)
mlp_test_data = np.expand_dims(mlp_test_scaled, axis=0)
gnn_test_data = np.expand_dims(gnn_test_data, axis=0)


# ==================== 4. Define Stacking GNN Ensemble Model ==================== #

class GCNLayer(Layer):
    """
    Custom GCN Layer. Given the pre-computed similarity matrix, this layer
    aggregates information from neighboring nodes and transforms it.
    """
    def __init__(self, units, activation="relu", **kwargs):
        super(GCNLayer, self).__init__(**kwargs)
        self.units = units
        self.activation = tf.keras.activations.get(activation)

    def build(self, input_shape):
        # input_shape is a list of two shapes: [(batch, nodes, features), (batch, nodes, nodes)]
        mlp_shape, gnn_shape = input_shape
        self.kernel = self.add_weight(
            shape=(mlp_shape[-1], self.units),
            initializer="glorot_uniform",
            trainable=True
        )
        super(GCNLayer, self).build(input_shape)

    def call(self, inputs):
        mlp_input, gnn_input = inputs
        # Perform batched matrix multiplication: (B, N, N) x (B, N, F) -> (B, N, F)
        aggregated_features = tf.matmul(gnn_input, mlp_input)
        # Apply the linear transformation: (B, N, F) x (F, U) -> (B, N, U)
        output = tf.matmul(aggregated_features, self.kernel)
        # Apply activation
        return self.activation(output)

class GATLayer(Layer):
    """
    Custom GAT Layer. This layer computes attention scores for neighboring
    nodes and aggregates features based on these scores.
    """
    def __init__(self, units, num_heads=4, activation="relu", **kwargs):
        super(GATLayer, self).__init__(**kwargs)
        self.units = units
        self.num_heads = num_heads
        self.activation = tf.keras.activations.get(activation)
        
    def build(self, input_shape):
        mlp_shape, gnn_shape = input_shape
        # The feature transformation kernel
        self.kernel_f = self.add_weight(
            shape=(mlp_shape[-1], self.units * self.num_heads),
            initializer="glorot_uniform",
            trainable=True
        )
        # The attention score kernels
        # Kernel 1 for the source node, Kernel 2 for the target node
        self.kernel_a_1 = self.add_weight(
            shape=(self.units, 1),
            initializer="glorot_uniform",
            trainable=True
        )
        self.kernel_a_2 = self.add_weight(
            shape=(self.units, 1),
            initializer="glorot_uniform",
            trainable=True
        )
        super(GATLayer, self).build(input_shape)
    
    def call(self, inputs):
        mlp_input, gnn_input = inputs
        
        # Linear transformation
        features = tf.matmul(mlp_input, self.kernel_f)
        
        # Split features into attention heads and transpose
        # Shape: (batch_size, num_nodes, num_heads, units)
        features_heads = tf.reshape(features, (-1, tf.shape(mlp_input)[1], self.num_heads, self.units))
        # Transpose to (batch_size, num_heads, num_nodes, units) for easier batched operations
        features_heads_t = tf.transpose(features_heads, perm=[0, 2, 1, 3])
        
        # Calculate attention scores for each head
        # This will be of shape (batch_size, num_heads, num_nodes, 1)
        e_input_1 = tf.matmul(features_heads_t, self.kernel_a_1)
        # This will be of shape (batch_size, num_heads, num_nodes, 1)
        e_input_2_pre_t = tf.matmul(features_heads_t, self.kernel_a_2)
        # Transpose the last two dimensions to get shape (batch_size, num_heads, 1, num_nodes)
        e_input_2 = tf.transpose(e_input_2_pre_t, perm=[0, 1, 3, 2])
        
        # Combine the scores using broadcasting to create the attention matrix for each head
        # Shape will be (batch_size, num_heads, num_nodes, num_nodes)
        e = e_input_1 + e_input_2
        e = tf.nn.leaky_relu(e, alpha=0.2)

        # Mask attention scores for non-existent edges
        # The gnn_input is (batch, nodes, nodes), expand it to (batch, 1, nodes, nodes)
        # so it can be broadcast to the attention_scores shape
        mask = -1e9 * (1.0 - tf.expand_dims(gnn_input, axis=1))
        attention_scores = e + mask
        
        # Softmax normalization across nodes (the last axis)
        attention = tf.nn.softmax(attention_scores, axis=-1)
        
        # Aggregate features
        # Perform batched matrix multiplication: attention (B,H,N,N) * features_heads_t (B,H,N,U) -> (B, H, N, U)
        aggregated_features = tf.matmul(attention, features_heads_t)
        
        # Concatenate heads and apply final activation
        # Reshape to (batch_size, num_nodes, num_heads * units)
        output = tf.reshape(aggregated_features, (-1, tf.shape(mlp_input)[1], self.units * self.num_heads))
        return self.activation(output)

def build_stacking_ensemble_model(mlp_dim):
    """
    Builds a stacking ensemble model with GCN and GAT base learners
    and an MLP meta-learner.
    
    NOTE: The model architecture has been updated to produce a prediction
    for each node in the graph, rather than a single prediction for the
    entire graph, which was the cause of the previous ValueError.
    """
    # Define inputs for all branches
    # The `None` allows for a variable number of nodes per graph
    mlp_input = Input(shape=(None, mlp_dim), name="mlp_input")
    gnn_input = Input(shape=(None, None), name="gnn_input")
    
    # --- GCN Base Learner Branch ---
    # This branch now outputs node-level features, not a single pooled vector
    gcn_branch = GCNLayer(128, name="gcn_layer_1")([mlp_input, gnn_input])
    gcn_branch = Dropout(0.2)(gcn_branch)
    gcn_output_features = GCNLayer(64, name="gcn_layer_2")([gcn_branch, gnn_input])
    
    # --- GAT Base Learner Branch ---
    # This branch also outputs node-level features
    gat_branch = GATLayer(64, num_heads=4, name="gat_layer_1")([mlp_input, gnn_input])
    gat_branch = Dropout(0.2)(gat_branch)
    gat_output_features = GATLayer(32, num_heads=4, name="gat_layer_2")([gat_branch, gnn_input])
    
    # --- MLP Meta-Learner (now a prediction head) ---
    # Concatenate the node-level feature outputs from the GNN branches
    meta_learner_input = Concatenate(name="meta_learner_input")([gcn_output_features, gat_output_features])
    
    # The final prediction layers operate on the node features to produce a single
    # value for each node.
    meta_learner_output = Dense(16, activation="relu", name="meta_dense_1")(meta_learner_input)
    meta_learner_output = Dense(8, activation="relu", name="meta_dense_2")(meta_learner_output)
    
    # Final prediction layer: one output per node
    final_output = Dense(1, activation="linear", name="final_output")(meta_learner_output)

    # Build and compile the model
    model = Model(inputs=[mlp_input, gnn_input], outputs=final_output)
    model.compile(optimizer=Adam(learning_rate=0.0005), loss="mse")
    return model

# Function to calculate permutation feature importance
def calculate_permutation_importance(model, mlp_data, gnn_data, y_true, feature_names):
    """
    Calculates permutation feature importance for each feature.
    
    NOTE: This function now expects un-batched `mlp_data` and `gnn_data` and
    handles the batching internally for predictions.
    """
    print("\nCalculating permutation feature importance...")
    
    # 1. Calculate a baseline score on the un-permuted data
    # Add batch dimension to data for prediction
    y_pred_baseline = model.predict([np.expand_dims(mlp_data, axis=0), np.expand_dims(gnn_data, axis=0)]).flatten()
    baseline_score = mean_squared_error(y_true, y_pred_baseline)
    
    importance_scores = {}
    
    # 2. Iterate through each feature
    for i, feature in enumerate(feature_names):
        # Create a copy of the data to avoid modifying the original
        X_mlp_permuted = mlp_data.copy()
        
        # Shuffle the values of the current feature
        X_mlp_permuted[:, i] = np.random.permutation(X_mlp_permuted[:, i])
        
        # 3. Make predictions with the permuted data
        # Add batch dimension to permuted data for prediction
        y_pred_permuted = model.predict([np.expand_dims(X_mlp_permuted, axis=0), np.expand_dims(gnn_data, axis=0)]).flatten()
        
        # 4. Calculate the new score and the importance
        permuted_score = mean_squared_error(y_true, y_pred_permuted)
        importance = permuted_score - baseline_score
        
        importance_scores[feature] = importance
        print(f"  Feature '{feature}': Importance = {importance:.4f}")
        
    return importance_scores


# ==================== Run the Analysis ==================== #
print("\n" + "="*80)
print(f"Analyzing Stacking GNN Ensemble Model")
print("="*80)

mlp_input_dim = mlp_train_scaled.shape[1]

# Build the stacking ensemble model
model = build_stacking_ensemble_model(mlp_input_dim)
model.summary()

# ==================== 5. Train Model ==================== #
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

# NOTE: Training on the full graph, not a generator.
# The y data now also needs a batch dimension to match the model's output
history = model.fit(
    x=[mlp_train_data, gnn_train_data],
    y=np.expand_dims(y_train_arr, axis=0),
    epochs=100, # Increased epochs for better training
    verbose=1,
    callbacks=[early_stopping],
    validation_data=([mlp_val_data, gnn_val_data], np.expand_dims(y_val_arr, axis=0))
)

# ==================== 6. Evaluate ==================== #
# Predict on the training data
# The model now outputs predictions for each node, so flattening works correctly.
y_pred_train = model.predict([mlp_train_data, gnn_train_data]).flatten()
r2_train = r2_score(y_train_arr, y_pred_train)
rmse_train = np.sqrt(mean_squared_error(y_train_arr, y_pred_train))

# Evaluate on the validation data
y_pred_val = model.predict([mlp_val_data, gnn_val_data]).flatten()
r2_val = r2_score(y_val_arr, y_pred_val)
rmse_val = np.sqrt(mean_squared_error(y_val_arr, y_pred_val))

# Evaluate on the test data
y_pred_test = model.predict([mlp_test_data, gnn_test_data]).flatten()
r2_test = r2_score(y_test_arr, y_pred_test)
rmse_test = np.sqrt(mean_squared_error(y_test_arr, y_pred_test))


print(f"\n Stacking GNN Ensemble Model Performance:")
print(f"R² Train: {r2_train:.4f} | RMSE Train: {rmse_train:.4f}")
print(f"R² Val: {r2_val:.4f} | RMSE Val: {rmse_val:.4f}")
print(f"R² Test: {r2_test:.4f} | RMSE Test: {rmse_test:.4f}")


# ==================== 7. Permutation Importance and Saving Results ==================== #

# First, calculate feature importance on the test data
# Pass un-batched adjacency matrix for consistency
importance_results = calculate_permutation_importance(
    model=model,
    mlp_data=mlp_test_scaled, # Pass the un-batched data here
    gnn_data=dist_mat_test, # Pass the un-batched adjacency matrix
    y_true=y_test_arr,
    feature_names=numeric_cols
)

# Create the directory if it doesn't exist
output_dir = 'gcn_gat'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Save the model in the .keras format
model_path = os.path.join(output_dir, 'gcn_gat.keras')
model.save(model_path)
print(f"\nModel saved to {model_path}")

# Save the feature importance results as a pickled file
importance_path = os.path.join(output_dir, 'feature_importance.pkl')
with open(importance_path, 'wb') as f:
    pickle.dump(importance_results, f)
print(f"Feature importance results saved to {importance_path}")

# Save the data splits for reproducibility in .npy format
np.save(os.path.join(output_dir, 'mlp_train_data.npy'), mlp_train_scaled)
np.save(os.path.join(output_dir, 'mlp_val_data.npy'), mlp_val_scaled)
np.save(os.path.join(output_dir, 'mlp_test_data.npy'), mlp_test_scaled)
np.save(os.path.join(output_dir, 'y_train_data.npy'), y_train_arr)
np.save(os.path.join(output_dir, 'y_val_data.npy'), y_val_arr)
np.save(os.path.join(output_dir, 'y_test_data.npy'), y_test_arr)
print("Training, validation, and test data splits saved to the gnn_gat folder in .npy format.")

# Garbage collect to free up memory
del model, history
gc.collect()

print("\nAnalysis complete and files have been saved.")


Note: Raster data is not used in this Stacking GNN ensemble model.

Analyzing Stacking GNN Ensemble Model


Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 942ms/step - loss: 36059.6836 - val_loss: 39894.3945
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - loss: 35891.3672 - val_loss: 38857.4766
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - loss: 35661.6445 - val_loss: 37882.2578
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - loss: 35548.8203 - val_loss: 36967.8242
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - loss: 35511.7305 - val_loss: 36164.8945
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - loss: 35511.9414 - val_loss: 35462.0547
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - loss: 35511.0781 - val_loss: 34865.5430
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - loss: 35507.1914 - val_loss: 34

# AlphaEarth Integration Enabled

This notebook has been enhanced with AlphaEarth satellite embeddings.

## Integration Options:
- **Option A**: Replace indices with AlphaEarth (64 bands)
- **Option B**: Add AlphaEarth to features (RECOMMENDED)
- **Option C**: PCA-reduced AlphaEarth (20 components)
- **Option D**: MLP enhancement only

Expected improvement: +0.5% to +0.8% in R²

In [None]:
# ==================== ALPHAEARTH CONFIGURATION ====================
import pandas as pd
import numpy as np
import os

# Select which AlphaEarth option to use
ALPHA_EARTH_OPTION = 'B'  # Options: A, B (recommended), C, D
USE_ALPHA_EARTH = True

# Paths to AlphaEarth data files (created by 00_AlphaEarth_Data_Preparation.ipynb)
option_file = f'Option_{ALPHA_EARTH_OPTION}_RainyAE.csv'  # or WinterAE

# Load AlphaEarth data
if os.path.exists(option_file):
    ae_data = pd.read_csv(option_file)
    print(f'Loaded AlphaEarth Option {ALPHA_EARTH_OPTION}')
    print(f'Shape: {ae_data.shape}')
else:
    print(f'WARNING: {option_file} not found')
    print('Please run 00_AlphaEarth_Data_Preparation.ipynb first')
    USE_ALPHA_EARTH = False


In [6]:
import pandas as pd
import numpy as np
import glob
import os
import rasterio
from rasterio.windows import Window
from scipy.spatial import distance_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Concatenate, Dropout, Layer
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
import gc # Import garbage collector
import pickle # Import for saving feature importance results
# ==================== Define Custom Metrics ==================== #
# We define SMAPE here as it is not available in sklearn
def smape(y_true, y_pred):
    """
    Calculates the Symmetric Mean Absolute Percentage Error (SMAPE).
    """
    numerator = np.abs(y_pred - y_true)
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2
    # Handle the case where both y_true and y_pred are zero to avoid division by zero
    return np.mean(np.where(denominator == 0, 0, numerator / denominator)) * 100
# Define the single buffer size to use
BUFFER_METERS = 500
# ==================== 1. Load Data ==================== #
# NOTE: The file paths are relative to the notebook's execution directory.
# Please ensure they are correct for your environment.
orig = pd.read_csv("../../data/RainySeason.csv")
river_100 = pd.read_csv("../data/Samples_100.csv")
drop_cols = ['Stations','River','Lat','Long','geometry']
numeric_cols = orig.drop(columns=drop_cols).columns.drop('RI')
# Train-test split (we will use K-Fold on `train_combined`)
train_orig = orig.sample(10, random_state=42)
test_orig = orig.drop(train_orig.index)
train_combined = pd.concat([river_100, train_orig], ignore_index=True)
# Separate features and target for the K-Fold process
X_combined = train_combined.drop(columns=['RI', 'Stations', 'River', 'geometry'])
y_combined = train_combined['RI']
gnn_coords = train_combined[['Long', 'Lat']].values
# ==================== 2. Collect ALL Rasters ==================== #
# We are not using rasters in this GNN model, but the paths are still
# defined for consistency with previous versions.
raster_paths = []
raster_paths += glob.glob("../CalIndices/*.tif")
raster_paths += glob.glob("../LULCMerged/*.tif")
raster_paths += glob.glob("../IDW/*.tif")
print("Note: Raster data is not used in this Stacking GNN ensemble model.")
# ==================== 3. Define Stacking GNN & MLP Models ==================== #
class GCNLayer(Layer):
    """
    Custom GCN Layer. Given the pre-computed similarity matrix, this layer
    aggregates information from neighboring nodes and transforms it.
    """
    def __init__(self, units, activation="relu", **kwargs):
        super(GCNLayer, self).__init__(**kwargs)
        self.units = units
        self.activation = tf.keras.activations.get(activation)
    def build(self, input_shape):
        # input_shape is a list of two shapes: [(batch, nodes, features), (batch, nodes, nodes)]
        mlp_shape, gnn_shape = input_shape
        self.kernel = self.add_weight(
            shape=(mlp_shape[-1], self.units),
            initializer="glorot_uniform",
            trainable=True
        )
        super(GCNLayer, self).build(input_shape)
    def call(self, inputs):
        mlp_input, gnn_input = inputs
        # Perform batched matrix multiplication: (B, N, N) x (B, N, F) -> (B, N, F)
        aggregated_features = tf.matmul(gnn_input, mlp_input)
        # Apply the linear transformation: (B, N, F) x (F, U) -> (B, N, U)
        output = tf.matmul(aggregated_features, self.kernel)
        # Apply activation
        return self.activation(output)
class GATLayer(Layer):
    """
    Custom GAT Layer. This layer computes attention scores for neighboring
    nodes and aggregates features based on these scores.
    """
    def __init__(self, units, num_heads=4, activation="relu", **kwargs):
        super(GATLayer, self).__init__(**kwargs)
        self.units = units
        self.num_heads = num_heads
        self.activation = tf.keras.activations.get(activation)
        
    def build(self, input_shape):
        mlp_shape, gnn_shape = input_shape
        # The feature transformation kernel
        self.kernel_f = self.add_weight(
            shape=(mlp_shape[-1], self.units * self.num_heads),
            initializer="glorot_uniform",
            trainable=True
        )
        # The attention score kernels
        # Kernel 1 for the source node, Kernel 2 for the target node
        self.kernel_a_1 = self.add_weight(
            shape=(self.units, 1),
            initializer="glorot_uniform",
            trainable=True
        )
        self.kernel_a_2 = self.add_weight(
            shape=(self.units, 1),
            initializer="glorot_uniform",
            trainable=True
        )
        super(GATLayer, self).build(input_shape)
    
    def call(self, inputs):
        mlp_input, gnn_input = inputs
        
        # Linear transformation
        features = tf.matmul(mlp_input, self.kernel_f)
        
        # Split features into attention heads and transpose
        # Shape: (batch_size, num_nodes, num_heads, units)
        features_heads = tf.reshape(features, (-1, tf.shape(mlp_input)[1], self.num_heads, self.units))
        # Transpose to (batch_size, num_heads, num_nodes, units) for easier batched operations
        features_heads_t = tf.transpose(features_heads, perm=[0, 2, 1, 3])
        
        # Calculate attention scores for each head
        # This will be of shape (batch_size, num_heads, num_nodes, 1)
        e_input_1 = tf.matmul(features_heads_t, self.kernel_a_1)
        # This will be of shape (batch_size, num_heads, num_nodes, 1)
        e_input_2_pre_t = tf.matmul(features_heads_t, self.kernel_a_2)
        # Transpose the last two dimensions to get shape (batch_size, num_heads, 1, num_nodes)
        e_input_2 = tf.transpose(e_input_2_pre_t, perm=[0, 1, 3, 2])
        
        # Combine the scores using broadcasting to create the attention matrix for each head
        # Shape will be (batch_size, num_heads, num_nodes, num_nodes)
        e = e_input_1 + e_input_2
        e = tf.nn.leaky_relu(e, alpha=0.2)
        # Mask attention scores for non-existent edges
        # The gnn_input is (batch, nodes, nodes), expand it to (batch, 1, nodes, nodes)
        # so it can be broadcast to the attention_scores shape
        mask = -1e9 * (1.0 - tf.expand_dims(gnn_input, axis=1))
        attention_scores = e + mask
        
        # Softmax normalization across nodes (the last axis)
        attention = tf.nn.softmax(attention_scores, axis=-1)
        
        # Aggregate features
        # Perform batched matrix multiplication: attention (B,H,N,N) * features_heads_t (B,H,N,U) -> (B, H, N, U)
        aggregated_features = tf.matmul(attention, features_heads_t)
        
        # Concatenate heads and apply final activation
        # Reshape to (batch_size, num_nodes, num_heads * units)
        output = tf.reshape(aggregated_features, (-1, tf.shape(mlp_input)[1], self.units * self.num_heads))
        return self.activation(output)
def build_stacking_ensemble_model(mlp_dim):
    """
    Builds a stacking ensemble model with GCN and GAT base learners
    and an MLP meta-learner.
    """
    # Define inputs for all branches
    # The `None` allows for a variable number of nodes per graph
    mlp_input = Input(shape=(None, mlp_dim), name="mlp_input")
    gnn_input = Input(shape=(None, None), name="gnn_input")
    
    # --- GCN Base Learner Branch ---
    # This branch now outputs node-level features, not a single pooled vector
    gcn_branch = GCNLayer(128, name="gcn_layer_1")([mlp_input, gnn_input])
    gcn_branch = Dropout(0.2)(gcn_branch)
    gcn_output_features = GCNLayer(64, name="gcn_layer_2")([gcn_branch, gnn_input])
    
    # --- GAT Base Learner Branch ---
    # This branch also outputs node-level features
    gat_branch = GATLayer(64, num_heads=4, name="gat_layer_1")([mlp_input, gnn_input])
    gat_branch = Dropout(0.2)(gat_branch)
    gat_output_features = GATLayer(32, num_heads=4, name="gat_layer_2")([gat_branch, gnn_input])
    
    # --- MLP Meta-Learner (now a prediction head) ---
    # Concatenate the node-level feature outputs from the GNN branches
    meta_learner_input = Concatenate(name="meta_learner_input")([gcn_output_features, gat_output_features])
    
    # The final prediction layers operate on the node features to produce a single
    # value for each node.
    meta_learner_output = Dense(16, activation="relu", name="meta_dense_1")(meta_learner_input)
    meta_learner_output = Dense(8, activation="relu", name="meta_dense_2")(meta_learner_output)
    
    # Final prediction layer: one output per node
    final_output = Dense(1, activation="linear", name="final_output")(meta_learner_output)
    # Build and compile the model
    model = Model(inputs=[mlp_input, gnn_input], outputs=final_output)
    model.compile(optimizer=Adam(learning_rate=0.0005), loss="mse")
    return model
# Function to calculate permutation feature importance
def calculate_permutation_importance(model, mlp_data, gnn_data, y_true, feature_names):
    """
    Calculates permutation feature importance for each feature.
    
    NOTE: This function now expects un-batched `mlp_data` and `gnn_data` and
    handles the batching internally for predictions.
    """
    print("\nCalculating permutation feature importance...")
    
    # 1. Calculate a baseline score on the un-permuted data
    # Add batch dimension to data for prediction
    y_pred_baseline = model.predict([np.expand_dims(mlp_data, axis=0), np.expand_dims(gnn_data, axis=0)]).flatten()
    baseline_score = mean_squared_error(y_true, y_pred_baseline)
    
    importance_scores = {}
    
    # 2. Iterate through each feature
    for i, feature in enumerate(feature_names):
        # Create a copy of the data to avoid modifying the original
        X_mlp_permuted = mlp_data.copy()
        
        # Shuffle the values of the current feature
        X_mlp_permuted[:, i] = np.random.permutation(X_mlp_permuted[:, i])
        
        # 3. Make predictions with the permuted data
        # Add batch dimension to permuted data for prediction
        y_pred_permuted = model.predict([np.expand_dims(X_mlp_permuted, axis=0), np.expand_dims(gnn_data, axis=0)]).flatten()
        
        # 4. Calculate the new score and the importance
        permuted_score = mean_squared_error(y_true, y_pred_permuted)
        importance = permuted_score - baseline_score
        
        importance_scores[feature] = importance
        print(f"  Feature '{feature}': Importance = {importance:.4f}")
        
    return importance_scores
# ==================== Run the Analysis with K-Fold Cross-Validation ==================== #
print("\n" + "="*80)
print(f"Running 5-Fold Cross-Validation for Stacking GNN Ensemble Model")
print("="*80)
# Initialize K-Fold
n_splits = 5
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
# Lists to store metrics for each fold
r2_scores = []
mae_scores = []
rmse_scores = []
smape_scores = []
# Loop through each fold
for fold, (train_idx, val_idx) in enumerate(kf.split(X_combined)):
    print(f"\n--- Training and Validation Split ---")
    # Split data for the current fold
    X_train, X_val = X_combined.iloc[train_idx], X_combined.iloc[val_idx]
    y_train, y_val = y_combined.iloc[train_idx], y_combined.iloc[val_idx]
    
    # Scale the MLP features using a new StandardScaler for each fold
    scaler = StandardScaler()
    mlp_train_scaled = scaler.fit_transform(X_train[numeric_cols])
    mlp_val_scaled = scaler.transform(X_val[numeric_cols])
    # Re-create distance matrices for the current fold
    coords_train = X_train[['Long', 'Lat']].values
    coords_val = X_val[['Long', 'Lat']].values
    dist_mat_train = distance_matrix(coords_train, coords_train)
    gnn_train_data = np.exp(-dist_mat_train/10)
    dist_mat_val = distance_matrix(coords_val, coords_val)
    gnn_val_data = np.exp(-dist_mat_val/10)
    # Convert target data to numpy arrays
    y_train_arr = y_train.values
    y_val_arr = y_val.values
    # Add a batch dimension to the data
    mlp_train_data_batched = np.expand_dims(mlp_train_scaled, axis=0)
    gnn_train_data_batched = np.expand_dims(gnn_train_data, axis=0)
    mlp_val_data_batched = np.expand_dims(mlp_val_scaled, axis=0)
    gnn_val_data_batched = np.expand_dims(gnn_val_data, axis=0)
    y_train_arr_batched = np.expand_dims(y_train_arr, axis=0)
    y_val_arr_batched = np.expand_dims(y_val_arr, axis=0)
    
    # Build and compile a fresh model for each fold
    mlp_input_dim = mlp_train_scaled.shape[1]
    model = build_stacking_ensemble_model(mlp_input_dim)
    
    # Train the model for the current fold
    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True
    )
    history = model.fit(
        x=[mlp_train_data_batched, gnn_train_data_batched],
        y=y_train_arr_batched,
        epochs=100,
        verbose=1,
        callbacks=[early_stopping],
        validation_data=([mlp_val_data_batched, gnn_val_data_batched], y_val_arr_batched)
    )
    # Evaluate the model on the validation data for the current fold
    y_pred_val = model.predict([mlp_val_data_batched, gnn_val_data_batched]).flatten()
    
    r2 = r2_score(y_val_arr, y_pred_val)
    mae = mean_absolute_error(y_val_arr, y_pred_val)
    rmse = np.sqrt(mean_squared_error(y_val_arr, y_pred_val))
    smp = smape(y_val_arr, y_pred_val)
    
    print(f"R²: {r2:.4f} | MAE: {mae:.4f} | RMSE: {rmse:.4f} | SMAPE: {smp:.4f}")
    r2_scores.append(r2)
    mae_scores.append(mae)
    rmse_scores.append(rmse)
    smape_scores.append(smp)
    # Clean up to free memory
    del model, history
    gc.collect()


Note: Raster data is not used in this Stacking GNN ensemble model.

Running 5-Fold Cross-Validation for Stacking GNN Ensemble Model

--- Fold 1/5 ---
Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - loss: 32703.6328 - val_loss: 32832.0117
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 32371.5293 - val_loss: 32439.4434
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 31885.8809 - val_loss: 31943.7070
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - loss: 31634.5703 - val_loss: 31450.5020
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 31345.5625 - val_loss: 30959.2422
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 30915.7891 - val_loss: 30397.7070
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 3072

In [None]:
# Calculate and print average metrics across all folds
print("\n" + "="*80)
print("Average K-Fold Cross-Validation Metrics")
print("="*80)
print(f"Average R²: {np.mean(r2_scores):.4f} ± {np.std(r2_scores):.4f}")
print(f"Average MAE: {np.mean(mae_scores):.4f} ± {np.std(mae_scores):.4f}")
print(f"Average RMSE: {np.mean(rmse_scores):.4f} ± {np.std(rmse_scores):.4f}")
print(f"Average SMAPE: {np.mean(smape_scores):.4f} ± {np.std(smape_scores):.4f}")

# ==================== Final Model Training and Evaluation ==================== #
print("\n" + "="*80)
print("Training final model on all combined data and evaluating on test set")
print("="*80)

# Prepare data for final training
# We will train on the entire `train_combined` dataset
scaler_final = StandardScaler()
X_combined_scaled = scaler_final.fit_transform(X_combined[numeric_cols])
y_combined_arr = y_combined.values
coords_combined = train_combined[['Long', 'Lat']].values
dist_mat_combined = distance_matrix(coords_combined, coords_combined)
gnn_combined_data = np.exp(-dist_mat_combined/10)

mlp_combined_batched = np.expand_dims(X_combined_scaled, axis=0)
gnn_combined_batched = np.expand_dims(gnn_combined_data, axis=0)
y_combined_batched = np.expand_dims(y_combined_arr, axis=0)

# Build and train the final model
model_final = build_stacking_ensemble_model(mlp_input_dim)
early_stopping_final = EarlyStopping(
    monitor='loss', # Use training loss to prevent overfitting
    patience=15,
    restore_best_weights=True
)

model_final.fit(
    x=[mlp_combined_batched, gnn_combined_batched],
    y=y_combined_batched,
    epochs=100,
    verbose=1,
    callbacks=[early_stopping_final]
)

# Prepare the final test data
coords_test = test_orig[['Long', 'Lat']].values
mlp_test_scaled = scaler_final.transform(test_orig[numeric_cols])
dist_mat_test = distance_matrix(coords_test, coords_test)
gnn_test_data = np.exp(-dist_mat_test/10)
y_test_arr = test_orig['RI'].values

mlp_test_data_batched = np.expand_dims(mlp_test_scaled, axis=0)
gnn_test_data_batched = np.expand_dims(gnn_test_data, axis=0)

# Evaluate the final model on the test data
y_pred_test = model_final.predict([mlp_test_data_batched, gnn_test_data_batched]).flatten()
r2_test = r2_score(y_test_arr, y_pred_test)
mae_test = mean_absolute_error(y_test_arr, y_pred_test)
rmse_test = np.sqrt(mean_squared_error(y_test_arr, y_pred_test))
smape_test = smape(y_test_arr, y_pred_test)

print(f"\nFinal Model Performance on Test Data:")
print(f"R² Test: {r2_test:.4f}")
print(f"MAE Test: {mae_test:.4f}")
print(f"RMSE Test: {rmse_test:.4f}")
print(f"SMAPE Test: {smape_test:.4f}")

# ==================== 7. Permutation Importance and Saving Results ==================== #

# First, calculate feature importance on the test data
importance_results = calculate_permutation_importance(
    model=model_final,
    mlp_data=mlp_test_scaled,
    gnn_data=dist_mat_test,
    y_true=y_test_arr,
    feature_names=numeric_cols
)


In [3]:
import pandas as pd
import numpy as np
import glob
import os
import rasterio
from rasterio.windows import Window
from scipy.spatial import distance_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Concatenate, Dropout, Layer
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
import gc # Import garbage collector

# Define the single buffer size to use
BUFFER_METERS = 500

# ==================== 1. Load Data ==================== #
# NOTE: The file paths are relative to the notebook's execution directory.
# Please ensure they are correct for your environment.
orig = pd.read_csv("../../data/RainySeason.csv")
river_100 = pd.read_csv("../data/Samples_100.csv")

drop_cols = ['Stations','River','Lat','Long','geometry']
numeric_cols = orig.drop(columns=drop_cols).columns.drop('RI')

# Train-test split
train_orig = orig.sample(10, random_state=42)
test_orig = orig.drop(train_orig.index)
train_combined = pd.concat([river_100, train_orig], ignore_index=True)

# ==================== 2. Collect ALL Rasters ==================== #
raster_paths = []
raster_paths += glob.glob("../CalIndices/*.tif")
raster_paths += glob.glob("../LULCMerged/*.tif")
raster_paths += glob.glob("../IDW/*.tif")

print("Note: Raster data is now included in the GNN ensemble model.")

# ==================== 4. Define GNN & MLP Custom Layers ==================== #

class GCNLayer(Layer):
    """
    Custom GCN Layer. Given the pre-computed similarity matrix, this layer
    aggregates information from neighboring nodes and transforms it.
    """
    def __init__(self, units, activation="relu", **kwargs):
        super(GCNLayer, self).__init__(**kwargs)
        self.units = units
        self.activation = tf.keras.activations.get(activation)

    def build(self, input_shape):
        # input_shape is a list of two shapes: [(batch, nodes, features), (batch, nodes, nodes)]
        mlp_shape, gnn_shape = input_shape
        self.kernel = self.add_weight(
            shape=(mlp_shape[-1], self.units),
            initializer="glorot_uniform",
            trainable=True
        )
        super(GCNLayer, self).build(input_shape)

    def call(self, inputs):
        mlp_input, gnn_input = inputs
        # Perform batched matrix multiplication: (B, N, N) x (B, N, F) -> (B, N, F)
        aggregated_features = tf.matmul(gnn_input, mlp_input)
        # Apply the linear transformation: (B, N, F) x (F, U) -> (B, N, U)
        output = tf.matmul(aggregated_features, self.kernel)
        # Apply activation
        return self.activation(output)

class GATLayer(Layer):
    """
    Custom GAT Layer. This layer computes attention scores for neighboring
    nodes and aggregates features based on these scores.
    """
    def __init__(self, units, num_heads=4, activation="relu", **kwargs):
        super(GATLayer, self).__init__(**kwargs)
        self.units = units
        self.num_heads = num_heads
        self.activation = tf.keras.activations.get(activation)
        
    def build(self, input_shape):
        mlp_shape, gnn_shape = input_shape
        # The feature transformation kernel
        self.kernel_f = self.add_weight(
            shape=(mlp_shape[-1], self.units * self.num_heads),
            initializer="glorot_uniform",
            trainable=True
        )
        # The attention score kernels
        # Kernel 1 for the source node, Kernel 2 for the target node
        self.kernel_a_1 = self.add_weight(
            shape=(self.units, 1),
            initializer="glorot_uniform",
            trainable=True
        )
        self.kernel_a_2 = self.add_weight(
            shape=(self.units, 1),
            initializer="glorot_uniform",
            trainable=True
        )
        super(GATLayer, self).build(input_shape)
        
    def call(self, inputs):
        mlp_input, gnn_input = inputs
        
        # Linear transformation
        features = tf.matmul(mlp_input, self.kernel_f)
        
        # Split features into attention heads and transpose
        # Shape: (batch_size, num_nodes, num_heads, units)
        features_heads = tf.reshape(features, (-1, tf.shape(mlp_input)[1], self.num_heads, self.units))
        # Transpose to (batch_size, num_heads, num_nodes, units) for easier batched operations
        features_heads_t = tf.transpose(features_heads, perm=[0, 2, 1, 3])
        
        # Calculate attention scores for each head
        # This will be of shape (batch_size, num_heads, num_nodes, 1)
        e_input_1 = tf.matmul(features_heads_t, self.kernel_a_1)
        # This will be of shape (batch_size, num_heads, num_nodes, 1)
        e_input_2_pre_t = tf.matmul(features_heads_t, self.kernel_a_2)
        # Transpose the last two dimensions to get shape (batch_size, num_heads, 1, num_nodes)
        e_input_2 = tf.transpose(e_input_2_pre_t, perm=[0, 1, 3, 2])
        
        # Combine the scores using broadcasting to create the attention matrix for each head
        # Shape will be (batch_size, num_heads, num_nodes, num_nodes)
        e = e_input_1 + e_input_2
        e = tf.nn.leaky_relu(e, alpha=0.2)

        # Mask attention scores for non-existent edges
        # The gnn_input is (batch, nodes, nodes), expand it to (batch, 1, nodes, nodes)
        # so it can be broadcast to the attention_scores shape
        mask = -1e9 * (1.0 - tf.expand_dims(gnn_input, axis=1))
        attention_scores = e + mask
        
        # Softmax normalization across nodes (the last axis)
        attention = tf.nn.softmax(attention_scores, axis=-1)
        
        # Aggregate features
        # Perform batched matrix multiplication: attention (B,H,N,N) * features_heads_t (B,H,N,U) -> (B, H, N, U)
        aggregated_features = tf.matmul(attention, features_heads_t)
        
        # Concatenate heads and apply final activation
        # Reshape to (batch_size, num_nodes, num_heads * units)
        output = tf.reshape(aggregated_features, (-1, tf.shape(mlp_input)[1], self.units * self.num_heads))
        return self.activation(output)

def build_simplified_gnn_model(mlp_dim):
    """
    Builds a simplified GNN model that combines GCN and GAT features early.
    This architecture is better suited for small datasets by reducing complexity.
    """
    # Define inputs for all branches
    mlp_input = Input(shape=(None, mlp_dim), name="mlp_input")
    gnn_input = Input(shape=(None, None), name="gnn_input")
    
    # --- GCN Feature Branch ---
    gcn_features = GCNLayer(64, name="gcn_layer_1")([mlp_input, gnn_input])
    
    # --- GAT Feature Branch ---
    gat_features = GATLayer(32, num_heads=4, name="gat_layer_1")([mlp_input, gnn_input])
    
    # --- Combined Prediction Head ---
    # Concatenate the features from both GNN branches
    combined_features = Concatenate(name="combined_features")([mlp_input, gcn_features, gat_features])
    
    # The final prediction layers operate on the combined node features
    prediction_head = Dense(32, activation="relu", name="dense_1")(combined_features)
    prediction_head = Dropout(0.2)(prediction_head)
    prediction_head = Dense(16, activation="relu", name="dense_2")(prediction_head)
    
    # Final prediction layer: one output per node
    final_output = Dense(1, activation="linear", name="final_output")(prediction_head)

    # Build and compile the model
    model = Model(inputs=[mlp_input, gnn_input], outputs=final_output)
    model.compile(optimizer=Adam(learning_rate=0.001), loss="mse")
    return model

# Function to calculate SMAPE
def smape(y_true, y_pred):
    """
    Calculates the Symmetric Mean Absolute Percentage Error (SMAPE).
    """
    numerator = np.abs(y_pred - y_true)
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2
    # To avoid division by zero, we can replace any zeros in the denominator
    # with a very small number (epsilon) before the division.
    return np.mean(numerator / np.where(denominator == 0, 1e-10, denominator)) * 100

def extract_raster_features(df, raster_paths):
    """
    Extracts pixel values from a list of raster files for each location
    in the provided DataFrame.
    """
    raster_features = pd.DataFrame()
    for path in raster_paths:
        try:
            with rasterio.open(path) as src:
                # Use rasterio.sample for efficient extraction
                # The coordinates must be in a list of (x, y) tuples
                coords_xy = df[['Long', 'Lat']].values
                # Sample the pixel values at the given coordinates
                sampled_values = [x[0] for x in src.sample(coords_xy)]
                
                # Get the base name of the raster for the new column name
                raster_name = os.path.splitext(os.path.basename(path))[0]
                raster_features[raster_name] = sampled_values
        except Exception as e:
            print(f"Warning: Could not read raster {path}. Error: {e}")
            
    return raster_features

# ==================== Run the Analysis ==================== #
print("\n" + "="*80)
print(f"Analyzing Simplified GNN Ensemble Model with Raster Data")
print("="*80)

# Split the combined training data into a training and a validation set
mlp_train_val, mlp_test = train_test_split(train_combined, test_size=len(test_orig), random_state=42)
y_train_val, y_test = train_test_split(train_combined['RI'], test_size=len(test_orig), random_state=42)

# ==================== 3. Prepare GNN & MLP Input (with rasters) ==================== #
# Extract raster features for each dataset
print("\nExtracting raster data for each sample...")
raster_train_combined = extract_raster_features(mlp_train_val, raster_paths)
raster_test_orig = extract_raster_features(mlp_test, raster_paths)

# Concatenate raster features to the original dataframes
mlp_train_val = pd.concat([mlp_train_val.reset_index(drop=True), raster_train_combined.reset_index(drop=True)], axis=1)
mlp_test = pd.concat([mlp_test.reset_index(drop=True), raster_test_orig.reset_index(drop=True)], axis=1)

# ** NEW: Handle missing values after concatenation **
# This is crucial as some raster lookups might fail and result in NaNs.
# Fill any NaN values with 0 before scaling.
mlp_train_val = mlp_train_val.fillna(0)
mlp_test = mlp_test.fillna(0)

# Redefine the full list of features for scaling
full_feature_cols = list(numeric_cols) + list(raster_train_combined.columns)

mlp_train, mlp_val, y_train, y_val = train_test_split(
    mlp_train_val, y_train_val, test_size=0.2, random_state=42)

# Now, re-do the distance matrices and scaling with the new splits
coords_train = mlp_train[['Long', 'Lat']].values
coords_val = mlp_val[['Long', 'Lat']].values
coords_test = mlp_test[['Long', 'Lat']].values

dist_mat_train = distance_matrix(coords_train, coords_train)
gnn_train_data = np.exp(-dist_mat_train/10)
dist_mat_val = distance_matrix(coords_val, coords_val)
gnn_val_data = np.exp(-dist_mat_val/10)
dist_mat_test = distance_matrix(coords_test, coords_test)
gnn_test_data = np.exp(-dist_mat_test/10)

# Scale the MLP features using StandardScaler. This now includes the raster data.
scaler = StandardScaler()
mlp_train_scaled = scaler.fit_transform(mlp_train[full_feature_cols])
mlp_val_scaled = scaler.transform(mlp_val[full_feature_cols])
mlp_test_scaled = scaler.transform(mlp_test[full_feature_cols])

# Convert target data to numpy arrays
y_train_arr = y_train.values
y_val_arr = y_val.values
y_test_arr = y_test.values

# Add a batch dimension to the data since we're using full-graph training
mlp_train_data = np.expand_dims(mlp_train_scaled, axis=0)
gnn_train_data = np.expand_dims(gnn_train_data, axis=0)
mlp_val_data = np.expand_dims(mlp_val_scaled, axis=0)
gnn_val_data = np.expand_dims(gnn_val_data, axis=0)
mlp_test_data = np.expand_dims(mlp_test_scaled, axis=0)
gnn_test_data = np.expand_dims(gnn_test_data, axis=0)

mlp_input_dim = mlp_train_scaled.shape[1]

# Build the simplified GNN model
model = build_simplified_gnn_model(mlp_input_dim)

# ==================== 5. Train Model ==================== #
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=15,
    restore_best_weights=True
)

history = model.fit(
    x=[mlp_train_data, gnn_train_data],
    y=np.expand_dims(y_train_arr, axis=0),
    epochs=100,
    verbose=1,
    callbacks=[early_stopping],
    validation_data=([mlp_val_data, gnn_val_data], np.expand_dims(y_val_arr, axis=0))
)

# ==================== 6. Evaluate ==================== #
# Predict on the training data
y_pred_train = model.predict([mlp_train_data, gnn_train_data]).flatten()
r2_train = r2_score(y_train_arr, y_pred_train)
rmse_train = np.sqrt(mean_squared_error(y_train_arr, y_pred_train))
mae_train = mean_absolute_error(y_train_arr, y_pred_train)
smape_train = smape(y_train_arr, y_pred_train)

# Evaluate on the validation data
y_pred_val = model.predict([mlp_val_data, gnn_val_data]).flatten()
r2_val = r2_score(y_val_arr, y_pred_val)
rmse_val = np.sqrt(mean_squared_error(y_val_arr, y_pred_val))
mae_val = mean_absolute_error(y_val_arr, y_pred_val)
smape_val = smape(y_val_arr, y_pred_val)

# Evaluate on the test data
y_pred_test = model.predict([mlp_test_data, gnn_test_data]).flatten()
r2_test = r2_score(y_test_arr, y_pred_test)
rmse_test = np.sqrt(mean_squared_error(y_test_arr, y_pred_test))
mae_test = mean_absolute_error(y_test_arr, y_pred_test)
smape_test = smape(y_test_arr, y_pred_test)


print(f"\n Simplified GNN Ensemble Model Performance:")
print(f"R² Train: {r2_train:.4f} | RMSE Train: {rmse_train:.4f} | MAE Train: {mae_train:.4f} | SMAPE Train: {smape_train:.2f}%")
print(f"R² Val: {r2_val:.4f} | RMSE Val: {rmse_val:.4f} | MAE Val: {mae_val:.4f} | SMAPE Val: {smape_val:.2f}%")
print(f"R² Test: {r2_test:.4f} | RMSE Test: {rmse_test:.4f} | MAE Test: {mae_test:.4f} | SMAPE Test: {smape_test:.2f}%")


Note: Raster data is now included in the GNN ensemble model.

Analyzing Simplified GNN Ensemble Model with Raster Data

Extracting raster data for each sample...
Epoch 1/100


  mlp_test = mlp_test.fillna(0)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 667ms/step - loss: 35123.9727 - val_loss: 30102.9668
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - loss: 35049.0273 - val_loss: 29947.2734
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - loss: 34953.6875 - val_loss: 29781.5098
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - loss: 34859.7148 - val_loss: 29606.0664
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - loss: 34772.3711 - val_loss: 29421.4648
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - loss: 34672.6875 - val_loss: 29226.7148
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - loss: 34598.1406 - val_loss: 29022.7109
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - loss: 34492.1211 - val_loss: 28806.6699
Epo