# WS

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor  # Import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error

# Load the data
file_path = "data_ra_norm_filled.xlsx"  # Update this with your file path
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'Wetland Type - Provincial Class',
    'Wetland Type - Federal Class',
    'Water Regime Indicator',
    'Specific Vegetation Type',
    '% Vegetation Cover for Specific Vegetation Cover Types',
    '% High Woody Canopy Cover (>5m)',
    'Phragmites present (Y/N)',
    'Soil Type',
    '% of Surface Water Present',
    'Depth of Saturation (cm)',
    'Average Depth of Living Moss (cm)',
    'Average Total Depth of Organics',
    'Average Organic Depth (cm)',
    'Hydrogeomorphic Class',
    '% Moss Cover'
]

results_columns = ['WS']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]  # Assuming you want to predict 'SR'

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models
models = [
    Ridge(),
    DecisionTreeRegressor(),
    RandomForestRegressor(),
    GradientBoostingRegressor(),
    AdaBoostRegressor(),
    SVR(),
    KNeighborsRegressor(),
    MLPRegressor(max_iter=1000)  # Adding MLPRegressor with default parameters
]

# Define hyperparameters to search for each model
param_grid = {
    'Ridge': {'ridge__alpha': [0.1, 0.5, 1.0]},
    'GradientBoostingRegressor': {},
    'AdaBoostRegressor': {},
    'SVR': {},
    'KNeighborsRegressor': {},
    'MLPRegressor': {}  # Add hyperparameters for tuning MLPRegressor if needed
}

# Train and tune hyperparameters for each model
best_models = {}

for model in models:
    model_name = model.__class__.__name__
    pipeline = make_pipeline(StandardScaler(), model)  # Scale features for some models
    
    # Perform grid search for hyperparameters
    if model_name in param_grid:
        grid_search = GridSearchCV(pipeline, param_grid[model_name], cv=5, scoring='neg_mean_squared_error')
        grid_search.fit(X_train, y_train)
        best_models[model_name] = grid_search.best_estimator_
        print(f"Best hyperparameters for {model_name}: {grid_search.best_params_}")
    else:
        pipeline.fit(X_train, y_train)
        best_models[model_name] = pipeline

# Make predictions using the best models
for model_name, model in best_models.items():
    print(f"Model: {model_name}")
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Show real and predicted results for the first 5 samples
    for i in range(5):
        print(f"Sample {i+1}: Real SR = {y_test.iloc[i]}, Predicted SR = {y_pred[i]}")
    
    # Calculate and print RMSE
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    print(f"RMSE: {rmse}")
    
    print("\n")


Best hyperparameters for Ridge: {'ridge__alpha': 1.0}
Best hyperparameters for GradientBoostingRegressor: {}
Best hyperparameters for AdaBoostRegressor: {}
Best hyperparameters for SVR: {}
Best hyperparameters for KNeighborsRegressor: {}




Best hyperparameters for MLPRegressor: {}
Model: Ridge
Sample 1: Real SR = 7.080039525691701, Predicted SR = 6.212853517959537
Sample 2: Real SR = 2.4, Predicted SR = 3.414812039080013
Sample 3: Real SR = -0.21, Predicted SR = 5.117461310708389
Sample 4: Real SR = 7.411773009599097, Predicted SR = 4.175634586993846
Sample 5: Real SR = 3.73, Predicted SR = 4.7391035552151655
RMSE: 2.1683879351275652


Model: DecisionTreeRegressor
Sample 1: Real SR = 7.080039525691701, Predicted SR = 4.54
Sample 2: Real SR = 2.4, Predicted SR = 5.12
Sample 3: Real SR = -0.21, Predicted SR = 7.461180124223602
Sample 4: Real SR = 7.411773009599097, Predicted SR = 1.51
Sample 5: Real SR = 3.73, Predicted SR = 6.546442687747036
RMSE: 3.0628751717677822


Model: RandomForestRegressor
Sample 1: Real SR = 7.080039525691701, Predicted SR = 4.586283794466404
Sample 2: Real SR = 2.4, Predicted SR = 4.109103105590062
Sample 3: Real SR = -0.21, Predicted SR = 4.3411033314511585
Sample 4: Real SR = 7.411773009599097,



In [16]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf

# Load the data
file_path = "combined.xlsx"  # Update this with your file path
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'Wetland Type - Provincial Class',
    'Wetland Type - Federal Class',
    'Water Regime Indicator',
    'Specific Vegetation Type',
    '% Vegetation Cover for Specific Vegetation Cover Types',
    '% High Woody Canopy Cover (>5m)',
    'Phragmites present (Y/N)',
    'Soil Type',
    '% of Surface Water Present',
    'Depth of Saturation (cm)',
    'Average Depth of Living Moss (cm)',
    'Average Total Depth of Organics',
    'Average Organic Depth (cm)',
    'Hydrogeomorphic Class',
    '% Moss Cover'
]

results_columns = ['WS']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]  # Assuming you want to predict 'SR'

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the TensorFlow model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train_scaled, y_train, epochs=100, batch_size=32, validation_split=0.2)

# Evaluate the model
y_pred = model.predict(X_test_scaled)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"RMSE: {rmse}")

# Show real and predicted results for the first 5 samples
print("Sample predictions:")
for i in range(5):
    print(f"Sample {i+1}: Real WS = {y_test.iloc[i]}, Predicted WS = {y_pred[i][0]}")


Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 34ms/step - loss: 24.4825 - val_loss: 19.1260
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 24.2997 - val_loss: 16.8575
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 21.3540 - val_loss: 11.3443
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 14.0495 - val_loss: 6.9278
Epoch 5/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 9.1286 - val_loss: 6.8749
Epoch 6/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 7.5504 - val_loss: 5.5046
Epoch 7/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 5.2445 - val_loss: 5.5433
Epoch 8/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 5.2943 - val_loss: 5.4346
Epoch 9/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0

# WS Benefit

In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor  # Import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error

# Load the data
file_path = "WR.xlsx"  # Update this with your file path
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'Wetland Type - Provincial Class',
    'Wetland Type - Federal Class',
    'Water Regime Indicator',
    'Specific Vegetation Type',
    '% Vegetation Cover for Specific Vegetation Cover Types',
    '% High Woody Canopy Cover (>5m)',
    'Phragmites present (Y/N)',
    'Soil Type',
    '% of Surface Water Present',
    'Depth of Saturation (cm)',
    'Average Depth of Living Moss (cm)',
    'Average Total Depth of Organics',
    'Average Organic Depth (cm)',
    'Hydrogeomorphic Class',
    '% Moss Cover'
]

results_columns = ['WS_Benefit']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]  # Assuming you want to predict 'SR'

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models
models = [
    Ridge(),
    DecisionTreeRegressor(),
    RandomForestRegressor(),
    GradientBoostingRegressor(),
    AdaBoostRegressor(),
    SVR(),
    KNeighborsRegressor(),
    MLPRegressor(max_iter=1000)  # Adding MLPRegressor with default parameters
]

# Define hyperparameters to search for each model
param_grid = {
    'Ridge': {'ridge__alpha': [0.1, 0.5, 1.0]},
    'GradientBoostingRegressor': {},
    'AdaBoostRegressor': {},
    'SVR': {},
    'KNeighborsRegressor': {},
    'MLPRegressor': {}  # Add hyperparameters for tuning MLPRegressor if needed
}

# Train and tune hyperparameters for each model
best_models = {}

for model in models:
    model_name = model.__class__.__name__
    pipeline = make_pipeline(StandardScaler(), model)  # Scale features for some models
    
    # Perform grid search for hyperparameters
    if model_name in param_grid:
        grid_search = GridSearchCV(pipeline, param_grid[model_name], cv=5, scoring='neg_mean_squared_error')
        grid_search.fit(X_train, y_train)
        best_models[model_name] = grid_search.best_estimator_
        print(f"Best hyperparameters for {model_name}: {grid_search.best_params_}")
    else:
        pipeline.fit(X_train, y_train)
        best_models[model_name] = pipeline

# Make predictions using the best models
for model_name, model in best_models.items():
    print(f"Model: {model_name}")
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Show real and predicted results for the first 5 samples
    for i in range(5):
        print(f"Sample {i+1}: Real SR = {y_test.iloc[i]}, Predicted SR = {y_pred[i]}")
    
    # Calculate and print RMSE
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    print(f"RMSE: {rmse}")
    
    print("\n")


Best hyperparameters for Ridge: {'ridge__alpha': 1.0}
Best hyperparameters for GradientBoostingRegressor: {}
Best hyperparameters for AdaBoostRegressor: {}
Best hyperparameters for SVR: {}
Best hyperparameters for KNeighborsRegressor: {}




Best hyperparameters for MLPRegressor: {}
Model: Ridge
Sample 1: Real SR = 0.0, Predicted SR = -0.16345155598544503
Sample 2: Real SR = 5.79, Predicted SR = 1.7106179560192878
Sample 3: Real SR = 0.0, Predicted SR = 4.088689619228337
Sample 4: Real SR = 0.0, Predicted SR = 1.9023440012585273
Sample 5: Real SR = 3.84, Predicted SR = 1.041113133528506
RMSE: 4.014613865831976


Model: DecisionTreeRegressor
Sample 1: Real SR = 0.0, Predicted SR = 0.0
Sample 2: Real SR = 5.79, Predicted SR = 10.0
Sample 3: Real SR = 0.0, Predicted SR = 0.0
Sample 4: Real SR = 0.0, Predicted SR = 0.0
Sample 5: Real SR = 3.84, Predicted SR = 0.0
RMSE: 5.124595573538296


Model: RandomForestRegressor
Sample 1: Real SR = 0.0, Predicted SR = 0.1
Sample 2: Real SR = 5.79, Predicted SR = 5.145166666666666
Sample 3: Real SR = 0.0, Predicted SR = 5.032640493156883
Sample 4: Real SR = 0.0, Predicted SR = 0.19330118764845608
Sample 5: Real SR = 3.84, Predicted SR = 0.33346904196357885
RMSE: 3.809767152896686


Model: 



In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf

# Load the data
file_path = "WR.xlsx"  # Update this with your file path
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'Wetland Type - Provincial Class',
    'Wetland Type - Federal Class',
    'Water Regime Indicator',
    'Specific Vegetation Type',
    '% Vegetation Cover for Specific Vegetation Cover Types',
    '% High Woody Canopy Cover (>5m)',
    'Phragmites present (Y/N)',
    'Soil Type',
    '% of Surface Water Present',
    'Depth of Saturation (cm)',
    'Average Depth of Living Moss (cm)',
    'Average Total Depth of Organics',
    'Average Organic Depth (cm)',
    'Hydrogeomorphic Class',
    '% Moss Cover'
]

results_columns = ['WS_Benefit']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]  # Assuming you want to predict 'SR'

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the TensorFlow model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train_scaled, y_train, epochs=100, batch_size=32, validation_split=0.2)

# Evaluate the model
y_pred = model.predict(X_test_scaled)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"RMSE: {rmse}")

# Show real and predicted results for the first 5 samples
print("Sample predictions:")
for i in range(5):
    print(f"Sample {i+1}: Real WS = {y_test.iloc[i]}, Predicted WS = {y_pred[i][0]}")


Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 34ms/step - loss: 9.1573 - val_loss: 14.6555
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 7.3681 - val_loss: 13.7827
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 7.7386 - val_loss: 12.4934
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 7.1489 - val_loss: 12.6660
Epoch 5/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 8.2186 - val_loss: 12.9255
Epoch 6/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 7.2259 - val_loss: 13.5068
Epoch 7/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 6.3695 - val_loss: 14.5614
Epoch 8/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 7.0681 - val_loss: 15.8443
Epoch 9/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m