# SR

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor  # Import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error

# Load the data
file_path = "SR.xlsx"  # Update this with your file path
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'Wetland Type - Provincial Class',
    'Wetland Type - Federal Class',
    'Water Regime Indicator',
    'Specific Vegetation Type',
    '% Vegetation Cover for Specific Vegetation Cover Types',
    '% High Woody Canopy Cover (>5m)',
    'Phragmites present (Y/N)',
    'Soil Type',
    '% of Surface Water Present',
    'Depth of Saturation (cm)',
    'Average Depth of Living Moss (cm)',
    'Average Total Depth of Organics',
    'Average Organic Depth (cm)',
    'Hydrogeomorphic Class',
    '% Moss Cover'
]

results_columns = ['SR']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]  # Assuming you want to predict 'SR'

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models
models = [
    Ridge(),
    DecisionTreeRegressor(),
    RandomForestRegressor(),
    GradientBoostingRegressor(),
    AdaBoostRegressor(),
    SVR(),
    KNeighborsRegressor(),
    MLPRegressor(max_iter=1000)  # Adding MLPRegressor with default parameters
]

# Define hyperparameters to search for each model
param_grid = {
    'Ridge': {'ridge__alpha': [0.1, 0.5, 1.0]},
    'GradientBoostingRegressor': {},
    'AdaBoostRegressor': {},
    'SVR': {},
    'KNeighborsRegressor': {},
    'MLPRegressor': {}  # Add hyperparameters for tuning MLPRegressor if needed
}

# Train and tune hyperparameters for each model
best_models = {}

for model in models:
    model_name = model.__class__.__name__
    pipeline = make_pipeline(StandardScaler(), model)  # Scale features for some models
    
    # Perform grid search for hyperparameters
    if model_name in param_grid:
        grid_search = GridSearchCV(pipeline, param_grid[model_name], cv=5, scoring='neg_mean_squared_error')
        grid_search.fit(X_train, y_train)
        best_models[model_name] = grid_search.best_estimator_
        print(f"Best hyperparameters for {model_name}: {grid_search.best_params_}")
    else:
        pipeline.fit(X_train, y_train)
        best_models[model_name] = pipeline

# Make predictions using the best models
for model_name, model in best_models.items():
    print(f"Model: {model_name}")
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Show real and predicted results for the first 5 samples
    for i in range(5):
        print(f"Sample {i+1}: Real SR = {y_test.iloc[i]}, Predicted SR = {y_pred[i]}")
    
    # Calculate and print RMSE
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    print(f"RMSE: {rmse}")
    
    print("\n")


Best hyperparameters for Ridge: {'ridge__alpha': 1.0}
Best hyperparameters for GradientBoostingRegressor: {}
Best hyperparameters for AdaBoostRegressor: {}
Best hyperparameters for SVR: {}
Best hyperparameters for KNeighborsRegressor: {}




Best hyperparameters for MLPRegressor: {}
Model: Ridge
Sample 1: Real SR = 3.14, Predicted SR = 4.886618799911994
Sample 2: Real SR = 1.95, Predicted SR = 2.432774872830897
Sample 3: Real SR = 1.071415011787847, Predicted SR = 2.72497990132601
Sample 4: Real SR = 3.073673531374684, Predicted SR = 4.705534557114696
Sample 5: Real SR = 1.55, Predicted SR = 4.554366071870147
RMSE: 2.0174905910083494


Model: DecisionTreeRegressor
Sample 1: Real SR = 3.14, Predicted SR = 2.31
Sample 2: Real SR = 1.95, Predicted SR = 1.74
Sample 3: Real SR = 1.071415011787847, Predicted SR = 3.98
Sample 4: Real SR = 3.073673531374684, Predicted SR = 10.0
Sample 5: Real SR = 1.55, Predicted SR = 2.12
RMSE: 3.092262139303616


Model: RandomForestRegressor
Sample 1: Real SR = 3.14, Predicted SR = 3.4076319285993217
Sample 2: Real SR = 1.95, Predicted SR = 1.9437131145229185
Sample 3: Real SR = 1.071415011787847, Predicted SR = 2.642739919633944
Sample 4: Real SR = 3.073673531374684, Predicted SR = 6.3572101474



In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf

# Load the data
file_path = "SR.xlsx"  # Update this with your file path
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'Wetland Type - Provincial Class',
    'Wetland Type - Federal Class',
    'Water Regime Indicator',
    'Specific Vegetation Type',
    '% Vegetation Cover for Specific Vegetation Cover Types',
    '% High Woody Canopy Cover (>5m)',
    'Phragmites present (Y/N)',
    'Soil Type',
    '% of Surface Water Present',
    'Depth of Saturation (cm)',
    'Average Depth of Living Moss (cm)',
    'Average Total Depth of Organics',
    'Average Organic Depth (cm)',
    'Hydrogeomorphic Class',
    '% Moss Cover'
]

results_columns = ['SR']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]  # Assuming you want to predict 'SR'

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the TensorFlow model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train_scaled, y_train, epochs=100, batch_size=32, validation_split=0.2)

# Evaluate the model
y_pred = model.predict(X_test_scaled)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"RMSE: {rmse}")

# Show real and predicted results for the first 5 samples
print("Sample predictions:")
for i in range(5):
    print(f"Sample {i+1}: Real WS = {y_test.iloc[i]}, Predicted WS = {y_pred[i][0]}")


Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 38ms/step - loss: 27.4112 - val_loss: 17.2367
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 24.3209 - val_loss: 11.9713
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 17.1867 - val_loss: 12.3347
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 11.8343 - val_loss: 11.2618
Epoch 5/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 8.6968 - val_loss: 9.8816
Epoch 6/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 10.0480 - val_loss: 10.2922
Epoch 7/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 7.8018 - val_loss: 11.8472
Epoch 8/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 7.0832 - val_loss: 10.6977
Epoch 9/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

# SR Benefit

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor  # Import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error

# Load the data
file_path = "SR.xlsx"  # Update this with your file path
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'Wetland Type - Provincial Class',
    'Wetland Type - Federal Class',
    'Water Regime Indicator',
    'Specific Vegetation Type',
    '% Vegetation Cover for Specific Vegetation Cover Types',
    '% High Woody Canopy Cover (>5m)',
    'Phragmites present (Y/N)',
    'Soil Type',
    '% of Surface Water Present',
    'Depth of Saturation (cm)',
    'Average Depth of Living Moss (cm)',
    'Average Total Depth of Organics',
    'Average Organic Depth (cm)',
    'Hydrogeomorphic Class',
    '% Moss Cover'
]

results_columns = ['SR_Benefit']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]  # Assuming you want to predict 'SR'

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models
models = [
    Ridge(),
    DecisionTreeRegressor(),
    RandomForestRegressor(),
    GradientBoostingRegressor(),
    AdaBoostRegressor(),
    SVR(),
    KNeighborsRegressor(),
    MLPRegressor(max_iter=1000)  # Adding MLPRegressor with default parameters
]

# Define hyperparameters to search for each model
param_grid = {
    'Ridge': {'ridge__alpha': [0.1, 0.5, 1.0]},
    'GradientBoostingRegressor': {},
    'AdaBoostRegressor': {},
    'SVR': {},
    'KNeighborsRegressor': {},
    'MLPRegressor': {}  # Add hyperparameters for tuning MLPRegressor if needed
}

# Train and tune hyperparameters for each model
best_models = {}

for model in models:
    model_name = model.__class__.__name__
    pipeline = make_pipeline(StandardScaler(), model)  # Scale features for some models
    
    # Perform grid search for hyperparameters
    if model_name in param_grid:
        grid_search = GridSearchCV(pipeline, param_grid[model_name], cv=5, scoring='neg_mean_squared_error')
        grid_search.fit(X_train, y_train)
        best_models[model_name] = grid_search.best_estimator_
        print(f"Best hyperparameters for {model_name}: {grid_search.best_params_}")
    else:
        pipeline.fit(X_train, y_train)
        best_models[model_name] = pipeline

# Make predictions using the best models
for model_name, model in best_models.items():
    print(f"Model: {model_name}")
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Show real and predicted results for the first 5 samples
    for i in range(5):
        print(f"Sample {i+1}: Real SR = {y_test.iloc[i]}, Predicted SR = {y_pred[i]}")
    
    # Calculate and print RMSE
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    print(f"RMSE: {rmse}")
    
    print("\n")


Best hyperparameters for Ridge: {'ridge__alpha': 1.0}
Best hyperparameters for GradientBoostingRegressor: {}
Best hyperparameters for AdaBoostRegressor: {}
Best hyperparameters for SVR: {}
Best hyperparameters for KNeighborsRegressor: {}




Best hyperparameters for MLPRegressor: {}
Model: Ridge
Sample 1: Real SR = 10.0, Predicted SR = 2.839159450397456
Sample 2: Real SR = 0.0, Predicted SR = 3.4942335530140616
Sample 3: Real SR = 5.045325726224918, Predicted SR = 2.472515056111075
Sample 4: Real SR = 4.077743931051089, Predicted SR = 4.672885015114274
Sample 5: Real SR = 4.54, Predicted SR = 2.7112895265777626
RMSE: 2.7649693835829985


Model: DecisionTreeRegressor
Sample 1: Real SR = 10.0, Predicted SR = 4.671852148658565
Sample 2: Real SR = 0.0, Predicted SR = 4.62
Sample 3: Real SR = 5.045325726224918, Predicted SR = 4.78
Sample 4: Real SR = 4.077743931051089, Predicted SR = 0.0
Sample 5: Real SR = 4.54, Predicted SR = 0.0
RMSE: 4.061723466680952


Model: RandomForestRegressor
Sample 1: Real SR = 10.0, Predicted SR = 4.65309123617293
Sample 2: Real SR = 0.0, Predicted SR = 4.32098120066949
Sample 3: Real SR = 5.045325726224918, Predicted SR = 2.5666383891180904
Sample 4: Real SR = 4.077743931051089, Predicted SR = 2.31



In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf

# Load the data
file_path = "SR.xlsx"  # Update this with your file path
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'Wetland Type - Provincial Class',
    'Wetland Type - Federal Class',
    'Water Regime Indicator',
    'Specific Vegetation Type',
    '% Vegetation Cover for Specific Vegetation Cover Types',
    '% High Woody Canopy Cover (>5m)',
    'Phragmites present (Y/N)',
    'Soil Type',
    '% of Surface Water Present',
    'Depth of Saturation (cm)',
    'Average Depth of Living Moss (cm)',
    'Average Total Depth of Organics',
    'Average Organic Depth (cm)',
    'Hydrogeomorphic Class',
    '% Moss Cover'
]

results_columns = ['SR_Benefit']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]  # Assuming you want to predict 'SR'

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the TensorFlow model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train_scaled, y_train, epochs=100, batch_size=32, validation_split=0.2)

# Evaluate the model
y_pred = model.predict(X_test_scaled)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"RMSE: {rmse}")

# Show real and predicted results for the first 5 samples
print("Sample predictions:")
for i in range(5):
    print(f"Sample {i+1}: Real WS = {y_test.iloc[i]}, Predicted WS = {y_pred[i][0]}")


Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 33ms/step - loss: 15.1896 - val_loss: 17.0757
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 14.7350 - val_loss: 14.2639
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 15.1986 - val_loss: 9.3736
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 10.2663 - val_loss: 7.2975
Epoch 5/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 10.0359 - val_loss: 7.3502
Epoch 6/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 8.0080 - val_loss: 7.9843
Epoch 7/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 8.3188 - val_loss: 8.1542
Epoch 8/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 7.7510 - val_loss: 7.9650
Epoch 9/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0