# PR

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor  # Import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error

# Load the data
file_path = "PR.xlsx"  # Update this with your file path
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'Wetland Type - Provincial Class',
    'Wetland Type - Federal Class',
    'Water Regime Indicator',
    'Specific Vegetation Type',
    '% Vegetation Cover for Specific Vegetation Cover Types',
    '% High Woody Canopy Cover (>5m)',
    'Phragmites present (Y/N)',
    'Soil Type',
    '% of Surface Water Present',
    'Depth of Saturation (cm)',
    'Average Depth of Living Moss (cm)',
    'Average Total Depth of Organics',
    'Average Organic Depth (cm)',
    'Hydrogeomorphic Class',
    '% Moss Cover'
]

results_columns = ['PR']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]  # Assuming you want to predict 'SR'

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models
models = [
    Ridge(),
    DecisionTreeRegressor(),
    RandomForestRegressor(),
    GradientBoostingRegressor(),
    AdaBoostRegressor(),
    SVR(),
    KNeighborsRegressor(),
    MLPRegressor(max_iter=1000)  # Adding MLPRegressor with default parameters
]

# Define hyperparameters to search for each model
param_grid = {
    'Ridge': {'ridge__alpha': [0.1, 0.5, 1.0]},
    'GradientBoostingRegressor': {},
    'AdaBoostRegressor': {},
    'SVR': {},
    'KNeighborsRegressor': {},
    'MLPRegressor': {}  # Add hyperparameters for tuning MLPRegressor if needed
}

# Train and tune hyperparameters for each model
best_models = {}

for model in models:
    model_name = model.__class__.__name__
    pipeline = make_pipeline(StandardScaler(), model)  # Scale features for some models
    
    # Perform grid search for hyperparameters
    if model_name in param_grid:
        grid_search = GridSearchCV(pipeline, param_grid[model_name], cv=5, scoring='neg_mean_squared_error')
        grid_search.fit(X_train, y_train)
        best_models[model_name] = grid_search.best_estimator_
        print(f"Best hyperparameters for {model_name}: {grid_search.best_params_}")
    else:
        pipeline.fit(X_train, y_train)
        best_models[model_name] = pipeline

# Make predictions using the best models
for model_name, model in best_models.items():
    print(f"Model: {model_name}")
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Show real and predicted results for the first 5 samples
    for i in range(5):
        print(f"Sample {i+1}: Real SR = {y_test.iloc[i]}, Predicted SR = {y_pred[i]}")
    
    # Calculate and print RMSE
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    print(f"RMSE: {rmse}")
    
    print("\n")


Best hyperparameters for Ridge: {'ridge__alpha': 1.0}
Best hyperparameters for GradientBoostingRegressor: {}
Best hyperparameters for AdaBoostRegressor: {}
Best hyperparameters for SVR: {}
Best hyperparameters for KNeighborsRegressor: {}




Best hyperparameters for MLPRegressor: {}
Model: Ridge
Sample 1: Real SR = 4.1, Predicted SR = 5.560384929531084
Sample 2: Real SR = 4.95, Predicted SR = 3.233816404026099
Sample 3: Real SR = 3.658444467902927, Predicted SR = 6.004431625244407
Sample 4: Real SR = 2.408991972171535, Predicted SR = 4.623760874904971
Sample 5: Real SR = 5.13, Predicted SR = 6.255870188956573
RMSE: 1.992014607476453


Model: DecisionTreeRegressor
Sample 1: Real SR = 4.1, Predicted SR = 4.07
Sample 2: Real SR = 4.95, Predicted SR = 3.243721923126547
Sample 3: Real SR = 3.658444467902927, Predicted SR = 10.0
Sample 4: Real SR = 2.408991972171535, Predicted SR = 5.67
Sample 5: Real SR = 5.13, Predicted SR = 5.18
RMSE: 2.64936863511727


Model: RandomForestRegressor
Sample 1: Real SR = 4.1, Predicted SR = 4.030291428772503
Sample 2: Real SR = 4.95, Predicted SR = 4.047541511128539
Sample 3: Real SR = 3.658444467902927, Predicted SR = 6.926092053941104
Sample 4: Real SR = 2.408991972171535, Predicted SR = 5.597



In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf

# Load the data
file_path = "PR.xlsx"  # Update this with your file path
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'Wetland Type - Provincial Class',
    'Wetland Type - Federal Class',
    'Water Regime Indicator',
    'Specific Vegetation Type',
    '% Vegetation Cover for Specific Vegetation Cover Types',
    '% High Woody Canopy Cover (>5m)',
    'Phragmites present (Y/N)',
    'Soil Type',
    '% of Surface Water Present',
    'Depth of Saturation (cm)',
    'Average Depth of Living Moss (cm)',
    'Average Total Depth of Organics',
    'Average Organic Depth (cm)',
    'Hydrogeomorphic Class',
    '% Moss Cover'
]

results_columns = ['PR']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]  # Assuming you want to predict 'SR'

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the TensorFlow model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train_scaled, y_train, epochs=100, batch_size=32, validation_split=0.2)

# Evaluate the model
y_pred = model.predict(X_test_scaled)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"RMSE: {rmse}")

# Show real and predicted results for the first 5 samples
print("Sample predictions:")
for i in range(5):
    print(f"Sample {i+1}: Real WS = {y_test.iloc[i]}, Predicted WS = {y_pred[i][0]}")


Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 36ms/step - loss: 51.4010 - val_loss: 30.5699
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 49.8197 - val_loss: 28.2191
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 41.2999 - val_loss: 22.0039
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 30.8032 - val_loss: 10.6108
Epoch 5/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 13.0344 - val_loss: 12.6436
Epoch 6/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 13.8067 - val_loss: 6.5631
Epoch 7/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 6.9509 - val_loss: 5.7234
Epoch 8/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 5.8271 - val_loss: 5.3273
Epoch 9/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

# PR Benefit

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor  # Import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error

# Load the data
file_path = "PR.xlsx"  # Update this with your file path
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'Wetland Type - Provincial Class',
    'Wetland Type - Federal Class',
    'Water Regime Indicator',
    'Specific Vegetation Type',
    '% Vegetation Cover for Specific Vegetation Cover Types',
    '% High Woody Canopy Cover (>5m)',
    'Phragmites present (Y/N)',
    'Soil Type',
    '% of Surface Water Present',
    'Depth of Saturation (cm)',
    'Average Depth of Living Moss (cm)',
    'Average Total Depth of Organics',
    'Average Organic Depth (cm)',
    'Hydrogeomorphic Class',
    '% Moss Cover'
]

results_columns = ['PR_Benefit']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]  # Assuming you want to predict 'SR'

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models
models = [
    Ridge(),
    DecisionTreeRegressor(),
    RandomForestRegressor(),
    GradientBoostingRegressor(),
    AdaBoostRegressor(),
    SVR(),
    KNeighborsRegressor(),
    MLPRegressor(max_iter=1000)  # Adding MLPRegressor with default parameters
]

# Define hyperparameters to search for each model
param_grid = {
    'Ridge': {'ridge__alpha': [0.1, 0.5, 1.0]},
    'GradientBoostingRegressor': {},
    'AdaBoostRegressor': {},
    'SVR': {},
    'KNeighborsRegressor': {},
    'MLPRegressor': {}  # Add hyperparameters for tuning MLPRegressor if needed
}

# Train and tune hyperparameters for each model
best_models = {}

for model in models:
    model_name = model.__class__.__name__
    pipeline = make_pipeline(StandardScaler(), model)  # Scale features for some models
    
    # Perform grid search for hyperparameters
    if model_name in param_grid:
        grid_search = GridSearchCV(pipeline, param_grid[model_name], cv=5, scoring='neg_mean_squared_error')
        grid_search.fit(X_train, y_train)
        best_models[model_name] = grid_search.best_estimator_
        print(f"Best hyperparameters for {model_name}: {grid_search.best_params_}")
    else:
        pipeline.fit(X_train, y_train)
        best_models[model_name] = pipeline

# Make predictions using the best models
for model_name, model in best_models.items():
    print(f"Model: {model_name}")
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Show real and predicted results for the first 5 samples
    for i in range(5):
        print(f"Sample {i+1}: Real SR = {y_test.iloc[i]}, Predicted SR = {y_pred[i]}")
    
    # Calculate and print RMSE
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    print(f"RMSE: {rmse}")
    
    print("\n")


Best hyperparameters for Ridge: {'ridge__alpha': 1.0}
Best hyperparameters for GradientBoostingRegressor: {}
Best hyperparameters for AdaBoostRegressor: {}
Best hyperparameters for SVR: {}
Best hyperparameters for KNeighborsRegressor: {}




Best hyperparameters for MLPRegressor: {}
Model: Ridge
Sample 1: Real SR = 10.0, Predicted SR = 3.687516029398416
Sample 2: Real SR = 2.99, Predicted SR = 5.395610626275099
Sample 3: Real SR = 7.217671707260766, Predicted SR = 4.675305750622234
Sample 4: Real SR = 5.739039593674248, Predicted SR = 6.6170622110723265
Sample 5: Real SR = 6.41, Predicted SR = 3.5781275389527982
RMSE: 3.640196460069589


Model: DecisionTreeRegressor
Sample 1: Real SR = 10.0, Predicted SR = 10.0
Sample 2: Real SR = 2.99, Predicted SR = 6.57
Sample 3: Real SR = 7.217671707260766, Predicted SR = 10.0
Sample 4: Real SR = 5.739039593674248, Predicted SR = 10.0
Sample 5: Real SR = 6.41, Predicted SR = 0.0
RMSE: 4.97974036938447


Model: RandomForestRegressor
Sample 1: Real SR = 10.0, Predicted SR = 6.464243454923231
Sample 2: Real SR = 2.99, Predicted SR = 5.8658142807671405
Sample 3: Real SR = 7.217671707260766, Predicted SR = 5.131062602859451
Sample 4: Real SR = 5.739039593674248, Predicted SR = 3.28452530960



In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf

# Load the data
file_path = "PR.xlsx"  # Update this with your file path
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'Wetland Type - Provincial Class',
    'Wetland Type - Federal Class',
    'Water Regime Indicator',
    'Specific Vegetation Type',
    '% Vegetation Cover for Specific Vegetation Cover Types',
    '% High Woody Canopy Cover (>5m)',
    'Phragmites present (Y/N)',
    'Soil Type',
    '% of Surface Water Present',
    'Depth of Saturation (cm)',
    'Average Depth of Living Moss (cm)',
    'Average Total Depth of Organics',
    'Average Organic Depth (cm)',
    'Hydrogeomorphic Class',
    '% Moss Cover'
]

results_columns = ['PR_Benefit']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]  # Assuming you want to predict 'SR'

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the TensorFlow model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train_scaled, y_train, epochs=100, batch_size=32, validation_split=0.2)

# Evaluate the model
y_pred = model.predict(X_test_scaled)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"RMSE: {rmse}")

# Show real and predicted results for the first 5 samples
print("Sample predictions:")
for i in range(5):
    print(f"Sample {i+1}: Real WS = {y_test.iloc[i]}, Predicted WS = {y_pred[i][0]}")


Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 37ms/step - loss: 25.1779 - val_loss: 44.4219
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 25.6706 - val_loss: 40.3633
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 24.9145 - val_loss: 31.1768
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 17.3544 - val_loss: 23.3201
Epoch 5/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 14.2128 - val_loss: 24.6675
Epoch 6/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 12.1808 - val_loss: 24.0924
Epoch 7/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 11.9664 - val_loss: 24.1786
Epoch 8/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 11.1872 - val_loss: 25.3155
Epoch 9/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m