# NR

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor  # Import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error

# Load the data
file_path = "NR.xlsx"  # Update this with your file path
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'Wetland Type - Provincial Class',
    'Wetland Type - Federal Class',
    'Water Regime Indicator',
    'Specific Vegetation Type',
    '% Vegetation Cover for Specific Vegetation Cover Types',
    '% High Woody Canopy Cover (>5m)',
    'Phragmites present (Y/N)',
    'Soil Type',
    '% of Surface Water Present',
    'Depth of Saturation (cm)',
    'Average Depth of Living Moss (cm)',
    'Average Total Depth of Organics',
    'Average Organic Depth (cm)',
    'Hydrogeomorphic Class',
    '% Moss Cover'
]

results_columns = ['NR']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]  # Assuming you want to predict 'SR'

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models
models = [
    Ridge(),
    DecisionTreeRegressor(),
    RandomForestRegressor(),
    GradientBoostingRegressor(),
    AdaBoostRegressor(),
    SVR(),
    KNeighborsRegressor(),
    MLPRegressor(max_iter=1000)  # Adding MLPRegressor with default parameters
]

# Define hyperparameters to search for each model
param_grid = {
    'Ridge': {'ridge__alpha': [0.1, 0.5, 1.0]},
    'GradientBoostingRegressor': {},
    'AdaBoostRegressor': {},
    'SVR': {},
    'KNeighborsRegressor': {},
    'MLPRegressor': {}  # Add hyperparameters for tuning MLPRegressor if needed
}

# Train and tune hyperparameters for each model
best_models = {}

for model in models:
    model_name = model.__class__.__name__
    pipeline = make_pipeline(StandardScaler(), model)  # Scale features for some models
    
    # Perform grid search for hyperparameters
    if model_name in param_grid:
        grid_search = GridSearchCV(pipeline, param_grid[model_name], cv=5, scoring='neg_mean_squared_error')
        grid_search.fit(X_train, y_train)
        best_models[model_name] = grid_search.best_estimator_
        print(f"Best hyperparameters for {model_name}: {grid_search.best_params_}")
    else:
        pipeline.fit(X_train, y_train)
        best_models[model_name] = pipeline

# Make predictions using the best models
for model_name, model in best_models.items():
    print(f"Model: {model_name}")
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Show real and predicted results for the first 5 samples
    for i in range(5):
        print(f"Sample {i+1}: Real SR = {y_test.iloc[i]}, Predicted SR = {y_pred[i]}")
    
    # Calculate and print RMSE
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    print(f"RMSE: {rmse}")
    
    print("\n")


Best hyperparameters for Ridge: {'ridge__alpha': 1.0}
Best hyperparameters for GradientBoostingRegressor: {}
Best hyperparameters for AdaBoostRegressor: {}
Best hyperparameters for SVR: {}
Best hyperparameters for KNeighborsRegressor: {}




Best hyperparameters for MLPRegressor: {}
Model: Ridge
Sample 1: Real SR = 3.86, Predicted SR = 4.751660297777801
Sample 2: Real SR = 2.0, Predicted SR = 1.2385615139224777
Sample 3: Real SR = 1.570516742590589, Predicted SR = 3.930684194391861
Sample 4: Real SR = 2.42215783927728, Predicted SR = 4.105345153871475
Sample 5: Real SR = 1.3, Predicted SR = 4.7541524192114935
RMSE: 2.4880086369141137


Model: DecisionTreeRegressor
Sample 1: Real SR = 3.86, Predicted SR = 3.17
Sample 2: Real SR = 2.0, Predicted SR = 0.75
Sample 3: Real SR = 1.570516742590589, Predicted SR = 1.382022977963089
Sample 4: Real SR = 2.42215783927728, Predicted SR = 2.484253220548712
Sample 5: Real SR = 1.3, Predicted SR = 1.91
RMSE: 2.860150586384024


Model: RandomForestRegressor
Sample 1: Real SR = 3.86, Predicted SR = 2.8762925276640137
Sample 2: Real SR = 2.0, Predicted SR = 2.4156927227553693
Sample 3: Real SR = 1.570516742590589, Predicted SR = 5.356629997703658
Sample 4: Real SR = 2.42215783927728, Predic



In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf

# Load the data
file_path = "NR.xlsx"  # Update this with your file path
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'Wetland Type - Provincial Class',
    'Wetland Type - Federal Class',
    'Water Regime Indicator',
    'Specific Vegetation Type',
    '% Vegetation Cover for Specific Vegetation Cover Types',
    '% High Woody Canopy Cover (>5m)',
    'Phragmites present (Y/N)',
    'Soil Type',
    '% of Surface Water Present',
    'Depth of Saturation (cm)',
    'Average Depth of Living Moss (cm)',
    'Average Total Depth of Organics',
    'Average Organic Depth (cm)',
    'Hydrogeomorphic Class',
    '% Moss Cover'
]

results_columns = ['NR']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]  # Assuming you want to predict 'SR'

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the TensorFlow model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train_scaled, y_train, epochs=100, batch_size=32, validation_split=0.2)

# Evaluate the model
y_pred = model.predict(X_test_scaled)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"RMSE: {rmse}")

# Show real and predicted results for the first 5 samples
print("Sample predictions:")
for i in range(5):
    print(f"Sample {i+1}: Real WS = {y_test.iloc[i]}, Predicted WS = {y_pred[i][0]}")


Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 36ms/step - loss: 42.1525 - val_loss: 21.5361
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 37.6182 - val_loss: 18.1290
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 28.1539 - val_loss: 12.9572
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 15.6422 - val_loss: 17.0783
Epoch 5/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 14.2703 - val_loss: 13.4516
Epoch 6/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 11.0323 - val_loss: 11.9429
Epoch 7/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 9.4274 - val_loss: 13.0543
Epoch 8/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 9.1790 - val_loss: 13.3465
Epoch 9/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

# NR Benefit

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor  # Import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error

# Load the data
file_path = "NR.xlsx"  # Update this with your file path
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'Wetland Type - Provincial Class',
    'Wetland Type - Federal Class',
    'Water Regime Indicator',
    'Specific Vegetation Type',
    '% Vegetation Cover for Specific Vegetation Cover Types',
    '% High Woody Canopy Cover (>5m)',
    'Phragmites present (Y/N)',
    'Soil Type',
    '% of Surface Water Present',
    'Depth of Saturation (cm)',
    'Average Depth of Living Moss (cm)',
    'Average Total Depth of Organics',
    'Average Organic Depth (cm)',
    'Hydrogeomorphic Class',
    '% Moss Cover'
]

results_columns = ['NR_Benefit']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]  # Assuming you want to predict 'SR'

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models
models = [
    Ridge(),
    DecisionTreeRegressor(),
    RandomForestRegressor(),
    GradientBoostingRegressor(),
    AdaBoostRegressor(),
    SVR(),
    KNeighborsRegressor(),
    MLPRegressor(max_iter=1000)  # Adding MLPRegressor with default parameters
]

# Define hyperparameters to search for each model
param_grid = {
    'Ridge': {'ridge__alpha': [0.1, 0.5, 1.0]},
    'GradientBoostingRegressor': {},
    'AdaBoostRegressor': {},
    'SVR': {},
    'KNeighborsRegressor': {},
    'MLPRegressor': {}  # Add hyperparameters for tuning MLPRegressor if needed
}

# Train and tune hyperparameters for each model
best_models = {}

for model in models:
    model_name = model.__class__.__name__
    pipeline = make_pipeline(StandardScaler(), model)  # Scale features for some models
    
    # Perform grid search for hyperparameters
    if model_name in param_grid:
        grid_search = GridSearchCV(pipeline, param_grid[model_name], cv=5, scoring='neg_mean_squared_error')
        grid_search.fit(X_train, y_train)
        best_models[model_name] = grid_search.best_estimator_
        print(f"Best hyperparameters for {model_name}: {grid_search.best_params_}")
    else:
        pipeline.fit(X_train, y_train)
        best_models[model_name] = pipeline

# Make predictions using the best models
for model_name, model in best_models.items():
    print(f"Model: {model_name}")
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Show real and predicted results for the first 5 samples
    for i in range(5):
        print(f"Sample {i+1}: Real SR = {y_test.iloc[i]}, Predicted SR = {y_pred[i]}")
    
    # Calculate and print RMSE
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    print(f"RMSE: {rmse}")
    
    print("\n")


Best hyperparameters for Ridge: {'ridge__alpha': 1.0}
Best hyperparameters for GradientBoostingRegressor: {}
Best hyperparameters for AdaBoostRegressor: {}
Best hyperparameters for SVR: {}
Best hyperparameters for KNeighborsRegressor: {}




Best hyperparameters for MLPRegressor: {}
Model: Ridge
Sample 1: Real SR = 10.0, Predicted SR = 2.866575257360708
Sample 2: Real SR = 10.0, Predicted SR = 5.465519408396202
Sample 3: Real SR = 10.0, Predicted SR = 9.425904248557654
Sample 4: Real SR = 10.0, Predicted SR = 6.983678819804583
Sample 5: Real SR = 4.83, Predicted SR = 3.749116253974898
RMSE: 3.3614916466003413


Model: DecisionTreeRegressor
Sample 1: Real SR = 10.0, Predicted SR = 10.0
Sample 2: Real SR = 10.0, Predicted SR = 4.764957264957264
Sample 3: Real SR = 10.0, Predicted SR = 10.0
Sample 4: Real SR = 10.0, Predicted SR = 4.62
Sample 5: Real SR = 4.83, Predicted SR = 0.0
RMSE: 3.815769103104411


Model: RandomForestRegressor
Sample 1: Real SR = 10.0, Predicted SR = 6.671563532763537
Sample 2: Real SR = 10.0, Predicted SR = 4.8009846153846185
Sample 3: Real SR = 10.0, Predicted SR = 8.859399999999999
Sample 4: Real SR = 10.0, Predicted SR = 4.095919658119659
Sample 5: Real SR = 4.83, Predicted SR = 1.6073829059829061




In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf

# Load the data
file_path = "NR.xlsx"  # Update this with your file path
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'Wetland Type - Provincial Class',
    'Wetland Type - Federal Class',
    'Water Regime Indicator',
    'Specific Vegetation Type',
    '% Vegetation Cover for Specific Vegetation Cover Types',
    '% High Woody Canopy Cover (>5m)',
    'Phragmites present (Y/N)',
    'Soil Type',
    '% of Surface Water Present',
    'Depth of Saturation (cm)',
    'Average Depth of Living Moss (cm)',
    'Average Total Depth of Organics',
    'Average Organic Depth (cm)',
    'Hydrogeomorphic Class',
    '% Moss Cover'
]

results_columns = ['NR_Benefit']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]  # Assuming you want to predict 'SR'

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the TensorFlow model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train_scaled, y_train, epochs=100, batch_size=32, validation_split=0.2)

# Evaluate the model
y_pred = model.predict(X_test_scaled)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"RMSE: {rmse}")

# Show real and predicted results for the first 5 samples
print("Sample predictions:")
for i in range(5):
    print(f"Sample {i+1}: Real WS = {y_test.iloc[i]}, Predicted WS = {y_pred[i][0]}")


Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 35ms/step - loss: 45.6305 - val_loss: 49.8157
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 43.7494 - val_loss: 43.6719
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 35.0830 - val_loss: 30.2552
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 24.3397 - val_loss: 23.7602
Epoch 5/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 22.0304 - val_loss: 24.1385
Epoch 6/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 16.6991 - val_loss: 24.9598
Epoch 7/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 15.8218 - val_loss: 26.7113
Epoch 8/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 14.1684 - val_loss: 31.6880
Epoch 9/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m