In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.model_selection import train_test_split, GridSearchCV

In [None]:
data = pd.read_csv('SMOF_dataset.csv')
X = df[['b21', 'b26', 'b38', 'b48', 'b77']]
y = df['Turbidity']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
rf = RandomForestRegressor(n_estimators=120, max_depth=10, random_state=42)
# Fit the model to the data
rf.fit(X, y)
# Make predictions on the data
y_pred = rf.predict(X)

# Calculate evaluation metrics
r2 = r2_score(y, y_pred)
mae = mean_absolute_error(y, y_pred)
mape = (abs(y - y_pred) / y).mean() * 100

# Print evaluation metrics
print("R2:", r2)
print("MAE:", mae)
print("MAPE:", mape)

# Add predicted turbidity to the original data
data['Turbidity_pred'] = y_pred

# Save the updated data to CSV file
data.to_csv('BR02TURB_RF.csv', index=False)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor
from lightgbm import LGBMRegressor
from sklearn.feature_selection import RFE
from sklearn.svm import SVR
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score

# Load the dataset
df = pd.read_csv('SMOF_dataset.csv')

# Handle missing values
df.fillna(method='ffill', inplace=True)  # Forward fill missing values

# Separate features and target variable
X = df[['b21', 'b26', 'b38', 'b48', 'b77']]
y = df['Turbidity']

# Initialize base models
base_models = [
    CatBoostRegressor(),
    DecisionTreeRegressor(),
    ExtraTreesRegressor(),
    GradientBoostingRegressor(),
    RFE(DecisionTreeRegressor(), n_features_to_select=5),
    RandomForestRegressor(),
    SVR(),
    XGBRegressor(),
]

# Initialize meta model (Random Forest)
meta_model = RandomForestRegressor()

# Initialize KFold for 10-fold cross-validation
kf = KFold(n_splits=10, shuffle=True, random_state=42)

# Initialize an array to store meta model predictions
meta_model_predictions = []

# Perform 10-fold cross-validation
for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Initialize an array to store base model predictions
    base_model_predictions = []

    # Train and predict using base models
    for model in base_models:
        model.fit(X_train, y_train)
        base_model_predictions.append(model.predict(X_test))

    # Stack base model predictions horizontally
    stacked_predictions = pd.DataFrame(base_model_predictions).T

    # Train meta model on stacked predictions and actual targets
    meta_model.fit(stacked_predictions, y_test)

    # Use base models to predict on the entire dataset
    base_model_predictions_full = []
    for model in base_models:
        model.fit(X, y)
        base_model_predictions_full.append(model.predict(X))

    # Stack base model predictions for the entire dataset
    stacked_predictions_full = pd.DataFrame(base_model_predictions_full).T

    # Use meta model to predict on the stacked predictions
    meta_model_prediction = meta_model.predict(stacked_predictions_full)
    meta_model_predictions.append(meta_model_prediction)

    # Calculate evaluation metrics
    rmse_scores.append(mean_squared_error(y, meta_model_prediction, squared=False))
    mape_scores.append(mean_absolute_percentage_error(y, meta_model_prediction))
    r2_scores.append(r2_score(y, meta_model_prediction))

# Calculate average metrics
average_rmse = sum(rmse_scores) / len(rmse_scores)
average_mape = sum(mape_scores) / len(mape_scores)
average_r2 = sum(r2_scores) / len(r2_scores)

print("Average RMSE:", average_rmse)
print("Average MAPE:", average_mape)
print("Average R2:", average_r2)

