In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load data
try:
    df = pd.read_csv('DO.csv')
except FileNotFoundError:
    # Fallback path if running in Colab or different environment
    df = pd.read_csv('/content/DO.csv')

# Applying Standard Scaling 
scaler = StandardScaler()
X_raw = df[['ndvi', 'n-WST']]
X_scaled = scaler.fit_transform(X_raw)
y = df['DO']

print("Data scaled using StandardScaler.")
print(f"NDVI mean: {scaler.mean_[0]:.4f}, std: {scaler.scale_[0]:.4f}")
print(f"n-WST mean: {scaler.mean_[1]:.4f}, std: {scaler.scale_[1]:.4f}")

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
# Hyperparameter ranges for RF
rf_params = {
    'n_estimators': np.arange(100, 501, 100),
    'max_depth': np.arange(4, 41, 4),
    'min_samples_split': np.arange(2, 11, 1),
    'min_samples_leaf': np.arange(1, 11, 1),
    'max_features': ['auto', 'sqrt', 'log2'],
    'bootstrap': [True, False]
}

rf = RandomForestRegressor(random_state=42)

rf_search = RandomizedSearchCV(
    rf, rf_params, n_iter=30, cv=5, scoring='neg_mean_squared_error', random_state=42, verbose=2, n_jobs=-1
)
rf_search.fit(X_train, y_train)

# Save best hyperparameters to CSV
pd.DataFrame([rf_search.best_params_]).to_csv('best_rf_hyperparameters.csv', index=False)

# Predict and save test results
rf_pred = rf_search.best_estimator_.predict(X_test)
rf_results = pd.DataFrame(X_test, columns=['ndvi_scaled', 'n-WST_scaled'])
rf_results['DO_actual'] = y_test.values
rf_results['DO_RF_pred'] = rf_pred
rf_results.to_csv('rf_test_predictions.csv', index=False)
print("RF: Best hyperparameters and test predictions (scaled features) saved as CSV.")


In [None]:
# Hyperparameter ranges for SVR
svr_params = {
    'C': np.linspace(0.1, 1000, 10),
    'epsilon': np.linspace(0.01, 1, 10),
    'gamma': ['scale', 'auto'] + list(np.linspace(0.001, 1, 6)),
    'kernel': ['linear', 'rbf', 'poly']
}

svr = SVR()

svr_search = RandomizedSearchCV(
    svr, svr_params, n_iter=30, cv=5, scoring='neg_mean_squared_error', random_state=42, verbose=2, n_jobs=-1
)
svr_search.fit(X_train, y_train)

# Save best hyperparameters to CSV
pd.DataFrame([svr_search.best_params_]).to_csv('best_svr_hyperparameters.csv', index=False)

# Predict and save test results
svr_pred = svr_search.best_estimator_.predict(X_test)
svr_results = pd.DataFrame(X_test, columns=['ndvi_scaled', 'n-WST_scaled'])
svr_results['DO_actual'] = y_test.values
svr_results['DO_SVR_pred'] = svr_pred
svr_results.to_csv('svr_test_predictions.csv', index=False)
print("SVR: Best hyperparameters and test predictions (scaled features) saved as CSV.")
