In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, StackingRegressor, ExtraTreesRegressor, BaggingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.preprocessing import RobustScaler, PolynomialFeatures, PowerTransformer
from sklearn.decomposition import PCA
from sklearn.metrics import mean_absolute_error,r2_score,root_mean_squared_error
from sklearn.pipeline import Pipeline
from scipy.stats.mstats import winsorize


In [3]:
df= pd.read_csv("/content/drive/MyDrive/DataBase/154train.csv")
df1= pd.read_csv("/content/drive/MyDrive/DataBase/78test.csv")

In [4]:
df.columns = df.columns.str.strip()
df1.columns = df1.columns.str.strip()

In [5]:
df['q'] = df['Velocity']/df['Depth']
df['v/d50'] = df['Velocity']/df['D50']
df1['q'] = df1['Velocity']/df1['Depth']
df1['v/d50'] = df1['Velocity']/df1['D50']


In [None]:
df.shape

(154, 10)

In [6]:
df_scaled= df.copy()
num_cols = df.columns.drop("Scour")
scaler = RobustScaler()
df_scaled[num_cols] = scaler.fit_transform(df_scaled[num_cols])
df1_scaled= df1.copy()
df1_scaled[num_cols] = scaler.transform(df1_scaled[num_cols])
df.shape

(154, 10)

In [7]:
from sklearn.utils import resample

df_resampled = resample(df_scaled, replace=True, n_samples=len(df) * 2, random_state=42)  # Doubles row size
df_resampled.shape

(308, 10)

In [8]:
def score(model, X_test, y_test):
    y_pred = model.predict(X_test)
    rmse = root_mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    n = X_test.shape[0]  # Number of observations
    p = X_test.shape[1]  # Number of predictors
    adjusted_r2 = 1 - (1 - r2) * (n - 1) / (n - p - 1)
    print(f"Adjusted R-squared: {adjusted_r2:.4f}")

    return rmse, r2


In [9]:
import random

def interpolate_data(df, n_samples):
    new_rows = []
    for _ in range(n_samples):
        # Pick two random rows
        idx1, idx2 = random.sample(range(len(df)), 2)
        row1, row2 = df.iloc[idx1], df.iloc[idx2]

        # Interpolate new row
        new_row = (row1 + row2) / 2
        new_rows.append(new_row)

    return pd.DataFrame(new_rows, columns=df.columns)

df_interpolated = interpolate_data(df_scaled, len(df))  # Generate as many new rows as original dataset

df_inter = pd.concat([df_scaled, df_interpolated], ignore_index=True)

print(df_inter.shape)  # Row count is doubled


(308, 10)


In [10]:
import numpy as np
import pandas as pd

# Copy original data
df_noisy = df_scaled.copy()

# Add Gaussian noise to numerical columns
noise = np.random.normal(loc=0, scale=0.01, size=df.shape)  # Small noise with mean=0, std=0.01
df_noisy.iloc[:, :-1] += noise[:, :-1]  # Exclude target variable from noise

# Combine original + noisy data
df_noised = pd.concat([df_scaled, df_noisy], ignore_index=True)

print(df_noised.shape)  # Row size is doubled


(308, 10)


In [None]:
!pip install ctgan


Collecting ctgan
  Downloading ctgan-0.11.0-py3-none-any.whl.metadata (10 kB)
Collecting rdt>=1.14.0 (from ctgan)
  Downloading rdt-1.14.0-py3-none-any.whl.metadata (10 kB)
Collecting Faker>=17 (from rdt>=1.14.0->ctgan)
  Downloading faker-37.0.0-py3-none-any.whl.metadata (15 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->ctgan)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->ctgan)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->ctgan)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->ctgan)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from tor

In [None]:
!pip install sdv --upgrade  # This line ensures the latest version of sdv library is installed, including CTGAN.
!pip install --upgrade ctgan

Collecting sdv
  Downloading sdv-1.18.0-py3-none-any.whl.metadata (13 kB)
Collecting boto3<2.0.0,>=1.28 (from sdv)
  Downloading boto3-1.37.9-py3-none-any.whl.metadata (6.6 kB)
Collecting botocore<2.0.0,>=1.31 (from sdv)
  Downloading botocore-1.37.9-py3-none-any.whl.metadata (5.7 kB)
Collecting copulas>=0.12.0 (from sdv)
  Downloading copulas-0.12.1-py3-none-any.whl.metadata (9.4 kB)
Collecting deepecho>=0.6.1 (from sdv)
  Downloading deepecho-0.7.0-py3-none-any.whl.metadata (10 kB)
Collecting sdmetrics>=0.17.0 (from sdv)
  Downloading sdmetrics-0.19.0-py3-none-any.whl.metadata (9.4 kB)
Collecting jmespath<2.0.0,>=0.7.1 (from boto3<2.0.0,>=1.28->sdv)
  Downloading jmespath-1.0.1-py3-none-any.whl.metadata (7.6 kB)
Collecting s3transfer<0.12.0,>=0.11.0 (from boto3<2.0.0,>=1.28->sdv)
  Downloading s3transfer-0.11.4-py3-none-any.whl.metadata (1.7 kB)
Downloading sdv-1.18.0-py3-none-any.whl (156 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m156.1/156.1 kB[0m [31m6.5 M

In [None]:

from sdv.single_table import CTGANSynthesizer # Import CTGANSynthesizer instead of CTGAN
from sdv.metadata import SingleTableMetadata

# Create metadata for the dataset
metadata = SingleTableMetadata()
metadata.detect_from_dataframe(data=df) # Detect metadata from the dataframe

# Train GAN on the dataset
ctgan = CTGANSynthesizer(metadata, epochs=100)  # Pass epochs to the constructor
ctgan.fit(df) #Remove epochs from the fit function

# Generate synthetic data (double the original size)
df_synthetic = ctgan.sample(len(df) * 3)

print(df_synthetic.shape)  # Dataset now has twice as many rows



(462, 10)


In [None]:
yj=PowerTransformer(method='yeo-johnson')
X_train_yj= yj.fit_transform(df.drop(["Scour"],axis=1))
X_train_yj = pd.DataFrame(X_train_yj, columns=df.drop(["Scour"], axis=1).columns)

In [None]:
from scipy.stats.mstats import winsorize
df_capped= df.copy()
num_cols = df.columns.drop("Scour")
df_capped[num_cols] = df[num_cols].apply(lambda x: pd.Series(winsorize(x.to_numpy(), limits=[0.01, 0.01])))

#Polynomial feature

In [None]:
poly= PolynomialFeatures(degree=2)
X_train_poly= poly.fit_transform(df_noised.drop(["Scour"],axis=1))
y_train= df_noised["Scour"]
X_test_poly= poly.transform(df1_scaled.drop(["Scour"],axis=1))
y_test= df1["Scour"]

In [None]:
y_train.shape

(308,)

#Linear Regression

In [None]:
X_train= df_noised.drop(["Scour"],axis=1)
y_train= df_noised["Scour"]
X_test= df1_scaled.drop(["Scour"],axis=1)
y_test= df1_scaled["Scour"]
lr= LinearRegression()
lr.fit(X_train,y_train)
mae,r2= score(lr,X_test,y_test)
print(f"rmse is : {mae}")
print(f"r2 is : {r2}")

Adjusted R-squared: 0.7189
rmse is : 0.5243993182249655
r2 is : 0.7517542065201503


In [None]:
X_train= df_resampled.drop(["Scour"],axis=1)
y_train= df_resampled["Scour"]
X_test= df1_scaled.drop(["Scour"],axis=1)
y_test= df1_scaled["Scour"]
lr= LinearRegression()
lr.fit(X_train,y_train)
mae,r2= score(lr,X_test,y_test)
print(f"rmse is : {mae}")
print(f"r2 is : {r2}")


Adjusted R-squared: 0.6912
rmse is : 0.5496359345914776
r2 is : 0.7272857078124686


In [None]:
X_train= df.drop(["Scour"],axis=1)
y_train= df["Scour"]
X_test= df1.drop(["Scour"],axis=1)
y_test= df1["Scour"]
lr= LinearRegression()
lr.fit(X_train,y_train)
mae,r2= score(lr,X_test,y_test)
print(f"rmse| is : {mae}")
print(f"r2 is : {r2}")


Adjusted R-squared: 0.7185
rmse| is : 0.5247269919106646
r2 is : 0.751443874201726


In [None]:
X_train= df_noised.drop(["Scour"],axis=1)
y_train= df_noised["Scour"]
X_test= df1_scaled.drop(["Scour"],axis=1)
y_test= df1_scaled["Scour"]
lr= LinearRegression()
params = {
    'copy_X':[True,False],
    'fit_intercept':[True,False],
    'n_jobs':[1,2,3,4,5,6,7,8,9,10],
    'positive':[True,False]
}
gd = GridSearchCV(lr,params,cv=5,scoring="r2")
gd.fit(X_train_poly,y_train)

best_lr= gd.best_estimator_
mae,r2= score(best_lr,X_test_poly,y_test)
print(f"rmse is : {mae}")
print(f"r2 is : {r2}")

Adjusted R-squared: -17.3345
rmse is : 2.4089153510375234
r2 is : -4.238425113351079


In [None]:
def GridSearch(models,params,X_train,X_test,y_test,y_train):
    results= {}
    best_score= float('-inf')
    best_model= None
    for model_name, model in models.items():
        gd= GridSearchCV(model,params[model_name],cv=5,scoring="r2")
        gd.fit(X_train,y_train)
        score= gd.score(X_test,y_test)
        if score> best_score:
            best_score= score
            best_model= gd.best_estimator_
        results[model_name]= gd.best_score_
    return results, best_model, best_score

In [None]:
X_train= df.drop(["Scour"],axis=1)
y_train= df["Scour"]
X_test= df1.drop(["Scour"],axis=1)
y_test= df1_scaled["Scour"]
models= {
    'ridge': Ridge(),
    'lasso': Lasso(),
    'lr': LinearRegression()

}
params_ridge= {
    'alpha': [100,150,200]}
params_lasso = {
    'alpha': [0.02,0.025,.04]
}
params_lr= {
     'copy_X':[True,False],
    'fit_intercept':[True,False],
    'n_jobs':[1,2,3,4,5,6,7,8,9,10],
    'positive':[True,False]
}
params = {
    'ridge': params_ridge,
    'lasso': params_lasso,
    'lr': params_lr
}
print(GridSearch(models,params,X_train,X_test,y_test,y_train))

({'ridge': np.float64(0.6795952179551356), 'lasso': np.float64(0.6787954190315614), 'lr': np.float64(0.6767872300837319)}, Lasso(alpha=0.04), 0.7637758047600168)


In [None]:
las= Lasso(alpha=0.02)
las.fit(X_train,y_train)
mae,r2= score(las,X_test,y_test)
print(f"rmse is : {mae}")
print(f"r2 is : {r2}")

Adjusted R-squared: 0.7327
rmse is : 0.5114049184747604
r2 is : 0.7639046342706597


#SVM

In [None]:
from sklearn.svm import SVR

from sklearn.metrics import mean_absolute_error

X_train= df.drop(["Scour"],axis=1)
y_train= df["Scour"]
X_test= df1.drop(["Scour"],axis=1)
y_test= df1["Scour"]


svr = SVR()


# grid_params = {
#     'C': [0.1, 1, 10],
#     'kernel': ['linear', 'rbf', 'poly'],
#     'gamma': ['scale', 'auto', 0.01, 0.1]
# }

# grid_search = GridSearchCV(svr, grid_params, cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)
# grid_search.fit(X_train, y_train)



# print("Best parameters (GridSearchCV):", grid_search.best_params_)
# print("Best MAE (GridSearchCV):", grid_search.best_score_)
# y_pred = grid_search.best_estimator_.predict(X_test)
# print("Test Accuracy:", mean_absolute_error(y_test, y_pred))


random_params = {
    'C': np.logspace(-2, 2, 10),
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1]
}


random_search = RandomizedSearchCV(svr, random_params, n_iter=10, cv=5, scoring='neg_mean_absolute_error', n_jobs=-1, random_state=42)
random_search.fit(X_train, y_train)

print("Best parameters (RandomizedSearchCV):", random_search.best_params_)
print("Best MAE (RandomizedSearchCV):", -random_search.best_score_)

best_model = random_search.best_estimator_
y_pred = best_model.predict(X_test)
print("Test RMSE:", root_mean_squared_error(y_test, y_pred))


Best parameters (RandomizedSearchCV): {'kernel': 'rbf', 'gamma': 0.01, 'C': np.float64(0.5994842503189409)}
Best MAE (RandomizedSearchCV): 0.4620770347476547
Test RMSE: 0.5486029775911344


In [11]:
!pip install optuna




#Desicion tree


In [37]:
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeRegressor

X_train= df_noised.drop(["Scour"],axis=1)
y_train= df_noised["Scour"]
X_test= df1_scaled.drop(["Scour"],axis=1)
y_test= df1["Scour"]




def objective(trial):
    max_depth = trial.suggest_int('max_depth', 1, 30)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 20)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 20)

    dtr = DecisionTreeRegressor(max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, random_state=42)
    score = cross_val_score(dtr, X_train, y_train, cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)
    return -score.mean()

# Run Optuna optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

print("Best parameters:", study.best_params)
print("Best MAE:", study.best_value)

# Train best model
best_dtr = DecisionTreeRegressor(**study.best_params, random_state=42)
best_dtr.fit(X_train, y_train)

y_pred = best_dtr.predict(X_test)
print("Test MAE:", mean_absolute_error(y_test, y_pred))
#

[I 2025-03-20 06:33:56,068] A new study created in memory with name: no-name-23c55e02-bb67-499c-a4c3-7fd88f877608
[I 2025-03-20 06:33:56,290] Trial 0 finished with value: 0.2873984788822221 and parameters: {'max_depth': 9, 'min_samples_split': 17, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.2873984788822221.
[I 2025-03-20 06:33:56,404] Trial 1 finished with value: 0.3842779152652908 and parameters: {'max_depth': 18, 'min_samples_split': 9, 'min_samples_leaf': 11}. Best is trial 0 with value: 0.2873984788822221.
[I 2025-03-20 06:33:56,520] Trial 2 finished with value: 0.28735521307410333 and parameters: {'max_depth': 16, 'min_samples_split': 9, 'min_samples_leaf': 5}. Best is trial 2 with value: 0.28735521307410333.
[I 2025-03-20 06:33:56,633] Trial 3 finished with value: 0.280666918126688 and parameters: {'max_depth': 7, 'min_samples_split': 7, 'min_samples_leaf': 4}. Best is trial 3 with value: 0.280666918126688.
[I 2025-03-20 06:33:56,720] Trial 4 finished with value: 0.374

Best parameters: {'max_depth': 30, 'min_samples_split': 2, 'min_samples_leaf': 1}
Best MAE: 0.10599378241417926
Test MAE: 0.3621740216578102


In [38]:
#AMONG noised and resampled
#Noised give .25 mae at best better than resampled



poly= PolynomialFeatures(degree=2)
X_train_poly= poly.fit_transform(df_noised.drop(["Scour"],axis=1))
y_train= df_noised["Scour"]
X_test_poly= poly.transform(df1_scaled.drop(["Scour"],axis=1))
y_test= df1_scaled["Scour"]
from sklearn.tree import DecisionTreeRegressor




def objective(trial):
    max_depth = trial.suggest_int('max_depth', 1, 30)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 20)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 20)

    dtr = DecisionTreeRegressor(max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, random_state=42)
    score = cross_val_score(dtr, X_train_poly, y_train, cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)
    return -score.mean()

# Run Optuna optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

print("Best parameters:", study.best_params)
print("Best MAE:", study.best_value)

# Train best model
best_dtr = DecisionTreeRegressor(**study.best_params, random_state=42)
best_dtr.fit(X_train_poly, y_train)

y_pred = best_dtr.predict(X_test_poly)
print("Test MAE:", mean_absolute_error(y_test, y_pred))


[I 2025-03-20 06:34:07,717] A new study created in memory with name: no-name-57eead7e-e15b-40be-b3e6-34732c278c4c
[I 2025-03-20 06:34:07,783] Trial 0 finished with value: 0.3066933706179483 and parameters: {'max_depth': 12, 'min_samples_split': 5, 'min_samples_leaf': 7}. Best is trial 0 with value: 0.3066933706179483.
[I 2025-03-20 06:34:07,844] Trial 1 finished with value: 0.3510551574482613 and parameters: {'max_depth': 28, 'min_samples_split': 6, 'min_samples_leaf': 13}. Best is trial 0 with value: 0.3066933706179483.
[I 2025-03-20 06:34:07,904] Trial 2 finished with value: 0.3182609497465968 and parameters: {'max_depth': 10, 'min_samples_split': 18, 'min_samples_leaf': 7}. Best is trial 0 with value: 0.3066933706179483.
[I 2025-03-20 06:34:07,952] Trial 3 finished with value: 0.33101903792434717 and parameters: {'max_depth': 30, 'min_samples_split': 14, 'min_samples_leaf': 11}. Best is trial 0 with value: 0.3066933706179483.
[I 2025-03-20 06:34:08,010] Trial 4 finished with value: 

Best parameters: {'max_depth': 17, 'min_samples_split': 5, 'min_samples_leaf': 1}
Best MAE: 0.14441865803485698
Test MAE: 0.30843432536143783


#Random Forest

In [21]:
#scaled
# BY INTERPOLATION TEST MAE IS .269
# BY NOISED TEST MAE IS .27
# BY RESAMPLED TEST MAE IS .27
import optuna

# BY INTERPOLATION TEST MAE IS .28
# BY NOISED TEST MAE IS .25
# BY RESAMPLED TEST MAE IS .3

import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestRegressor
X_train= df_inter.drop(["Scour"],axis=1)
y_train= df_inter["Scour"]
X_test= df1.drop(["Scour"],axis=1)
y_test= df1["Scour"]




def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 300)
    max_depth = trial.suggest_int('max_depth', 2, 30)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 20)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 20)

    rf = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth,
                               min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, random_state=42, n_jobs=-1)
    score = cross_val_score(rf, X_train, y_train, cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)
    return -score.mean()

# Run Optuna optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=25)

# Best parameters
print("Best parameters:", study.best_params)
print("Best MAE:", study.best_value)

# Train best model
best_rf = RandomForestRegressor(**study.best_params, random_state=42, n_jobs=-1)
best_rf.fit(X_train, y_train)

y_pred = best_rf.predict(X_test)
print("Test MAE:", mean_absolute_error(y_test, y_pred))



[I 2025-03-20 03:32:15,228] A new study created in memory with name: no-name-352de5df-018d-46bf-a0e6-97d73d955032
[I 2025-03-20 03:32:17,375] Trial 0 finished with value: 0.5096515731640346 and parameters: {'n_estimators': 168, 'max_depth': 18, 'min_samples_split': 17, 'min_samples_leaf': 16}. Best is trial 0 with value: 0.5096515731640346.
[I 2025-03-20 03:32:19,212] Trial 1 finished with value: 0.5100113599611478 and parameters: {'n_estimators': 154, 'max_depth': 25, 'min_samples_split': 15, 'min_samples_leaf': 16}. Best is trial 0 with value: 0.5096515731640346.
[I 2025-03-20 03:32:23,536] Trial 2 finished with value: 0.4297728419446062 and parameters: {'n_estimators': 260, 'max_depth': 19, 'min_samples_split': 10, 'min_samples_leaf': 1}. Best is trial 2 with value: 0.4297728419446062.
[I 2025-03-20 03:32:26,114] Trial 3 finished with value: 0.5131447972981447 and parameters: {'n_estimators': 177, 'max_depth': 4, 'min_samples_split': 13, 'min_samples_leaf': 16}. Best is trial 2 with

Best parameters: {'n_estimators': 60, 'max_depth': 9, 'min_samples_split': 2, 'min_samples_leaf': 3}
Best MAE: 0.4250307577644709
Test MAE: 0.2833263073244704


In [34]:
#SCALED
#NOISED GIVES MAE OF .226
#INTERPOLATED GIVES MAE OF .28
#RESAMPLED GIVES MAE OF .23

#NOISED GIVES MAE OF .28
#INTERPOLATED GIVES MAE OF .28
#RESAMPLED GIVES MAE OF .57


poly= PolynomialFeatures(degree=2)
X_train= poly.fit_transform(df_noised.drop(["Scour"],axis=1))
y_train= df_noised["Scour"]
X_test= poly.transform(df1_scaled.drop(["Scour"],axis=1))
y_test= df1["Scour"]






def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 300)
    max_depth = trial.suggest_int('max_depth', 2, 30)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 20)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 20)

    rf = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth,
                               min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, random_state=42, n_jobs=-1)
    score = cross_val_score(rf, X_train, y_train, cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)
    return -score.mean()

# Run Optuna optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

# Best parameters
print("Best parameters:", study.best_params)
print("Best MAE:", study.best_value)

# Train best model
best_rf = RandomForestRegressor(**study.best_params, random_state=42, n_jobs=-1)
best_rf.fit(X_train, y_train)

y_pred = best_rf.predict(X_test)
print("Test MAE:", mean_absolute_error(y_test, y_pred))


[I 2025-03-20 04:30:48,619] A new study created in memory with name: no-name-cb2a1f36-5a91-4359-81c7-609e95d0e1ab
[I 2025-03-20 04:30:58,626] Trial 0 finished with value: 0.2826044612476979 and parameters: {'n_estimators': 162, 'max_depth': 22, 'min_samples_split': 7, 'min_samples_leaf': 5}. Best is trial 0 with value: 0.2826044612476979.
[I 2025-03-20 04:31:06,534] Trial 1 finished with value: 0.36837242708639584 and parameters: {'n_estimators': 286, 'max_depth': 5, 'min_samples_split': 6, 'min_samples_leaf': 19}. Best is trial 0 with value: 0.2826044612476979.
[I 2025-03-20 04:31:10,707] Trial 2 finished with value: 0.3614526214680486 and parameters: {'n_estimators': 129, 'max_depth': 29, 'min_samples_split': 7, 'min_samples_leaf': 18}. Best is trial 0 with value: 0.2826044612476979.
[I 2025-03-20 04:31:14,438] Trial 3 finished with value: 0.36110509050415657 and parameters: {'n_estimators': 102, 'max_depth': 15, 'min_samples_split': 2, 'min_samples_leaf': 18}. Best is trial 0 with v

Best parameters: {'n_estimators': 214, 'max_depth': 18, 'min_samples_split': 8, 'min_samples_leaf': 1}
Best MAE: 0.21602448073104547
Test MAE: 0.23296105272856202


In [35]:
rmse,r2= score(best_rf,X_test,y_test)
print(f"rmse is : {rmse}")
print(f"r2 is : {r2}")

Adjusted R-squared: 0.7028
rmse is : 0.3067166760532033
r2 is : 0.9150756093510911


#XG BOOST

In [24]:
# MAE ON SIMPLE DATASET IS .24
# MAE ON NOISED DATASET IS .23
# MAE ON INTERPOLATED DATASET IS .234
# MAE ON REsAMPLED DATASET IS .237
import xgboost as xgb

X_train= df_resampled.drop(["Scour"],axis=1)
y_train= df_resampled["Scour"]
X_test= df1_scaled.drop(["Scour"],axis=1)
y_test= df1_scaled["Scour"]



def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3,log=True),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-5, 1e1,log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-5, 1e1,log=True),
        'objective': 'reg:squarederror'
    }

    model = xgb.XGBRegressor(**params)
    model.fit(X_train, y_train, eval_set=[(X_test, y_test)],  verbose=False)

    y_pred = model.predict(X_test)
    return mean_absolute_error(y_test, y_pred)

# Run Optuna study
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# Get best hyperparameters
best_params = study.best_params
print("Best Hyperparameters:", best_params)
print("\n")
# Train final model with best parameters
best_model = xgb.XGBRegressor(**best_params)
best_model.fit(X_train, y_train)
y_pred = best_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f"TEST MAE IS :{mae}")


[I 2025-03-20 03:49:37,441] A new study created in memory with name: no-name-2a0c5f5c-3783-4990-a189-45d81e93064b
[I 2025-03-20 03:49:37,750] Trial 0 finished with value: 0.5677591795340563 and parameters: {'n_estimators': 460, 'learning_rate': 0.07907969619621681, 'max_depth': 4, 'subsample': 0.9284107800245703, 'colsample_bytree': 0.6027465310361013, 'reg_alpha': 0.00014929192920976064, 'reg_lambda': 0.01260705073638346}. Best is trial 0 with value: 0.5677591795340563.
[I 2025-03-20 03:49:37,973] Trial 1 finished with value: 0.5382326061144854 and parameters: {'n_estimators': 347, 'learning_rate': 0.11985771273066344, 'max_depth': 6, 'subsample': 0.8671037703246612, 'colsample_bytree': 0.9595986014854453, 'reg_alpha': 3.869872619933389e-05, 'reg_lambda': 4.689892871294037}. Best is trial 1 with value: 0.5382326061144854.
[I 2025-03-20 03:49:38,089] Trial 2 finished with value: 0.5678182112100798 and parameters: {'n_estimators': 355, 'learning_rate': 0.09520058064285405, 'max_depth': 

Best Hyperparameters: {'n_estimators': 248, 'learning_rate': 0.011428972055105395, 'max_depth': 5, 'subsample': 0.5277188764940821, 'colsample_bytree': 0.6257261122795221, 'reg_alpha': 0.04972985729131906, 'reg_lambda': 2.4345308096902067e-05}


TEST MAE IS :0.49548457104426175


In [25]:
# MAE ON SIMPLE DATASET IS .23
# MAE ON NOISED DATASET IS .225
# MAE ON INTERPOLATED DATASET IS .25
# MAE ON REsAMPLED DATASET IS .23
# MAE ON scaled DATASET IS .23

poly= PolynomialFeatures(degree=2)
X_train= poly.fit_transform(df.drop(["Scour"],axis=1))
y_train= df["Scour"]
X_test= poly.transform(df1.drop(["Scour"],axis=1))
y_test= df1["Scour"]



def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3,log=True),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-5, 1e1,log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-5, 1e1,log=True),
        'objective': 'reg:squarederror'
    }

    model = xgb.XGBRegressor(**params)
    model.fit(X_train, y_train, eval_set=[(X_test, y_test)],  verbose=False)

    y_pred = model.predict(X_test)
    return mean_absolute_error(y_test, y_pred)

# Run Optuna study
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# Get best hyperparameters
best_params = study.best_params
print("Best Hyperparameters:", best_params)
print("\n")
# Train final model with best parameters
best_model = xgb.XGBRegressor(**best_params)
best_model.fit(X_train, y_train)
y_pred = best_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f"TEST MAE IS :{mae}")

[I 2025-03-20 04:09:34,606] A new study created in memory with name: no-name-878c9bf2-f16f-4cc6-980c-f6bba196017d
[I 2025-03-20 04:09:37,993] Trial 0 finished with value: 0.23743867274278257 and parameters: {'n_estimators': 326, 'learning_rate': 0.09574492669735048, 'max_depth': 3, 'subsample': 0.8258165573558212, 'colsample_bytree': 0.6929615584955646, 'reg_alpha': 0.1405321185901978, 'reg_lambda': 0.1431217403423535}. Best is trial 0 with value: 0.23743867274278257.
[I 2025-03-20 04:09:40,343] Trial 1 finished with value: 0.24750923980505043 and parameters: {'n_estimators': 327, 'learning_rate': 0.016831748554172944, 'max_depth': 4, 'subsample': 0.7405958026878071, 'colsample_bytree': 0.6019000634656904, 'reg_alpha': 0.0013213959343725509, 'reg_lambda': 4.3368314454177895e-05}. Best is trial 0 with value: 0.23743867274278257.
[I 2025-03-20 04:09:43,043] Trial 2 finished with value: 0.2516284305315752 and parameters: {'n_estimators': 285, 'learning_rate': 0.06662182377083789, 'max_dep

Best Hyperparameters: {'n_estimators': 350, 'learning_rate': 0.22852185642610515, 'max_depth': 9, 'subsample': 0.7212025810651304, 'colsample_bytree': 0.5871858192044206, 'reg_alpha': 0.006606710515084298, 'reg_lambda': 0.01610892127799281}


TEST MAE IS :0.22838079906426947


#ADA BOOST

In [None]:
# MAE ON SIMPLE DATASET IS .28
# MAE ON NOISED DATASET IS .224
# MAE ON INTERPOLATED DATASET IS .23
# MAE ON REsAMPLED DATASET IS .23

import optuna
import pandas as pd
from sklearn.ensemble import AdaBoostRegressor

X_train= df.drop(["Scour"],axis=1)
y_train= df["Scour"]
X_test= df1.drop(["Scour"],axis=1)
y_test= df1["Scour"]

# Define Optuna objective function
def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0, log=True),
        'max_depth': trial.suggest_int('max_depth', 1, 10),
    }

    base_model = DecisionTreeRegressor(max_depth=params['max_depth'])
    model = AdaBoostRegressor(estimator=base_model,
                              n_estimators=params['n_estimators'],
                              learning_rate=params['learning_rate'],
                              random_state=42)

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return mean_absolute_error(y_test, y_pred)

# Run Optuna study
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# Get best hyperparameters
best_params = study.best_params
print("Best Hyperparameters:", best_params)

# Train final model with best parameters
best_base_model = DecisionTreeRegressor(max_depth=best_params['max_depth'])
best_model = AdaBoostRegressor(estimator=best_base_model,
                               n_estimators=best_params['n_estimators'],
                               learning_rate=best_params['learning_rate'],
                               random_state=42)

best_model.fit(X_train, y_train)
y_pred = best_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)

print("Optimized Mean Absolute Error:", mae)


[I 2025-03-19 19:53:07,105] A new study created in memory with name: no-name-7181897c-c8ea-4823-a556-c0463e98081d
[I 2025-03-19 19:53:07,415] Trial 0 finished with value: 0.3040232704912111 and parameters: {'n_estimators': 118, 'learning_rate': 0.04398762853734612, 'max_depth': 9}. Best is trial 0 with value: 0.3040232704912111.
[I 2025-03-19 19:53:08,543] Trial 1 finished with value: 0.2978779485055001 and parameters: {'n_estimators': 500, 'learning_rate': 0.4361109515114962, 'max_depth': 7}. Best is trial 1 with value: 0.2978779485055001.
[I 2025-03-19 19:53:08,749] Trial 2 finished with value: 0.30725781113986866 and parameters: {'n_estimators': 307, 'learning_rate': 0.17799114827054674, 'max_depth': 5}. Best is trial 1 with value: 0.2978779485055001.
[I 2025-03-19 19:53:08,897] Trial 3 finished with value: 0.31032769960948436 and parameters: {'n_estimators': 68, 'learning_rate': 0.018670464219053134, 'max_depth': 4}. Best is trial 1 with value: 0.2978779485055001.
[I 2025-03-19 19:

Best Hyperparameters: {'n_estimators': 439, 'learning_rate': 0.16168439489769781, 'max_depth': 7}
Optimized Mean Absolute Error: 0.2886577605628383


In [33]:
# MAE ON SIMPLE DATASET IS .23
# MAE ON NOISED DATASET IS .216
# MAE ON INTERPOLATED DATASET IS .225
# MAE ON REsAMPLED DATASET IS .217
# MAE ON scaled DATASET IS .23


poly= PolynomialFeatures(degree=2)
X_train= poly.fit_transform(df_resampled.drop(["Scour"],axis=1))
y_train= df_resampled["Scour"]
X_test= poly.transform(df1_scaled.drop(["Scour"],axis=1))
y_test= df1["Scour"]

# Define Optuna objective function
def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0, log=True),
        'max_depth': trial.suggest_int('max_depth', 1, 10),
    }

    base_model = DecisionTreeRegressor(max_depth=params['max_depth'])
    model = AdaBoostRegressor(estimator=base_model,
                              n_estimators=params['n_estimators'],
                              learning_rate=params['learning_rate'],
                              random_state=42)

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return mean_absolute_error(y_test, y_pred)

# Run Optuna study
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# Get best hyperparameters
best_params = study.best_params
print("Best Hyperparameters:", best_params)

# Train final model with best parameters
best_base_model = DecisionTreeRegressor(max_depth=best_params['max_depth'])
best_model = AdaBoostRegressor(estimator=best_base_model,
                               n_estimators=best_params['n_estimators'],
                               learning_rate=best_params['learning_rate'],
                               random_state=42)

best_model.fit(X_train, y_train)
y_pred = best_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)

print("Optimized Mean Absolute Error:", mae)

[I 2025-03-20 06:28:22,985] A new study created in memory with name: no-name-ed1089db-8a10-4615-b38a-59004b3b764c
[I 2025-03-20 06:28:24,903] Trial 0 finished with value: 0.2352914388134186 and parameters: {'n_estimators': 376, 'learning_rate': 0.14749430052782378, 'max_depth': 5}. Best is trial 0 with value: 0.2352914388134186.
[I 2025-03-20 06:28:27,149] Trial 1 finished with value: 0.23443624914770317 and parameters: {'n_estimators': 300, 'learning_rate': 0.684962119523845, 'max_depth': 5}. Best is trial 1 with value: 0.23443624914770317.
[I 2025-03-20 06:28:31,210] Trial 2 finished with value: 0.23347629708866008 and parameters: {'n_estimators': 289, 'learning_rate': 0.4913697887999055, 'max_depth': 5}. Best is trial 2 with value: 0.23347629708866008.
[I 2025-03-20 06:28:31,891] Trial 3 finished with value: 0.264887956635365 and parameters: {'n_estimators': 80, 'learning_rate': 0.03493472724704433, 'max_depth': 4}. Best is trial 2 with value: 0.23347629708866008.
[I 2025-03-20 06:2

Best Hyperparameters: {'n_estimators': 438, 'learning_rate': 0.9699024810995018, 'max_depth': 9}
Optimized Mean Absolute Error: 0.21750449126610014


In [34]:
rmse,r2= score(best_model,X_test,y_test)
print(f"rmse is : {rmse}")
print(f"r2 is : {r2}")

Adjusted R-squared: 0.7298
rmse is : 0.2924285235126249
r2 is : 0.9228035863977471


## CAT BOOST


In [12]:
!pip install catboost



In [39]:
pip install --upgrade numpy catboost


Collecting numpy
  Downloading numpy-2.2.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m


In [24]:
import optuna
import pandas as pd
from catboost import CatBoostRegressor

# MAE ON SIMPLE DATASET IS .
# MAE ON NOISED DATASET IS .25
# MAE ON INTERPOLATED DATASET IS .258
# MAE ON REsAMPLED DATASET IS .247
# MAE ON scaled DATASET IS .

poly= PolynomialFeatures(degree=2)
X_train= poly.fit_transform(df_inter.drop(["Scour"],axis=1))
y_train= df_inter["Scour"]
X_test= poly.transform(df1_scaled.drop(["Scour"],axis=1))
y_test= df1["Scour"]


# Define Optuna objective function
def objective(trial):
    params = {
        'iterations': trial.suggest_int('iterations', 100, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0, log=True),
        'depth': trial.suggest_int('depth', 4, 10),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-5, 10, log=True),
        'random_strength': trial.suggest_float('random_strength', 1e-9, 10, log=True),
    }

    model = CatBoostRegressor(**params, verbose=0, random_state=42)

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return mean_absolute_error(y_test, y_pred)

# Run Optuna study
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# Get best hyperparameters
best_params = study.best_params
print("Best Hyperparameters:", best_params)

# Train final model with best parameters
best_model = CatBoostRegressor(**best_params, verbose=0, random_state=42)
best_model.fit(X_train, y_train)
y_pred = best_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)

print("Optimized Mean Absolute Error:", mae)


[I 2025-03-20 05:32:30,296] A new study created in memory with name: no-name-60f2ba26-aef5-4896-9885-34b4a3c3e324
[I 2025-03-20 05:32:34,998] Trial 0 finished with value: 0.2666793789906577 and parameters: {'iterations': 990, 'learning_rate': 0.01549015347996617, 'depth': 4, 'l2_leaf_reg': 0.0006424443114937264, 'random_strength': 3.683602422968101e-09}. Best is trial 0 with value: 0.2666793789906577.
[I 2025-03-20 05:32:39,695] Trial 1 finished with value: 0.2968423449800054 and parameters: {'iterations': 126, 'learning_rate': 0.03341549478021657, 'depth': 8, 'l2_leaf_reg': 0.06397098043392276, 'random_strength': 1.1399663678957334e-07}. Best is trial 0 with value: 0.2666793789906577.
[I 2025-03-20 05:33:36,237] Trial 2 finished with value: 0.28659294935665514 and parameters: {'iterations': 933, 'learning_rate': 0.01901988480284947, 'depth': 9, 'l2_leaf_reg': 0.04749473375797019, 'random_strength': 4.380939683354132}. Best is trial 0 with value: 0.2666793789906577.
[I 2025-03-20 05:34

Best Hyperparameters: {'iterations': 957, 'learning_rate': 0.01035241840911312, 'depth': 4, 'l2_leaf_reg': 0.0053035608070125485, 'random_strength': 0.06692693615149285}
Optimized Mean Absolute Error: 0.2588375019193312


#LIGHT GBM

In [22]:
import optuna
import pandas as pd
import lightgbm as lgb

# MAE ON SIMPLE DATASET IS .
# MAE ON NOISED DATASET IS .27
# MAE ON INTERPOLATED DATASET IS .23
# MAE ON REsAMPLED DATASET IS .247
# MAE ON scaled DATASET IS .

X_train= df_inter.drop(["Scour"],axis=1)
y_train= df_inter["Scour"]
X_test= df1_scaled.drop(["Scour"],axis=1)
y_test= df1_scaled["Scour"]



def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 1.0, log=True),
        'num_leaves': trial.suggest_int('num_leaves', 10, 34),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 20),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-5, 10, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-5, 10, log=True),
    }

    model = lgb.LGBMRegressor(**params, random_state=42)

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return mean_absolute_error(y_test, y_pred)

# Run Optuna study
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=25)

# Get best hyperparameters
best_params = study.best_params
print("Best Hyperparameters:", best_params)

# Train final model with best parameters
best_model = lgb.LGBMRegressor(**best_params, random_state=42)
best_model.fit(X_train, y_train)
y_pred = best_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)

print("Optimized Mean Absolute Error:", mae)


[I 2025-03-20 05:31:41,419] A new study created in memory with name: no-name-07897c03-4ffa-48a4-ae9f-9228e7e748d3
[I 2025-03-20 05:31:41,492] Trial 0 finished with value: 0.26885866103925504 and parameters: {'n_estimators': 192, 'learning_rate': 0.07765305204814493, 'num_leaves': 25, 'max_depth': 3, 'min_child_samples': 14, 'reg_alpha': 0.0003433399904610548, 'reg_lambda': 0.004940095091284336}. Best is trial 0 with value: 0.26885866103925504.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000097 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000126 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455


[I 2025-03-20 05:31:42,016] Trial 1 finished with value: 0.2401145491049733 and parameters: {'n_estimators': 558, 'learning_rate': 0.022530800831349652, 'num_leaves': 21, 'max_depth': 9, 'min_child_samples': 8, 'reg_alpha': 0.0003170498030138008, 'reg_lambda': 0.058053583332098334}. Best is trial 1 with value: 0.2401145491049733.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000128 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455


[I 2025-03-20 05:31:42,430] Trial 2 finished with value: 0.25238116362097424 and parameters: {'n_estimators': 451, 'learning_rate': 0.4583618265751958, 'num_leaves': 21, 'max_depth': 9, 'min_child_samples': 7, 'reg_alpha': 0.00031997243039696424, 'reg_lambda': 2.851699492967693}. Best is trial 1 with value: 0.2401145491049733.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000098 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455


[I 2025-03-20 05:31:42,949] Trial 3 finished with value: 0.2587823308219742 and parameters: {'n_estimators': 868, 'learning_rate': 0.37215149606626596, 'num_leaves': 18, 'max_depth': 8, 'min_child_samples': 11, 'reg_alpha': 0.0005890637691211971, 'reg_lambda': 0.6588768382659321}. Best is trial 1 with value: 0.2401145491049733.
[I 2025-03-20 05:31:43,045] Trial 4 finished with value: 0.30746285267101836 and parameters: {'n_estimators': 461, 'learning_rate': 0.19071345816801477, 'num_leaves': 26, 'max_depth': 10, 'min_child_samples': 19, 'reg_alpha': 4.170463687714779, 'reg_lambda': 0.4208731493804344}. Best is trial 1 with value: 0.2401145491049733.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000086 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000090 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455


[I 2025-03-20 05:31:43,540] Trial 5 finished with value: 0.2758666097507318 and parameters: {'n_estimators': 512, 'learning_rate': 0.02015993539214832, 'num_leaves': 31, 'max_depth': 12, 'min_child_samples': 17, 'reg_alpha': 0.04111785387428995, 'reg_lambda': 0.3873890764961646}. Best is trial 1 with value: 0.2401145491049733.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000104 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455


[I 2025-03-20 05:31:43,666] Trial 6 finished with value: 0.2719225140402227 and parameters: {'n_estimators': 269, 'learning_rate': 0.8213287263238112, 'num_leaves': 32, 'max_depth': 9, 'min_child_samples': 19, 'reg_alpha': 4.824900054852752e-05, 'reg_lambda': 0.007354728792117375}. Best is trial 1 with value: 0.2401145491049733.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000085 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455


[I 2025-03-20 05:31:43,945] Trial 7 finished with value: 0.23713070313265364 and parameters: {'n_estimators': 290, 'learning_rate': 0.11871526661398936, 'num_leaves': 20, 'max_depth': 9, 'min_child_samples': 11, 'reg_alpha': 0.5407381150673511, 'reg_lambda': 0.014125272148970093}. Best is trial 7 with value: 0.23713070313265364.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000094 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455


[I 2025-03-20 05:31:44,266] Trial 8 finished with value: 0.2591588030062895 and parameters: {'n_estimators': 584, 'learning_rate': 0.07511432874956003, 'num_leaves': 10, 'max_depth': 4, 'min_child_samples': 10, 'reg_alpha': 0.19060877478824226, 'reg_lambda': 0.0016454538184487184}. Best is trial 7 with value: 0.23713070313265364.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000092 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455


[I 2025-03-20 05:31:44,487] Trial 9 finished with value: 0.3288898052365993 and parameters: {'n_estimators': 690, 'learning_rate': 0.026400257983088996, 'num_leaves': 24, 'max_depth': 9, 'min_child_samples': 15, 'reg_alpha': 6.783961426459798, 'reg_lambda': 7.562614919069987}. Best is trial 7 with value: 0.23713070313265364.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000090 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455


[I 2025-03-20 05:31:44,704] Trial 10 finished with value: 0.24315315812459717 and parameters: {'n_estimators': 107, 'learning_rate': 0.14415577418285744, 'num_leaves': 14, 'max_depth': 6, 'min_child_samples': 6, 'reg_alpha': 0.6306608778410058, 'reg_lambda': 3.080988658596978e-05}. Best is trial 7 with value: 0.23713070313265364.




[I 2025-03-20 05:31:45,014] Trial 11 finished with value: 0.2652154894668926 and parameters: {'n_estimators': 327, 'learning_rate': 0.011096636376582059, 'num_leaves': 19, 'max_depth': 6, 'min_child_samples': 9, 'reg_alpha': 0.006067799018327684, 'reg_lambda': 0.06336264726882308}. Best is trial 7 with value: 0.23713070313265364.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000098 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000090 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455


[I 2025-03-20 05:31:45,648] Trial 12 finished with value: 0.23729808676116582 and parameters: {'n_estimators': 729, 'learning_rate': 0.04083951558627325, 'num_leaves': 15, 'max_depth': 11, 'min_child_samples': 5, 'reg_alpha': 1.223967001985751e-05, 'reg_lambda': 0.0003145101843812694}. Best is trial 7 with value: 0.23713070313265364.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000097 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455


[I 2025-03-20 05:31:46,319] Trial 13 finished with value: 0.25124603013721436 and parameters: {'n_estimators': 976, 'learning_rate': 0.04895530142008178, 'num_leaves': 16, 'max_depth': 12, 'min_child_samples': 12, 'reg_alpha': 1.073383514213938e-05, 'reg_lambda': 0.00021891429457681727}. Best is trial 7 with value: 0.23713070313265364.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000093 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455


[I 2025-03-20 05:31:46,825] Trial 14 finished with value: 0.24641905280413395 and parameters: {'n_estimators': 783, 'learning_rate': 0.04046866718084501, 'num_leaves': 13, 'max_depth': 11, 'min_child_samples': 5, 'reg_alpha': 0.009318063351490647, 'reg_lambda': 0.0003702336299292261}. Best is trial 7 with value: 0.23713070313265364.
[I 2025-03-20 05:31:47,028] Trial 15 finished with value: 0.25135135631737554 and parameters: {'n_estimators': 680, 'learning_rate': 0.18279916172254368, 'num_leaves': 28, 'max_depth': 11, 'min_child_samples': 13, 'reg_alpha': 0.4964538837126102, 'reg_lambda': 1.1063763567260407e-05}. Best is trial 7 with value: 0.23713070313265364.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000100 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455


[I 2025-03-20 05:31:47,249] Trial 16 finished with value: 0.2632907770312586 and parameters: {'n_estimators': 318, 'learning_rate': 0.04963847394569649, 'num_leaves': 12, 'max_depth': 7, 'min_child_samples': 16, 'reg_alpha': 0.08443275143810276, 'reg_lambda': 0.000569677212368913}. Best is trial 7 with value: 0.23713070313265364.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000098 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000075 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455


[I 2025-03-20 05:31:47,426] Trial 17 finished with value: 0.2342808505136189 and parameters: {'n_estimators': 392, 'learning_rate': 0.11121018178608517, 'num_leaves': 17, 'max_depth': 11, 'min_child_samples': 8, 'reg_alpha': 0.0017856940944353742, 'reg_lambda': 0.03191765734668815}. Best is trial 17 with value: 0.2342808505136189.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000072 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455


[I 2025-03-20 05:31:47,576] Trial 18 finished with value: 0.2556181893247114 and parameters: {'n_estimators': 373, 'learning_rate': 0.11535323820653577, 'num_leaves': 18, 'max_depth': 7, 'min_child_samples': 9, 'reg_alpha': 0.002000534116448351, 'reg_lambda': 0.04002487961669911}. Best is trial 17 with value: 0.2342808505136189.
[I 2025-03-20 05:31:47,694] Trial 19 finished with value: 0.2511465030066338 and parameters: {'n_estimators': 186, 'learning_rate': 0.294002891995167, 'num_leaves': 23, 'max_depth': 10, 'min_child_samples': 11, 'reg_alpha': 0.028175258046646764, 'reg_lambda': 0.023876013810435512}. Best is trial 17 with value: 0.2342808505136189.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000099 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455


[I 2025-03-20 05:31:47,780] Trial 20 finished with value: 0.27188754431992107 and parameters: {'n_estimators': 397, 'learning_rate': 0.7320791807080306, 'num_leaves': 29, 'max_depth': 10, 'min_child_samples': 7, 'reg_alpha': 1.8315542342712137, 'reg_lambda': 0.18313934881351362}. Best is trial 17 with value: 0.2342808505136189.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000066 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000071 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455


[I 2025-03-20 05:31:48,059] Trial 21 finished with value: 0.24530548551453274 and parameters: {'n_estimators': 639, 'learning_rate': 0.07794280194462738, 'num_leaves': 16, 'max_depth': 11, 'min_child_samples': 5, 'reg_alpha': 5.2726215638589724e-05, 'reg_lambda': 0.0024764523617409858}. Best is trial 17 with value: 0.2342808505136189.
[I 2025-03-20 05:31:48,286] Trial 22 finished with value: 0.2531851766471449 and parameters: {'n_estimators': 780, 'learning_rate': 0.23728307549335234, 'num_leaves': 16, 'max_depth': 12, 'min_child_samples': 7, 'reg_alpha': 0.0014875541071538466, 'reg_lambda': 0.00014176300136504495}. Best is trial 17 with value: 0.2342808505136189.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000102 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455


[I 2025-03-20 05:31:48,422] Trial 23 finished with value: 0.24176077674710916 and parameters: {'n_estimators': 230, 'learning_rate': 0.09844096167752378, 'num_leaves': 20, 'max_depth': 11, 'min_child_samples': 8, 'reg_alpha': 1.4124762496858275e-05, 'reg_lambda': 0.014531286368521665}. Best is trial 17 with value: 0.2342808505136189.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000071 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000070 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455


[I 2025-03-20 05:31:48,606] Trial 24 finished with value: 0.24050874171071399 and parameters: {'n_estimators': 454, 'learning_rate': 0.03281496382937743, 'num_leaves': 15, 'max_depth': 8, 'min_child_samples': 5, 'reg_alpha': 7.07080383515573e-05, 'reg_lambda': 0.001531081753536558}. Best is trial 17 with value: 0.2342808505136189.


Best Hyperparameters: {'n_estimators': 392, 'learning_rate': 0.11121018178608517, 'num_leaves': 17, 'max_depth': 11, 'min_child_samples': 8, 'reg_alpha': 0.0017856940944353742, 'reg_lambda': 0.03191765734668815}
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000156 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 547
[LightGBM] [Info] Number of data points in the train set: 308, number of used features: 9
[LightGBM] [Info] Start training from score 1.170455
Optimized Mean Absolute Error: 0.2342808505136189
