In [20]:
import numpy as np
import pandas as pd
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.decomposition import PCA
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
import re
import plotly.express as px

In [2]:
def convert_to_number(val):
    if pd.isna(val):
        return np.nan
    val = str(val).replace(' ', '')  # удаляем пробелы
    # обработка значений '<число'
    if val.startswith('<'):
        num = re.findall(r'<(\d+\.?\d*)', val)
        return float(num[0]) if num else np.nan
    # обработка значений с ±
    elif '±' in val:
        nums = re.findall(r'([\d\.]+)±([\d\.]+)', val)
        if nums:
            main, uncertainty = nums[0]
            return float(main)  # берём только среднее (первое число)
        else:
            return np.nan
    # обработка значений с '/'
    elif '/' in val:
        nums = re.findall(r'([\d\.]+)/([\d\.]+)', val)
        if nums:
            num1, num2 = nums[0]
            return (float(num1) + float(num2)) / 2
        else:
            return np.nan
    # пробуем просто преобразовать в число
    else:
        try:
            return float(val)
        except:
            return np.nan

In [6]:
# Load dataset
df = pd.read_csv("for_regr_with_descrip.csv")
df['raw_efficiency'] = df['raw_efficiency'].apply(convert_to_number)

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 877 entries, 0 to 876
Data columns (total 59 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   id                    877 non-null    int64  
 1   sequence              877 non-null    object 
 2   extra_name            36 non-null     object 
 3   cpp_category          170 non-null    object 
 4   is_cpp                877 non-null    bool   
 5   cpp_type              11 non-null     object 
 6   origin                1 non-null      object 
 7   id_uptake             877 non-null    float64
 8   peptide               877 non-null    float64
 9   uptake_type           850 non-null    object 
 10  raw_efficiency        877 non-null    float64
 11  raw_toxicity          0 non-null      float64
 12  raw_concentration     0 non-null      float64
 13  id_experiment         875 non-null    float64
 14  peptide_experiment    875 non-null    float64
 15  raw_time              8

In [21]:
fig = px.box(df, x="uptake_type", y="raw_efficiency"
)

fig.show()

In [24]:
df.describe()

Unnamed: 0,id,id_uptake,peptide,raw_efficiency,raw_toxicity,raw_concentration,id_experiment,peptide_experiment,id_article,pubmed_id,...,Positive_AA,Negative_AA,MolWt,LogP,TPSA,HBD,HBA,RotBonds,Rings,Fsp3
count,877.0,877.0,877.0,877.0,0.0,0.0,875.0,875.0,288.0,0.0,...,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0
mean,2336.399088,858.465222,2433.510832,6171.126,,,1155.318857,2433.202286,1.715278,,...,6.605473,0.769669,2316.701556,-10.018074,1028.797423,38.883694,31.489168,77.941847,4.172178,0.619444
std,671.436748,614.014833,514.474697,53327.07,,,458.197301,514.662395,1.55352,,...,4.804808,1.707315,1806.097391,10.852696,743.684017,26.751737,24.706892,59.429384,4.624607,0.103138
min,19.0,2.0,1340.0,0.0,,,9.0,1340.0,1.0,,...,0.0,0.0,115.176,-167.81064,43.09,1.0,2.0,1.0,0.0,0.3
25%,1819.0,279.0,1963.0,9.762,,,871.5,1962.5,1.0,,...,4.0,0.0,1451.71,-12.83501,664.61,25.0,19.0,49.0,1.0,0.559322
50%,2607.0,724.0,2607.0,65.0,,,1259.0,2607.0,1.0,,...,6.0,0.0,1964.351,-7.72747,870.18,34.0,26.0,67.0,3.0,0.630435
75%,2847.0,1408.0,2847.0,555.0,,,1529.5,2846.5,1.0,,...,8.0,1.0,2610.914,-4.32486,1159.28,45.0,36.0,88.0,5.0,0.694737
max,3103.0,1965.0,3103.0,1037500.0,,,1788.0,3103.0,6.0,,...,42.0,13.0,23968.291,6.4769,9159.58,298.0,306.0,780.0,44.0,0.833333


# Код Кати + фильтрование по аптейку

In [25]:
def filter_data(df, arrays, cell_line_array=None, is_y=False):
    """
    Фильтрует данные по маске из столбца 'uptake_type' датафрейма df.
    
    Параметры:
    - df: pandas.DataFrame с исходными данными
    - arrays: список массивов для фильтрации и объединения (например, [X_numerical, blomap_pca])
    - cell_line_array: дополнительный массив (X_cell_line), который может отсутствовать
    - is_y: флаг, что обрабатываем целевую переменную y
    
    Возвращает:
    - Отфильтрованный массив или датафрейм
    """
    # Создаем булеву маску
    mask = df['uptake_type'].isin(['Mean Fluorescence intensity', 'Fluorescence intensity'])
    
    if is_y:
        # Для y просто применяем маску к столбцу
        return df['raw_efficiency'].values[mask]
    else:
        # Фильтруем все переданные массивы
        filtered_arrays = [arr[mask] for arr in arrays]
        
        # Объединяем массивы
        filtered_X = np.hstack(filtered_arrays)
        
        # Добавляем cell_line, если он не пуст
        if cell_line_array is not None and not cell_line_array.empty:
            filtered_cell_line = cell_line_array.values[mask]
            filtered_X = np.hstack([filtered_X, filtered_cell_line])
        
        return filtered_X

In [16]:
# Load embeddings
blomap_embeddings = np.load("blomap_regr.npy")
fingerprints_embeddings = np.load("fingerprints_regr.npy")
protbert_embeddings = np.load("protbert_regr.npy")

# Apply PCA to Blomap for XGBoost
pca_blomap = PCA(n_components=10, random_state=42)
blomap_pca = pca_blomap.fit_transform(blomap_embeddings)

# Select numerical features
selected_features = [
    "MW", "GRAVY", "pI", "Charge", "Charge_Density", "Aromaticity",
    "Flexibility", "Aliphatic_Index", "Boman_Index", "Hydrophobic_AA",
    "Polar_AA", "Positive_AA", "Negative_AA", "MolWt", "LogP",
    "TPSA", "HBD", "HBA", "RotBonds", "Rings", "Fsp3"
]
X_numerical = df[selected_features].copy()

# One-hot encoding for cell_line
if "cell_line" in df.columns:
    enc = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
    cell_line_encoded = enc.fit_transform(df[['cell_line']])
    cell_line_feature_names = enc.get_feature_names_out(["cell_line"])
    X_cell_line = pd.DataFrame(cell_line_encoded, columns=cell_line_feature_names)
else:
    X_cell_line = pd.DataFrame()

# Prepare feature matrices
X_xgb = np.hstack([X_numerical.values, blomap_pca, fingerprints_embeddings, protbert_embeddings])
if not X_cell_line.empty:
    X_xgb = np.hstack([X_xgb, X_cell_line.values])

X_lgbm = np.hstack([X_numerical.values, blomap_embeddings, fingerprints_embeddings, protbert_embeddings])
if not X_cell_line.empty:
    X_lgbm = np.hstack([X_lgbm, X_cell_line.values])

y = df["raw_efficiency"].values

# Пример создания исходных переменных:
# (предполагается, что переменные X_numerical, blomap_pca, fingerprints_embeddings, 
#  protbert_embeddings, blomap_embeddings, X_cell_line и df уже определены)

# Для X_xgb:
# Список массивов для X_xgb
arrays_xgb = [
    X_numerical.values,
    blomap_pca,          # Из вашего исходного кода
    fingerprints_embeddings,
    protbert_embeddings
]

# Применяем фильтрацию
X_xgb = filter_data(
    df=df,
    arrays=arrays_xgb,
    cell_line_array=X_cell_line  # Передаем X_cell_line, если он не пуст
)

# Для X_lgbm:
# Список массивов для X_lgbm (здесь другой blomap)
arrays_lgbm = [
    X_numerical.values,
    blomap_embeddings,    # Разный признак относительно X_xgb
    fingerprints_embeddings,
    protbert_embeddings
]

# Применяем фильтрацию
X_lgbm = filter_data(
    df=df,
    arrays=arrays_lgbm,
    cell_line_array=X_cell_line
)

# Целевая переменная:
y = df["raw_efficiency"].values 

y = filter_data(
    df=df,
    arrays=[],  # Для y не нужны дополнительные массивы
    cell_line_array=None,
    is_y=True   # Указываем, что обрабатываем целевую переменную
)


valid_idx = ~np.isnan(y)
X_xgb, X_lgbm, y = X_xgb[valid_idx], X_lgbm[valid_idx], y[valid_idx]

# Handle missing values
imputer = SimpleImputer(strategy="mean")
X_xgb, X_lgbm = imputer.fit_transform(X_xgb), imputer.fit_transform(X_lgbm)

# Log-transform target variable
y = np.log1p(y)

# Train-test split
X_train_xgb, X_test_xgb, y_train, y_test = train_test_split(X_xgb, y, test_size=0.2, random_state=42)
X_train_lgbm, X_test_lgbm, _, _ = train_test_split(X_lgbm, y, test_size=0.2, random_state=42)

# Train XGBoost
xgb_model = XGBRegressor(n_estimators=754, max_depth=6, learning_rate=0.054886325307314195,
                         subsample=0.9967873263465272, colsample_bytree=0.8645926672674225,
                         random_state=42)
xgb_model.fit(X_train_xgb, y_train)
xgb_pred = np.expm1(xgb_model.predict(X_test_xgb))

# Train LightGBM
lgbm_model = LGBMRegressor(n_estimators=629, learning_rate=0.0114315426267485, num_leaves=77, 
                            min_data_in_leaf=9, max_depth=7, colsample_bytree=0.7, random_state=42)
lgbm_model.fit(X_train_lgbm, y_train)
lgbm_pred = np.expm1(lgbm_model.predict(X_test_lgbm))

# Ensemble predictions (90% XGBoost, 10% LightGBM)
ensemble_pred = (0.9 * xgb_pred + 0.1 * lgbm_pred)

# Evaluate model
print(f"MAE XGBoost: {mean_absolute_error(np.expm1(y_test), xgb_pred):.4f}")
print(f"MAE LightGBM: {mean_absolute_error(np.expm1(y_test), lgbm_pred):.4f}")
print(f"MAE Ensemble (90% XGBoost, 10% LightGBM): {mean_absolute_error(np.expm1(y_test), ensemble_pred):.4f}")


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001706 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2205
[LightGBM] [Info] Number of data points in the train set: 249, number of used features: 326
[LightGBM] [Info] Start training from score 5.483091
MAE XGBoost: 26809.3662
MAE LightGBM: 27008.1357
MAE Ensemble (90% XGBoost, 10% LightGBM): 26817.8202




In [11]:
X_xgb

array([[ 3.15183620e+03, -1.49230769e+00,  1.16083223e+01, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 2.78519390e+03, -1.27272727e+00,  1.18393770e+01, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 2.07836580e+03, -1.12352941e+00,  1.18244848e+01, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       ...,
       [ 8.90932000e+01,  1.80000000e+00,  5.57001667e+00, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 3.71297160e+03, -3.61153846e+00,  1.19999678e+01, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 3.71297160e+03, -3.61153846e+00,  1.19999678e+01, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00]])

In [17]:
import numpy as np
import pandas as pd
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.decomposition import PCA
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

# Load dataset
df = pd.read_csv("for_regr_with_descrip.csv")
df['raw_efficiency'] = df['raw_efficiency'].apply(convert_to_number)

# Load embeddings
blomap_embeddings = np.load("blomap_regr.npy")
fingerprints_embeddings = np.load("fingerprints_regr.npy")
protbert_embeddings = np.load("protbert_regr.npy")

# Apply PCA to Blomap for XGBoost
pca_blomap = PCA(n_components=10, random_state=42)
blomap_pca = pca_blomap.fit_transform(blomap_embeddings)

# Select numerical features
selected_features = [
    "MW", "GRAVY", "pI", "Charge", "Charge_Density", "Aromaticity",
    "Flexibility", "Aliphatic_Index", "Boman_Index", "Hydrophobic_AA",
    "Polar_AA", "Positive_AA", "Negative_AA", "MolWt", "LogP",
    "TPSA", "HBD", "HBA", "RotBonds", "Rings", "Fsp3"
]
X_numerical = df[selected_features].copy()

# One-hot encoding for cell_line
if "cell_line" in df.columns:
    enc = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
    cell_line_encoded = enc.fit_transform(df[['cell_line']])
    cell_line_feature_names = enc.get_feature_names_out(["cell_line"])
    X_cell_line = pd.DataFrame(cell_line_encoded, columns=cell_line_feature_names)
else:
    X_cell_line = pd.DataFrame()

# Prepare feature matrices
X_xgb = np.hstack([X_numerical, blomap_pca, fingerprints_embeddings, protbert_embeddings])
if not X_cell_line.empty:
    X_xgb = np.hstack([X_xgb, X_cell_line])

X_lgbm = np.hstack([X_numerical, blomap_embeddings, fingerprints_embeddings, protbert_embeddings])
if not X_cell_line.empty:
    X_lgbm = np.hstack([X_lgbm, X_cell_line])

y = df["raw_efficiency"].values
valid_idx = ~np.isnan(y)
X_xgb, X_lgbm, y = X_xgb[valid_idx], X_lgbm[valid_idx], y[valid_idx]

# Handle missing values
imputer = SimpleImputer(strategy="mean")
X_xgb, X_lgbm = imputer.fit_transform(X_xgb), imputer.fit_transform(X_lgbm)

# Log-transform target variable
y = np.log1p(y)

# Train-test split
X_train_xgb, X_test_xgb, y_train, y_test = train_test_split(X_xgb, y, test_size=0.2, random_state=42)
X_train_lgbm, X_test_lgbm, _, _ = train_test_split(X_lgbm, y, test_size=0.2, random_state=42)

# Optimize XGBoost with Optuna
def objective_xgb(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'random_state': 42
    }
    model = XGBRegressor(**params)
    model.fit(X_train_xgb, y_train)
    pred = model.predict(X_test_xgb)
    return mean_absolute_error(y_test, pred)

study_xgb = optuna.create_study(direction='minimize')
study_xgb.optimize(objective_xgb, n_trials=50)
best_params_xgb = study_xgb.best_params
xgb_model = XGBRegressor(**best_params_xgb)
xgb_model.fit(X_train_xgb, y_train)
xgb_pred = np.expm1(xgb_model.predict(X_test_xgb))

# Optimize LightGBM with Optuna
def objective_lgbm(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'num_leaves': trial.suggest_int('num_leaves', 20, 100),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 5, 50),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'random_state': 42
    }
    model = LGBMRegressor(**params)
    model.fit(X_train_lgbm, y_train)
    pred = model.predict(X_test_lgbm)
    return mean_absolute_error(y_test, pred)

study_lgbm = optuna.create_study(direction='minimize')
study_lgbm.optimize(objective_lgbm, n_trials=50)
best_params_lgbm = study_lgbm.best_params
lgbm_model = LGBMRegressor(**best_params_lgbm)
lgbm_model.fit(X_train_lgbm, y_train)
lgbm_pred = np.expm1(lgbm_model.predict(X_test_lgbm))

# Ensemble predictions (90% XGBoost, 10% LightGBM)
ensemble_pred = (0.9 * xgb_pred + 0.1 * lgbm_pred)

# Evaluate model
print(f"MAE XGBoost: {mean_absolute_error(np.expm1(y_test), xgb_pred):.4f}")
print(f"MAE LightGBM: {mean_absolute_error(np.expm1(y_test), lgbm_pred):.4f}")
print(f"MAE Ensemble (90% XGBoost, 10% LightGBM): {mean_absolute_error(np.expm1(y_test), ensemble_pred):.4f}")


[I 2025-03-18 16:39:25,287] A new study created in memory with name: no-name-bbecc130-1630-4536-904b-0cf9fa7cb157
[I 2025-03-18 16:39:28,660] Trial 0 finished with value: 1.8033489493874109 and parameters: {'n_estimators': 991, 'max_depth': 3, 'learning_rate': 0.18802617935350052, 'subsample': 0.5055348572807952, 'colsample_bytree': 0.5544211474901319}. Best is trial 0 with value: 1.8033489493874109.
[I 2025-03-18 16:39:36,598] Trial 1 finished with value: 1.6770690481408337 and parameters: {'n_estimators': 670, 'max_depth': 10, 'learning_rate': 0.04386714041880845, 'subsample': 0.7823025782026443, 'colsample_bytree': 0.7958185092986652}. Best is trial 1 with value: 1.6770690481408337.
[I 2025-03-18 16:39:39,690] Trial 2 finished with value: 1.6622340763770873 and parameters: {'n_estimators': 968, 'max_depth': 3, 'learning_rate': 0.07442712595122661, 'subsample': 0.5507971879257119, 'colsample_bytree': 0.5294418201943205}. Best is trial 2 with value: 1.6622340763770873.
[I 2025-03-18 1

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004474 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4089
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 315
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:09,518] Trial 0 finished with value: 1.84853920632397 and parameters: {'n_estimators': 587, 'learning_rate': 0.03521530329895833, 'num_leaves': 41, 'min_data_in_leaf': 32, 'max_depth': 6, 'colsample_bytree': 0.7210493430173269}. Best is trial 0 with value: 1.84853920632397.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003800 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4083
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 312
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:10,960] Trial 1 finished with value: 1.860731409532413 and parameters: {'n_estimators': 973, 'learning_rate': 0.04479817434308457, 'num_leaves': 80, 'min_data_in_leaf': 34, 'max_depth': 9, 'colsample_bytree': 0.8389518435418618}. Best is trial 0 with value: 1.84853920632397.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004586 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3964
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 280
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:11,328] Trial 2 finished with value: 2.078925134931094 and parameters: {'n_estimators': 564, 'learning_rate': 0.26392591473185534, 'num_leaves': 65, 'min_data_in_leaf': 47, 'max_depth': 3, 'colsample_bytree': 0.6503701015666923}. Best is trial 0 with value: 1.84853920632397.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006234 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4319
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 375
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:12,549] Trial 3 finished with value: 1.8471601760469598 and parameters: {'n_estimators': 988, 'learning_rate': 0.011888979977626135, 'num_leaves': 71, 'min_data_in_leaf': 16, 'max_depth': 7, 'colsample_bytree': 0.8423006881970787}. Best is trial 3 with value: 1.8471601760469598.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004336 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3956
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 276
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:12,940] Trial 4 finished with value: 1.8740794135052277 and parameters: {'n_estimators': 263, 'learning_rate': 0.028402413357041363, 'num_leaves': 65, 'min_data_in_leaf': 50, 'max_depth': 10, 'colsample_bytree': 0.8775158867535382}. Best is trial 3 with value: 1.8471601760469598.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.014341 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 8312
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 1443
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:18,440] Trial 5 finished with value: 1.7833517577994307 and parameters: {'n_estimators': 954, 'learning_rate': 0.1999996718155121, 'num_leaves': 78, 'min_data_in_leaf': 6, 'max_depth': 10, 'colsample_bytree': 0.6653921727430552}. Best is trial 5 with value: 1.7833517577994307.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005775 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4373
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 395
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:19,011] Trial 6 finished with value: 1.83009960786231 and parameters: {'n_estimators': 423, 'learning_rate': 0.28088819002583026, 'num_leaves': 74, 'min_data_in_leaf': 13, 'max_depth': 7, 'colsample_bytree': 0.8319742212242854}. Best is trial 5 with value: 1.7833517577994307.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005208 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4087
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 314
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:19,321] Trial 7 finished with value: 1.861169041853031 and parameters: {'n_estimators': 197, 'learning_rate': 0.20614128899518794, 'num_leaves': 41, 'min_data_in_leaf': 33, 'max_depth': 9, 'colsample_bytree': 0.8381872478324492}. Best is trial 5 with value: 1.7833517577994307.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004737 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4191
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 340
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:19,813] Trial 8 finished with value: 1.8800600802548482 and parameters: {'n_estimators': 427, 'learning_rate': 0.02359929461816792, 'num_leaves': 67, 'min_data_in_leaf': 26, 'max_depth': 4, 'colsample_bytree': 0.546882633785447}. Best is trial 5 with value: 1.7833517577994307.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007954 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4157
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 335
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:20,189] Trial 9 finished with value: 1.854405812730141 and parameters: {'n_estimators': 417, 'learning_rate': 0.02655849966886273, 'num_leaves': 58, 'min_data_in_leaf': 27, 'max_depth': 5, 'colsample_bytree': 0.9648645812700358}. Best is trial 5 with value: 1.7833517577994307.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013690 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 8312
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 1443
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:23,529] Trial 10 finished with value: 1.7204358456855404 and parameters: {'n_estimators': 774, 'learning_rate': 0.10315585182545343, 'num_leaves': 100, 'min_data_in_leaf': 6, 'max_depth': 10, 'colsample_bytree': 0.5063471121411427}. Best is trial 10 with value: 1.7204358456855404.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.015060 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 8429
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 1477
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:26,833] Trial 11 finished with value: 1.7210885200887787 and parameters: {'n_estimators': 788, 'learning_rate': 0.11481072285295443, 'num_leaves': 95, 'min_data_in_leaf': 5, 'max_depth': 10, 'colsample_bytree': 0.5817748717713679}. Best is trial 10 with value: 1.7204358456855404.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.012829 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 8429
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 1477
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:29,023] Trial 12 finished with value: 1.79127522817035 and parameters: {'n_estimators': 741, 'learning_rate': 0.11091326206935101, 'num_leaves': 100, 'min_data_in_leaf': 5, 'max_depth': 8, 'colsample_bytree': 0.5067958554235653}. Best is trial 10 with value: 1.7204358456855404.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004445 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4319
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 375
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:30,072] Trial 13 finished with value: 1.7945278611858606 and parameters: {'n_estimators': 771, 'learning_rate': 0.07939563414520252, 'num_leaves': 96, 'min_data_in_leaf': 16, 'max_depth': 9, 'colsample_bytree': 0.5905305537513377}. Best is trial 10 with value: 1.7204358456855404.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004622 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4423
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 409
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:31,633] Trial 14 finished with value: 1.822401543242477 and parameters: {'n_estimators': 788, 'learning_rate': 0.11605302324582577, 'num_leaves': 89, 'min_data_in_leaf': 11, 'max_depth': 10, 'colsample_bytree': 0.5941345349432973}. Best is trial 10 with value: 1.7204358456855404.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005069 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4260
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 359
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:32,662] Trial 15 finished with value: 1.847479225115212 and parameters: {'n_estimators': 675, 'learning_rate': 0.0709683556993809, 'num_leaves': 26, 'min_data_in_leaf': 20, 'max_depth': 8, 'colsample_bytree': 0.5171273885255571}. Best is trial 10 with value: 1.7204358456855404.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009516 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6410
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 936
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:34,486] Trial 16 finished with value: 1.7292842349212345 and parameters: {'n_estimators': 838, 'learning_rate': 0.12473859870272268, 'num_leaves': 91, 'min_data_in_leaf': 9, 'max_depth': 8, 'colsample_bytree': 0.6042859424672754}. Best is trial 10 with value: 1.7204358456855404.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005336 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4252
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 356
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:35,465] Trial 17 finished with value: 1.7987735704274288 and parameters: {'n_estimators': 873, 'learning_rate': 0.06422399649684477, 'num_leaves': 86, 'min_data_in_leaf': 21, 'max_depth': 10, 'colsample_bytree': 0.7290019031077262}. Best is trial 10 with value: 1.7204358456855404.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004819 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4013
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 290
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:35,948] Trial 18 finished with value: 1.9576468494894557 and parameters: {'n_estimators': 667, 'learning_rate': 0.15843609660904295, 'num_leaves': 51, 'min_data_in_leaf': 44, 'max_depth': 6, 'colsample_bytree': 0.6663990750930568}. Best is trial 10 with value: 1.7204358456855404.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.012490 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7307
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 1182
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:38,108] Trial 19 finished with value: 1.736504685575511 and parameters: {'n_estimators': 879, 'learning_rate': 0.08762935428011849, 'num_leaves': 100, 'min_data_in_leaf': 8, 'max_depth': 9, 'colsample_bytree': 0.5650152758002674}. Best is trial 10 with value: 1.7204358456855404.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003730 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4032
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 299
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:38,617] Trial 20 finished with value: 1.8894918595950723 and parameters: {'n_estimators': 673, 'learning_rate': 0.05008940711628704, 'num_leaves': 85, 'min_data_in_leaf': 40, 'max_depth': 8, 'colsample_bytree': 0.7752786622254778}. Best is trial 10 with value: 1.7204358456855404.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006174 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5593
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 706
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:39,672] Trial 21 finished with value: 1.7802004768254471 and parameters: {'n_estimators': 851, 'learning_rate': 0.13083201069052278, 'num_leaves': 91, 'min_data_in_leaf': 10, 'max_depth': 8, 'colsample_bytree': 0.6238488107404393}. Best is trial 10 with value: 1.7204358456855404.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012537 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 8429
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 1477
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:42,545] Trial 22 finished with value: 1.7515019892376342 and parameters: {'n_estimators': 842, 'learning_rate': 0.16152840333513868, 'num_leaves': 93, 'min_data_in_leaf': 5, 'max_depth': 9, 'colsample_bytree': 0.5461618069773709}. Best is trial 10 with value: 1.7204358456855404.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004467 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4359
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 390
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:43,448] Trial 23 finished with value: 1.7807972262952638 and parameters: {'n_estimators': 729, 'learning_rate': 0.09960385097132665, 'num_leaves': 82, 'min_data_in_leaf': 14, 'max_depth': 10, 'colsample_bytree': 0.6127736685010029}. Best is trial 10 with value: 1.7204358456855404.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004372 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4260
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 359
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:44,438] Trial 24 finished with value: 1.8962903103592084 and parameters: {'n_estimators': 906, 'learning_rate': 0.14793938926926503, 'num_leaves': 94, 'min_data_in_leaf': 20, 'max_depth': 7, 'colsample_bytree': 0.5066341054984893}. Best is trial 10 with value: 1.7204358456855404.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007073 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 6410
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 936
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:45,430] Trial 25 finished with value: 1.8061120309148284 and parameters: {'n_estimators': 619, 'learning_rate': 0.20502804778315128, 'num_leaves': 100, 'min_data_in_leaf': 9, 'max_depth': 9, 'colsample_bytree': 0.7003875350074915}. Best is trial 10 with value: 1.7204358456855404.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004698 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4399
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 402
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:46,191] Trial 26 finished with value: 1.7903701775388166 and parameters: {'n_estimators': 464, 'learning_rate': 0.05605609301184845, 'num_leaves': 90, 'min_data_in_leaf': 12, 'max_depth': 10, 'colsample_bytree': 0.5699463836033581}. Best is trial 10 with value: 1.7204358456855404.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004182 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4303
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 372
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:46,863] Trial 27 finished with value: 1.8008543188180242 and parameters: {'n_estimators': 796, 'learning_rate': 0.08975218415508186, 'num_leaves': 76, 'min_data_in_leaf': 17, 'max_depth': 8, 'colsample_bytree': 0.6345865670502602}. Best is trial 10 with value: 1.7204358456855404.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010289 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7307
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 1182
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:47,465] Trial 28 finished with value: 1.706551036925886 and parameters: {'n_estimators': 503, 'learning_rate': 0.1231827126672656, 'num_leaves': 87, 'min_data_in_leaf': 8, 'max_depth': 5, 'colsample_bytree': 0.7782686430330562}. Best is trial 28 with value: 1.706551036925886.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004285 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4215
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 346
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:47,833] Trial 29 finished with value: 1.8710249517671516 and parameters: {'n_estimators': 513, 'learning_rate': 0.039003049812874856, 'num_leaves': 84, 'min_data_in_leaf': 24, 'max_depth': 5, 'colsample_bytree': 0.7498851784253571}. Best is trial 28 with value: 1.706551036925886.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.012673 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 8244
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 1421
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:48,573] Trial 30 finished with value: 1.74574488208225 and parameters: {'n_estimators': 609, 'learning_rate': 0.01853124277534837, 'num_leaves': 24, 'min_data_in_leaf': 7, 'max_depth': 5, 'colsample_bytree': 0.7784401206375583}. Best is trial 28 with value: 1.706551036925886.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.011364 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5593
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 706
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:48,936] Trial 31 finished with value: 1.79481505030167 and parameters: {'n_estimators': 284, 'learning_rate': 0.13477686550180643, 'num_leaves': 95, 'min_data_in_leaf': 10, 'max_depth': 6, 'colsample_bytree': 0.6977475234330847}. Best is trial 28 with value: 1.706551036925886.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.010850 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 8429
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 1477
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:50,077] Trial 32 finished with value: 1.6562390103494082 and parameters: {'n_estimators': 522, 'learning_rate': 0.10638228440933559, 'num_leaves': 88, 'min_data_in_leaf': 5, 'max_depth': 4, 'colsample_bytree': 0.5390104858820288}. Best is trial 32 with value: 1.6562390103494082.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013054 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 8429
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 1477
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:51,111] Trial 33 finished with value: 1.718882285006985 and parameters: {'n_estimators': 516, 'learning_rate': 0.06366908973174547, 'num_leaves': 81, 'min_data_in_leaf': 5, 'max_depth': 3, 'colsample_bytree': 0.536854169539785}. Best is trial 32 with value: 1.6562390103494082.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003583 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4042
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 303
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:51,587] Trial 34 finished with value: 1.8517206751953716 and parameters: {'n_estimators': 519, 'learning_rate': 0.06953602382808902, 'num_leaves': 80, 'min_data_in_leaf': 38, 'max_depth': 3, 'colsample_bytree': 0.5479226773806761}. Best is trial 32 with value: 1.6562390103494082.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003477 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4106
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 323
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:51,941] Trial 35 finished with value: 1.8500370445862324 and parameters: {'n_estimators': 337, 'learning_rate': 0.0418877669190413, 'num_leaves': 72, 'min_data_in_leaf': 30, 'max_depth': 4, 'colsample_bytree': 0.5331906982648436}. Best is trial 32 with value: 1.6562390103494082.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004063 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4359
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 390
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:52,328] Trial 36 finished with value: 1.8251178619605044 and parameters: {'n_estimators': 564, 'learning_rate': 0.05064608006927608, 'num_leaves': 57, 'min_data_in_leaf': 14, 'max_depth': 4, 'colsample_bytree': 0.8857525644075371}. Best is trial 32 with value: 1.6562390103494082.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016098 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 8244
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 1421
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:52,766] Trial 37 finished with value: 1.7655615594443583 and parameters: {'n_estimators': 481, 'learning_rate': 0.06215221095306384, 'num_leaves': 86, 'min_data_in_leaf': 7, 'max_depth': 3, 'colsample_bytree': 0.7843273021818438}. Best is trial 32 with value: 1.6562390103494082.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004474 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4399
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 402
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:53,056] Trial 38 finished with value: 1.9223854466534873 and parameters: {'n_estimators': 357, 'learning_rate': 0.01018329068275289, 'num_leaves': 68, 'min_data_in_leaf': 12, 'max_depth': 4, 'colsample_bytree': 0.810070210801007}. Best is trial 32 with value: 1.6562390103494082.




[I 2025-03-18 16:42:53,361] Trial 39 finished with value: 1.8834869290913723 and parameters: {'n_estimators': 577, 'learning_rate': 0.18999328636999893, 'num_leaves': 78, 'min_data_in_leaf': 16, 'max_depth': 3, 'colsample_bytree': 0.8928456342302833}. Best is trial 32 with value: 1.6562390103494082.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004405 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4319
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 375
[LightGBM] [Info] Start training from score 4.296787
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.012321 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 8244
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 1421
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:53,672] Trial 40 finished with value: 1.733806882853001 and parameters: {'n_estimators': 121, 'learning_rate': 0.2593197653403817, 'num_leaves': 62, 'min_data_in_leaf': 7, 'max_depth': 5, 'colsample_bytree': 0.9370621604360614}. Best is trial 32 with value: 1.6562390103494082.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.020453 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 8429
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 1477
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:54,572] Trial 41 finished with value: 1.6289915906337555 and parameters: {'n_estimators': 519, 'learning_rate': 0.1019786244161141, 'num_leaves': 88, 'min_data_in_leaf': 5, 'max_depth': 4, 'colsample_bytree': 0.5815988893091064}. Best is trial 41 with value: 1.6289915906337555.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.010634 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 8429
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 1477
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:55,718] Trial 42 finished with value: 1.6636302557584985 and parameters: {'n_estimators': 519, 'learning_rate': 0.09849431720494436, 'num_leaves': 80, 'min_data_in_leaf': 5, 'max_depth': 4, 'colsample_bytree': 0.5242708508950769}. Best is trial 41 with value: 1.6289915906337555.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009959 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7307
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 1182
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:56,725] Trial 43 finished with value: 1.7739760576474688 and parameters: {'n_estimators': 524, 'learning_rate': 0.08097356490979896, 'num_leaves': 75, 'min_data_in_leaf': 8, 'max_depth': 4, 'colsample_bytree': 0.5297921490636571}. Best is trial 41 with value: 1.6289915906337555.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.012104 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 8429
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 1477
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:57,145] Trial 44 finished with value: 1.6886932132703778 and parameters: {'n_estimators': 376, 'learning_rate': 0.09611643271024153, 'num_leaves': 81, 'min_data_in_leaf': 5, 'max_depth': 3, 'colsample_bytree': 0.654596370827399}. Best is trial 41 with value: 1.6289915906337555.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005402 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5593
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 706
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:57,526] Trial 45 finished with value: 1.8594536095599004 and parameters: {'n_estimators': 392, 'learning_rate': 0.09618212971507183, 'num_leaves': 71, 'min_data_in_leaf': 10, 'max_depth': 5, 'colsample_bytree': 0.6598657302228306}. Best is trial 41 with value: 1.6289915906337555.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005547 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4373
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 395
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:57,862] Trial 46 finished with value: 1.7494209578806965 and parameters: {'n_estimators': 458, 'learning_rate': 0.17252983218040902, 'num_leaves': 86, 'min_data_in_leaf': 13, 'max_depth': 4, 'colsample_bytree': 0.6414294825241835}. Best is trial 41 with value: 1.6289915906337555.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.013230 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 8429
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 1477
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:58,581] Trial 47 finished with value: 1.6413594280454695 and parameters: {'n_estimators': 342, 'learning_rate': 0.08046321537484927, 'num_leaves': 79, 'min_data_in_leaf': 5, 'max_depth': 4, 'colsample_bytree': 0.5615073181207837}. Best is trial 41 with value: 1.6289915906337555.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012491 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 8429
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 1477
[LightGBM] [Info] Start training from score 4.296787


[I 2025-03-18 16:42:59,105] Trial 48 finished with value: 1.7244612428503912 and parameters: {'n_estimators': 237, 'learning_rate': 0.0787591897522543, 'num_leaves': 78, 'min_data_in_leaf': 5, 'max_depth': 3, 'colsample_bytree': 0.5676286141728323}. Best is trial 41 with value: 1.6289915906337555.




[I 2025-03-18 16:42:59,419] Trial 49 finished with value: 1.9341802291560128 and parameters: {'n_estimators': 330, 'learning_rate': 0.09889816719715809, 'num_leaves': 71, 'min_data_in_leaf': 50, 'max_depth': 4, 'colsample_bytree': 0.5856205592098752}. Best is trial 41 with value: 1.6289915906337555.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002880 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3956
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 276
[LightGBM] [Info] Start training from score 4.296787
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.011236 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 8429
[LightGBM] [Info] Number of data points in the train set: 701, number of used features: 1477
[LightGBM] [Info] Start training from score 4.296787
MAE XGBoost: 6270.6100
MAE LightGBM: 7614.9601
MAE Ensemble (90% XGBoost, 10% LightGBM): 6336.1096
