In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from src.feature_selectors import effectiveness_score
import pickle
import warnings
warnings.filterwarnings("ignore")

In [3]:
X = pd.read_csv('data/x_train.txt', sep=' ', header=None)
X.columns = ['x' + str(i) for i in range(500)]
y = pd.read_csv('data/y_train.txt', header=None)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train = pd.DataFrame(X_train, columns=X.columns)
X_test = pd.DataFrame(X_test, columns=X.columns)

# read list of selected features from features/GreedyGainSelector_VotingClassifier_random_improvement_0.25_0.2_5_True.pkl
with open('features/GreedyGainSelector_VotingClassifier_top_1_0.25_0.2_5_True.pkl', 'rb') as f:
    selected_features = pickle.load(f)

X_train = X_train[selected_features]
X_test = X_test[selected_features]

In [4]:
selected_features

['x101', 'x100', 'x105']

In [13]:
# make scorer from effectiveness_score, scorer like in sklearn
from sklearn.metrics import make_scorer
scorer = make_scorer(effectiveness_score, n_features=len(selected_features), greater_is_better=True)

In [14]:
scorer

make_scorer(effectiveness_score, response_method='predict', n_features=3)

In [15]:
# Overwrite VotingClassifier to put 1 to 0.2 most probable class 1 and 0 to the rest
class VotingClassifierWithThreshold(VotingClassifier):
    def predict(self, X, th=0.2):
        probas = self.predict_proba(X)
        # set 1 to the most probable 0.2 class 
        idx = int(len(probas) * th)
        probas_sorted = np.sort(probas[:, 1])
        threshold = probas_sorted[-idx]
        probas[:, 1] = (probas[:, 1] >= threshold).astype(int)
        return probas[:, 1]
    
# Overwrite RandomForestClassifier to put 1 to 0.2 most probable class 1 and 0 to the rest
class RandomForestClassifierWithThreshold(RandomForestClassifier):
    def predict(self, X, th=0.2):
        probas = self.predict_proba(X)
        # set 1 to the most probable 0.2 class 
        idx = int(len(probas) * th)
        probas_sorted = np.sort(probas[:, 1])
        threshold = probas_sorted[-idx]
        probas[:, 1] = (probas[:, 1] >= threshold).astype(int)
        return probas[:, 1]
    
# Overwrite XGBClassifier to put 1 to 0.2 most probable class 1 and 0 to the rest
class XGBClassifierWithThreshold(XGBClassifier):
    def predict(self, X, th=0.2):
        probas = self.predict_proba(X)
        # set 1 to the most probable 0.2 class 
        idx = int(len(probas) * th)
        probas_sorted = np.sort(probas[:, 1])
        threshold = probas_sorted[-idx]
        probas[:, 1] = (probas[:, 1] >= threshold).astype(int)
        return probas[:, 1]
    
# Overwrite CatBoostClassifier to put 1 to 0.2 most probable class 1 and 0 to the rest
class CatBoostClassifierWithThreshold(CatBoostClassifier):
    def predict(self, X, th=0.2):
        probas = self.predict_proba(X)
        # set 1 to the most probable 0.2 class 
        idx = int(len(probas) * th)
        probas_sorted = np.sort(probas[:, 1])
        threshold = probas_sorted[-idx]
        probas[:, 1] = (probas[:, 1] >= threshold).astype(int)
        return probas[:, 1]

In [16]:
# use VotingClassifierWithThreshold instead of VotingClassifier
clf1 = XGBClassifier(n_estimators=100, max_depth=5, random_state=42)
clf2 = CatBoostClassifier(iterations=100, depth=5, random_state=42, verbose=0)
clf3 = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
eclf = VotingClassifierWithThreshold(estimators=[('xgb', clf1), ('cat', clf2), ('rf', clf3)], voting='soft')

eclf.fit(X_train, y_train)
y_pred = eclf.predict(X_test, th=0.2)
effectiveness_score(y_test.values.ravel(), y_pred, n_features=len(selected_features))

1220.0

In [17]:
# cross-validation
from sklearn.model_selection import cross_val_score

scores = cross_val_score(eclf, X_train, y_train.values.ravel(), cv=4, scoring=scorer)
scores

array([1380., 1270., 1320., 1360.])

In [18]:
clf1 = XGBClassifierWithThreshold(n_estimators=100, max_depth=5, random_state=42)
clf2 = CatBoostClassifierWithThreshold(iterations=100, depth=5, random_state=42, verbose=0)
clf3 = RandomForestClassifierWithThreshold(n_estimators=100, max_depth=5, random_state=42)

clf1.fit(X_train, y_train)
y_pred = clf1.predict(X_test, th=0.2)
print(effectiveness_score(y_test.values.ravel(), y_pred, n_features=len(selected_features)))
print(cross_val_score(clf1, X_train, y_train.values.ravel(), cv=4, scoring=scorer))

clf2.fit(X_train, y_train)
y_pred = clf2.predict(X_test, th=0.2)
print(effectiveness_score(y_test.values.ravel(), y_pred, n_features=len(selected_features)))
print(cross_val_score(clf2, X_train, y_train.values.ravel(), cv=4, scoring=scorer))

clf3.fit(X_train, y_train)
y_pred = clf3.predict(X_test, th=0.2)
print(effectiveness_score(y_test.values.ravel(), y_pred, n_features=len(selected_features)))
print(cross_val_score(clf3, X_train, y_train.values.ravel(), cv=4, scoring=scorer))

1230.0
[1350. 1260. 1250. 1370.]
1290.0
[1380. 1300. 1350. 1320.]
1270.0
[1400. 1270. 1310. 1480.]


In [5]:
str(selected_features)

"['x101', 'x100', 'x105']"

In [24]:
# optuna for RandomForestClassifierWithThreshold
import optuna
from optuna.samplers import TPESampler

def objective_rf(trial):
    try:
        LOGS_RF = pd.read_csv('logs/LOGS_RF.csv')
    except:
        LOGS_RF = pd.DataFrame(columns=['model', 'n_estimators', 'max_depth', 'min_samples_split', 'min_samples_leaf', 'es', 'cv1', 'cv2', 'cv3', 'cv4', 'cv_mean', 'cv_std', 'columns'])
    cv = 4
    n_estimators = 1000
    max_depth = trial.suggest_int('max_depth', 3, 8)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 16)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 16)
    
    clf = RandomForestClassifierWithThreshold(n_estimators=n_estimators, max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, random_state=42)
    clf.fit(X_train, y_train)
    es = effectiveness_score(y_test.values.ravel(), y_pred, n_features=len(selected_features))
    cv_score = cross_val_score(clf, X_train, y_train.values.ravel(), cv=cv, scoring=scorer)
    LOGS_RF = pd.concat([LOGS_RF, pd.DataFrame([[clf, n_estimators, max_depth, min_samples_split, min_samples_leaf, es] + cv_score.tolist() + [cv_score.mean(), cv_score.std(), str(selected_features)]], columns=LOGS_RF.columns)], ignore_index=True)
    LOGS_RF.to_csv('logs/LOGS_RF.csv', index=False)
    return cv_score.mean()


sampler = TPESampler(seed=42)
study = optuna.create_study(direction='maximize', sampler=sampler)
study.optimize(objective_rf, n_trials=100)

[I 2024-05-27 16:41:21,710] A new study created in memory with name: no-name-c36fd49f-a097-4da7-91e6-0d0fc34728f6
[I 2024-05-27 16:41:36,662] Trial 0 finished with value: 1342.5 and parameters: {'max_depth': 5, 'min_samples_split': 16, 'min_samples_leaf': 12}. Best is trial 0 with value: 1342.5.
[I 2024-05-27 16:41:52,440] Trial 1 finished with value: 1350.0 and parameters: {'max_depth': 6, 'min_samples_split': 4, 'min_samples_leaf': 3}. Best is trial 1 with value: 1350.0.
[I 2024-05-27 16:42:03,453] Trial 2 finished with value: 1327.5 and parameters: {'max_depth': 3, 'min_samples_split': 14, 'min_samples_leaf': 10}. Best is trial 1 with value: 1350.0.
[I 2024-05-27 16:42:18,875] Trial 3 finished with value: 1360.0 and parameters: {'max_depth': 7, 'min_samples_split': 2, 'min_samples_leaf': 16}. Best is trial 3 with value: 1360.0.
[I 2024-05-27 16:42:35,922] Trial 4 finished with value: 1352.5 and parameters: {'max_depth': 7, 'min_samples_split': 5, 'min_samples_leaf': 3}. Best is tria

In [None]:
# optuna for XGBClassifierWithThreshold

def objective_xgb(trial):
    try:
        LOGS_XGB = pd.read_csv('logs/LOGS_XGB.csv')
    except:
        LOGS_XGB = pd.DataFrame(columns=['model', 'n_estimators', 'max_depth', 'learning_rate', 'subsample', 'colsample_bytree', 'min_child_weight', 'test_score', 'cv_1', 'cv_2', 'cv_3', 'cv_4', 'cv_mean', 'cv_std', 'columns'])
    cv = 4
    n_estimators = 1000
    max_depth = trial.suggest_int('max_depth', 3, 8)
    learning_rate = trial.suggest_float('learning_rate', 1e-3, 0.1, log=True)
    subsample = trial.suggest_float('subsample', 0.05, 1.0)
    colsample_bytree = trial.suggest_float('colsample_bytree', 0.05, 1.0)
    min_child_weight = trial.suggest_int('min_child_weight', 1, 10)
    
    clf = XGBClassifierWithThreshold(n_estimators=n_estimators, max_depth=max_depth, learning_rate=learning_rate, subsample=subsample, colsample_bytree=colsample_bytree, min_child_weight=min_child_weight, random_state=42)
    clf.fit(X_train, y_train)
    es = effectiveness_score(y_test.values.ravel(), y_pred, n_features=len(selected_features))
    cv_score = cross_val_score(clf, X_train, y_train.values.ravel(), cv=cv, scoring=scorer)
    LOGS_XGB = pd.concat([LOGS_XGB, pd.DataFrame([[clf, n_estimators, max_depth, learning_rate, subsample, colsample_bytree, min_child_weight, es, *cv_score, cv_score.mean(), cv_score.std(), str(selected_features)]], columns=LOGS_XGB.columns)], axis=0)
    LOGS_XGB.to_csv('logs/LOGS_XGB.csv', index=False)
    return cv_score.mean()

sampler = TPESampler(seed=42)
study = optuna.create_study(direction='maximize', sampler=sampler)
study.optimize(objective_xgb, n_trials=100)

[I 2024-05-27 16:37:01,253] A new study created in memory with name: no-name-702b8585-e325-4595-bbfd-10da52d9dcea
[I 2024-05-27 16:37:06,969] Trial 0 finished with value: 1232.5 and parameters: {'max_depth': 5, 'learning_rate': 0.07969454818643935, 'subsample': 0.7453942447208348, 'colsample_bytree': 0.6187255599871848, 'min_child_weight': 2}. Best is trial 0 with value: 1232.5.
[I 2024-05-27 16:37:11,488] Trial 1 finished with value: 1335.0 and parameters: {'max_depth': 3, 'learning_rate': 0.0013066739238053278, 'subsample': 0.8728673384861885, 'colsample_bytree': 0.6210592611560484, 'min_child_weight': 8}. Best is trial 1 with value: 1335.0.
[I 2024-05-27 16:37:15,772] Trial 2 finished with value: 1287.5 and parameters: {'max_depth': 3, 'learning_rate': 0.08706020878304858, 'subsample': 0.8408205087604007, 'colsample_bytree': 0.25172215514436236, 'min_child_weight': 2}. Best is trial 1 with value: 1335.0.
[I 2024-05-27 16:37:20,499] Trial 3 finished with value: 1332.5 and parameters:

In [None]:
# optuna for CatBoostClassifierWithThreshold

def objective_cat(trial):
    try:
        LOGS_CAT = pd.read_csv('logs/LOGS_CAT.csv')
    except:
        LOGS_CAT = pd.DataFrame(columns=['model', 'iterations', 'depth', 'learning_rate', 'subsample', 'colsample_bylevel', 'min_data_in_leaf', 'test_score', 'cv_1', 'cv_2', 'cv_3', 'cv_4', 'cv_mean', 'cv_std', 'columns'])
    cv = 4
    iterations = 1000
    depth = trial.suggest_int('depth', 3, 8)
    learning_rate = trial.suggest_float('learning_rate', 1e-3, 0.1, log=True)
    subsample = trial.suggest_float('subsample', 0.05, 1.0)
    colsample_bylevel = trial.suggest_float('colsample_bylevel', 0.05, 1.0)
    min_data_in_leaf = trial.suggest_int('min_data_in_leaf', 1, 10)
    
    clf = CatBoostClassifierWithThreshold(iterations=iterations, depth=depth, learning_rate=learning_rate, subsample=subsample, colsample_bylevel=colsample_bylevel, min_data_in_leaf=min_data_in_leaf, random_state=42, verbose=0)
    clf.fit(X_train, y_train)
    es = effectiveness_score(y_test.values.ravel(), y_pred, n_features=len(selected_features))
    cv_score = cross_val_score(clf, X_train, y_train.values.ravel(), cv=cv, scoring=scorer)
    LOGS_CAT = pd.concat([LOGS_CAT, pd.DataFrame([[clf, iterations, depth, learning_rate, subsample, colsample_bylevel, min_data_in_leaf, es, *cv_score, cv_score.mean(), cv_score.std(), str(selected_features)]], columns=LOGS_CAT.columns)], axis=0)
    LOGS_CAT.to_csv('logs/LOGS_CAT.csv', index=False)
    return cv_score.mean()

sampler = TPESampler(seed=42)
study = optuna.create_study(direction='maximize', sampler=sampler)
study.optimize(objective_cat, n_trials=100)

In [8]:
import optuna
from optuna.samplers import TPESampler
import os
import pickle
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from src.feature_selectors import effectiveness_score
import pickle
import warnings
from sklearn.model_selection import cross_val_score
warnings.filterwarnings("ignore")
from sklearn.metrics import make_scorer

In [9]:
# Overwrite VotingClassifier to put 1 to 0.2 most probable class 1 and 0 to the rest
class VotingClassifierWithThreshold(VotingClassifier):
    def predict(self, X, th=0.2):
        probas = self.predict_proba(X)
        # set 1 to the most probable 0.2 class 
        idx = int(len(probas) * th)
        probas_sorted = np.sort(probas[:, 1])
        threshold = probas_sorted[-idx]
        probas[:, 1] = (probas[:, 1] >= threshold).astype(int)
        return probas[:, 1]
    
# Overwrite RandomForestClassifier to put 1 to 0.2 most probable class 1 and 0 to the rest
class RandomForestClassifierWithThreshold(RandomForestClassifier):
    def predict(self, X, th=0.2):
        probas = self.predict_proba(X)
        # set 1 to the most probable 0.2 class 
        idx = int(len(probas) * th)
        probas_sorted = np.sort(probas[:, 1])
        threshold = probas_sorted[-idx]
        probas[:, 1] = (probas[:, 1] >= threshold).astype(int)
        return probas[:, 1]
    
# Overwrite XGBClassifier to put 1 to 0.2 most probable class 1 and 0 to the rest
class XGBClassifierWithThreshold(XGBClassifier):
    def predict(self, X, th=0.2):
        probas = self.predict_proba(X)
        # set 1 to the most probable 0.2 class 
        idx = int(len(probas) * th)
        probas_sorted = np.sort(probas[:, 1])
        threshold = probas_sorted[-idx]
        probas[:, 1] = (probas[:, 1] >= threshold).astype(int)
        return probas[:, 1]
    
# Overwrite CatBoostClassifier to put 1 to 0.2 most probable class 1 and 0 to the rest
class CatBoostClassifierWithThreshold(CatBoostClassifier):
    def predict(self, X, th=0.2):
        probas = self.predict_proba(X)
        # set 1 to the most probable 0.2 class 
        idx = int(len(probas) * th)
        probas_sorted = np.sort(probas[:, 1])
        threshold = probas_sorted[-idx]
        probas[:, 1] = (probas[:, 1] >= threshold).astype(int)
        return probas[:, 1]

In [10]:
features = []
for file in os.listdir('features'):
    if file.endswith('.pkl'):
        with open('features/' + file, 'rb') as f:
            features.append(sorted(pickle.load(f)))

In [11]:
# remove duplicates
features = list(set(tuple(i) for i in features))
features = [list(i) for i in features]
features

[['x100', 'x101', 'x105'],
 ['x100', 'x101', 'x102', 'x105'],
 ['x100', 'x101', 'x102', 'x105', 'x270']]

In [12]:
X = pd.read_csv('data/x_train.txt', sep=' ', header=None)
X.columns = ['x' + str(i) for i in range(500)]
y = pd.read_csv('data/y_train.txt', header=None)

X_train_all, X_test_all, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
scaler = StandardScaler()
X_train_all = scaler.fit_transform(X_train_all)
X_test_all = scaler.transform(X_test_all)
X_train_all = pd.DataFrame(X_train_all, columns=X.columns)
X_test_all = pd.DataFrame(X_test_all, columns=X.columns)

for selected_features in features:
    scorer = make_scorer(effectiveness_score, n_features=len(selected_features), greater_is_better=True)
    
    X_train = X_train_all[selected_features]
    X_test = X_test_all[selected_features]
    
    # Optuna for RandomForestClassifierWithThreshold
    def objective_rf(trial):
        try:
            LOGS_RF = pd.read_csv('logs/LOGS_RF.csv')
        except:
            LOGS_RF = pd.DataFrame(columns=['model', 'n_estimators', 'max_depth', 'min_samples_split', 'min_samples_leaf', 'es', 'cv1', 'cv2', 'cv3', 'cv4', 'cv_mean', 'cv_std', 'columns'])
        cv = 4
        n_estimators = 1000
        max_depth = trial.suggest_int('max_depth', 3, 8)
        min_samples_split = trial.suggest_int('min_samples_split', 2, 16)
        min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 16)
        
        clf = RandomForestClassifierWithThreshold(n_estimators=n_estimators, max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, random_state=42)
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test, th=0.2)
        es = effectiveness_score(y_test.values.ravel(), y_pred, n_features=len(selected_features))
        cv_score = cross_val_score(clf, X_train, y_train.values.ravel(), cv=cv, scoring=scorer)
        LOGS_RF = pd.concat([LOGS_RF, pd.DataFrame([[clf, n_estimators, max_depth, min_samples_split, min_samples_leaf, es] + cv_score.tolist() + [cv_score.mean(), cv_score.std(), str(selected_features)]], columns=LOGS_RF.columns)], ignore_index=True)
        LOGS_RF.to_csv('logs/LOGS_RF.csv', index=False)
        return cv_score.mean()


    sampler = TPESampler(seed=42)
    study = optuna.create_study(direction='maximize', sampler=sampler)
    study.optimize(objective_rf, n_trials=100)

    # Optuna for XGBClassifierWithThreshold
    def objective_xgb(trial):
        try:
            LOGS_XGB = pd.read_csv('logs/LOGS_XGB.csv')
        except:
            LOGS_XGB = pd.DataFrame(columns=['model', 'n_estimators', 'max_depth', 'learning_rate', 'subsample', 'colsample_bytree', 'min_child_weight', 'test_score', 'cv_1', 'cv_2', 'cv_3', 'cv_4', 'cv_mean', 'cv_std', 'columns'])
        cv = 4
        n_estimators = 1000
        max_depth = trial.suggest_int('max_depth', 3, 8)
        learning_rate = trial.suggest_float('learning_rate', 1e-3, 0.1, log=True)
        subsample = trial.suggest_float('subsample', 0.05, 1.0)
        colsample_bytree = trial.suggest_float('colsample_bytree', 0.05, 1.0)
        min_child_weight = trial.suggest_int('min_child_weight', 1, 10)
        
        clf = XGBClassifierWithThreshold(n_estimators=n_estimators, max_depth=max_depth, learning_rate=learning_rate, subsample=subsample, colsample_bytree=colsample_bytree, min_child_weight=min_child_weight, random_state=42)
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test, th=0.2)
        es = effectiveness_score(y_test.values.ravel(), y_pred, n_features=len(selected_features))
        cv_score = cross_val_score(clf, X_train, y_train.values.ravel(), cv=cv, scoring=scorer)
        LOGS_XGB = pd.concat([LOGS_XGB, pd.DataFrame([[clf, n_estimators, max_depth, learning_rate, subsample, colsample_bytree, min_child_weight, es, *cv_score, cv_score.mean(), cv_score.std(), str(selected_features)]], columns=LOGS_XGB.columns)], axis=0)
        LOGS_XGB.to_csv('logs/LOGS_XGB.csv', index=False)
        return cv_score.mean()

    sampler = TPESampler(seed=42)
    study = optuna.create_study(direction='maximize', sampler=sampler)
    study.optimize(objective_xgb, n_trials=100)
    
    # Optuna for CatBoostClassifierWithThreshold
    def objective_cat(trial):
        try:
            LOGS_CAT = pd.read_csv('logs/LOGS_CAT.csv')
        except:
            LOGS_CAT = pd.DataFrame(columns=['model', 'iterations', 'depth', 'learning_rate', 'subsample', 'colsample_bylevel', 'min_data_in_leaf', 'test_score', 'cv_1', 'cv_2', 'cv_3', 'cv_4', 'cv_mean', 'cv_std', 'columns'])
        cv = 4
        iterations = 1000
        depth = trial.suggest_int('depth', 3, 8)
        learning_rate = trial.suggest_float('learning_rate', 1e-3, 0.1, log=True)
        subsample = trial.suggest_float('subsample', 0.05, 1.0)
        colsample_bylevel = trial.suggest_float('colsample_bylevel', 0.05, 1.0)
        min_data_in_leaf = trial.suggest_int('min_data_in_leaf', 1, 10)
        
        clf = CatBoostClassifierWithThreshold(iterations=iterations, depth=depth, learning_rate=learning_rate, subsample=subsample, colsample_bylevel=colsample_bylevel, min_data_in_leaf=min_data_in_leaf, random_state=42, verbose=0)
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test, th=0.2)
        es = effectiveness_score(y_test.values.ravel(), y_pred, n_features=len(selected_features))
        cv_score = cross_val_score(clf, X_train, y_train.values.ravel(), cv=cv, scoring=scorer)
        LOGS_CAT = pd.concat([LOGS_CAT, pd.DataFrame([[clf, iterations, depth, learning_rate, subsample, colsample_bylevel, min_data_in_leaf, es, *cv_score, cv_score.mean(), cv_score.std(), str(selected_features)]], columns=LOGS_CAT.columns)], axis=0)
        LOGS_CAT.to_csv('logs/LOGS_CAT.csv', index=False)
        return cv_score.mean()

    sampler = TPESampler(seed=42)
    study = optuna.create_study(direction='maximize', sampler=sampler)
    study.optimize(objective_cat, n_trials=100)

[I 2024-05-27 17:12:32,691] A new study created in memory with name: no-name-ac3678d7-56ee-4ef2-8569-2d2f5034b339
[I 2024-05-27 17:12:46,467] Trial 0 finished with value: 1347.5 and parameters: {'max_depth': 5, 'min_samples_split': 16, 'min_samples_leaf': 12}. Best is trial 0 with value: 1347.5.
[I 2024-05-27 17:13:01,150] Trial 1 finished with value: 1357.5 and parameters: {'max_depth': 6, 'min_samples_split': 4, 'min_samples_leaf': 3}. Best is trial 1 with value: 1357.5.
[I 2024-05-27 17:13:11,470] Trial 2 finished with value: 1317.5 and parameters: {'max_depth': 3, 'min_samples_split': 14, 'min_samples_leaf': 10}. Best is trial 1 with value: 1357.5.
[I 2024-05-27 17:13:26,308] Trial 3 finished with value: 1357.5 and parameters: {'max_depth': 7, 'min_samples_split': 2, 'min_samples_leaf': 16}. Best is trial 1 with value: 1357.5.
[I 2024-05-27 17:13:41,512] Trial 4 finished with value: 1355.0 and parameters: {'max_depth': 7, 'min_samples_split': 5, 'min_samples_leaf': 3}. Best is tria