In [1]:
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from category_encoders import TargetEncoder
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from scipy.stats.mstats import winsorize
from sklearn.ensemble import IsolationForest
from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
import optuna
import warnings
warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm


# Handle outlier

In [2]:
class OutlierHandler:
    """
    Handles outlier detection and removal using multiple methods:
    - Winsorization for heavy skewed features
    - Hybrid approach: IsolationForest + Mahalanobis distance
    """
    
    def __init__(self, random_state=42):
        self.random_state = random_state
        self.report = {}

    def detect_heavy_skew(self, df, numeric_cols, threshold=1.0):
        """Detect columns with high skewness"""
        valid_cols = [col for col in numeric_cols if col in df.columns]
        if not valid_cols:
            return [], {}

        skewed = df[valid_cols].skew().abs().sort_values(ascending=False)
        heavy_skew = skewed[skewed > threshold].index.tolist()

        return heavy_skew, skewed[skewed > threshold].to_dict()

    def winsorize_columns(self, df, columns, lower_pct=0.01, upper_pct=0.99):
        """Apply winsorization to specified columns"""
        df_wins = df.copy()

        for col in columns:
            if col not in df_wins.columns:
                continue
            arr = df_wins[col].astype(float)
            low_val, high_val = arr.quantile(lower_pct), arr.quantile(upper_pct)
            df_wins[col] = arr.clip(lower=low_val, upper=high_val)

        return df_wins

    def hybrid_outlier_detection(self, df, numeric_cols, contamination='auto'):
        """
        Detect outliers using both IsolationForest and Mahalanobis distance
        Only removes rows flagged by both methods (conservative approach)
        """
        valid_cols = [col for col in numeric_cols if col in df.columns]
        if not valid_cols or len(df) < 100:
            return np.ones(len(df), dtype=bool), {}

        X = df[valid_cols].copy()

        # Auto contamination
        if contamination == 'auto':
            iqr_outliers = []
            for col in valid_cols:
                Q1, Q3 = df[col].quantile([0.25, 0.75])
                IQR = Q3 - Q1
                iqr_outliers.append(((df[col] < Q1 - 1.5*IQR) | (df[col] > Q3 + 1.5*IQR)).sum())
            contamination = min(0.05, max(0.01, np.mean(iqr_outliers) / len(df)))

        # Impute and detect
        Xf = SimpleImputer(strategy='median').fit_transform(X)

        # IsolationForest
        iso = IsolationForest(contamination=contamination, random_state=self.random_state, n_estimators=100)
        iso_mask = iso.fit_predict(Xf) == -1

        # PCA + Mahalanobis
        try:
            Xs = StandardScaler().fit_transform(Xf)
            pca = PCA(n_components=0.90, random_state=self.random_state)
            pcs = pca.fit_transform(Xs)

            cov = np.cov(pcs, rowvar=False)
            inv_cov = np.linalg.pinv(cov)
            mean_vec = pcs.mean(axis=0)
            dists = np.sqrt(np.sum((pcs - mean_vec) @ inv_cov * (pcs - mean_vec), axis=1))

            mahal_mask = dists > np.percentile(dists, 100*(1-contamination))
        except:
            mahal_mask = np.zeros(len(X), dtype=bool)

        # Conservative: remove only if both agree
        outlier_mask = iso_mask & mahal_mask
        keep_mask = ~outlier_mask

        print(f"Outliers detected: IsoForest={iso_mask.sum()}, Mahalanobis={mahal_mask.sum()}, Both={outlier_mask.sum()}")
        print(f"Keep: {keep_mask.sum()}/{len(df)} ({100*keep_mask.mean():.1f}%)")

        return keep_mask, {'consensus_outliers': outlier_mask.sum(), 'contamination': contamination}

    def process_outliers(self, df, numeric_cols, method='both', skew_threshold=1.0,
                        contamination='auto', action='remove'):
        """Main outlier processing pipeline"""
        print(f"\n{'='*60}\nOUTLIER PROCESSING: {method} | {action}")
        print(f"Original: {df.shape}")

        df_clean = df.copy()

        # 1. Detect & Winsorize heavy skew
        if method in ['winsorize_only', 'both']:
            heavy_skew_cols, _ = self.detect_heavy_skew(df_clean, numeric_cols, skew_threshold)
            if heavy_skew_cols:
                df_clean = self.winsorize_columns(df_clean, heavy_skew_cols)

        # 2. Hybrid outlier detection
        if method in ['hybrid', 'both']:
            keep_mask, details = self.hybrid_outlier_detection(df_clean, numeric_cols, contamination)

            if action == 'remove':
                df_clean = df_clean[keep_mask].reset_index(drop=True)
                print(f"Removed {(~keep_mask).sum()} rows")

        print(f"Final: {df_clean.shape} | Removed: {len(df)-len(df_clean)} rows ({100*(len(df)-len(df_clean))/len(df):.1f}%)")
        print("="*60)

        return df_clean, self.report

# DATA PREPROCESSING

In [3]:
def load_and_clean_data(filepath):
    """Load data and drop unnecessary columns"""
    house_df = pd.read_csv(filepath)
    house_df = house_df.drop(["Id", "Alley", "PoolQC", "Fence", "MiscFeature"], axis=1)
    return house_df


def prepare_features(train_df, test_df, use_polynomial=False, poly_degree=2, top_n_features=10):
    
    # Extract target
    y_train = train_df["SalePrice"].values
    y_test = test_df["SalePrice"].values
    x_train = train_df.drop(["SalePrice"], axis=1)
    x_test = test_df.drop(["SalePrice"], axis=1)

    # Identify column types
    num_cols = [col for col in x_train.columns if x_train[col].dtype in ["float64", "int64"]]
    cat_cols = [col for col in x_train.columns if x_train[col].dtype not in ["float64", "int64"]]

    # Fill missing values for categorical columns
    x_train[cat_cols] = x_train[cat_cols].fillna("none")
    x_test[cat_cols] = x_test[cat_cols].fillna("none")

    # Target encoding for categorical variables
    target_encoder = TargetEncoder(
        cols=cat_cols,
        smoothing=1.0,
        min_samples_leaf=1,
        handle_unknown='value',
        handle_missing='value'
    )
    X_train_encoded = target_encoder.fit_transform(x_train, y_train)
    X_test_encoded = target_encoder.transform(x_test)

    # Impute numerical values
    imputer = SimpleImputer()
    X_train_encoded[num_cols] = imputer.fit_transform(x_train[num_cols])
    X_test_encoded[num_cols] = imputer.transform(x_test[num_cols])

    # Apply polynomial features if requested
    if use_polynomial:
        print(f"\n{'='*60}")
        print(f"APPLYING POLYNOMIAL FEATURES (degree={poly_degree})")
        print(f"{'='*60}")
        
        # Select top correlated features for polynomial transformation
        correlations = x_train[num_cols].corrwith(pd.Series(y_train, index=x_train.index)).abs()
        top_features = correlations.nlargest(top_n_features).index.tolist()
        print(f"Top {top_n_features} features selected for polynomial transformation:")
        print(top_features)
        
        # Apply polynomial features to selected numerical features
        poly = PolynomialFeatures(degree=poly_degree, include_bias=False)
        X_train_poly = poly.fit_transform(x_train[top_features])
        X_test_poly = poly.transform(x_test[top_features])
        
        # Combine with categorical features
        X_train_cat = X_train_encoded[cat_cols].values
        X_test_cat = X_test_encoded[cat_cols].values
        
        X_train_final = np.hstack([X_train_poly, X_train_cat])
        X_test_final = np.hstack([X_test_poly, X_test_cat])
        
        print(f"Shape after polynomial features: {X_train_final.shape}")
    else:
        X_train_final = X_train_encoded.values
        X_test_final = X_test_encoded.values

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_final)
    X_test_scaled = scaler.transform(X_test_final)

    return X_train_scaled, X_test_scaled, y_train, y_test

# MODEL OPTIMIZATION

In [4]:
def get_model_configs():
    return {
        'Ridge': {
            'model_class': Ridge,
            'params': lambda trial: {
                'alpha': trial.suggest_float('alpha', 1e-3, 10.0, log=True)
            }
        },
        'Lasso': {
            'model_class': Lasso,
            'params': lambda trial: {
                'alpha': trial.suggest_float('alpha', 1e-3, 10.0, log=True)
            }
        },
        'ElasticNet': {
            'model_class': ElasticNet,
            'params': lambda trial: {
                'alpha': trial.suggest_float('alpha', 1e-3, 10.0, log=True),
                'l1_ratio': trial.suggest_float('l1_ratio', 0.0, 1.0)
            }
        },
        'RandomForest': {
            'model_class': RandomForestRegressor,
            'params': lambda trial: {
                'n_estimators': trial.suggest_int('n_estimators', 100, 600),
                'max_depth': trial.suggest_int('max_depth', 3, 30),
                'min_samples_split': trial.suggest_int('min_samples_split', 2, 10),
                'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2', 1.0]),
                'random_state': 42,
                'n_jobs': -1
            }
        },
        'GradientBoosting': {
            'model_class': GradientBoostingRegressor,
            'params': lambda trial: {
                'n_estimators': trial.suggest_int('n_estimators', 100, 500),
                'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
                'max_depth': trial.suggest_int('max_depth', 2, 8),
                'subsample': trial.suggest_float('subsample', 0.5, 1.0),
                'random_state': 42
            }
        },
        'XGBoost': {
            'model_class': XGBRegressor,
            'params': lambda trial: {
                'n_estimators': trial.suggest_int('n_estimators', 200, 1000),
                'max_depth': trial.suggest_int('max_depth', 3, 12),
                'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.3, log=True),
                'subsample': trial.suggest_float('subsample', 0.5, 1.0),
                'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
                'reg_lambda': trial.suggest_float('reg_lambda', 1e-3, 10, log=True),
                'reg_alpha': trial.suggest_float('reg_alpha', 1e-3, 10, log=True),
                'random_state': 42,
                'n_jobs': -1,
                'tree_method': "hist"
            }
        },
        'LightGBM': {
            'model_class': LGBMRegressor,
            'params': lambda trial: {
                'n_estimators': trial.suggest_int('n_estimators', 200, 1000),
                'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.2, log=True),
                'num_leaves': trial.suggest_int('num_leaves', 15, 127),
                'max_depth': trial.suggest_int('max_depth', -1, 10),
                'min_child_samples': trial.suggest_int('min_child_samples', 5, 30),
                'feature_fraction': trial.suggest_float('feature_fraction', 0.5, 1.0),
                'bagging_fraction': trial.suggest_float('bagging_fraction', 0.5, 1.0),
                'reg_alpha': trial.suggest_float('reg_alpha', 1e-3, 10, log=True),
                'reg_lambda': trial.suggest_float('reg_lambda', 1e-3, 10, log=True),
                'random_state': 42,
                'n_jobs': -1,
                'verbose': -1
            }
        },
        'CatBoost': {
            'model_class': CatBoostRegressor,
            'params': lambda trial: {
                'iterations': trial.suggest_int('iterations', 300, 1000),
                'depth': trial.suggest_int('depth', 4, 10),
                'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.3, log=True),
                'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-3, 10, log=True),
                'random_seed': 42,
                'verbose': False
            }
        }
    }


def objective(trial, model_name, model_config, X_train, y_train, X_test, y_test):
    """Objective function for Optuna optimization"""
    # Get model parameters
    params = model_config['params'](trial)
    
    # Create and train model
    model = model_config['model_class'](**params)
    model.fit(X_train, y_train)
    
    # Predict and evaluate
    y_pred = model.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    
    # Store RMSE as user attribute
    trial.set_user_attr("rmse", rmse)
    
    return r2  # Optuna will maximize this


def optimize_models(X_train, y_train, X_test, y_test, model_names=None, n_trials=30):
    
    model_configs = get_model_configs()
    
    if model_names is None:
        model_names = list(model_configs.keys())
    
    best_results = {}
    
    for model_name in model_names:
        if model_name not in model_configs:
            print(f"Warning: {model_name} not found in model configs. Skipping.")
            continue
            
        print(f"\n{'='*60}")
        print(f"Optimizing {model_name}...")
        print(f"{'='*60}")
        
        model_config = model_configs[model_name]
        
        # Create study
        study = optuna.create_study(direction="maximize")
        study.optimize(
            lambda trial: objective(trial, model_name, model_config, X_train, y_train, X_test, y_test),
            n_trials=n_trials,
            show_progress_bar=True
        )
        
        # Store best results
        best_results[model_name] = {
            "best_r2": study.best_value,
            "best_rmse": study.best_trial.user_attrs["rmse"],
            "best_params": study.best_params
        }
    
    return best_results


def print_results(results, title="RESULTS"):
    print(f"{title}")
    
    results_df = pd.DataFrame(results).T
    results_df = results_df.sort_values("best_r2", ascending=False)
    print(results_df)
    
    print("DETAILED RESULTS")
    
    for model_name, result in results.items():
        print(f"\n{model_name}:")
        print(f"   R²: {result['best_r2']:.4f}")
        print(f"   RMSE: {result['best_rmse']:.2f}")
        print(f"   Parameters:")
        for param, value in result['best_params'].items():
            if isinstance(value, float):
                print(f"      {param}: {value:.6f}")
            else:
                print(f"      {param}: {value}")

# COMPARISON 

In [5]:

def compare_models_with_and_without_poly(
    df_clean,
    models_to_test=None,
    n_trials=30,
    poly_degree=2,
    top_n_features=10
):
    # Split data
    train_df, test_df = train_test_split(df_clean, test_size=0.25, random_state=42)
    
    # WITHOUT POLYNOMIAL FEATURES 
    print("\n" + "="*70)
    print("STAGE 1: STANDARD FEATURES (No Polynomial)")
    print("="*70)
    
    X_train_std, X_test_std, y_train, y_test = prepare_features(
        train_df, test_df, use_polynomial=False
    )
    
    results_standard = optimize_models(
        X_train_std, y_train, X_test_std, y_test,
        model_names=models_to_test,
        n_trials=n_trials
    )
    
    print_results(results_standard, "RESULTS - STANDARD FEATURES")
    
    #  WITH POLYNOMIAL FEATURES 
    print("\n" + "="*70)
    print("STAGE 2: POLYNOMIAL FEATURES")
    print("="*70)
    
    X_train_poly, X_test_poly, y_train, y_test = prepare_features(
        train_df, test_df, 
        use_polynomial=True,
        poly_degree=poly_degree,
        top_n_features=top_n_features
    )
    
    results_poly = optimize_models(
        X_train_poly, y_train, X_test_poly, y_test,
        model_names=models_to_test,
        n_trials=n_trials
    )
    
    print_results(results_poly, "RESULTS - POLYNOMIAL FEATURES")
    
    #  COMPARISON 
    print("FINAL COMPARISON: STANDARD vs POLYNOMIAL")
    
    comparison_data = []
    for model_name in results_standard.keys():
        comparison_data.append({
            'Model': model_name,
            'Standard_R2': results_standard[model_name]['best_r2'],
            'Standard_RMSE': results_standard[model_name]['best_rmse'],
            'Poly_R2': results_poly[model_name]['best_r2'],
            'Poly_RMSE': results_poly[model_name]['best_rmse'],
            'R2_Improvement': results_poly[model_name]['best_r2'] - results_standard[model_name]['best_r2'],
            'RMSE_Improvement': results_standard[model_name]['best_rmse'] - results_poly[model_name]['best_rmse']
        })
    
    comparison_df = pd.DataFrame(comparison_data)
    comparison_df = comparison_df.sort_values('Poly_R2', ascending=False)
    
    print("\n", comparison_df.to_string(index=False))
    
    print("SUMMARY")
    print(f"Models improved with polynomial features: {(comparison_df['R2_Improvement'] > 0).sum()}/{len(comparison_df)}")
    print(f"Best model (Standard): {comparison_df.loc[comparison_df['Standard_R2'].idxmax(), 'Model']}")
    print(f"Best model (Polynomial): {comparison_df.loc[comparison_df['Poly_R2'].idxmax(), 'Model']}")
    
    return results_standard, results_poly, comparison_df



In [6]:
# MAIN EXECUTION
def main(
    filepath,
    models_to_test=None,
    n_trials=30,
    poly_degree=2,
    top_n_features=10
):
    """
    Main execution function
    
    Parameters:
    -----------
    filepath : str
        Path to the dataset
    models_to_test : list or None
        List of model names to test. Available models:
        ['Ridge', 'Lasso', 'ElasticNet', 'RandomForest', 'GradientBoosting', 
         'XGBoost', 'LightGBM', 'CatBoost']
        If None, test all models.
    n_trials : int
        Number of optimization trials per model
    poly_degree : int
        Degree of polynomial features
    top_n_features : int
        Number of top features to apply polynomial transformation
    """
    print("="*70)
    print("UNIFIED MODEL COMPARISON: STANDARD vs POLYNOMIAL FEATURES")
    print("="*70)
    
    # Load and clean data
    print("\n[1/4] Loading and cleaning data...")
    house_df = load_and_clean_data(filepath)
    print(f"Data loaded: {house_df.shape}")
    
    # Handle outliers
    print("\n[2/4] Processing outliers...")
    num_cols_org = [col for col in house_df.columns if house_df[col].dtype in ["float64", "int64"]]
    handler = OutlierHandler(random_state=42)
    df_clean, report = handler.process_outliers(
        house_df,
        num_cols_org,
        method='both',
        skew_threshold=1.0,
        contamination='auto',
        action='remove'
    )
    
    # Compare models
    print("\n[3/4] Optimizing models...")
    results_standard, results_poly, comparison_df = compare_models_with_and_without_poly(
        df_clean,
        models_to_test=models_to_test,
        n_trials=n_trials,
        poly_degree=poly_degree,
        top_n_features=top_n_features
    )
    
    print("\n[4/4] Done!")
    
    return results_standard, results_poly, comparison_df


In [9]:
results_std, results_poly, comparison = main(filepath="/home/huy/quan_nguyen/module 5/project/train-house-prices-advanced-regression-techniques.csv", 
    n_trials=10)

UNIFIED MODEL COMPARISON: STANDARD vs POLYNOMIAL FEATURES

[1/4] Loading and cleaning data...
Data loaded: (1460, 76)

[2/4] Processing outliers...

OUTLIER PROCESSING: both | remove
Original: (1460, 76)
Outliers detected: IsoForest=45, Mahalanobis=45, Both=13
Keep: 1447/1460 (99.1%)
Removed 13 rows
Final: (1447, 76) | Removed: 13 rows (0.9%)

[3/4] Optimizing models...

STAGE 1: STANDARD FEATURES (No Polynomial)


[I 2025-11-01 20:49:25,525] A new study created in memory with name: no-name-0d692bba-963c-42e8-9fef-449fb5bbd5eb



Optimizing Ridge...


Best trial: 3. Best value: 0.894286: 100%|██████████| 10/10 [00:00<00:00, 312.99it/s]
[I 2025-11-01 20:49:25,559] A new study created in memory with name: no-name-fbd2e064-89e9-4f73-9b5c-ec245889c72c


[I 2025-11-01 20:49:25,528] Trial 0 finished with value: 0.8930085828972408 and parameters: {'alpha': 0.00459199414868515}. Best is trial 0 with value: 0.8930085828972408.
[I 2025-11-01 20:49:25,531] Trial 1 finished with value: 0.8930043978204523 and parameters: {'alpha': 0.0019340147881435157}. Best is trial 0 with value: 0.8930085828972408.
[I 2025-11-01 20:49:25,534] Trial 2 finished with value: 0.8930287108671465 and parameters: {'alpha': 0.017456372648248803}. Best is trial 2 with value: 0.8930287108671465.
[I 2025-11-01 20:49:25,537] Trial 3 finished with value: 0.8942861333349107 and parameters: {'alpha': 1.2398980042060979}. Best is trial 3 with value: 0.8942861333349107.
[I 2025-11-01 20:49:25,541] Trial 4 finished with value: 0.8933535311152285 and parameters: {'alpha': 0.24563064418057737}. Best is trial 3 with value: 0.8942861333349107.
[I 2025-11-01 20:49:25,544] Trial 5 finished with value: 0.8930036923060392 and parameters: {'alpha': 0.0014865028621929967}. Best is tria

Best trial: 0. Best value: 0.893767:  30%|███       | 3/10 [00:00<00:00, 34.92it/s]

Best trial: 0. Best value: 0.893767:  60%|██████    | 6/10 [00:00<00:00, 44.91it/s]

[I 2025-11-01 20:49:25,578] Trial 0 finished with value: 0.8937669785289044 and parameters: {'alpha': 9.57265863494453}. Best is trial 0 with value: 0.8937669785289044.
[I 2025-11-01 20:49:25,601] Trial 1 finished with value: 0.8930077838004625 and parameters: {'alpha': 0.015545382303216877}. Best is trial 0 with value: 0.8937669785289044.
[I 2025-11-01 20:49:25,623] Trial 2 finished with value: 0.8934351712269037 and parameters: {'alpha': 4.886306777664494}. Best is trial 0 with value: 0.8937669785289044.
[I 2025-11-01 20:49:25,645] Trial 3 finished with value: 0.8935752114919493 and parameters: {'alpha': 6.608626940599729}. Best is trial 0 with value: 0.8937669785289044.
[I 2025-11-01 20:49:25,669] Trial 4 finished with value: 0.8930066184384866 and parameters: {'alpha': 0.0023119624802029203}. Best is trial 0 with value: 0.8937669785289044.
[I 2025-11-01 20:49:25,697] Trial 5 finished with value: 0.8930066433792627 and parameters: {'alpha': 0.002595164054624644}. Best is trial 0 wit

Best trial: 0. Best value: 0.893767:  80%|████████  | 8/10 [00:00<00:00, 44.91it/s]

[I 2025-11-01 20:49:25,744] Trial 7 finished with value: 0.8930128060524569 and parameters: {'alpha': 0.07259429194230486}. Best is trial 0 with value: 0.8937669785289044.
[I 2025-11-01 20:49:25,766] Trial 8 finished with value: 0.8934487577781298 and parameters: {'alpha': 5.043179936652947}. Best is trial 0 with value: 0.8937669785289044.


Best trial: 0. Best value: 0.893767: 100%|██████████| 10/10 [00:00<00:00, 42.88it/s]
[I 2025-11-01 20:49:25,794] A new study created in memory with name: no-name-1a17d1d7-a313-4b74-8466-f7bb26588095


[I 2025-11-01 20:49:25,791] Trial 9 finished with value: 0.8930072863582431 and parameters: {'alpha': 0.009896414556235444}. Best is trial 0 with value: 0.8937669785289044.

Optimizing ElasticNet...


Best trial: 5. Best value: 0.898915:  80%|████████  | 8/10 [00:00<00:00, 66.77it/s]

[I 2025-11-01 20:49:25,812] Trial 0 finished with value: 0.8959930542699249 and parameters: {'alpha': 0.007701743519570461, 'l1_ratio': 0.026849579580478178}. Best is trial 0 with value: 0.8959930542699249.
[I 2025-11-01 20:49:25,816] Trial 1 finished with value: 0.8481481477200716 and parameters: {'alpha': 7.60044812805759, 'l1_ratio': 0.6321222893417215}. Best is trial 0 with value: 0.8959930542699249.
[I 2025-11-01 20:49:25,840] Trial 2 finished with value: 0.8947320733625385 and parameters: {'alpha': 0.007176588203453187, 'l1_ratio': 0.7354830056459625}. Best is trial 0 with value: 0.8959930542699249.
[I 2025-11-01 20:49:25,860] Trial 3 finished with value: 0.8952790199111211 and parameters: {'alpha': 0.06054351555178143, 'l1_ratio': 0.942769938590144}. Best is trial 0 with value: 0.8959930542699249.
[I 2025-11-01 20:49:25,864] Trial 4 finished with value: 0.8868001625282337 and parameters: {'alpha': 2.8490817175753613, 'l1_ratio': 0.6150661217715907}. Best is trial 0 with value: 0

Best trial: 5. Best value: 0.898915: 100%|██████████| 10/10 [00:00<00:00, 63.64it/s]
[I 2025-11-01 20:49:25,953] A new study created in memory with name: no-name-c85b5992-f565-46bf-a1a5-761676842651


[I 2025-11-01 20:49:25,950] Trial 9 finished with value: 0.8947513639873435 and parameters: {'alpha': 0.004687932424658501, 'l1_ratio': 0.5863134289270041}. Best is trial 5 with value: 0.8989150075764972.

Optimizing RandomForest...


Best trial: 1. Best value: 0.901922:  20%|██        | 2/10 [00:00<00:02,  3.68it/s]

[I 2025-11-01 20:49:26,380] Trial 0 finished with value: 0.8767593635424795 and parameters: {'n_estimators': 480, 'max_depth': 6, 'min_samples_split': 4, 'max_features': 'log2'}. Best is trial 0 with value: 0.8767593635424795.
[I 2025-11-01 20:49:26,542] Trial 1 finished with value: 0.9019224497110213 and parameters: {'n_estimators': 167, 'max_depth': 9, 'min_samples_split': 8, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9019224497110213.


Best trial: 2. Best value: 0.901975:  30%|███       | 3/10 [00:00<00:01,  4.79it/s]

[I 2025-11-01 20:49:26,675] Trial 2 finished with value: 0.9019753788441867 and parameters: {'n_estimators': 114, 'max_depth': 30, 'min_samples_split': 6, 'max_features': 'sqrt'}. Best is trial 2 with value: 0.9019753788441867.


Best trial: 2. Best value: 0.901975:  40%|████      | 4/10 [00:01<00:01,  3.60it/s]

[I 2025-11-01 20:49:27,059] Trial 3 finished with value: 0.8961453447638213 and parameters: {'n_estimators': 417, 'max_depth': 11, 'min_samples_split': 10, 'max_features': 'log2'}. Best is trial 2 with value: 0.9019753788441867.


Best trial: 4. Best value: 0.908036:  60%|██████    | 6/10 [00:01<00:00,  4.24it/s]

[I 2025-11-01 20:49:27,381] Trial 4 finished with value: 0.9080360496885709 and parameters: {'n_estimators': 341, 'max_depth': 15, 'min_samples_split': 3, 'max_features': 'log2'}. Best is trial 4 with value: 0.9080360496885709.
[I 2025-11-01 20:49:27,506] Trial 5 finished with value: 0.9006411537207104 and parameters: {'n_estimators': 112, 'max_depth': 12, 'min_samples_split': 7, 'max_features': 'log2'}. Best is trial 4 with value: 0.9080360496885709.


Best trial: 4. Best value: 0.908036:  70%|███████   | 7/10 [00:01<00:00,  3.47it/s]

[I 2025-11-01 20:49:27,901] Trial 6 finished with value: 0.9062393155426494 and parameters: {'n_estimators': 430, 'max_depth': 20, 'min_samples_split': 4, 'max_features': 'log2'}. Best is trial 4 with value: 0.9080360496885709.


Best trial: 4. Best value: 0.908036:  80%|████████  | 8/10 [00:02<00:00,  3.17it/s]

[I 2025-11-01 20:49:28,274] Trial 7 finished with value: 0.8998840780954196 and parameters: {'n_estimators': 415, 'max_depth': 10, 'min_samples_split': 7, 'max_features': 'log2'}. Best is trial 4 with value: 0.9080360496885709.


Best trial: 4. Best value: 0.908036:  90%|█████████ | 9/10 [00:02<00:00,  2.66it/s]

[I 2025-11-01 20:49:28,784] Trial 8 finished with value: 0.9065344299526306 and parameters: {'n_estimators': 471, 'max_depth': 7, 'min_samples_split': 8, 'max_features': 1.0}. Best is trial 4 with value: 0.9080360496885709.


Best trial: 4. Best value: 0.908036: 100%|██████████| 10/10 [00:03<00:00,  3.13it/s]
[I 2025-11-01 20:49:29,146] A new study created in memory with name: no-name-6eace7b8-8626-4e34-bee9-6bc0241dc128


[I 2025-11-01 20:49:29,142] Trial 9 finished with value: 0.9034653076012467 and parameters: {'n_estimators': 386, 'max_depth': 10, 'min_samples_split': 7, 'max_features': 'sqrt'}. Best is trial 4 with value: 0.9080360496885709.

Optimizing GradientBoosting...


Best trial: 0. Best value: 0.899105:  10%|█         | 1/10 [00:00<00:04,  1.98it/s]

[I 2025-11-01 20:49:29,651] Trial 0 finished with value: 0.899104526984649 and parameters: {'n_estimators': 179, 'learning_rate': 0.21193786622879746, 'max_depth': 4, 'subsample': 0.5252249566460491}. Best is trial 0 with value: 0.899104526984649.


Best trial: 1. Best value: 0.904904:  20%|██        | 2/10 [00:01<00:07,  1.11it/s]

[I 2025-11-01 20:49:30,827] Trial 1 finished with value: 0.9049036252612022 and parameters: {'n_estimators': 415, 'learning_rate': 0.2984592074793242, 'max_depth': 3, 'subsample': 0.7391284116610479}. Best is trial 1 with value: 0.9049036252612022.


Best trial: 2. Best value: 0.922146:  30%|███       | 3/10 [00:02<00:06,  1.04it/s]

[I 2025-11-01 20:49:31,857] Trial 2 finished with value: 0.9221461544191886 and parameters: {'n_estimators': 358, 'learning_rate': 0.02703300200452259, 'max_depth': 4, 'subsample': 0.5460245689074819}. Best is trial 2 with value: 0.9221461544191886.


Best trial: 2. Best value: 0.922146:  40%|████      | 4/10 [00:05<00:09,  1.61s/it]

[I 2025-11-01 20:49:34,477] Trial 3 finished with value: 0.9099593725391896 and parameters: {'n_estimators': 340, 'learning_rate': 0.014531971110610137, 'max_depth': 7, 'subsample': 0.8851298962934405}. Best is trial 2 with value: 0.9221461544191886.


Best trial: 2. Best value: 0.922146:  50%|█████     | 5/10 [00:06<00:08,  1.62s/it]

[I 2025-11-01 20:49:36,101] Trial 4 finished with value: 0.91701063254776 and parameters: {'n_estimators': 360, 'learning_rate': 0.04736725704904297, 'max_depth': 6, 'subsample': 0.5730753664644026}. Best is trial 2 with value: 0.9221461544191886.


Best trial: 2. Best value: 0.922146:  60%|██████    | 6/10 [00:08<00:06,  1.73s/it]

[I 2025-11-01 20:49:38,052] Trial 5 finished with value: 0.9001232275979149 and parameters: {'n_estimators': 337, 'learning_rate': 0.20691405965482212, 'max_depth': 5, 'subsample': 0.9824947281001281}. Best is trial 2 with value: 0.9221461544191886.


Best trial: 2. Best value: 0.922146:  70%|███████   | 7/10 [00:09<00:03,  1.29s/it]

[I 2025-11-01 20:49:38,432] Trial 6 finished with value: 0.9108465339968642 and parameters: {'n_estimators': 182, 'learning_rate': 0.1850739366886803, 'max_depth': 3, 'subsample': 0.5021711397306967}. Best is trial 2 with value: 0.9221461544191886.


Best trial: 2. Best value: 0.922146:  80%|████████  | 8/10 [00:13<00:04,  2.12s/it]

[I 2025-11-01 20:49:42,323] Trial 7 finished with value: 0.899447409825712 and parameters: {'n_estimators': 413, 'learning_rate': 0.02181355529814656, 'max_depth': 8, 'subsample': 0.9629187460822841}. Best is trial 2 with value: 0.9221461544191886.


Best trial: 2. Best value: 0.922146:  90%|█████████ | 9/10 [00:14<00:01,  1.75s/it]

[I 2025-11-01 20:49:43,268] Trial 8 finished with value: 0.9140283923957907 and parameters: {'n_estimators': 406, 'learning_rate': 0.21449809234900508, 'max_depth': 2, 'subsample': 0.9278717611810983}. Best is trial 2 with value: 0.9221461544191886.


Best trial: 2. Best value: 0.922146: 100%|██████████| 10/10 [00:16<00:00,  1.60s/it]
[I 2025-11-01 20:49:45,186] A new study created in memory with name: no-name-2ca1d9b6-f541-452b-8ff2-cf35ad6fd047


[I 2025-11-01 20:49:45,182] Trial 9 finished with value: 0.9129106811540675 and parameters: {'n_estimators': 299, 'learning_rate': 0.03279407214818141, 'max_depth': 6, 'subsample': 0.8852193138925812}. Best is trial 2 with value: 0.9221461544191886.

Optimizing XGBoost...


Best trial: 0. Best value: 0.771233:  10%|█         | 1/10 [00:02<00:20,  2.31s/it]

[I 2025-11-01 20:49:47,497] Trial 0 finished with value: 0.7712334142074437 and parameters: {'n_estimators': 402, 'max_depth': 11, 'learning_rate': 0.00255899363757537, 'subsample': 0.8234273257665476, 'colsample_bytree': 0.7373659372920733, 'reg_lambda': 0.0070890947422283445, 'reg_alpha': 8.89654261979606}. Best is trial 0 with value: 0.7712334142074437.


Best trial: 1. Best value: 0.831277:  20%|██        | 2/10 [00:05<00:20,  2.56s/it]

[I 2025-11-01 20:49:50,224] Trial 1 finished with value: 0.8312772196194965 and parameters: {'n_estimators': 899, 'max_depth': 11, 'learning_rate': 0.0016988593058010447, 'subsample': 0.9067880977407752, 'colsample_bytree': 0.9901728771390131, 'reg_lambda': 0.45598959340742923, 'reg_alpha': 0.04119870292873043}. Best is trial 1 with value: 0.8312772196194965.


Best trial: 2. Best value: 0.895455:  30%|███       | 3/10 [00:06<00:15,  2.16s/it]

[I 2025-11-01 20:49:51,908] Trial 2 finished with value: 0.895454585693576 and parameters: {'n_estimators': 520, 'max_depth': 12, 'learning_rate': 0.02062732425390824, 'subsample': 0.956477384986284, 'colsample_bytree': 0.9036608810608646, 'reg_lambda': 1.1703366510467794, 'reg_alpha': 0.01645423614074407}. Best is trial 2 with value: 0.895454585693576.


Best trial: 3. Best value: 0.921475:  40%|████      | 4/10 [00:07<00:09,  1.58s/it]

[I 2025-11-01 20:49:52,602] Trial 3 finished with value: 0.9214753933669273 and parameters: {'n_estimators': 979, 'max_depth': 7, 'learning_rate': 0.014218354466639674, 'subsample': 0.6431175139819383, 'colsample_bytree': 0.6603024917133412, 'reg_lambda': 0.355591203847894, 'reg_alpha': 0.32284829488113714}. Best is trial 3 with value: 0.9214753933669273.


Best trial: 3. Best value: 0.921475:  50%|█████     | 5/10 [00:07<00:06,  1.22s/it]

[I 2025-11-01 20:49:53,179] Trial 4 finished with value: 0.9118015576867788 and parameters: {'n_estimators': 795, 'max_depth': 6, 'learning_rate': 0.06154960383124444, 'subsample': 0.6828179928659316, 'colsample_bytree': 0.8169075687672167, 'reg_lambda': 0.0011200922235075593, 'reg_alpha': 0.0016923657730989596}. Best is trial 3 with value: 0.9214753933669273.


Best trial: 3. Best value: 0.921475:  60%|██████    | 6/10 [00:09<00:05,  1.30s/it]

[I 2025-11-01 20:49:54,623] Trial 5 finished with value: 0.8890071294629752 and parameters: {'n_estimators': 853, 'max_depth': 8, 'learning_rate': 0.002642017638296212, 'subsample': 0.8537187579316315, 'colsample_bytree': 0.9676992101577142, 'reg_lambda': 0.0022057074376045974, 'reg_alpha': 0.0016420654990662868}. Best is trial 3 with value: 0.9214753933669273.


Best trial: 3. Best value: 0.921475:  70%|███████   | 7/10 [00:09<00:02,  1.06it/s]

[I 2025-11-01 20:49:54,835] Trial 6 finished with value: 0.9061840631439569 and parameters: {'n_estimators': 790, 'max_depth': 3, 'learning_rate': 0.1750111641389535, 'subsample': 0.5894178442272794, 'colsample_bytree': 0.6054287599986556, 'reg_lambda': 1.1990515920158165, 'reg_alpha': 0.10886906010333951}. Best is trial 3 with value: 0.9214753933669273.


Best trial: 3. Best value: 0.921475:  80%|████████  | 8/10 [00:10<00:01,  1.09it/s]

[I 2025-11-01 20:49:55,715] Trial 7 finished with value: 0.8441558349150605 and parameters: {'n_estimators': 744, 'max_depth': 8, 'learning_rate': 0.002405832021981238, 'subsample': 0.7913981107482979, 'colsample_bytree': 0.8447309984236737, 'reg_lambda': 2.5929512752287738, 'reg_alpha': 0.00822324964403327}. Best is trial 3 with value: 0.9214753933669273.


Best trial: 3. Best value: 0.921475:  90%|█████████ | 9/10 [00:11<00:00,  1.05it/s]

[I 2025-11-01 20:49:56,735] Trial 8 finished with value: 0.9109966171735563 and parameters: {'n_estimators': 656, 'max_depth': 11, 'learning_rate': 0.06956475187780894, 'subsample': 0.7668685357652552, 'colsample_bytree': 0.6165709412553837, 'reg_lambda': 4.59046261541478, 'reg_alpha': 0.009677649499188087}. Best is trial 3 with value: 0.9214753933669273.


Best trial: 9. Best value: 0.923778: 100%|██████████| 10/10 [00:11<00:00,  1.18s/it]
[I 2025-11-01 20:49:56,969] A new study created in memory with name: no-name-7d3a512c-4446-42a6-bcb4-1964cdefc13b


[I 2025-11-01 20:49:56,964] Trial 9 finished with value: 0.9237783390026577 and parameters: {'n_estimators': 702, 'max_depth': 4, 'learning_rate': 0.0449032312395194, 'subsample': 0.9964877370599412, 'colsample_bytree': 0.8069325468271975, 'reg_lambda': 0.00561921999931866, 'reg_alpha': 1.7906632448154662}. Best is trial 9 with value: 0.9237783390026577.

Optimizing LightGBM...


Best trial: 0. Best value: 0.379566:  10%|█         | 1/10 [00:00<00:02,  4.02it/s]

[I 2025-11-01 20:49:57,218] Trial 0 finished with value: 0.37956602622030167 and parameters: {'n_estimators': 303, 'learning_rate': 0.001105865976239355, 'num_leaves': 33, 'max_depth': 6, 'min_child_samples': 30, 'feature_fraction': 0.9682487812525853, 'bagging_fraction': 0.7646642522324135, 'reg_alpha': 0.39448252035836495, 'reg_lambda': 1.253603912195714}. Best is trial 0 with value: 0.37956602622030167.


Best trial: 2. Best value: 0.916097:  20%|██        | 2/10 [00:00<00:03,  2.44it/s]

[I 2025-11-01 20:49:57,738] Trial 1 finished with value: 0.7496996669363705 and parameters: {'n_estimators': 617, 'learning_rate': 0.0015770317816115526, 'num_leaves': 65, 'max_depth': 6, 'min_child_samples': 12, 'feature_fraction': 0.9690478147217156, 'bagging_fraction': 0.8989902666524956, 'reg_alpha': 0.4688308334826678, 'reg_lambda': 0.002541343570883065}. Best is trial 1 with value: 0.7496996669363705.
[I 2025-11-01 20:49:57,809] Trial 2 finished with value: 0.916096715289652 and parameters: {'n_estimators': 532, 'learning_rate': 0.03620778169004457, 'num_leaves': 42, 'max_depth': 2, 'min_child_samples': 8, 'feature_fraction': 0.8633807532257345, 'bagging_fraction': 0.9596900527131045, 'reg_alpha': 1.0187185358439768, 'reg_lambda': 0.0026754730124093273}. Best is trial 2 with value: 0.916096715289652.


Best trial: 3. Best value: 0.917884:  40%|████      | 4/10 [00:01<00:02,  2.29it/s]

[I 2025-11-01 20:49:58,642] Trial 3 finished with value: 0.917884182616346 and parameters: {'n_estimators': 953, 'learning_rate': 0.023499077005114832, 'num_leaves': 41, 'max_depth': -1, 'min_child_samples': 21, 'feature_fraction': 0.8169443299503744, 'bagging_fraction': 0.6656331184871536, 'reg_alpha': 0.04418788283555805, 'reg_lambda': 0.0015925978533784819}. Best is trial 3 with value: 0.917884182616346.


Best trial: 5. Best value: 0.924877:  60%|██████    | 6/10 [00:02<00:01,  2.61it/s]

[I 2025-11-01 20:49:59,255] Trial 4 finished with value: 0.9040645732551131 and parameters: {'n_estimators': 229, 'learning_rate': 0.09239557848143741, 'num_leaves': 98, 'max_depth': -1, 'min_child_samples': 7, 'feature_fraction': 0.8706099124038759, 'bagging_fraction': 0.5481844127116524, 'reg_alpha': 0.0028992203417317203, 'reg_lambda': 0.022889654113167568}. Best is trial 3 with value: 0.917884182616346.
[I 2025-11-01 20:49:59,394] Trial 5 finished with value: 0.9248766726600354 and parameters: {'n_estimators': 894, 'learning_rate': 0.025997296185327486, 'num_leaves': 28, 'max_depth': 3, 'min_child_samples': 14, 'feature_fraction': 0.8595892445709297, 'bagging_fraction': 0.7555414590544511, 'reg_alpha': 0.033070090189139166, 'reg_lambda': 0.003257855581306415}. Best is trial 5 with value: 0.9248766726600354.


Best trial: 5. Best value: 0.924877:  70%|███████   | 7/10 [00:02<00:01,  2.28it/s]

[I 2025-11-01 20:49:59,956] Trial 6 finished with value: 0.7361761458293079 and parameters: {'n_estimators': 779, 'learning_rate': 0.0012543304440550223, 'num_leaves': 68, 'max_depth': 0, 'min_child_samples': 25, 'feature_fraction': 0.6338193825453153, 'bagging_fraction': 0.9123364677940143, 'reg_alpha': 4.0637366849467025, 'reg_lambda': 0.6534942823443388}. Best is trial 5 with value: 0.9248766726600354.


Best trial: 5. Best value: 0.924877:  80%|████████  | 8/10 [00:03<00:01,  1.92it/s]

[I 2025-11-01 20:50:00,670] Trial 7 finished with value: 0.9213487616850404 and parameters: {'n_estimators': 1000, 'learning_rate': 0.02162635772435214, 'num_leaves': 125, 'max_depth': 3, 'min_child_samples': 28, 'feature_fraction': 0.7478433763096966, 'bagging_fraction': 0.9498045677906108, 'reg_alpha': 0.0024359766474909947, 'reg_lambda': 0.008084733224583788}. Best is trial 5 with value: 0.9248766726600354.


Best trial: 5. Best value: 0.924877:  90%|█████████ | 9/10 [00:04<00:00,  1.59it/s]

[I 2025-11-01 20:50:01,552] Trial 8 finished with value: 0.909162768526086 and parameters: {'n_estimators': 477, 'learning_rate': 0.14298455132653481, 'num_leaves': 75, 'max_depth': 0, 'min_child_samples': 9, 'feature_fraction': 0.7825808201523052, 'bagging_fraction': 0.5991774496559612, 'reg_alpha': 0.010120919182566935, 'reg_lambda': 0.38815262811829737}. Best is trial 5 with value: 0.9248766726600354.


Best trial: 5. Best value: 0.924877: 100%|██████████| 10/10 [00:04<00:00,  2.05it/s]
[I 2025-11-01 20:50:01,849] A new study created in memory with name: no-name-8848ef70-8937-4d76-bab7-358b74002384


[I 2025-11-01 20:50:01,843] Trial 9 finished with value: 0.3792537630608289 and parameters: {'n_estimators': 325, 'learning_rate': 0.0010038591394083743, 'num_leaves': 103, 'max_depth': 7, 'min_child_samples': 22, 'feature_fraction': 0.8093398759040062, 'bagging_fraction': 0.9137728322160086, 'reg_alpha': 0.03162280777411102, 'reg_lambda': 0.8342036272973228}. Best is trial 5 with value: 0.9248766726600354.

Optimizing CatBoost...


Best trial: 0. Best value: 0.900218:  10%|█         | 1/10 [00:01<00:11,  1.23s/it]

[I 2025-11-01 20:50:03,082] Trial 0 finished with value: 0.9002183372497115 and parameters: {'iterations': 770, 'depth': 8, 'learning_rate': 0.23201811723184843, 'l2_leaf_reg': 0.1740128304136401}. Best is trial 0 with value: 0.9002183372497115.


Best trial: 1. Best value: 0.923608:  20%|██        | 2/10 [00:02<00:11,  1.40s/it]

[I 2025-11-01 20:50:04,597] Trial 1 finished with value: 0.9236077030204173 and parameters: {'iterations': 943, 'depth': 8, 'learning_rate': 0.013183737066205943, 'l2_leaf_reg': 1.7247636976968137}. Best is trial 1 with value: 0.9236077030204173.


Best trial: 1. Best value: 0.923608:  30%|███       | 3/10 [00:04<00:11,  1.59s/it]

[I 2025-11-01 20:50:06,423] Trial 2 finished with value: 0.9092704817533699 and parameters: {'iterations': 524, 'depth': 9, 'learning_rate': 0.16697669879561755, 'l2_leaf_reg': 0.014241121105101659}. Best is trial 1 with value: 0.9236077030204173.


Best trial: 1. Best value: 0.923608:  40%|████      | 4/10 [00:05<00:07,  1.20s/it]

[I 2025-11-01 20:50:07,026] Trial 3 finished with value: 0.9198539512630367 and parameters: {'iterations': 791, 'depth': 6, 'learning_rate': 0.014668155966602696, 'l2_leaf_reg': 8.143166484816808}. Best is trial 1 with value: 0.9236077030204173.


Best trial: 1. Best value: 0.923608:  50%|█████     | 5/10 [00:05<00:04,  1.11it/s]

[I 2025-11-01 20:50:07,397] Trial 4 finished with value: 0.48555846398667213 and parameters: {'iterations': 321, 'depth': 7, 'learning_rate': 0.0016190584321904467, 'l2_leaf_reg': 0.019207998139811267}. Best is trial 1 with value: 0.9236077030204173.


Best trial: 5. Best value: 0.924714:  60%|██████    | 6/10 [00:06<00:03,  1.25it/s]

[I 2025-11-01 20:50:07,999] Trial 5 finished with value: 0.9247137607435154 and parameters: {'iterations': 815, 'depth': 6, 'learning_rate': 0.014212054080694722, 'l2_leaf_reg': 0.001394318417383248}. Best is trial 5 with value: 0.9247137607435154.


Best trial: 5. Best value: 0.924714:  70%|███████   | 7/10 [00:06<00:02,  1.43it/s]

[I 2025-11-01 20:50:08,488] Trial 6 finished with value: 0.8637375148155189 and parameters: {'iterations': 790, 'depth': 5, 'learning_rate': 0.0028823996609160327, 'l2_leaf_reg': 0.36388479982648025}. Best is trial 5 with value: 0.9247137607435154.


Best trial: 7. Best value: 0.925797:  80%|████████  | 8/10 [00:07<00:01,  1.49it/s]

[I 2025-11-01 20:50:09,105] Trial 7 finished with value: 0.9257974609267078 and parameters: {'iterations': 845, 'depth': 6, 'learning_rate': 0.10617066636158129, 'l2_leaf_reg': 0.0046197835287303335}. Best is trial 7 with value: 0.9257974609267078.


Best trial: 7. Best value: 0.925797: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]


[I 2025-11-01 20:50:12,378] Trial 8 finished with value: 0.8454670974186074 and parameters: {'iterations': 971, 'depth': 9, 'learning_rate': 0.0020240183195658974, 'l2_leaf_reg': 0.5763300274392945}. Best is trial 7 with value: 0.9257974609267078.
[I 2025-11-01 20:50:12,534] Trial 9 finished with value: 0.9137512995294383 and parameters: {'iterations': 324, 'depth': 4, 'learning_rate': 0.019587734243813482, 'l2_leaf_reg': 0.003584321931426912}. Best is trial 7 with value: 0.9257974609267078.
RESULTS - STANDARD FEATURES
                   best_r2     best_rmse  \
CatBoost          0.925797   19101.63032   
LightGBM          0.924877  19219.782129   
XGBoost           0.923778  19359.772828   
GradientBoosting  0.922146  19565.956665   
RandomForest      0.908036  21265.219099   
ElasticNet        0.898915  22294.841751   
Ridge             0.894286   22799.58971   
Lasso             0.893767  22855.504891   

                                                        best_params  
CatBoost

[I 2025-11-01 20:50:12,693] A new study created in memory with name: no-name-de0c5a2d-9183-460d-b524-68006dd97770



APPLYING POLYNOMIAL FEATURES (degree=2)
Top 10 features selected for polynomial transformation:
['OverallQual', 'GrLivArea', 'GarageCars', 'GarageArea', 'TotalBsmtSF', '1stFlrSF', 'TotRmsAbvGrd', 'YearBuilt', 'FullBath', 'YearRemodAdd']
Shape after polynomial features: (1085, 104)

Optimizing Ridge...


Best trial: 8. Best value: 0.905707: 100%|██████████| 10/10 [00:00<00:00, 296.65it/s]
[I 2025-11-01 20:50:12,729] A new study created in memory with name: no-name-883c7fdb-73c3-47b8-95ca-ec1d0ca122fb


[I 2025-11-01 20:50:12,696] Trial 0 finished with value: 0.9004128999552419 and parameters: {'alpha': 0.0611435825051707}. Best is trial 0 with value: 0.9004128999552419.
[I 2025-11-01 20:50:12,700] Trial 1 finished with value: 0.901882438005117 and parameters: {'alpha': 1.0036573215413225}. Best is trial 1 with value: 0.901882438005117.
[I 2025-11-01 20:50:12,703] Trial 2 finished with value: 0.9028238941896851 and parameters: {'alpha': 1.540617345226494}. Best is trial 2 with value: 0.9028238941896851.
[I 2025-11-01 20:50:12,706] Trial 3 finished with value: 0.8999467907401157 and parameters: {'alpha': 0.1786295319609986}. Best is trial 2 with value: 0.9028238941896851.
[I 2025-11-01 20:50:12,710] Trial 4 finished with value: 0.9055080775590088 and parameters: {'alpha': 6.0539847259180615}. Best is trial 4 with value: 0.9055080775590088.
[I 2025-11-01 20:50:12,713] Trial 5 finished with value: 0.9000240187034618 and parameters: {'alpha': 0.2442701912153439}. Best is trial 4 with valu

Best trial: 0. Best value: 0.898598:  30%|███       | 3/10 [00:00<00:00, 28.70it/s]

[I 2025-11-01 20:50:12,763] Trial 0 finished with value: 0.8985982319525658 and parameters: {'alpha': 3.67060061045516}. Best is trial 0 with value: 0.8985982319525658.
[I 2025-11-01 20:50:12,799] Trial 1 finished with value: 0.8980088543055689 and parameters: {'alpha': 0.0013435224493099438}. Best is trial 0 with value: 0.8985982319525658.
[I 2025-11-01 20:50:12,833] Trial 2 finished with value: 0.8980088538019084 and parameters: {'alpha': 0.0013404427722060255}. Best is trial 0 with value: 0.8985982319525658.
[I 2025-11-01 20:50:12,867] Trial 3 finished with value: 0.8985493066449646 and parameters: {'alpha': 3.3652522517298933}. Best is trial 0 with value: 0.8985982319525658.


Best trial: 0. Best value: 0.898598:  60%|██████    | 6/10 [00:00<00:00, 28.98it/s]

[I 2025-11-01 20:50:12,902] Trial 4 finished with value: 0.8980323389887204 and parameters: {'alpha': 0.14598458317795815}. Best is trial 0 with value: 0.8985982319525658.
[I 2025-11-01 20:50:12,936] Trial 5 finished with value: 0.898011468878706 and parameters: {'alpha': 0.017346416277945375}. Best is trial 0 with value: 0.8985982319525658.


Best trial: 6. Best value: 0.899478: 100%|██████████| 10/10 [00:00<00:00, 29.04it/s]
[I 2025-11-01 20:50:13,075] A new study created in memory with name: no-name-58149624-eb6f-4408-931e-78b21bd83c00


[I 2025-11-01 20:50:12,970] Trial 6 finished with value: 0.8994775634267509 and parameters: {'alpha': 8.757794473576809}. Best is trial 6 with value: 0.8994775634267509.
[I 2025-11-01 20:50:13,004] Trial 7 finished with value: 0.8980145900223637 and parameters: {'alpha': 0.03648608208931796}. Best is trial 6 with value: 0.8994775634267509.
[I 2025-11-01 20:50:13,038] Trial 8 finished with value: 0.8982401850416252 and parameters: {'alpha': 1.4301578877235384}. Best is trial 6 with value: 0.8994775634267509.
[I 2025-11-01 20:50:13,073] Trial 9 finished with value: 0.8980095064434246 and parameters: {'alpha': 0.0053326597473741764}. Best is trial 6 with value: 0.8994775634267509.

Optimizing ElasticNet...


Best trial: 1. Best value: 0.903245:  10%|█         | 1/10 [00:00<00:00, 13.72it/s]

[I 2025-11-01 20:50:13,110] Trial 0 finished with value: 0.899840094814713 and parameters: {'alpha': 0.0012946154693481027, 'l1_ratio': 0.7677451284161289}. Best is trial 0 with value: 0.899840094814713.
[I 2025-11-01 20:50:13,148] Trial 1 finished with value: 0.9032445856672779 and parameters: {'alpha': 0.3413747391734399, 'l1_ratio': 0.03551067685920273}. Best is trial 1 with value: 0.9032445856672779.


Best trial: 2. Best value: 0.905361:  80%|████████  | 8/10 [00:00<00:00, 36.09it/s]

[I 2025-11-01 20:50:13,184] Trial 2 finished with value: 0.905360527385567 and parameters: {'alpha': 0.02395051365278193, 'l1_ratio': 0.4481505061690233}. Best is trial 2 with value: 0.905360527385567.
[I 2025-11-01 20:50:13,213] Trial 3 finished with value: 0.9031108235618576 and parameters: {'alpha': 0.0017314952175341801, 'l1_ratio': 0.15964258036743606}. Best is trial 2 with value: 0.905360527385567.
[I 2025-11-01 20:50:13,219] Trial 4 finished with value: 0.8797973802926767 and parameters: {'alpha': 9.70862749112542, 'l1_ratio': 0.6846692188899542}. Best is trial 2 with value: 0.905360527385567.
[I 2025-11-01 20:50:13,247] Trial 5 finished with value: 0.8999890732593767 and parameters: {'alpha': 0.0011981728829925055, 'l1_ratio': 0.7237070989549043}. Best is trial 2 with value: 0.905360527385567.
[I 2025-11-01 20:50:13,275] Trial 6 finished with value: 0.9039159399033316 and parameters: {'alpha': 0.2552019288145275, 'l1_ratio': 0.6743658335795656}. Best is trial 2 with value: 0.90

Best trial: 2. Best value: 0.905361: 100%|██████████| 10/10 [00:00<00:00, 37.10it/s]
[I 2025-11-01 20:50:13,347] A new study created in memory with name: no-name-7b2123e8-7dae-44a9-8e0a-fa5fea9e8bc1


[I 2025-11-01 20:50:13,313] Trial 8 finished with value: 0.884355535942124 and parameters: {'alpha': 2.6884499792589227, 'l1_ratio': 0.07644533135650433}. Best is trial 2 with value: 0.905360527385567.
[I 2025-11-01 20:50:13,344] Trial 9 finished with value: 0.9034173022303018 and parameters: {'alpha': 0.4991908949331913, 'l1_ratio': 0.4115209870722977}. Best is trial 2 with value: 0.905360527385567.

Optimizing RandomForest...


Best trial: 0. Best value: 0.90539:  10%|█         | 1/10 [00:00<00:05,  1.58it/s]

[I 2025-11-01 20:50:13,978] Trial 0 finished with value: 0.9053899031004082 and parameters: {'n_estimators': 314, 'max_depth': 30, 'min_samples_split': 8, 'max_features': 1.0}. Best is trial 0 with value: 0.9053899031004082.


Best trial: 1. Best value: 0.906155:  20%|██        | 2/10 [00:01<00:05,  1.43it/s]

[I 2025-11-01 20:50:14,720] Trial 1 finished with value: 0.9061547361081315 and parameters: {'n_estimators': 376, 'max_depth': 12, 'min_samples_split': 8, 'max_features': 1.0}. Best is trial 1 with value: 0.9061547361081315.


Best trial: 1. Best value: 0.906155:  30%|███       | 3/10 [00:01<00:03,  1.89it/s]

[I 2025-11-01 20:50:15,048] Trial 2 finished with value: 0.8995399210795262 and parameters: {'n_estimators': 343, 'max_depth': 27, 'min_samples_split': 10, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9061547361081315.


Best trial: 1. Best value: 0.906155:  40%|████      | 4/10 [00:02<00:02,  2.19it/s]

[I 2025-11-01 20:50:15,396] Trial 3 finished with value: 0.8956778910195209 and parameters: {'n_estimators': 380, 'max_depth': 8, 'min_samples_split': 6, 'max_features': 'log2'}. Best is trial 1 with value: 0.9061547361081315.


Best trial: 1. Best value: 0.906155:  50%|█████     | 5/10 [00:02<00:02,  2.28it/s]

[I 2025-11-01 20:50:15,799] Trial 4 finished with value: 0.8981391938685793 and parameters: {'n_estimators': 431, 'max_depth': 27, 'min_samples_split': 4, 'max_features': 'log2'}. Best is trial 1 with value: 0.9061547361081315.


Best trial: 1. Best value: 0.906155:  60%|██████    | 6/10 [00:02<00:01,  2.54it/s]

[I 2025-11-01 20:50:16,110] Trial 5 finished with value: 0.8987418264153655 and parameters: {'n_estimators': 320, 'max_depth': 28, 'min_samples_split': 6, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9061547361081315.


Best trial: 1. Best value: 0.906155:  70%|███████   | 7/10 [00:03<00:01,  1.80it/s]

[I 2025-11-01 20:50:16,999] Trial 6 finished with value: 0.9058031167465059 and parameters: {'n_estimators': 426, 'max_depth': 13, 'min_samples_split': 5, 'max_features': 1.0}. Best is trial 1 with value: 0.9061547361081315.


Best trial: 1. Best value: 0.906155:  80%|████████  | 8/10 [00:04<00:01,  1.82it/s]

[I 2025-11-01 20:50:17,537] Trial 7 finished with value: 0.9055155075110344 and parameters: {'n_estimators': 303, 'max_depth': 9, 'min_samples_split': 10, 'max_features': 1.0}. Best is trial 1 with value: 0.9061547361081315.


Best trial: 1. Best value: 0.906155:  90%|█████████ | 9/10 [00:04<00:00,  1.96it/s]

[I 2025-11-01 20:50:17,959] Trial 8 finished with value: 0.8675537983865812 and parameters: {'n_estimators': 467, 'max_depth': 4, 'min_samples_split': 3, 'max_features': 'log2'}. Best is trial 1 with value: 0.9061547361081315.


Best trial: 1. Best value: 0.906155: 100%|██████████| 10/10 [00:04<00:00,  2.01it/s]
[I 2025-11-01 20:50:18,319] A new study created in memory with name: no-name-06e3a0a1-e6ac-4d9f-a8f0-c7c38d3df4af


[I 2025-11-01 20:50:18,315] Trial 9 finished with value: 0.8973834806391544 and parameters: {'n_estimators': 372, 'max_depth': 9, 'min_samples_split': 6, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9061547361081315.

Optimizing GradientBoosting...


Best trial: 0. Best value: 0.903538:  10%|█         | 1/10 [00:05<00:52,  5.82s/it]

[I 2025-11-01 20:50:24,138] Trial 0 finished with value: 0.9035380433601469 and parameters: {'n_estimators': 296, 'learning_rate': 0.18339149700619675, 'max_depth': 7, 'subsample': 0.7466051381235386}. Best is trial 0 with value: 0.9035380433601469.


Best trial: 0. Best value: 0.903538:  20%|██        | 2/10 [00:09<00:34,  4.30s/it]

[I 2025-11-01 20:50:27,380] Trial 1 finished with value: 0.8804664941248221 and parameters: {'n_estimators': 385, 'learning_rate': 0.20997894230066083, 'max_depth': 4, 'subsample': 0.5357808100938541}. Best is trial 0 with value: 0.9035380433601469.


Best trial: 2. Best value: 0.906391:  30%|███       | 3/10 [00:14<00:32,  4.65s/it]

[I 2025-11-01 20:50:32,449] Trial 2 finished with value: 0.9063905373198677 and parameters: {'n_estimators': 312, 'learning_rate': 0.05732787777170617, 'max_depth': 5, 'subsample': 0.8582587713729755}. Best is trial 2 with value: 0.9063905373198677.


Best trial: 3. Best value: 0.908212:  40%|████      | 4/10 [00:15<00:19,  3.29s/it]

[I 2025-11-01 20:50:33,657] Trial 3 finished with value: 0.9082120737987214 and parameters: {'n_estimators': 191, 'learning_rate': 0.13721607281685616, 'max_depth': 2, 'subsample': 0.7989791241401982}. Best is trial 3 with value: 0.9082120737987214.


Best trial: 3. Best value: 0.908212:  50%|█████     | 5/10 [00:18<00:15,  3.20s/it]

[I 2025-11-01 20:50:36,682] Trial 4 finished with value: 0.9003159676686082 and parameters: {'n_estimators': 136, 'learning_rate': 0.14320960304485972, 'max_depth': 8, 'subsample': 0.7428291125832351}. Best is trial 3 with value: 0.9082120737987214.


Best trial: 5. Best value: 0.910674:  60%|██████    | 6/10 [00:22<00:13,  3.36s/it]

[I 2025-11-01 20:50:40,346] Trial 5 finished with value: 0.9106736783510843 and parameters: {'n_estimators': 210, 'learning_rate': 0.08058561343345905, 'max_depth': 7, 'subsample': 0.6572012894609323}. Best is trial 5 with value: 0.9106736783510843.


Best trial: 5. Best value: 0.910674:  70%|███████   | 7/10 [00:25<00:09,  3.23s/it]

[I 2025-11-01 20:50:43,330] Trial 6 finished with value: 0.9025869307076941 and parameters: {'n_estimators': 182, 'learning_rate': 0.08993039646158281, 'max_depth': 5, 'subsample': 0.8635432107092078}. Best is trial 5 with value: 0.9106736783510843.


Best trial: 5. Best value: 0.910674:  80%|████████  | 8/10 [00:33<00:09,  4.93s/it]

[I 2025-11-01 20:50:51,900] Trial 7 finished with value: 0.8754507044607523 and parameters: {'n_estimators': 302, 'learning_rate': 0.22953870475558, 'max_depth': 8, 'subsample': 0.960930056115326}. Best is trial 5 with value: 0.9106736783510843.


Best trial: 5. Best value: 0.910674:  90%|█████████ | 9/10 [00:36<00:04,  4.34s/it]

[I 2025-11-01 20:50:54,933] Trial 8 finished with value: 0.9093704808225324 and parameters: {'n_estimators': 234, 'learning_rate': 0.04186135208243348, 'max_depth': 5, 'subsample': 0.6753080401675539}. Best is trial 5 with value: 0.9106736783510843.


Best trial: 9. Best value: 0.915034: 100%|██████████| 10/10 [00:41<00:00,  4.10s/it]
[I 2025-11-01 20:50:59,366] A new study created in memory with name: no-name-7af502ca-2afd-4a89-aa7d-608d6202d992


[I 2025-11-01 20:50:59,363] Trial 9 finished with value: 0.915034174703132 and parameters: {'n_estimators': 490, 'learning_rate': 0.05683171713009057, 'max_depth': 3, 'subsample': 0.7730410208744503}. Best is trial 9 with value: 0.915034174703132.

Optimizing XGBoost...


Best trial: 0. Best value: 0.916509:  10%|█         | 1/10 [00:00<00:03,  2.53it/s]

[I 2025-11-01 20:50:59,758] Trial 0 finished with value: 0.9165085361668776 and parameters: {'n_estimators': 948, 'max_depth': 3, 'learning_rate': 0.03278023586014394, 'subsample': 0.688141108681324, 'colsample_bytree': 0.7593474251339866, 'reg_lambda': 0.057776846747947676, 'reg_alpha': 6.715704667488095}. Best is trial 0 with value: 0.9165085361668776.


Best trial: 0. Best value: 0.916509:  20%|██        | 2/10 [00:02<00:12,  1.50s/it]

[I 2025-11-01 20:51:02,031] Trial 1 finished with value: 0.9053513511322098 and parameters: {'n_estimators': 429, 'max_depth': 9, 'learning_rate': 0.02945348005764864, 'subsample': 0.866251780355443, 'colsample_bytree': 0.5717952549199894, 'reg_lambda': 0.0018639915850999615, 'reg_alpha': 0.2692329884008286}. Best is trial 0 with value: 0.9165085361668776.


Best trial: 0. Best value: 0.916509:  30%|███       | 3/10 [00:04<00:11,  1.59s/it]

[I 2025-11-01 20:51:03,729] Trial 2 finished with value: 0.9002421520673677 and parameters: {'n_estimators': 276, 'max_depth': 10, 'learning_rate': 0.02058153578055985, 'subsample': 0.7379985559289963, 'colsample_bytree': 0.9334689299185137, 'reg_lambda': 0.9174891035438344, 'reg_alpha': 0.0010507284295247776}. Best is trial 0 with value: 0.9165085361668776.


Best trial: 0. Best value: 0.916509:  40%|████      | 4/10 [00:05<00:08,  1.38s/it]

[I 2025-11-01 20:51:04,800] Trial 3 finished with value: 0.8715234858226174 and parameters: {'n_estimators': 407, 'max_depth': 8, 'learning_rate': 0.18468207104509832, 'subsample': 0.9827824719438683, 'colsample_bytree': 0.9454946647026278, 'reg_lambda': 0.07247535072015417, 'reg_alpha': 0.8958265018931225}. Best is trial 0 with value: 0.9165085361668776.


Best trial: 0. Best value: 0.916509:  50%|█████     | 5/10 [00:05<00:05,  1.05s/it]

[I 2025-11-01 20:51:05,246] Trial 4 finished with value: 0.9100971561536334 and parameters: {'n_estimators': 411, 'max_depth': 6, 'learning_rate': 0.03156543870431595, 'subsample': 0.9654929461336281, 'colsample_bytree': 0.5486952810876706, 'reg_lambda': 3.9305862529644333, 'reg_alpha': 0.04215998868613273}. Best is trial 0 with value: 0.9165085361668776.


Best trial: 0. Best value: 0.916509:  60%|██████    | 6/10 [00:06<00:03,  1.26it/s]

[I 2025-11-01 20:51:05,541] Trial 5 finished with value: 0.6204586343328372 and parameters: {'n_estimators': 368, 'max_depth': 5, 'learning_rate': 0.0018205093575904374, 'subsample': 0.8225645218759972, 'colsample_bytree': 0.599454330684334, 'reg_lambda': 1.4147028505460078, 'reg_alpha': 0.0024771640454983477}. Best is trial 0 with value: 0.9165085361668776.


Best trial: 0. Best value: 0.916509:  70%|███████   | 7/10 [00:19<00:14,  4.90s/it]

[I 2025-11-01 20:51:18,896] Trial 6 finished with value: 0.8955592791469794 and parameters: {'n_estimators': 905, 'max_depth': 12, 'learning_rate': 0.01068401977832737, 'subsample': 0.9003696776022116, 'colsample_bytree': 0.7027685734869875, 'reg_lambda': 0.0028353178064621544, 'reg_alpha': 9.412041963458302}. Best is trial 0 with value: 0.9165085361668776.


Best trial: 0. Best value: 0.916509:  80%|████████  | 8/10 [00:20<00:07,  3.74s/it]

[I 2025-11-01 20:51:20,161] Trial 7 finished with value: 0.879639722471797 and parameters: {'n_estimators': 497, 'max_depth': 12, 'learning_rate': 0.268021765297684, 'subsample': 0.9825932473265045, 'colsample_bytree': 0.6106529305201738, 'reg_lambda': 0.0014411890004123076, 'reg_alpha': 2.3718431440674146}. Best is trial 0 with value: 0.9165085361668776.


Best trial: 0. Best value: 0.916509:  90%|█████████ | 9/10 [00:23<00:03,  3.55s/it]

[I 2025-11-01 20:51:23,291] Trial 8 finished with value: 0.8709046466422629 and parameters: {'n_estimators': 658, 'max_depth': 10, 'learning_rate': 0.003642357252532457, 'subsample': 0.864100988803566, 'colsample_bytree': 0.6327892316429342, 'reg_lambda': 9.737998408718337, 'reg_alpha': 9.117964724856252}. Best is trial 0 with value: 0.9165085361668776.


Best trial: 0. Best value: 0.916509: 100%|██████████| 10/10 [00:27<00:00,  2.72s/it]
[I 2025-11-01 20:51:26,596] A new study created in memory with name: no-name-26b2cf43-f567-4b59-89b4-bc7b1bb305fe


[I 2025-11-01 20:51:26,593] Trial 9 finished with value: 0.9071587722171774 and parameters: {'n_estimators': 515, 'max_depth': 12, 'learning_rate': 0.0456949888782517, 'subsample': 0.6422666802422348, 'colsample_bytree': 0.917832632264988, 'reg_lambda': 0.07706192475886807, 'reg_alpha': 0.9869905331088796}. Best is trial 0 with value: 0.9165085361668776.

Optimizing LightGBM...


Best trial: 0. Best value: 0.912131:  10%|█         | 1/10 [00:00<00:01,  5.54it/s]

[I 2025-11-01 20:51:26,776] Trial 0 finished with value: 0.9121306161039866 and parameters: {'n_estimators': 295, 'learning_rate': 0.04095100896927524, 'num_leaves': 79, 'max_depth': 7, 'min_child_samples': 20, 'feature_fraction': 0.5664696108646707, 'bagging_fraction': 0.8708519467554883, 'reg_alpha': 9.817818139653314, 'reg_lambda': 4.258839831331433}. Best is trial 0 with value: 0.9121306161039866.


Best trial: 0. Best value: 0.912131:  20%|██        | 2/10 [00:01<00:04,  1.72it/s]

[I 2025-11-01 20:51:27,634] Trial 1 finished with value: 0.9057536950361736 and parameters: {'n_estimators': 830, 'learning_rate': 0.11153507113877219, 'num_leaves': 76, 'max_depth': 9, 'min_child_samples': 7, 'feature_fraction': 0.5063689231719428, 'bagging_fraction': 0.5508592180433881, 'reg_alpha': 0.08035309351261961, 'reg_lambda': 0.0016120772277126083}. Best is trial 0 with value: 0.9121306161039866.


Best trial: 0. Best value: 0.912131:  30%|███       | 3/10 [00:01<00:03,  1.98it/s]

[I 2025-11-01 20:51:28,047] Trial 2 finished with value: 0.5503219756538071 and parameters: {'n_estimators': 302, 'learning_rate': 0.001667020599198814, 'num_leaves': 38, 'max_depth': 8, 'min_child_samples': 7, 'feature_fraction': 0.5992214211852618, 'bagging_fraction': 0.7602338724938438, 'reg_alpha': 0.12967171129472155, 'reg_lambda': 0.005772165508410403}. Best is trial 0 with value: 0.9121306161039866.


Best trial: 0. Best value: 0.912131:  40%|████      | 4/10 [00:01<00:02,  2.38it/s]

[I 2025-11-01 20:51:28,341] Trial 3 finished with value: 0.9062440272210237 and parameters: {'n_estimators': 679, 'learning_rate': 0.07234771429415066, 'num_leaves': 84, 'max_depth': 7, 'min_child_samples': 22, 'feature_fraction': 0.6165967311385437, 'bagging_fraction': 0.6785723658863387, 'reg_alpha': 0.2507798757861156, 'reg_lambda': 0.012636888551899627}. Best is trial 0 with value: 0.9121306161039866.


Best trial: 0. Best value: 0.912131:  50%|█████     | 5/10 [00:01<00:01,  2.83it/s]

[I 2025-11-01 20:51:28,574] Trial 4 finished with value: 0.5038379039246184 and parameters: {'n_estimators': 405, 'learning_rate': 0.0012351852640504963, 'num_leaves': 17, 'max_depth': 9, 'min_child_samples': 15, 'feature_fraction': 0.8453109172382999, 'bagging_fraction': 0.9048023845134405, 'reg_alpha': 0.01634968198563174, 'reg_lambda': 5.4773616589564265}. Best is trial 0 with value: 0.9121306161039866.


Best trial: 0. Best value: 0.912131:  80%|████████  | 8/10 [00:02<00:00,  3.53it/s]

[I 2025-11-01 20:51:29,346] Trial 5 finished with value: 0.8983494668639807 and parameters: {'n_estimators': 562, 'learning_rate': 0.16640326836357533, 'num_leaves': 103, 'max_depth': -1, 'min_child_samples': 23, 'feature_fraction': 0.6924305809178614, 'bagging_fraction': 0.7410820339533961, 'reg_alpha': 1.840838343537754, 'reg_lambda': 0.014943177087102834}. Best is trial 0 with value: 0.9121306161039866.
[I 2025-11-01 20:51:29,406] Trial 6 finished with value: 0.5677080797626273 and parameters: {'n_estimators': 287, 'learning_rate': 0.0025955920056078582, 'num_leaves': 23, 'max_depth': 2, 'min_child_samples': 8, 'feature_fraction': 0.5300620847161583, 'bagging_fraction': 0.6896380776133602, 'reg_alpha': 4.181333972350086, 'reg_lambda': 9.745879578354527}. Best is trial 0 with value: 0.9121306161039866.
[I 2025-11-01 20:51:29,475] Trial 7 finished with value: 0.9113400113781459 and parameters: {'n_estimators': 228, 'learning_rate': 0.15885247683926718, 'num_leaves': 98, 'max_depth': 3

Best trial: 0. Best value: 0.912131:  80%|████████  | 8/10 [00:02<00:00,  3.53it/s]

[I 2025-11-01 20:51:29,565] Trial 8 finished with value: 0.906060924671761 and parameters: {'n_estimators': 764, 'learning_rate': 0.12273286470964075, 'num_leaves': 78, 'max_depth': 1, 'min_child_samples': 30, 'feature_fraction': 0.7824753002515916, 'bagging_fraction': 0.6568926806763477, 'reg_alpha': 0.008762486183430231, 'reg_lambda': 0.0014904466156005808}. Best is trial 0 with value: 0.9121306161039866.


Best trial: 0. Best value: 0.912131: 100%|██████████| 10/10 [00:03<00:00,  2.54it/s]
[I 2025-11-01 20:51:30,536] A new study created in memory with name: no-name-0053e0fd-ea54-4023-a9cf-beb596e35a96


[I 2025-11-01 20:51:30,532] Trial 9 finished with value: 0.6898373138526754 and parameters: {'n_estimators': 741, 'learning_rate': 0.001032474438274991, 'num_leaves': 111, 'max_depth': 8, 'min_child_samples': 18, 'feature_fraction': 0.7117842171401947, 'bagging_fraction': 0.8143722160902489, 'reg_alpha': 0.017884320649229898, 'reg_lambda': 0.05904851796839728}. Best is trial 0 with value: 0.9121306161039866.

Optimizing CatBoost...


Best trial: 0. Best value: 0.732713:  10%|█         | 1/10 [00:00<00:02,  3.59it/s]

[I 2025-11-01 20:51:30,813] Trial 0 finished with value: 0.73271307697511 and parameters: {'iterations': 300, 'depth': 5, 'learning_rate': 0.003701530832298852, 'l2_leaf_reg': 0.004922986678426076}. Best is trial 0 with value: 0.73271307697511.


Best trial: 1. Best value: 0.915993:  20%|██        | 2/10 [00:12<00:56,  7.05s/it]

[I 2025-11-01 20:51:42,607] Trial 1 finished with value: 0.915993136182695 and parameters: {'iterations': 412, 'depth': 10, 'learning_rate': 0.015251244517442733, 'l2_leaf_reg': 0.03280245700127665}. Best is trial 1 with value: 0.915993136182695.


Best trial: 1. Best value: 0.915993:  30%|███       | 3/10 [00:12<00:29,  4.26s/it]

[I 2025-11-01 20:51:43,535] Trial 2 finished with value: 0.9140181672581867 and parameters: {'iterations': 484, 'depth': 7, 'learning_rate': 0.08803934479692867, 'l2_leaf_reg': 0.03625872200299648}. Best is trial 1 with value: 0.915993136182695.


Best trial: 1. Best value: 0.915993:  40%|████      | 4/10 [00:13<00:17,  2.94s/it]

[I 2025-11-01 20:51:44,454] Trial 3 finished with value: 0.86402981109021 and parameters: {'iterations': 508, 'depth': 7, 'learning_rate': 0.00402245538705774, 'l2_leaf_reg': 1.2494963609313827}. Best is trial 1 with value: 0.915993136182695.


Best trial: 1. Best value: 0.915993:  50%|█████     | 5/10 [00:14<00:10,  2.04s/it]

[I 2025-11-01 20:51:44,911] Trial 4 finished with value: 0.9093355585201591 and parameters: {'iterations': 459, 'depth': 5, 'learning_rate': 0.11316204867610015, 'l2_leaf_reg': 0.69759039677679}. Best is trial 1 with value: 0.915993136182695.


Best trial: 1. Best value: 0.915993:  60%|██████    | 6/10 [00:38<00:37,  9.41s/it]

[I 2025-11-01 20:52:08,617] Trial 5 finished with value: 0.9144747795970906 and parameters: {'iterations': 841, 'depth': 10, 'learning_rate': 0.06657210616873033, 'l2_leaf_reg': 0.0577456432507764}. Best is trial 1 with value: 0.915993136182695.


Best trial: 1. Best value: 0.915993:  70%|███████   | 7/10 [00:40<00:21,  7.10s/it]

[I 2025-11-01 20:52:10,974] Trial 6 finished with value: 0.9135428406047229 and parameters: {'iterations': 383, 'depth': 8, 'learning_rate': 0.11670175115468587, 'l2_leaf_reg': 0.010453141067320332}. Best is trial 1 with value: 0.915993136182695.


Best trial: 1. Best value: 0.915993:  80%|████████  | 8/10 [00:40<00:09,  4.99s/it]

[I 2025-11-01 20:52:11,433] Trial 7 finished with value: 0.9142879163588865 and parameters: {'iterations': 720, 'depth': 4, 'learning_rate': 0.18218168094746442, 'l2_leaf_reg': 0.010634222465154596}. Best is trial 1 with value: 0.915993136182695.


Best trial: 1. Best value: 0.915993:  90%|█████████ | 9/10 [00:41<00:03,  3.61s/it]

[I 2025-11-01 20:52:12,005] Trial 8 finished with value: 0.9139636739071701 and parameters: {'iterations': 745, 'depth': 5, 'learning_rate': 0.0724990688731641, 'l2_leaf_reg': 0.0400813727037115}. Best is trial 1 with value: 0.915993136182695.


Best trial: 1. Best value: 0.915993: 100%|██████████| 10/10 [00:41<00:00,  4.19s/it]

[I 2025-11-01 20:52:12,457] Trial 9 finished with value: 0.4417903403549983 and parameters: {'iterations': 390, 'depth': 6, 'learning_rate': 0.00118196537836435, 'l2_leaf_reg': 3.285121776352345}. Best is trial 1 with value: 0.915993136182695.
RESULTS - POLYNOMIAL FEATURES
                   best_r2     best_rmse  \
XGBoost           0.916509  20261.989706   
CatBoost          0.915993  20324.432989   
GradientBoosting  0.915034  20440.108309   
LightGBM          0.912131  20786.426945   
RandomForest      0.906155  21481.630027   
Ridge             0.905707  21532.764711   
ElasticNet        0.905361  21572.337607   
Lasso             0.899478   22232.71783   

                                                        best_params  
XGBoost           {'n_estimators': 948, 'max_depth': 3, 'learnin...  
CatBoost          {'iterations': 412, 'depth': 10, 'learning_rat...  
GradientBoosting  {'n_estimators': 490, 'learning_rate': 0.05683...  
LightGBM          {'n_estimators': 295, 'learning


