In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import lightgbm as lgb
from sklearn.model_selection import KFold
from sklearn.metrics import *
from catboost import Pool, CatBoostClassifier, CatBoostRegressor
import cupy as cp, gc
import cudf
from tensorflow import keras
from tensorflow.keras import layers, optimizers, callbacks
from tensorflow.keras.models import Sequential
import tensorflow as tf
from sklearn.preprocessing import StandardScaler

2025-06-04 14:19:16.729271: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749046756.948475      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749046757.011813      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from tensorflow import keras
from tensorflow.keras import layers, optimizers, callbacks
from tensorflow.keras.models import Sequential
import tensorflow as tf
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
import warnings
warnings.filterwarnings('ignore')


In [3]:
from sklearn.base import BaseEstimator, TransformerMixin
import pandas as pd
import numpy as np

class MathFeatureCreator(BaseEstimator, TransformerMixin):
    def __init__(self, variables=None, operations=None):
        self.variables = variables
        self.operations = operations or ['add','sub','mul', 'div']
    
    def fit(self, X, y=None):
        return self  # Nothing to fit
    
    def transform(self, X):
        X = X.copy()
        new_features = []  # This will hold the new columns to concat at once

        for i in range(len(self.variables)):
            for j in range(i + 1, len(self.variables)):
                var1 = self.variables[i]
                var2 = self.variables[j]
                
                if 'add' in self.operations:
                    new_features.append(X[var1] + X[var2])
                if 'sub' in self.operations:
                    new_features.append(X[var1] - X[var2])
                if 'mul' in self.operations:
                    new_features.append(X[var1] * X[var2])
                if 'div' in self.operations:
                    new_features.append(X[var1] / X[var2].replace(0, np.nan))  # Handle div by 0
                

        # Concatenate all new features at once
        new_features_df = pd.concat(new_features, axis=1)
        
        # Rename columns
        new_feature_names = [
            f'{self.variables[i]}_{op}_{self.variables[j]}'
            for i in range(len(self.variables))
            for j in range(i + 1, len(self.variables))
            for op in self.operations
        ]
        new_features_df.columns = new_feature_names
        
        # Concatenate the original dataframe with the new features
        X = pd.concat([X, new_features_df], axis=1)
        
        return X

In [8]:
import numpy as np
import pandas as pd

class ExponentFeatureCreator(BaseEstimator, TransformerMixin):
    def __init__(self, operations=None, features=None):
        self.operations = operations or ['sqrt','cbrt']
        self.variables = features
        
    def fit(self, X):
        return self

    def transform(self, X):
        X = X.copy()
        new_feature_dict = {}

        for var in self.variables:
            if 'sqrt' in self.operations:
                new_feature_dict[f"{var}_sqrt"] = np.sqrt(X[var])
            if 'cbrt' in self.operations:
                new_feature_dict[f"{var}_cbrt"] = np.cbrt(X[var])
            if 'log' in self.operations:
                new_feature_dict[f"{var}_log"] = np.log1p(X[var])
            if 'inverse' in self.operations:
                new_feature_dict[f"{var}_inverse"] = np.where(X[var] != 0, 1 / X[var], np.nan)
            if 'exp' in self.operations:
                new_feature_dict[f"{var}_exp"] = np.exp(X[var])

        # Concatenate new features to original DataFrame
        new_features_df = pd.DataFrame(new_feature_dict, index=X.index)
        X = pd.concat([X, new_features_df], axis=1)

        return X


In [13]:

def create_keras_model(input_dim, params, problem_type='regression'):
    """Create a Keras model according to specified parameters"""
    
    # Extract parameters for model architecture
    hidden_layers = params.get('hidden_layer_sizes', (64, 32))
    activation = params.get('activation', 'relu')
    learning_rate = params.get('learning_rate_init', 0.001)
    
    # Handle dropout rates (can be a single float or a list for per-layer dropout)
    dropout_rates = params.get('dropout_rate', 0.2)
    if not isinstance(dropout_rates, (list, tuple)):
        # If a single value is provided, use it for all layers
        dropout_rates = [dropout_rates] * len(hidden_layers)
    elif len(dropout_rates) < len(hidden_layers):
        # If list is too short, extend it with the last value
        dropout_rates = list(dropout_rates) + [dropout_rates[-1]] * (len(hidden_layers) - len(dropout_rates))
    
    # Handle activations (can be a single string or a list for per-layer activation)
    activations = params.get('activation', 'relu')
    if not isinstance(activations, (list, tuple)):
        # If a single value is provided, use it for all layers
        activations = [activations] * len(hidden_layers)
    elif len(activations) < len(hidden_layers):
        # If list is too short, extend it with the last value
        activations = list(activations) + [activations[-1]] * (len(hidden_layers) - len(activations))
    
    use_batch_norm = params.get('batch_normalization', False)
    l1_reg = params.get('l1_reg', 0.0)
    l2_reg = params.get('l2_reg', 0.0)
    kernel_regularizer = None
    if l1_reg > 0 or l2_reg > 0:
        kernel_regularizer = keras.regularizers.l1_l2(l1=l1_reg, l2=l2_reg)
    
    # Define model
    model = Sequential()
    
    # Input layer
    model.add(layers.Input(shape=(input_dim,)))
    
    # Hidden layers
    for i, units in enumerate(hidden_layers):
        model.add(layers.Dense(
            units, 
            activation=None,  # Apply activation after batch norm if used
            kernel_regularizer=kernel_regularizer,
            kernel_initializer=params.get('kernel_initializer', 'glorot_uniform')
        ))
        
        if use_batch_norm:
            model.add(layers.BatchNormalization())
            
        model.add(layers.Activation(activations[i]))
        
        if dropout_rates[i] > 0:
            model.add(layers.Dropout(dropout_rates[i]))
    
    # Output layer
    if problem_type == 'regression':
        model.add(layers.Dense(1, activation='linear'))
    else:  # Binary classification
        model.add(layers.Dense(1, activation='sigmoid'))
    
    # Compile the model
    optimizer_name = params.get('solver', 'adam').lower()
    if optimizer_name == 'adam':
        optimizer = optimizers.Adam(learning_rate=learning_rate)
    elif optimizer_name == 'sgd':
        optimizer = optimizers.SGD(learning_rate=learning_rate, momentum=params.get('momentum', 0.9))
    elif optimizer_name == 'rmsprop':
        optimizer = optimizers.RMSprop(learning_rate=learning_rate)
    elif optimizer_name == 'adagrad':
        optimizer = optimizers.Adagrad(learning_rate=learning_rate)
    else:
        raise ValueError(f"Unknown optimizer: {optimizer_name}")
    
    if problem_type == 'regression':
        model.compile(
            optimizer=optimizer,
            loss=params.get('loss', 'mean_squared_error'),
        )
    else:  # Binary classification
        model.compile(
            optimizer=optimizer,
            loss=params.get('loss', 'binary_crossentropy'),
        )
    
    return model



In [14]:
def keras_ann_model(train, test, TARGET, params, folds=5, return_proba=True):
    
    # Define classification and regression metrics
    classification_metrics = ['accuracy', 'f1', 'precision', 'recall', 'auc']
    regression_metrics = ['rmse', 'mae', 'r2']
    
    # Extract and prepare parameters
    metric = params.pop('eval_metric', 'rmse')
    
    # Determine problem type based on evaluation metric
    problem_type = 'classification' if metric.lower() in classification_metrics else 'regression'
    
    random_state = params.get('random_state', 42)
    batch_size = params.get('batch_size', 32)
    epochs = params.get('max_iter', 100)
    patience = params.get('patience', 3)
    verbose = params.get('verbose', 1)
    
    # Set seed for reproducibility
    tf.random.set_seed(random_state)
    np.random.seed(random_state)
    
    # Prepare data
    FOLDS = folds
    oof_preds = np.zeros(len(train))
    test_preds = np.zeros(len(test))
    FEATURES = [col for col in train.columns if col != TARGET]
    
    # Reset indices for consistent fold splitting
    train = train.reset_index(drop=True)
    test = test.reset_index(drop=True)

    scaler = StandardScaler()
    train[FEATURES] = scaler.fit_transform(train[FEATURES])
    test[FEATURES] = scaler.transform(test[FEATURES])

    
    # Prepare cross-validation
    kf = KFold(n_splits=FOLDS, shuffle=True, random_state=random_state)
    
    print(f' Training Keras ANN for {problem_type}...')
    
    # Train across folds
    for i, (train_idx, val_idx) in enumerate(kf.split(train)):
        print(f' Fold {i + 1}/{FOLDS}')
        
        # Prepare training and validation sets
        X_train = train.loc[train_idx, FEATURES].values
        y_train = train.loc[train_idx, TARGET].values
        X_val = train.loc[val_idx, FEATURES].values
        y_val = train.loc[val_idx, TARGET].values
        X_test = test[FEATURES].values
        
        # Create and train model
        model = create_keras_model(
            input_dim=len(FEATURES), 
            params=params, 
            problem_type=problem_type
        )
        
        # Prepare callbacks
        callbacks_list = [
            callbacks.EarlyStopping(
                monitor='val_loss',
                patience=patience,
                restore_best_weights=True
            )
        ]
        
        # Fit the model
        model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=epochs,
            batch_size=batch_size,
            callbacks=callbacks_list,
            verbose=verbose
        )
        
        # Make predictions
        val_preds = model.predict(X_val, verbose=0).flatten()
        test_fold_preds = model.predict(X_test, verbose=0).flatten()
        
        # Store predictions
        oof_preds[val_idx] = val_preds
        test_preds += test_fold_preds / FOLDS
        
        # Evaluate fold performance
        fold_score = get_metric(metric, y_val, val_preds)
        print(f' Fold {metric.upper()}: {fold_score:.5f}')
    
    # Evaluate overall performance
    overall_score = get_metric(metric, train[TARGET], oof_preds)
    print(f'Overall OOF {metric.upper()}: {overall_score:.5f}')
    
    # Clear Keras backend session
    keras.backend.clear_session()
    
    return oof_preds, test_preds

In [15]:
import xgboost as xgb
from sklearn.model_selection import KFold
import numpy as np

def xgb_model(train, test, TARGET, params, folds):
    params['device'] = params.get('device', 'cuda')
    FOLDS = folds
    oof_xgb = np.zeros(len(train))
    pred_xgb = np.zeros(len(test))
    FEATURES = [col for col in train if col != TARGET]
    train = train.reset_index(drop=True)
    test = test.reset_index(drop=True)

    kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)
    print('Training XGBoost model...')

    for i, (train_index, val_index) in enumerate(kf.split(train)):
        print(f'Fold {i + 1}')
        X_train = train.loc[train_index, FEATURES]
        y_train_fold = train.loc[train_index, TARGET]
        X_val = train.loc[val_index, FEATURES]
        y_val = train.loc[val_index, TARGET]
        X_test = test[FEATURES]

        dtrain = xgb.DMatrix(X_train, label=y_train_fold)
        dval = xgb.DMatrix(X_val, label=y_val)
        dtest = xgb.DMatrix(X_test)

        model_xgb = xgb.train(
            params=params,
            dtrain=dtrain,
            num_boost_round=10000,
            evals=[(dval, 'validation')],
            early_stopping_rounds=3,
            verbose_eval=False 
        )

        # After training, print validation results (from last iteration)
        print(f"Validation results after Fold {i + 1}:")
        print(f"Best iteration: {model_xgb.best_iteration}, Best score: {model_xgb.best_score}")

        val_preds = model_xgb.predict(dval)
        oof_xgb[val_index] = val_preds
        pred_xgb += model_xgb.predict(dtest)

    pred_xgb /= FOLDS

    return oof_xgb, pred_xgb


In [16]:
def lgb_model(train, test, TARGET, params, folds):
    params['verbosity'] = params.get('verbosity', -1)
    params['device'] = params.get('device', 'gpu')
    FOLDS = folds
    oof_lgb = np.zeros(len(train))
    pred_lgb = np.zeros(len(test))
    FEATURES = [col for col in train if col != TARGET]
    train = train.reset_index(drop=True)
    test = test.reset_index(drop=True)

    kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)
    print('Training LightGBM model...')

    for i, (train_index, val_index) in enumerate(kf.split(train)):
        print(f'Fold {i + 1}')
        X_train = train.loc[train_index, FEATURES]
        y_train_fold = train.loc[train_index, TARGET]
        X_val = train.loc[val_index, FEATURES]
        y_val = train.loc[val_index, TARGET]
        X_test = test[FEATURES]

        dtrain = lgb.Dataset(X_train, label=y_train_fold)
        dval = lgb.Dataset(X_val, label=y_val)

        model_lgb = lgb.train(
            params=params,
            train_set=dtrain,
            num_boost_round=10000,
            valid_sets=[dval],
             callbacks=[
                lgb.early_stopping(stopping_rounds=3),
                lgb.log_evaluation(0)]  
    
        )

        val_preds = model_lgb.predict(X_val, num_iteration=model_lgb.best_iteration)
        oof_lgb[val_index] = val_preds
        pred_lgb += model_lgb.predict(X_test, num_iteration=model_lgb.best_iteration)

    pred_lgb /= FOLDS

    return oof_lgb, pred_lgb


In [17]:
def catboost_model(train, test, TARGET, params, folds):
    # Set default parameters for verbosity and iterations if not provided
    params['verbose'] = params.get('verbose', False)
    params['task_type'] = params.get('task_type', 'GPU')
    params['iterations'] = params.get('iterations', 10000)
    FOLDS = folds
    oof_cb = np.zeros(len(train))  # Out-of-Fold predictions
    pred_cb = np.zeros(len(test))  # Test predictions
    FEATURES = [col for col in train if col != TARGET]
    train = train.reset_index(drop=True)
    test = test.reset_index(drop=True)

    # Determine if the task is classification or regression based on params
    is_classification = 'Logloss' in params['loss_function'] or 'CrossEntropy' in params['loss_function']

    kf = KFold(n_splits=FOLDS, shuffle=True, random_state=42)
    print('Training CatBoost model...')

    for i, (train_index, val_index) in enumerate(kf.split(train)):
        print(f'Fold {i + 1}')
        X_train = train.loc[train_index, FEATURES]
        y_train_fold = train.loc[train_index, TARGET]
        X_val = train.loc[val_index, FEATURES]
        y_val = train.loc[val_index, TARGET]
        X_test = test[FEATURES]

        dtrain = Pool(data=X_train, label=y_train_fold)
        dval = Pool(data=X_val, label=y_val)

        # Choose the correct CatBoost model based on classification or regression
        if is_classification:
            model_cb = CatBoostClassifier(**params)
        else:
            model_cb = CatBoostRegressor(**params)

        # Train the model
        model_cb.fit(
            dtrain,
            eval_set=dval,
            early_stopping_rounds=3,
            verbose=False
        )

        # Get predictions
        if is_classification:
            val_preds = model_cb.predict_proba(X_val)[:, 1]  # Probabilities for the positive class
            pred_cb += model_cb.predict_proba(X_test)[:, 1]
        else:
            val_preds = model_cb.predict(X_val)  # Continuous predictions for regression
            pred_cb += model_cb.predict(X_test)

        # Calculate validation score
        best_iter = model_cb.get_best_iteration()
        best_score = model_cb.get_best_score()
        val_score = best_score['validation'][model_cb.get_param('eval_metric')]

        print(f'Best Iteration: {best_iter} | Validation {model_cb.get_param("eval_metric")}: {val_score:.5f}')

        # Store the predictions
        oof_cb[val_index] = val_preds

    # Average predictions for the test set
    pred_cb /= FOLDS

    return oof_cb, pred_cb

In [18]:
def weighted_mean_absolute_error(y_true, y_pred, weights):
        return np.sum(weights * np.abs(y_true - y_pred)) / np.sum(weights)

def rmsLe(y_true, y_pred):
    y_pred = np.maximum(y_pred, 1e-6)
    return np.sqrt(mean_squared_log_error(y_true, y_pred))

def mape(y_true, y_pred):
    return mean_absolute_percentage_error(y_true, y_pred)

def logloss(y_true, y_prob, eps=1e-15):
        y_prob = np.clip(y_prob, eps, 1 - eps)  # Avoid log(0)
        return -np.mean(y_true * np.log(y_prob) + (1 - y_true) * np.log(1 - y_prob))



In [19]:
def get_metric(metric, y_true, y_pred, num_classes=None, weights=None, custom_metric=None):
    if metric == 'roc_auc':
        return roc_auc_score(y_true, y_pred, multi_class="ovr" if num_classes and num_classes > 2 else None)
    elif metric == 'accuracy':
        return accuracy_score(y_true, y_pred.round())
    elif metric == 'f1':
        return f1_score(y_true, y_pred.round(), average='weighted') if num_classes and num_classes > 2 else f1_score(y_true, y_pred.round())
    elif metric == 'precision':
        return precision_score(y_true, y_pred.round(), average='weighted') if num_classes and num_classes > 2 else precision_score(y_true, y_pred.round())
    elif metric == 'recall':
        return recall_score(y_true, y_pred.round(), average='weighted') if num_classes and num_classes > 2 else recall_score(y_true, y_pred.round())
    elif metric == 'mae':
        return mean_absolute_error(y_true, y_pred)
    elif metric == 'r2':
        return r2_score(y_true, y_pred)
    elif metric == 'rmse':
        return mean_squared_error(y_true, y_pred, squared=False)
    elif metric == 'wmae' and weights is not None:
        return weighted_mean_absolute_error(y_true, y_pred, weights)  # You must define this externally
    elif metric == 'rmsle':
        return rmsle(y_true, y_pred) 
    elif metric == 'mse':
        return mean_squared_error(y_true, y_pred, squared=True)
    elif metric == "mape":
        return mape(y_true, y_pred)
    elif metric == 'logloss':
        return logloss(y_true,y_pred)
    elif metric == 'custom' and callable(custom_metric):
        return custom_metric(y_true, y_pred)
    else:
        raise ValueError(f"Unsupported metric '{metric}'")


In [20]:
import cupy as cp

def get_metric_gpu(metric, y_true, y_pred, num_classes=None, weights=None, custom_metric=None):
    if isinstance(y_true, pd.Series):
        y_true = cp.array(y_true.values)
    
    if len(y_true.shape) == 1:
        y_true = y_true[:, cp.newaxis]

    if isinstance(y_pred, pd.Series):
        y_pred = cp.array(y_pred.values)
    
    if len(y_pred.shape) == 1:
        y_pred = y_pred[:, cp.newaxis] 
        
    def accuracy_score_gpu(y_true, y_pred):
        return cp.mean(y_true == y_pred)

    def precision_score_gpu(y_true, y_pred):
        if num_classes and num_classes > 2:
            scores, weights_ = [], []
            for cls in range(num_classes):
                tp = cp.sum((y_true == cls) & (y_pred == cls))
                fp = cp.sum((y_true != cls) & (y_pred == cls))
                score = tp / (tp + fp + 1e-8)
                scores.append(score)
                weights_.append(cp.sum(y_true == cls))
            return cp.sum(cp.array(scores) * cp.array(weights_)) / (cp.sum(weights_) + 1e-8)
        else:
            tp = cp.sum((y_true == 1) & (y_pred == 1))
            fp = cp.sum((y_true == 0) & (y_pred == 1))
            return tp / (tp + fp + 1e-8)

    def recall_score_gpu(y_true, y_pred):
        if num_classes and num_classes > 2:
            scores, weights_ = [], []
            for cls in range(num_classes):
                tp = cp.sum((y_true == cls) & (y_pred == cls))
                fn = cp.sum((y_true == cls) & (y_pred != cls))
                score = tp / (tp + fn + 1e-8)
                scores.append(score)
                weights_.append(cp.sum(y_true == cls))
            return cp.sum(cp.array(scores) * cp.array(weights_)) / (cp.sum(weights_) + 1e-8)
        else:
            tp = cp.sum((y_true == 1) & (y_pred == 1))
            fn = cp.sum((y_true == 1) & (y_pred == 0))
            return tp / (tp + fn + 1e-8)

    def f1_score_gpu(y_true, y_pred):
        precision = precision_score_gpu(y_true, y_pred)
        recall = recall_score_gpu(y_true, y_pred)
        return 2 * precision * recall / (precision + recall + 1e-8)

    def mae_gpu(y_true, y_pred):
        return cp.mean(cp.abs(y_true - y_pred))

    def mse_gpu(y_true, y_pred):
        return cp.mean((y_true - y_pred) ** 2)

    def rmse_gpu(y_true, y_pred):
        return cp.sqrt(mse_gpu(y_true, y_pred))

    def rmsle_gpu(y_true, y_pred):
        y_true_log = cp.log1p(cp.clip(y_true, a_min=0, a_max=None))
        y_pred_log = cp.log1p(cp.clip(y_pred, a_min=0, a_max=None))
        return cp.sqrt(cp.mean((y_true_log - y_pred_log) ** 2))

    def r2_score_gpu(y_true, y_pred):
        ss_res = cp.sum((y_true - y_pred) ** 2)
        ss_tot = cp.sum((y_true - cp.mean(y_true)) ** 2)
        return 1 - ss_res / (ss_tot + 1e-8)

    def mape_gpu(y_true, y_pred):
        return cp.mean(cp.abs((y_true - y_pred) / cp.clip(y_true, 1e-8, None))) * 100

    def weighted_mae_gpu(y_true, y_pred):
        return cp.sum(cp.abs(y_true - y_pred) * weights) / (cp.sum(weights) + 1e-8)

    def roc_auc_score_gpu(y_true, y_pred):
    # Sort scores and corresponding true values
        desc_score_indices = cp.argsort(-y_pred)
        y_true_sorted = y_true[desc_score_indices]
        
        # Count positives and negatives
        n_pos = cp.sum(y_true_sorted == 1)
        n_neg = cp.sum(y_true_sorted == 0)
        
        # Cumulative sum of positives
        tps = cp.cumsum(y_true_sorted == 1)
        fps = cp.cumsum(y_true_sorted == 0)
        
        # Avoid division by zero
        if n_pos == 0 or n_neg == 0:
            return cp.nan  # Undefined AUC
    
        # Calculate AUC using trapezoidal rule
        fpr = fps / n_neg
        tpr = tps / n_pos
        auc = cp.trapz(tpr, fpr)
        return auc
    def logloss_gpu(y_true, y_prob, eps=1e-15):
        y_prob = cp.clip(y_prob, eps, 1 - eps)  # Avoid log(0)
        return -cp.mean(y_true * cp.log(y_prob) + (1 - y_true) * cp.log(1 - y_prob))


    if metric == 'accuracy':
        return accuracy_score_gpu(y_true, y_pred)
    elif metric == 'roc_auc':
        return roc_auc_score_gpu(y_true, y_pred)
    elif metric == 'precision':
        return precision_score_gpu(y_true, y_pred)
    elif metric == 'recall':
        return recall_score_gpu(y_true, y_pred)
    elif metric == 'f1':
        return f1_score_gpu(y_true, y_pred)
    elif metric == 'mae':
        return mae_gpu(y_true, y_pred)
    elif metric == 'mse':
        return mse_gpu(y_true, y_pred)
    elif metric == 'rmse':
        return rmse_gpu(y_true, y_pred)
    elif metric == 'rmsle':
        return rmsle_gpu(y_true, y_pred)
    elif metric == 'r2':
        return r2_score_gpu(y_true, y_pred)
    elif metric == 'mape':
        return mape_gpu(y_true, y_pred)
    elif metric == 'wmae' and weights is not None:
        return weighted_mae_gpu(y_true, y_pred)
    elif metric == 'custom' and callable(custom_metric):
        return custom_metric(y_true, y_pred)
    elif metric == 'logloss':
        return logloss_gpu(y_true,y_pred)
        
    else:
        raise ValueError(f"Unsupported metric '{metric}'")


In [22]:
class HillBoost():
    def __init__(self, models, folds, hc_metric):
        if not isinstance(models, dict):
            raise TypeError("'models' must be a dict with keys as model_name & values as another dict for parameters")

        self.models = models
        self.hc_metric = hc_metric
        self.folds = folds

    def hill_climb_df(self, train, test, target):
        FEATURES = [col for col in train.columns if col != target]
        oof_df = pd.DataFrame()
        pred_df = pd.DataFrame()

        for idx, (model, params) in enumerate(self.models.items()):
            print(f'Model : {model} is on the run..')
            if model.startswith('xgboost'):
                model_oof, model_pred = xgb_model(train, test, target, params, self.folds)
                oof_df[f'{model}'] = model_oof
                pred_df[f'{model}'] = model_pred

            if model.startswith('lightgbm'):
                model_oof, model_pred = lgb_model(train, test, target, params, self.folds)
                oof_df[f'{model}'] = model_oof
                pred_df[f'{model}'] = model_pred

            if model.startswith('catboost'):
                model_oof, model_pred = catboost_model(train, test, target, params, self.folds)
                oof_df[f'{model}'] = model_oof
                pred_df[f'{model}'] = model_pred

            if model.startswith(('mlp','ann')):
                model_oof, model_pred = keras_ann_model(train, test, target, params, self.folds)
                oof_df[f'{model}'] = model_oof
                pred_df[f'{model}'] = model_pred
                
        return oof_df, pred_df

    def HillClimb(self,train, test, target, iterations=10, min_improvement=1e-7):
        """
        Enhanced hill climbing algorithm for ensemble building
        
        Parameters:
        - train: Training dataframe with OOF predictions
        - test: Test dataframe with predictions
        - target: Target column name or array
        - iterations: Number of passes through the model set (default: 10)
        - min_improvement: Minimum improvement threshold (default: 1e-7)
        """
        import pandas as pd
        import numpy as np
        import cupy as cp  # Using GPU acceleration
        import time
        
        # Define metrics that should be maximized or minimized
        maximize_metrics = [
            'roc_auc',
            'accuracy',
            'f1',
            'precision',
            'recall',
            'r2'
        ]
        
        minimize_metrics = [
            'mae',
            'rmse',
            'wmae',
            'rmsle',
            'mse',
            'mape',
            'logloss'
        ]
    
        oof_df,pred_df=self.hill_climb_df(train,test,target)
        # Algorithm parameters - adjusted for better performance
        USE_NEGATIVE_WGT = True       # Allow negative weights for potentially better combinations
        MAX_MODELS = 20               # Maximum number of models in ensemble
        TOL = min_improvement         # Use a smaller tolerance for more precise optimization
        metric = self.hc_metric              # Metric to optimize
        
        
        # Handle target as column name or direct array
        if isinstance(target, str):
            TARGET = train[target].values
            # Remove target column from predictions if it's there
            if target in oof_df.columns:
                oof_df = oof_df.drop(columns=[target])
            if target in pred_df.columns:
                pred_df = pred_df.drop(columns=[target])
        else:
            TARGET = target
        
        model_names = oof_df.columns
        print(f"Working with {len(model_names)} models: {', '.join(model_names)}")
        
        # Find the best individual model
        best_score = float('inf') if metric in minimize_metrics else -float('inf')
        best_index = -1
        
        for k, name in enumerate(model_names):
            metric_calculated = get_metric(metric, TARGET, oof_df.iloc[:, k])
            if metric in minimize_metrics:
                if metric_calculated < best_score:
                    best_score = metric_calculated
                    best_index = k
            else:  # maximize metric
                if metric_calculated > best_score:
                    best_score = metric_calculated
                    best_index = k
            print(f'{metric} {metric_calculated:0.5f} {name}')
    
        print(f'Best single model is {model_names[best_index]} with {metric} = {best_score:0.5f}')
        
        # Initialize the ensemble with the best model
        indices = [best_index]
        old_best_score = best_score
        print(f'0 We begin with best single model {metric} {best_score:0.5f} from "{model_names[best_index]}"')
        
        # Prepare variables for GPU processing
        x_train2 = cp.array(oof_df.values)  # GPU
        x_train3 = cp.array(pred_df.values)  # GPU
        best_ensemble = x_train2[:, best_index].copy()  # GPU - explicitly copy to avoid reference issues
        truth = cp.array(TARGET)  # GPU
        
        # Improved weight range with finer granularity
        start = -0.60 if USE_NEGATIVE_WGT else 0.01
        # Use even finer weight steps for more precise optimization
        ww = cp.concatenate([
            cp.arange(start, -0.01, 0.05),     # Coarse steps for negative weights
            cp.arange(-0.01, 0.01, 0.001),     # Very fine steps around zero
            cp.arange(0.01, 0.11, 0.005),      # Fine steps for small weights
            cp.arange(0.11, 1.01, 0.01)        # Regular steps for larger weights
        ])
        nn = len(ww)
        print(f"Testing {nn} different weight combinations")
        
        # Begin hill climbing with multiple iterations
        new_rows = []
        final_pred = x_train3[:, best_index].copy()  # Explicit copy
        ensemble_weights = {model_names[best_index]: 1.0}  # Track all model weights
        
        improvement_found = False
        
        for iteration in range(iterations):
            print(f"\nIteration {iteration+1}/{iterations}")
            models_added = 0
            
            # Shuffle the model order for more exploration in each iteration
            model_indices = list(range(len(model_names)))
            if iteration > 0:  # Only shuffle after first iteration to keep logging consistent
                np.random.shuffle(model_indices)
            
            # Try each model that hasn't been added yet
            for idx in model_indices:
                model = model_names[idx]
                
                if len(indices) >= MAX_MODELS:
                    print(f"Reached maximum number of models ({MAX_MODELS})")
                    break
                    
                # Skip the best model in first iteration (it's already included)
                if iteration == 0 and idx == best_index:
                    continue
                
                # Create potential ensembles with different weights
                new_model = x_train2[:, idx]  # GPU
                m1 = cp.repeat(best_ensemble[:, cp.newaxis], nn, axis=1) * (1-ww)  # GPU
                m2 = cp.repeat(new_model[:, cp.newaxis], nn, axis=1) * ww  # GPU
                mm = m1 + m2  # GPU
    
                new_metrics = get_metric_gpu(metric, truth, mm)
    
                if metric in minimize_metrics:
                    new_score = cp.min(new_metrics).item()
                    new_idx = cp.argmin(new_metrics).item()
                    is_better = new_score < old_best_score
                    diff = old_best_score - new_score
                else:  # maximize metrics
                    new_score = cp.max(new_metrics).item()
                    new_idx = cp.argmax(new_metrics).item()
                    is_better = new_score > old_best_score
                    diff = new_score - old_best_score
                    
                if is_better and abs(diff) > TOL:
                    improvement_found = True
                    print(f'Improvement found by {abs(diff):.6f}')
                    best_ensemble = mm[:, new_idx].copy()  # Explicit copy
                    best_weight = ww[new_idx].item()
                    old_best_score = new_score
                    
                    # Track which models we've used (even if re-used in later iterations)
                    if idx not in indices:
                        indices.append(idx)
                    
                    # Update ensemble weights and predictions
                    for existing_model in list(ensemble_weights.keys()):
                        ensemble_weights[existing_model] *= (1-best_weight)
                    
                    # Add or update weight for the current model
                    if model in ensemble_weights:
                        ensemble_weights[model] += best_weight
                    else:
                        ensemble_weights[model] = best_weight
                    
                    final_pred = final_pred * (1-best_weight) + x_train3[:, idx] * best_weight
                    new_row = {
                        'iteration': iteration + 1,
                        'model_added': model,
                        'additive_weight': best_weight,
                        'total_weight': ensemble_weights[model],
                        'metric_value': new_score,
                        'improvement': abs(diff)
                    }
                    new_rows.append(new_row)
                    models_added += 1
                    print(f'Added {model} with weight {best_weight:.4f} (total weight: {ensemble_weights[model]:.4f})')
                    
            if models_added == 0:
                print(f"No improvements found in iteration {iteration+1}")
                if iteration > 0 and not improvement_found:  # Only break if we've done at least one iteration with no improvements
                    break
        
        # Calculate final score
        if isinstance(best_ensemble, cp.ndarray):
            best_ensemble_np = cp.asnumpy(best_ensemble)
        else:
            best_ensemble_np = best_ensemble
            
        final_new_score = get_metric(metric, TARGET, best_ensemble_np)
        df = pd.DataFrame(new_rows)
        
        if df.empty:
            print(f'No additive gain in {metric}, consider:')
            print('1. Using more diverse models')
            print('2. Decreasing min_improvement parameter')
            print('3. Trying different metrics')
            print('4. Analyzing model correlations - highly correlated models may not ensemble well')
        else:
            print(f'\nEnsemble Summary:')
            print(f'Improvement: {abs(final_new_score - best_score):.6f}')
            print(f'Initial best score: {best_score:.6f}')
            print(f'Final ensemble score: {final_new_score:.6f}')
            
            print("\nFinal ensemble weights:")
            for model, weight in sorted(ensemble_weights.items(), key=lambda x: abs(x[1]), reverse=True):
                if abs(weight) > 0.001:  # Only show models with non-negligible weights
                    print(f"  {model}: {weight:.4f}")
    
        # Save predictions
        np.save('pred_hill_climb', cp.asnumpy(final_pred) if isinstance(final_pred, cp.ndarray) else final_pred)
        np.save('oof_hill_climb', best_ensemble_np)
        
        # Return the DataFrame of added models, the ensemble OOF predictions, and the test predictions
        return df, best_ensemble_np, (cp.asnumpy(final_pred) if isinstance(final_pred, cp.ndarray) else final_pred), ensemble_weights
    
    # Example usage
    # df, oof_preds, test_preds, weights = HillClimb(train, test, 'target', iterations=10, min_improvement=1e-7)