In [1]:
import os
os.chdir('..')

In [2]:
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', 500)

train_df = pd.read_csv("data/train.csv")
y_target = np.log1p( train_df.Calories.values )
train_df.head()

Unnamed: 0,id,Sex,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories
0,0,male,36,189.0,82.0,26.0,101.0,41.0,150.0
1,1,female,64,163.0,60.0,8.0,85.0,39.7,34.0
2,2,female,51,161.0,64.0,7.0,84.0,39.8,29.0
3,3,male,20,192.0,90.0,25.0,105.0,40.7,140.0
4,4,female,38,166.0,61.0,25.0,102.0,40.6,146.0


In [3]:
files = []
X_train = []
PATH = "data/oof_preds/"

print("Loading files...")
for c in ['xgb','cat','nn']:
    print(f"=> {c} ",end="")
    oof_preds = np.load(f"{PATH}oof_{c}.npy")

    # IF NOT LOG1P THEN APPLY LOG1P
    if oof_preds.mean() > 10: 
        oof_preds = np.log1p(oof_preds)

    X_train.append(oof_preds)
    files.append(f"oof_{c}")

Loading files...
=> xgb => cat => nn 

In [4]:
import numpy as np
from sklearn.metrics import mean_squared_error, root_mean_squared_error

class HillClimbEnsembler:
    def __init__(self, 
                 evaluation_metric=root_mean_squared_error,
                 weight_search_space=None,
                 maximum_number_of_models=50,
                 tolerance=1e-5,
                 allow_negative_weights=False):
        """
        Initialize the ensembler.
        
        Parameters:
        - metric: function to optimize (e.g., root_mean_squared_error)
        - weight_range: array of weights to try when combining models
        - max_models: maximum number of models to include
        - tol: minimum improvement required to continue
        - use_negative_weights: allow negative weights in ensemble
        """
        self.evaluation_metric = evaluation_metric
        self.maximum_number_of_models = maximum_number_of_models
        self.tolerance = tolerance
        self.allow_negative_weights = allow_negative_weights
        self.weight_search_space = (
            weight_search_space 
            if weight_search_space is not None 
            else np.linspace(0.5, 0, 51)
        )
        if allow_negative_weights:
            self.weight_search_space = np.linspace(0.5, -0.5, 101)

        self.selected_model_indices = []
        self.model_weights = []
        self.performance_history = []

    def fit(self, oof_predictions, y_true):
        """
        Train the hill climbing ensemble.
        
        Parameters:
        - oof_predictions: list of numpy arrays (each of shape (n_samples,))
        - y_true: numpy array of true target values (shape: (n_samples,))
        """
        n_models = len(oof_predictions)
        y_true = np.array(y_true)

        # Step 1: Find the best single model
        scores = [self.evaluation_metric(y_true, pred) for pred in oof_predictions]
        best_index = np.argmin(scores)
        best_score = scores[best_index]
        current_ensemble = oof_predictions[best_index].copy()

        # Initialize tracking variables
        self.selected_model_indices = [best_index]
        self.model_weights = [1.0]
        self.performance_history = [best_score]

        print(f"Initial model {files[best_index]} selected with score: {best_score:.5f}")

        for iteration in range(self.maximum_number_of_models):
            best_candidate_index = None
            best_candidate_weight = None
            best_candidate_score = best_score
            best_candidate_prediction = None

            for candidate_index in range(n_models):
                if candidate_index in self.selected_model_indices:
                    continue

                for weight in self.weight_search_space:
                    candidate_pred = (
                        (1 - weight) * current_ensemble + weight * oof_predictions[candidate_index]
                    )
                    score = self.evaluation_metric(y_true, candidate_pred)

                    if score < best_candidate_score:
                        best_candidate_score = score
                        best_candidate_index = candidate_index
                        best_candidate_weight = weight
                        best_candidate_prediction = candidate_pred

            
            if best_candidate_index is None:
                print(f"No further improvement after {iteration} iterations.")
                break

            # Break out of the loop if the score is not improving by the tolerance
            if best_score < best_candidate_score + self.tolerance:
                print(f"Score not improving by {self.tolerance} after {iteration} iterations.")
                break

            # Update ensemble
            current_ensemble = best_candidate_prediction
            self.model_weights = [
                (1 - best_candidate_weight) * w for w in self.model_weights
            ]
            self.model_weights.append(best_candidate_weight)
            self.selected_model_indices.append(best_candidate_index)
            self.performance_history.append(best_candidate_score)

            print(
                f"Iteration {iteration + 1}: added model {files[best_candidate_index]} "
                f"with weight {best_candidate_weight:.3f} | New score: {best_candidate_score:.5f}"
            )

            best_score = best_candidate_score

    def predict(self, prediction_list):
        """
        Generate ensemble predictions using learned model indices and weights.

        Parameters:
        - prediction_list: list of numpy arrays (each of shape (n_samples,))
                        Must match the ordering of models used during training.

        Returns:
        - ensemble_prediction: numpy array of shape (n_samples,)
        """
        if not self.selected_model_indices or not self.model_weights:
            raise ValueError("The ensemble has not been trained yet. Call fit() first.")

        selected_predictions = [
            prediction_list[i] * weight
            for i, weight in zip(self.selected_model_indices, self.model_weights)
        ]

        ensemble_prediction = np.sum(selected_predictions, axis=0)
        return ensemble_prediction
    
    def get_ensemble_info(self):
        """
        Return all key information needed to reproduce or audit the ensemble.

        Returns:
            dict with:
            - 'selected_model_indices': list of model indices used in the ensemble
            - 'model_weights': list of corresponding weights
            - 'performance_history': list of metric values at each ensemble iteration
            - 'final_score': best achieved ensemble score
            - 'hyperparameters': dictionary of relevant init settings
        """
        if not self.selected_model_indices or not self.model_weights:
            raise ValueError("No ensemble has been trained yet. Call fit() first.")

        return {
            "selected_model_indices": self.selected_model_indices,
            "model_weights": self.model_weights,
            "performance_history": self.performance_history,
            "final_score": self.performance_history[-1] if self.performance_history else None,
            "hyperparameters": {
                "evaluation_metric": self.evaluation_metric.__name__ if hasattr(self.evaluation_metric, '__name__') else str(self.evaluation_metric),
                "tolerance": self.tolerance,
                "maximum_number_of_models": self.maximum_number_of_models,
                "allow_negative_weights": self.allow_negative_weights,
                "weight_search_space": (self.weight_search_space[0], self.weight_search_space[-1], len(self.weight_search_space))
            }
        }


In [5]:
# Assuming you already trained it
ensembler = HillClimbEnsembler()
ensembler.fit(X_train, y_target)

Initial model oof_cat selected with score: 0.05982
Iteration 1: added model oof_xgb with weight 0.480 | New score: 0.05940
Iteration 2: added model oof_nn with weight 0.190 | New score: 0.05932
No further improvement after 2 iterations.


In [6]:
ensembler.get_ensemble_info()

{'selected_model_indices': [np.int64(1), 0, 2],
 'model_weights': [np.float64(0.4212),
  np.float64(0.38880000000000003),
  np.float64(0.19)],
 'performance_history': [0.059821028159519334,
  0.05940478825277589,
  0.059318550201120056],
 'final_score': 0.059318550201120056,
 'hyperparameters': {'evaluation_metric': 'root_mean_squared_error',
  'tolerance': 1e-05,
  'maximum_number_of_models': 50,
  'allow_negative_weights': False,
  'weight_search_space': (np.float64(0.5), np.float64(0.0), 51)}}