In [2]:
%pip install cupy-cuda12x

[33mDEPRECATION: Loading egg at /home/ika1/.conda/envs/py311/lib/python3.11/site-packages/nms-0.0.0-py3.11-linux-x86_64.egg is deprecated. pip 25.1 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330[0m[33m
Note: you may need to restart the kernel to use updated packages.


In [3]:
from hill_climbing import Climber
import cupy as cp
import pandas as pd 
import numpy as np

# 1. Load OOF predictions correctly
oof_cb = np.load("oofs/cb_oof_preds.npy")  # Load numpy arrays directly
oof_xgb = np.load("oofs/xgb_oof_preds.npy")

# 2. Create ensemble dataframe (adapt based on your actual OOF files)
oofs = pd.DataFrame({
    'cb': np.log1p(oof_cb),
    'xgb': oof_xgb,
    # Add other models as needed
})

# 3. Load labels with consistent transformation
raw_labels = pd.read_csv("train.csv")["Calories"]
labels = np.log1p(raw_labels)  # Same transformation as training

# 4. Load test predictions with proper transformation
test_preds = pd.DataFrame()
test_preds_gbdt = pd.read_csv("test_preds/gbdt.csv")

# Apply same transformation as used in training
test_preds["cb"] = test_preds_gbdt["cb"]
test_preds["xgb"] = test_preds_gbdt["xgb_best"]


def rmse(y_true, y_pred):
    """Consistent metric with training evaluation"""
    if isinstance(y_true, cp.ndarray):  # GPU mode
        return cp.sqrt(cp.mean((y_true - y_pred) ** 2))
    else:  # CPU mode
        return np.sqrt(np.mean((y_true - y_pred) ** 2))

# 5. Initialize climber with proper configuration
climber = Climber(
    objective="minimize",
    eval_metric=rmse,
    allow_negative_weights=False,    
    score_decimal_places=6,
    precision=0.0001,
    n_jobs=1,
    use_gpu=True
).fit(oofs, labels)


#final_predictions = np.expm1(ensemble_test_pred)

[1m[94mConfiguration[0m

   Metric:                       rmse
   Objective:                    minimize
   Precision:                    0.0001
   Allow negative weights:       False
   Starting model:               best
   Number of parallel jobs:      1
   Number of models:             2
   Using GPU:                    False


[1m[94mModels[0m

   [92mcb    0.059315 █ (best)[0m
   xgb   0.059892 ████████████████████████████████████████


[1m[94mRunning Hill Climbing[0m

   Iter   Model     Weight        Score      Improvement         Time
   ────────────────────────────────────────────────────────────────
   [92m   0   cb      1.0000     0.059315                -            -[0m
   [92m   1   xgb     0.3209     0.059148         0.000167        55.10[0m


[1m[94mResults[0m

   Number of models in ensemble:      2
   Overall improvement:               [92m+0.000167 (+0.28%)[0m
   Total time:                        55.14 seconds
   Average iteration time:          

In [4]:
preds = np.expm1(climber.predict(test_preds))

In [5]:
test_df=pd.read_csv("test.csv")

submission = pd.DataFrame({"id": test_df["id"], "prediction": np.expm1(preds)})

submission.to_csv("submission_hillclimb.csv", index=False)


In [8]:
import numpy as np
from scipy.optimize import minimize

# 1. Prepare your OOF predictions and labels
X = oofs.values  # Shape (n_samples, n_models)
y = labels.values  # True target values

# 2. Define objective function
def objective(weights):
    """Calculate RMSE for given ensemble weights"""
    weighted_avg = X @ weights  # Matrix multiplication
    return np.sqrt(np.mean((y - weighted_avg) ** 2))

# 3. Set up constraints
constraints = (
    {'type': 'eq', 'fun': lambda w: np.sum(w) - 1}  # Weights sum to 1
)

# 4. Set bounds (force non-negative weights)
bounds = [(0, 1) for _ in range(X.shape[1])]

# 5. Initial guess (equal weights)
initial_weights = np.ones(X.shape[1]) / X.shape[1]

# 6. Run optimization
result = minimize(
    objective,
    x0=initial_weights,
    method='SLSQP',
    bounds=bounds,
    constraints=constraints,
    options={'maxiter': 1000, 'ftol': 1e-8}
)

# 7. Extract optimal weights
best_weights = result.x
print("Optimal weights:", dict(zip(oofs.columns, best_weights)))


test_preds["final_preds"] = test_preds["cb"]*0.6792815957311169 + test_preds["xgb"] *0.3207184042688831

submission = pd.DataFrame({"id": test_df["id"], "prediction": np.expm1(test_preds["final_preds"])})

submission.to_csv("submission_optimized_weights.csv", index=False)

Optimal weights: {'cb': 0.6792815957311169, 'xgb': 0.3207184042688831}
