<!-- # Copyright (c) 2025 takotime808 -->

# Grid Search Example #

In [None]:
# Removing the uncertainty_toolbox dependency and retrying the function

import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, Matern
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.datasets import make_regression
from sklearn.base import BaseEstimator, RegressorMixin
from typing import Tuple, Union, Optional
import pandas as pd
import seaborn as sns

class RandomForestWithUncertainty(RandomForestRegressor):
    def predict(self, X, return_std=False):
        all_preds = np.stack([tree.predict(X) for tree in self.estimators_], axis=1)
        mean = all_preds.mean(axis=1)
        if return_std:
            std = all_preds.std(axis=1)
            return mean, std
        return mean

# Fixing the issue by explicitly passing n_estimators to all internal regressors in the GradientBoostingWithUncertainty wrapper

class GradientBoostingWithUncertainty(BaseEstimator, RegressorMixin):
    def __init__(self, alpha=0.95, n_estimators=100):
        self.alpha = alpha
        self.n_estimators = n_estimators
        self.lower = GradientBoostingRegressor(loss="quantile", alpha=(1 - alpha) / 2, n_estimators=n_estimators)
        self.upper = GradientBoostingRegressor(loss="quantile", alpha=1 - (1 - alpha) / 2, n_estimators=n_estimators)
        self.mid = GradientBoostingRegressor(loss="squared_error", n_estimators=n_estimators)

    def fit(self, X, y):
        self.lower.fit(X, y)
        self.upper.fit(X, y)
        self.mid.fit(X, y)
        return self

    def predict(self, X, return_std=False):
        y_pred = self.mid.predict(X)
        if return_std:
            lower = self.lower.predict(X)
            upper = self.upper.predict(X)
            std = (upper - lower) / 2
            return y_pred, std
        return y_pred



class ModelWrapper(BaseEstimator, RegressorMixin):
    def __init__(self, estimator=None):
        self.estimator = estimator

    def fit(self, X, y):
        self.estimator.fit(X, y)
        return self

    def predict(self, X, return_std=False):
        return self.estimator.predict(X, return_std=return_std)

def run_uncertainty_gridsearch_with_plots():
    X, y = make_regression(n_samples=200, n_features=3, noise=5.0, random_state=42)

    param_grid = [
        {
            "estimator": [GaussianProcessRegressor()],
            "estimator__kernel": [RBF(), Matern(nu=1.5)],
            "estimator__alpha": [1e-10, 1e-5],
        },
        {
            "estimator": [RandomForestWithUncertainty(n_estimators=100)],
            "estimator__max_depth": [3, 5],
        },
        {
            "estimator": [GradientBoostingWithUncertainty()],
            "estimator__alpha": [0.9, 0.95],
            "estimator__n_estimators": [100],
        },
    ]

    grid = GridSearchCV(ModelWrapper(), param_grid, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
    grid.fit(X, y)

    best_model = grid.best_estimator_
    X_test = X[:50]
    y_true = y[:50]
    y_pred, y_std = best_model.predict(X_test, return_std=True)

    df = pd.DataFrame({
        "Prediction": y_pred,
        "True": y_true,
        "Uncertainty": y_std
    })

    fig, ax = plt.subplots(figsize=(10, 6))
    ax.errorbar(range(len(y_pred)), y_pred, yerr=y_std, fmt='o', label="Prediction ± Uncertainty")
    ax.plot(range(len(y_true)), y_true, 'k--', label="True")
    ax.set_title("Predictions with Uncertainty")
    ax.set_xlabel("Sample Index")
    ax.set_ylabel("Target Value")
    ax.legend()
    plt.tight_layout()
    plt.show()

    sns.scatterplot(x="True", y="Prediction", hue="Uncertainty", size="Uncertainty", sizes=(20, 200), palette="coolwarm", data=df)
    plt.title("Prediction vs. True with Uncertainty")
    plt.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'k--')
    plt.xlabel("True Value")
    plt.ylabel("Predicted Value")
    plt.tight_layout()
    plt.show()

    print("Best model:", best_model)
    print("Best params:", grid.best_params_)
    print("Mean Uncertainty:", np.mean(y_std))

run_uncertainty_gridsearch_with_plots()
