# Test custom Kernels Gama Kernel with ChatGPT

- creation date 2025-02-27
- last update : 2025-02-28 : Not working

In [None]:
import numpy as np
from sklearn.utils.optimize import _check_optimize_result
from functools import partial
import scipy.optimize
from scipy.optimize import minimize
from sklearn.preprocessing import StandardScaler

In [None]:
from sklearn.gaussian_process.kernels import RBF
from sklearn.gaussian_process.kernels import ExpSineSquared
from sklearn.gaussian_process.kernels import RationalQuadratic
from sklearn.gaussian_process.kernels import WhiteKernel
from sklearn.gaussian_process.kernels import ConstantKernel
from sklearn.gaussian_process.kernels import Matern
from sklearn.gaussian_process.kernels import Kernel, Hyperparameter

In [None]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Kernel, Hyperparameter

In [None]:
from sklearn.gaussian_process.kernels import Kernel, Hyperparameter

class CustomKernel(Kernel):
    """Exemple de noyau avec hyperparamètre optimisable."""
    
    def __init__(self, param=1.0):
        self.param = param

    @property
    def hyperparameter_param(self):
        return Hyperparameter("param", "numeric", (1e-2, 10.0))  # Intervalle de l'optimisation

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        if Y is None:
            Y = X
        else:
            Y = np.atleast_2d(Y)
        
        dists = np.linalg.norm(X[:, np.newaxis] - Y, axis=2)
        K = np.exp(-self.param * dists)  # Fonction du noyau
        
        if eval_gradient:
            #grad_K = -dists * K[:, :, np.newaxis]  # Gradient correct de taille (n, n, 1)
            grad_K = (-dists * K)[:, :, np.newaxis]  # Taille correcte (n_samples, n_samples, 1)
            return K, grad_K

        return K

    def diag(self, X):
        return np.ones(X.shape[0])

    def is_stationary(self):
        return True


In [None]:
X = np.linspace(0, 10, 50).reshape(-1, 1)
y = np.sin(X).ravel() + np.random.normal(0, 0.1, X.shape[0])  # Signal bruité
custom_kernel = CustomKernel(param=1.)

gp = GaussianProcessRegressor(kernel=custom_kernel)
gp.fit(X, y)
y_pred, sigma = gp.predict(X, return_std=True)

In [None]:
K, grad_K = custom_kernel(X, eval_gradient=True)
print("Gradient NaN ?", np.isnan(grad_K).any())  # Doit être False
print("Gradient shape :", grad_K.shape)  # Doit être (n_samples, n_samples, n_hyperparameters)

In [None]:
gp = GaussianProcessRegressor(kernel=custom_kernel, optimizer=None)

In [None]:
gp.fit(X, y)
y_pred, sigma = gp.predict(X, return_std=True)

In [None]:
class PoissonKernel(Kernel):
    def __init__(self, intensity=1.0):
        self.intensity = intensity

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        Y = X if Y is None else np.atleast_2d(Y)

        dists = np.linalg.norm(X[:, np.newaxis] - Y, axis=2)
        K = np.exp(-self.intensity * dists)

        if eval_gradient:
            grad_K = (-dists * K)[:, :, np.newaxis]  # Gradient par rapport à intensity
            return K, grad_K

        return K

    def diag(self, X):
        return np.ones(X.shape[0])

    def is_stationary(self):
        return True

    @property
    def hyperparameter_intensity(self):
        return Hyperparameter("intensity", "numeric", (1e-2, 10), fixed=False)


In [None]:
poisson_kernel = PoissonKernel(intensity=1.0)
gp = GaussianProcessRegressor(kernel=poisson_kernel)
gp.fit(X, y)  # Optimise alpha et length_scale !

In [None]:
from sklearn.gaussian_process.kernels import Kernel, Hyperparameter
import numpy as np
from scipy.special import gamma

class GammaKernel(Kernel):
    def __init__(self, alpha=1.0, length_scale=1.0):
        self.alpha = alpha
        self.length_scale = length_scale

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        Y = X if Y is None else np.atleast_2d(Y)

        dists = np.linalg.norm(X[:, np.newaxis] - Y, axis=2) / self.length_scale
        K = np.exp(-gamma(self.alpha + dists) / (gamma(self.alpha) * gamma(dists + 1e-6)))  # Ajout d'un petit terme pour éviter les instabilités

        # Vérification PSD
        K = (K + K.T) / 2  # Rendre la matrice symétrique
        min_eig = np.min(np.linalg.eigvalsh(K))
        if min_eig < 0:
            K -= 10 * min_eig * np.eye(*K.shape)  # Ajout d'une petite correction pour rendre PSD

        if eval_gradient:
            grad_K = np.zeros((X.shape[0], Y.shape[0], 2))  
            grad_K[:, :, 0] = K * (np.log(self.alpha + dists + 1e-6) - np.log(self.alpha + 1e-6))  
            grad_K[:, :, 1] = K * dists / (self.length_scale + 1e-6)  
            return K, grad_K

        return K

    def diag(self, X):
        return np.ones(X.shape[0])

    def is_stationary(self):
        return True

    @property
    def hyperparameter_alpha(self):
        return Hyperparameter("alpha", "numeric", (1e-2, 10), fixed=False)

    @property
    def hyperparameter_length_scale(self):
        return Hyperparameter("length_scale", "numeric", (1e-2, 10), fixed=False)


In [None]:
gamma_kernel = GammaKernel(alpha=2.0, length_scale=1.0)
gp = GaussianProcessRegressor(kernel=gamma_kernel, alpha=1e-6)  # Alpha régularise la covariance
gp.fit(X, y)  


In [None]:
from sklearn.gaussian_process.kernels import Kernel, Hyperparameter
import numpy as np
from scipy.special import gamma

class GammaKernel(Kernel):
    """
    🚀 Améliorations
    ✅ Ajout d'une régularisation (1e-6) pour éviter les singularités.
    ✅ Correction de la symétrie (K + K.T) / 2 pour éviter les erreurs de matrice non PSD.
    ✅ Forçage de la positivité avec min_eig pour assurer une matrice inversible.
    """
    def __init__(self, alpha=1.0, length_scale=1.0):
        self.alpha = alpha
        self.length_scale = length_scale

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        Y = X if Y is None else np.atleast_2d(Y)

        dists = np.linalg.norm(X[:, np.newaxis] - Y, axis=2) / self.length_scale
        K = np.exp(-gamma(self.alpha + dists) / (gamma(self.alpha) * gamma(dists + 1e-6)))  # Ajout d'un petit terme pour éviter les instabilités

        # Vérification PSD
        K = (K + K.T) / 2  # Rendre la matrice symétrique
        min_eig = np.min(np.linalg.eigvalsh(K))
        if min_eig < 0:
            K -= 10 * min_eig * np.eye(*K.shape)  # Ajout d'une petite correction pour rendre PSD

        if eval_gradient:
            grad_K = np.zeros((X.shape[0], Y.shape[0], 2))  
            grad_K[:, :, 0] = K * (np.log(self.alpha + dists + 1e-6) - np.log(self.alpha + 1e-6))  
            grad_K[:, :, 1] = K * dists / (self.length_scale + 1e-6)  
            return K, grad_K

        return K

    def diag(self, X):
        return np.ones(X.shape[0])

    def is_stationary(self):
        return True

    @property
    def hyperparameter_alpha(self):
        return Hyperparameter("alpha", "numeric", (1e-2, 10), fixed=False)

    @property
    def hyperparameter_length_scale(self):
        return Hyperparameter("length_scale", "numeric", (1e-2, 10), fixed=False)


In [None]:
gamma_kernel = GammaKernel(alpha=2.0, length_scale=1.0)
gp = GaussianProcessRegressor(kernel=gamma_kernel, alpha=1e-6)  # Alpha régularise la covariance
gp.fit(X, y)  

In [None]:
class GammaKernel(Kernel):
    def __init__(self, alpha=1.0, length_scale=1.0):
        self.alpha = alpha
        self.length_scale = length_scale

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        Y = X if Y is None else np.atleast_2d(Y)

        dists = np.linalg.norm(X[:, np.newaxis] - Y, axis=2) / self.length_scale
        
        # Remplacer gamma par une fonction exponentielle plus stable
        K = np.exp(-self.alpha * dists)  

        # Forcer la matrice symétrique
        K = (K + K.T) / 2
        
        # Contrôler les valeurs propres négatives
        min_eig = np.min(np.linalg.eigvalsh(K))
        if min_eig < 0:
            K -= 10 * min_eig * np.eye(*K.shape)  # Forcer les valeurs propres positives

        if eval_gradient:
            grad_K = np.zeros((X.shape[0], Y.shape[0], 2))  
            grad_K[:, :, 0] = K * (np.log(self.alpha + dists))  
            grad_K[:, :, 1] = K * dists / (self.length_scale)  
            return K, grad_K

        return K

    def diag(self, X):
        return np.ones(X.shape[0])

    def is_stationary(self):
        return True

    @property
    def hyperparameter_alpha(self):
        return Hyperparameter("alpha", "numeric", (1e-2, 10), fixed=False)

    @property
    def hyperparameter_length_scale(self):
        return Hyperparameter("length_scale", "numeric", (1e-2, 10), fixed=False)


In [None]:
gamma_kernel = GammaKernel(alpha=2.0, length_scale=1.0)
gp = GaussianProcessRegressor(kernel=gamma_kernel, alpha=1e-6)  # Alpha régularise la covariance
gp.fit(X, y)  

In [None]:
class GammaKernel(Kernel):
    def __init__(self, alpha=1.0, length_scale=1.0):
        self.alpha = alpha
        self.length_scale = length_scale

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        Y = X if Y is None else np.atleast_2d(Y)

        dists = np.linalg.norm(X[:, np.newaxis] - Y, axis=2) / self.length_scale
        
        # Remplacer gamma par une fonction exponentielle plus stable
        K = np.exp(-self.alpha * dists)  

        # Forcer la matrice symétrique
        K = (K + K.T) / 2
        
        # Contrôler les valeurs propres négatives
        min_eig = np.min(np.linalg.eigvalsh(K))
        if min_eig < 0:
            K -= 10 * min_eig * np.eye(*K.shape)  # Forcer les valeurs propres positives

        if eval_gradient:
            grad_K = np.zeros((X.shape[0], Y.shape[0], 2))  
            grad_K[:, :, 0] = K * (np.log(self.alpha + dists))  
            grad_K[:, :, 1] = K * dists / (self.length_scale)  
            return K, grad_K

        return K

    def diag(self, X):
        return np.ones(X.shape[0])

    def is_stationary(self):
        return True

    @property
    def hyperparameter_alpha(self):
        return Hyperparameter("alpha", "numeric", (1e-2, 10), fixed=False)

    @property
    def hyperparameter_length_scale(self):
        return Hyperparameter("length_scale", "numeric", (1e-2, 10), fixed=False)


In [None]:
gamma_kernel = GammaKernel(alpha=2.0, length_scale=1.0)
gp = GaussianProcessRegressor(kernel=gamma_kernel, alpha=1e-6)  # Alpha régularise la covariance
gp.fit(X, y)  

In [None]:
def optimizer(obj_func, x0, bounds):
    res = scipy.optimize.minimize(
        obj_func, x0, bounds=bounds, method="L-BFGS-B", jac=True,
        options= {'maxiter':20_000})
    return res.x, res.fun

In [None]:
# Générer des données d'exemple
rng = np.random.RandomState(4)
X = rng.uniform(0, 5, 20)[:, np.newaxis]
y = 0.5 * np.sin(3 * X[:, 0]) + rng.normal(0, 0.5, X.shape[0])

# Normaliser les données
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Créer le noyau gamma personnalisé
#kernel = GammaKernel(length_scale=1.0, gamma=1.5)
kernel = GammaKernel(length_scale=2.0, alpha=1.0)


# Créer le modèle de régression de processus gaussien
gp = GaussianProcessRegressor(
    kernel=kernel,
    n_restarts_optimizer=10,
    alpha=1e-2,
#    optimizer='fmin_l_bfgs_b',
#    max_iter=1000
    optimizer = optimizer
)

# Ajuster le modèle aux données
gp.fit(X_scaled, y)

# Prédire avec le modèle
X_test_scaled = scaler.transform(np.linspace(0, 5, 100)[:, np.newaxis])
y_pred, sigma = gp.predict(X_test_scaled, return_std=True)

# Afficher les résultats
import matplotlib.pyplot as plt

plt.figure()
plt.plot(X, y, 'r.', markersize=10, label='Données observées')
plt.plot(np.linspace(0, 5, 100), y_pred, 'b-', label='Prédiction')
plt.fill_between(np.linspace(0, 5, 100),
                 y_pred - sigma,
                 y_pred + sigma,
                 alpha=0.2,
                 color='blue',
                 label='Incertitude')
plt.xlabel('X')
plt.ylabel('y')
plt.title('Régression de processus gaussien avec noyau Gamma')
plt.legend()
plt.show()
