more restructuring

lbl-camera · Mar 22, 2024 · bd09870 · bd09870
1 parent b74f51d
commit bd09870
Show file tree

Hide file tree

Showing 9 changed files with 249 additions and 245 deletions.
diff --git a/fvgp/gp.py b/fvgp/gp.py
@@ -31,6 +31,7 @@
 #   when using gp2Scale but the solution is dense we should go with dense linalg [Done]
 #   init hps and bounds should only be initiated for the default kernel
 #   the mcmc in default mode should not need proposal distributions explicitly
+#   reshape posteriors if x_out
 
 class GP:
     """

diff --git a/fvgp/gp2.py b/fvgp/gp2.py
@@ -16,7 +16,12 @@
 from .gp2Scale import gp2Scale as gp2S
 from dask.distributed import Client
 from scipy.stats import norm
-
+from .gp_prior import GPrior
+from .gp_data import GPdata
+from .gp_marginal_density import GPMarginalDensity
+from .gp_likelihood import GPlikelihood
+from .gp_training import GPtraining
+from .gp_posterior import GPosterior
 
 # TODO: search below "TODO"
 #   self.V is calculated at init and then again in calculate/update gp prior. That's not good. --> has to be because of training. Can we take it out of the init/update?
@@ -28,7 +33,7 @@
 #   neither minres nor random logdet are doing a good job, cg is better but we need a preconditioner
 #   when using gp2Scale but the solution is dense we should go with dense linalg
 
-class GP(Gprior, GPlikelihood, GPtraining):
+class GP():
     """
     This class provides all the tools for a single-task Gaussian Process (GP).
     Use fvGP for multitask GPs. However, the fvGP class inherits all methods from this class.
@@ -215,6 +220,7 @@ def __init__(
         gp_mean_function=None,
         gp_mean_function_grad=None,
         store_inv=True,
+        online=False,
         ram_economy=False,
         args=None,
         info=False
@@ -233,32 +239,41 @@ def __init__(
         ###init prior instance##################
         ########################################
         # hps, mean function kernel function
-        self.prior = GPdata(self.data,
+        self.prior = GPrior(self.data,
                             init_hyperparameters=init_hyperparameters,
                             gp_kernel_function=gp_kernel_function,
                             gp_mean_function=gp_mean_function,
                             gp_kernel_function_grad=gp_kernel_function_grad,
                             gp_mean_function_grad=gp_mean_function_grad,
-                            online=False)
+                            online=online)
 
         ########################################
         ###init likelihood instance#############
         ########################################
         # likelihood needs hps, noise function
-        self.likelihood(self.data,
-                        init_hyperparameters=init_hyperparameters,
-                        gp_noise_function=gp_noise_function,
-                        gp_noise_function_grad=gp_noise_function_grad,
-                        online=False)
+        self.likelihood = GPlikelihood(hyperparameters=self.prior.hyperparameters,
+                                       noise_variances=noise_variances,
+                                       gp_noise_function=gp_noise_function,
+                                       gp_noise_function_grad=gp_noise_function_grad,
+                                       ram_economy=ram_economy,
+                                       online=online)
+
+        ##########################################
+        #######prepare marginal density###########
+        ##########################################
+        self.marginal_density = GPMarginalDensity(self.data, self.prior, self.likelihood)
+
 
         ##########################################
         #######prepare training###################
         ##########################################
         # needs init hps, bounds
         self.trainer = GPtraining(self.data,
-                                  self.prior,
-                                  self.likelihood,
-                                  hyperparameter_bounds)
+                                  gp_kernel_function,
+                                  gp_mean_function,
+                                  gp_noise_function,
+                                  init_hyperparameters=self.prior.hyperparameters,
+                                  hyperparameter_bounds=hyperparameter_bounds)
 
         ##########################################
         #######prepare posterior evaluations######

diff --git a/fvgp/gpMCMC.py b/fvgp/gpMCMC.py
@@ -245,6 +245,9 @@ def __init__(self, prop_dist,
             Arguments that will be available as obj attribute in `prop_dist`and `adapt_callable`.
 
         """
+        #consider:
+        #axis_std = np.linalg.norm(hps_bounds, axis=1) / 10.
+        #init_s = np.diag(axis_std ** 2)
 
         self.prop_dist = prop_dist
         self.indices = indices

diff --git a/fvgp/data.py → fvgp/gp_data.py b/fvgp/data.py → fvgp/gp_data.py
@@ -1,7 +1,7 @@
 import numpy as np
 
 
-class Data:
+class GPdata:
     def __init__(self, x_data, y_data, noise_variances=None):
         # make sure the inputs are in the right format
         assert isinstance(x_data, np.ndarray) or isinstance(x_data, list)

diff --git a/fvgp/gp_kernels.py b/fvgp/gp_kernels.py
@@ -1,38 +1,4 @@
-import torch
-from torch import nn
-
-
-class Network(nn.Module):  # pragma: no cover
-    def __init__(dim, layer_width):
-        super().__init__()
-        # Inputs to hidden layer linear transformation
-        layer1 = nn.Linear(dim, layer_width)
-        layer2 = nn.Linear(layer_width, layer_width)
-        layer3 = nn.Linear(layer_width, dim)
-
-    def forward(x):
-        x = torch.Tensor(x)
-        x = torch.nn.functional.relu(layer1(x))
-        x = torch.nn.functional.relu(layer2(x))
-        x = torch.nn.functional.relu(layer3(x))
-        return x.detach().numpy()
-
-    def set_weights(w1, w2, w3):
-        with torch.no_grad(): layer1.weight = nn.Parameter(torch.from_numpy(w1).float())
-        with torch.no_grad(): layer2.weight = nn.Parameter(torch.from_numpy(w2).float())
-        with torch.no_grad(): layer3.weight = nn.Parameter(torch.from_numpy(w3).float())
-
-    def set_biases(b1, b2, b3):
-        with torch.no_grad(): layer1.bias = nn.Parameter(torch.from_numpy(b1).float())
-        with torch.no_grad(): layer2.bias = nn.Parameter(torch.from_numpy(b2).float())
-        with torch.no_grad(): layer3.bias = nn.Parameter(torch.from_numpy(b3).float())
-
-    def get_weights(self):
-        return layer1.weight, layer2.weight, layer3.weight
-
-    def get_biases(self):
-        return layer1.bias, layer2.bias, layer3.bias
-
+import numpy as np
 
 def squared_exponential_kernel(distance, length):
     """

diff --git a/fvgp/gp_likelihood.py b/fvgp/gp_likelihood.py
@@ -1,9 +1,16 @@
-import numpy
-
+import numpy as np
+import warnings
 
 class GPlikelihood:  # pragma: no cover
-    def __init__(self, prior_obj, noise_variances, gp_noise_function):
-        self.prior_obj = prior_obj
+    def __init__(self, hyperparameters=None,
+                 noise_variances=None,
+                 gp_noise_function=None,
+                 gp_noise_function_grad=None,
+                 ram_economy=False,
+                 online=False):
+
+        assert isinstance(hyperparameters, np.ndarray) and np.ndim(hyperparameters) == 1
+
 
         if noise_variances is not None and callable(gp_noise_function):
             warnings.warn("Noise function and measurement noise provided. noise_variances set to None.", stacklevel=2)
@@ -20,174 +27,21 @@ def __init__(self, prior_obj, noise_variances, gp_noise_function):
         if callable(gp_noise_function_grad):
             self.noise_function_grad = gp_noise_function_grad
         elif callable(gp_noise_function):
-            if self.ram_economy is True:
+            if ram_economy is True:
                 self.noise_function_grad = self._finitediff_dnoise_dh_econ
             else:
                 self.noise_function_grad = self._finitediff_dnoise_dh
         else:
-            if self.ram_economy is True:
+            if ram_economy is True:
                 self.noise_function_grad = self._default_dnoise_dh_econ
             else:
                 self.noise_function_grad = self._default_dnoise_dh
 
-    ##################################################################################
-
-    def log_likelihood(self, hyperparameters=None):
-        """
-        Function that computes the marginal log-likelihood
-
-        Parameters
-        ----------
-        hyperparameters : np.ndarray, optional
-            Vector of hyperparameters of shape (N).
-            If not provided, the covariance will not be recomputed.
-
-        Return
-        ------
-        log marginal likelihood of the data : float
-        """
-        if hyperparameters is None:
-            K, KV, KVinvY, KVlogdet, FO, KVinv, mean, cov = \
-                (self.K, self.KV, self.KVinvY, self.KVlogdet, self.factorization_obj,
-                 self.KVinv, self.prior_mean_vec, self.V)
-        else:
-            K, KV, KVinvY, KVlogdet, FO, KVinv, mean, cov = \
-                self._compute_GPpriorV(self.x_data, self.y_data, hyperparameters, calc_inv=False)
-        n = len(self.y_data)
-        return -(0.5 * ((self.y_data - mean).T @ KVinvY)) - (0.5 * KVlogdet) - (0.5 * n * np.log(2.0 * np.pi))
-
-    ##################################################################################
-    def neg_log_likelihood(self, hyperparameters=None):
-        """
-        Function that computes the marginal log-likelihood
-
-        Parameters
-        ----------
-        hyperparameters : np.ndarray, optional
-            Vector of hyperparameters of shape (N)
-            If not provided, the covariance will not be recomputed.
-
-        Return
-        ------
-        negative log marginal likelihood of the data : float
-        """
-        return -self.log_likelihood(hyperparameters=hyperparameters)
+        self.online = online
 
     ##################################################################################
-    def neg_log_likelihood_gradient(self, hyperparameters=None):
-        """
-        Function that computes the gradient of the marginal log-likelihood.
 
-        Parameters
-        ----------
-        hyperparameters : np.ndarray, optional
-            Vector of hyperparameters of shape (N).
-            If not provided, the covariance will not be recomputed.
-
-        Return
-        ------
-        Gradient of the negative log marginal likelihood : np.ndarray
-        """
-        logger.debug("log-likelihood gradient is being evaluated...")
-        if hyperparameters is None:
-            K, KV, KVinvY, KVlogdet, FO, KVinv, mean, cov = \
-                self.K, self.KV, self.KVinvY, self.KVlogdet, self.factorization_obj, \
-                    self.KVinv, self.prior_mean_vec, self.V
-        else:
-            K, KV, KVinvY, KVlogdet, FO, KVinv, mean, cov = \
-                self._compute_GPpriorV(self.x_data, self.y_data, hyperparameters, calc_inv=False)
-
-        b = KVinvY
-        y = self.y_data - mean
-        if self.ram_economy is False:
-            try:
-                dK_dH = self.dk_dh(self.x_data, self.x_data, hyperparameters, self) + self.noise_function_grad(
-                    self.x_data, hyperparameters, self)
-            except Exception as e:
-                raise Exception(
-                    "The gradient evaluation dK/dh + dNoise/dh was not successful. \n \
-                    That normally means the combination of ram_economy and definition \
-                    of the gradient function is wrong. ",
-                    str(e))
-            KV = np.array([KV, ] * len(hyperparameters))
-            a = self._solve(KV, dK_dH)
-        bbT = np.outer(b, b.T)
-        dL_dH = np.zeros((len(hyperparameters)))
-        dL_dHm = np.zeros((len(hyperparameters)))
-        dm_dh = self.dm_dh(self.x_data, hyperparameters, self)
-
-        for i in range(len(hyperparameters)):
-            dL_dHm[i] = -dm_dh[i].T @ b
-            if self.ram_economy is False:
-                matr = a[i]
-            else:
-                try:
-                    dK_dH = self.dk_dh(self.x_data, self.x_data, i, hyperparameters, self) + self.noise_function_grad(
-                        self.x_data, i, hyperparameters, self)
-                except:
-                    raise Exception(
-                        "The gradient evaluation dK/dh + dNoise/dh was not successful. \n \
-                        That normally means the combination of ram_economy and definition of \
-                        the gradient function is wrong.")
-                matr = np.linalg.solve(KV, dK_dH)
-            if dL_dHm[i] == 0.0:
-                if self.ram_economy is False:
-                    mtrace = np.einsum('ij,ji->', bbT, dK_dH[i])
-                else:
-                    mtrace = np.einsum('ij,ji->', bbT, dK_dH)
-                dL_dH[i] = - 0.5 * (mtrace - np.trace(matr))
-            else:
-                dL_dH[i] = 0.0
-        logger.debug("gradient norm: {}", np.linalg.norm(dL_dH + dL_dHm))
-        return dL_dH + dL_dHm
-
-    ##################################################################################
-    def neg_log_likelihood_hessian(self, hyperparameters=None):
-        """
-        Function that computes the Hessian of the marginal log-likelihood.
-        It does so by a first-order approximation of the exact gradient.
-
-        Parameters
-        ----------
-        hyperparameters : np.ndarray
-            Vector of hyperparameters of shape (N).
-            If not provided, the covariance will not be recomputed.
-
-        Return
-        ------
-        Hessian of the negative log marginal likelihood : np.ndarray
-        """
-        ##implemented as first-order approximation
-        len_hyperparameters = len(hyperparameters)
-        d2L_dmdh = np.zeros((len_hyperparameters, len_hyperparameters))
-        epsilon = 1e-6
-        grad_at_hps = self.neg_log_likelihood_gradient(hyperparameters=hyperparameters)
-        for i in range(len_hyperparameters):
-            hps_temp = np.array(hyperparameters)
-            hps_temp[i] = hps_temp[i] + epsilon
-            d2L_dmdh[i, i:] = ((self.neg_log_likelihood_gradient(hyperparameters=hps_temp) - grad_at_hps) / epsilon)[i:]
-        return d2L_dmdh + d2L_dmdh.T - np.diag(np.diag(d2L_dmdh))
-
-    def test_log_likelihood_gradient(self, hyperparameters):
-        thps = np.array(hyperparameters)
-        grad = np.empty((len(thps)))
-        eps = 1e-6
-        for i in range(len(thps)):
-            thps_aux = np.array(thps)
-            thps_aux[i] = thps_aux[i] + eps
-            grad[i] = (self.log_likelihood(hyperparameters=thps_aux) - self.log_likelihood(hyperparameters=thps)) / eps
-        analytical = -self.neg_log_likelihood_gradient(hyperparameters=thps)
-        if np.linalg.norm(grad - analytical) > np.linalg.norm(grad) / 100.0:
-            print("Gradient possibly wrong")
-            print("finite diff appr: ", grad)
-            print("analytical      : ", analytical)
-        else:
-            print("Gradient correct")
-            print("finite diff appr: ", grad)
-            print("analytical      : ", analytical)
-        assert np.linalg.norm(grad - analytical) < np.linalg.norm(grad) / 100.0
 
-        return grad, analytical
 
     def _default_noise_function(self, x, hyperparameters, gp_obj):
         noise = np.ones((len(x))) * (np.mean(abs(self.y_data)) / 100.0)