Skip to content

Commit

Permalink
more restructuring
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcusMNoack committed Mar 22, 2024
1 parent b74f51d commit bd09870
Show file tree
Hide file tree
Showing 9 changed files with 249 additions and 245 deletions.
1 change: 1 addition & 0 deletions fvgp/gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
# when using gp2Scale but the solution is dense we should go with dense linalg [Done]
# init hps and bounds should only be initiated for the default kernel
# the mcmc in default mode should not need proposal distributions explicitly
# reshape posteriors if x_out

class GP:
"""
Expand Down
39 changes: 27 additions & 12 deletions fvgp/gp2.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,12 @@
from .gp2Scale import gp2Scale as gp2S
from dask.distributed import Client
from scipy.stats import norm

from .gp_prior import GPrior
from .gp_data import GPdata
from .gp_marginal_density import GPMarginalDensity
from .gp_likelihood import GPlikelihood
from .gp_training import GPtraining
from .gp_posterior import GPosterior

# TODO: search below "TODO"
# self.V is calculated at init and then again in calculate/update gp prior. That's not good. --> has to be because of training. Can we take it out of the init/update?
Expand All @@ -28,7 +33,7 @@
# neither minres nor random logdet are doing a good job, cg is better but we need a preconditioner
# when using gp2Scale but the solution is dense we should go with dense linalg

class GP(Gprior, GPlikelihood, GPtraining):
class GP():
"""
This class provides all the tools for a single-task Gaussian Process (GP).
Use fvGP for multitask GPs. However, the fvGP class inherits all methods from this class.
Expand Down Expand Up @@ -215,6 +220,7 @@ def __init__(
gp_mean_function=None,
gp_mean_function_grad=None,
store_inv=True,
online=False,
ram_economy=False,
args=None,
info=False
Expand All @@ -233,32 +239,41 @@ def __init__(
###init prior instance##################
########################################
# hps, mean function kernel function
self.prior = GPdata(self.data,
self.prior = GPrior(self.data,
init_hyperparameters=init_hyperparameters,
gp_kernel_function=gp_kernel_function,
gp_mean_function=gp_mean_function,
gp_kernel_function_grad=gp_kernel_function_grad,
gp_mean_function_grad=gp_mean_function_grad,
online=False)
online=online)

########################################
###init likelihood instance#############
########################################
# likelihood needs hps, noise function
self.likelihood(self.data,
init_hyperparameters=init_hyperparameters,
gp_noise_function=gp_noise_function,
gp_noise_function_grad=gp_noise_function_grad,
online=False)
self.likelihood = GPlikelihood(hyperparameters=self.prior.hyperparameters,
noise_variances=noise_variances,
gp_noise_function=gp_noise_function,
gp_noise_function_grad=gp_noise_function_grad,
ram_economy=ram_economy,
online=online)

##########################################
#######prepare marginal density###########
##########################################
self.marginal_density = GPMarginalDensity(self.data, self.prior, self.likelihood)


##########################################
#######prepare training###################
##########################################
# needs init hps, bounds
self.trainer = GPtraining(self.data,
self.prior,
self.likelihood,
hyperparameter_bounds)
gp_kernel_function,
gp_mean_function,
gp_noise_function,
init_hyperparameters=self.prior.hyperparameters,
hyperparameter_bounds=hyperparameter_bounds)

##########################################
#######prepare posterior evaluations######
Expand Down
3 changes: 3 additions & 0 deletions fvgp/gpMCMC.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,9 @@ def __init__(self, prop_dist,
Arguments that will be available as obj attribute in `prop_dist`and `adapt_callable`.
"""
#consider:
#axis_std = np.linalg.norm(hps_bounds, axis=1) / 10.
#init_s = np.diag(axis_std ** 2)

self.prop_dist = prop_dist
self.indices = indices
Expand Down
2 changes: 1 addition & 1 deletion fvgp/data.py → fvgp/gp_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np


class Data:
class GPdata:
def __init__(self, x_data, y_data, noise_variances=None):
# make sure the inputs are in the right format
assert isinstance(x_data, np.ndarray) or isinstance(x_data, list)
Expand Down
36 changes: 1 addition & 35 deletions fvgp/gp_kernels.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,4 @@
import torch
from torch import nn


class Network(nn.Module): # pragma: no cover
def __init__(dim, layer_width):
super().__init__()
# Inputs to hidden layer linear transformation
layer1 = nn.Linear(dim, layer_width)
layer2 = nn.Linear(layer_width, layer_width)
layer3 = nn.Linear(layer_width, dim)

def forward(x):
x = torch.Tensor(x)
x = torch.nn.functional.relu(layer1(x))
x = torch.nn.functional.relu(layer2(x))
x = torch.nn.functional.relu(layer3(x))
return x.detach().numpy()

def set_weights(w1, w2, w3):
with torch.no_grad(): layer1.weight = nn.Parameter(torch.from_numpy(w1).float())
with torch.no_grad(): layer2.weight = nn.Parameter(torch.from_numpy(w2).float())
with torch.no_grad(): layer3.weight = nn.Parameter(torch.from_numpy(w3).float())

def set_biases(b1, b2, b3):
with torch.no_grad(): layer1.bias = nn.Parameter(torch.from_numpy(b1).float())
with torch.no_grad(): layer2.bias = nn.Parameter(torch.from_numpy(b2).float())
with torch.no_grad(): layer3.bias = nn.Parameter(torch.from_numpy(b3).float())

def get_weights(self):
return layer1.weight, layer2.weight, layer3.weight

def get_biases(self):
return layer1.bias, layer2.bias, layer3.bias

import numpy as np

def squared_exponential_kernel(distance, length):
"""
Expand Down
174 changes: 14 additions & 160 deletions fvgp/gp_likelihood.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
import numpy

import numpy as np
import warnings

class GPlikelihood: # pragma: no cover
def __init__(self, prior_obj, noise_variances, gp_noise_function):
self.prior_obj = prior_obj
def __init__(self, hyperparameters=None,
noise_variances=None,
gp_noise_function=None,
gp_noise_function_grad=None,
ram_economy=False,
online=False):

assert isinstance(hyperparameters, np.ndarray) and np.ndim(hyperparameters) == 1


if noise_variances is not None and callable(gp_noise_function):
warnings.warn("Noise function and measurement noise provided. noise_variances set to None.", stacklevel=2)
Expand All @@ -20,174 +27,21 @@ def __init__(self, prior_obj, noise_variances, gp_noise_function):
if callable(gp_noise_function_grad):
self.noise_function_grad = gp_noise_function_grad
elif callable(gp_noise_function):
if self.ram_economy is True:
if ram_economy is True:
self.noise_function_grad = self._finitediff_dnoise_dh_econ
else:
self.noise_function_grad = self._finitediff_dnoise_dh
else:
if self.ram_economy is True:
if ram_economy is True:
self.noise_function_grad = self._default_dnoise_dh_econ
else:
self.noise_function_grad = self._default_dnoise_dh

##################################################################################

def log_likelihood(self, hyperparameters=None):
"""
Function that computes the marginal log-likelihood
Parameters
----------
hyperparameters : np.ndarray, optional
Vector of hyperparameters of shape (N).
If not provided, the covariance will not be recomputed.
Return
------
log marginal likelihood of the data : float
"""
if hyperparameters is None:
K, KV, KVinvY, KVlogdet, FO, KVinv, mean, cov = \
(self.K, self.KV, self.KVinvY, self.KVlogdet, self.factorization_obj,
self.KVinv, self.prior_mean_vec, self.V)
else:
K, KV, KVinvY, KVlogdet, FO, KVinv, mean, cov = \
self._compute_GPpriorV(self.x_data, self.y_data, hyperparameters, calc_inv=False)
n = len(self.y_data)
return -(0.5 * ((self.y_data - mean).T @ KVinvY)) - (0.5 * KVlogdet) - (0.5 * n * np.log(2.0 * np.pi))

##################################################################################
def neg_log_likelihood(self, hyperparameters=None):
"""
Function that computes the marginal log-likelihood
Parameters
----------
hyperparameters : np.ndarray, optional
Vector of hyperparameters of shape (N)
If not provided, the covariance will not be recomputed.
Return
------
negative log marginal likelihood of the data : float
"""
return -self.log_likelihood(hyperparameters=hyperparameters)
self.online = online

##################################################################################
def neg_log_likelihood_gradient(self, hyperparameters=None):
"""
Function that computes the gradient of the marginal log-likelihood.

Parameters
----------
hyperparameters : np.ndarray, optional
Vector of hyperparameters of shape (N).
If not provided, the covariance will not be recomputed.
Return
------
Gradient of the negative log marginal likelihood : np.ndarray
"""
logger.debug("log-likelihood gradient is being evaluated...")
if hyperparameters is None:
K, KV, KVinvY, KVlogdet, FO, KVinv, mean, cov = \
self.K, self.KV, self.KVinvY, self.KVlogdet, self.factorization_obj, \
self.KVinv, self.prior_mean_vec, self.V
else:
K, KV, KVinvY, KVlogdet, FO, KVinv, mean, cov = \
self._compute_GPpriorV(self.x_data, self.y_data, hyperparameters, calc_inv=False)

b = KVinvY
y = self.y_data - mean
if self.ram_economy is False:
try:
dK_dH = self.dk_dh(self.x_data, self.x_data, hyperparameters, self) + self.noise_function_grad(
self.x_data, hyperparameters, self)
except Exception as e:
raise Exception(
"The gradient evaluation dK/dh + dNoise/dh was not successful. \n \
That normally means the combination of ram_economy and definition \
of the gradient function is wrong. ",
str(e))
KV = np.array([KV, ] * len(hyperparameters))
a = self._solve(KV, dK_dH)
bbT = np.outer(b, b.T)
dL_dH = np.zeros((len(hyperparameters)))
dL_dHm = np.zeros((len(hyperparameters)))
dm_dh = self.dm_dh(self.x_data, hyperparameters, self)

for i in range(len(hyperparameters)):
dL_dHm[i] = -dm_dh[i].T @ b
if self.ram_economy is False:
matr = a[i]
else:
try:
dK_dH = self.dk_dh(self.x_data, self.x_data, i, hyperparameters, self) + self.noise_function_grad(
self.x_data, i, hyperparameters, self)
except:
raise Exception(
"The gradient evaluation dK/dh + dNoise/dh was not successful. \n \
That normally means the combination of ram_economy and definition of \
the gradient function is wrong.")
matr = np.linalg.solve(KV, dK_dH)
if dL_dHm[i] == 0.0:
if self.ram_economy is False:
mtrace = np.einsum('ij,ji->', bbT, dK_dH[i])
else:
mtrace = np.einsum('ij,ji->', bbT, dK_dH)
dL_dH[i] = - 0.5 * (mtrace - np.trace(matr))
else:
dL_dH[i] = 0.0
logger.debug("gradient norm: {}", np.linalg.norm(dL_dH + dL_dHm))
return dL_dH + dL_dHm

##################################################################################
def neg_log_likelihood_hessian(self, hyperparameters=None):
"""
Function that computes the Hessian of the marginal log-likelihood.
It does so by a first-order approximation of the exact gradient.
Parameters
----------
hyperparameters : np.ndarray
Vector of hyperparameters of shape (N).
If not provided, the covariance will not be recomputed.
Return
------
Hessian of the negative log marginal likelihood : np.ndarray
"""
##implemented as first-order approximation
len_hyperparameters = len(hyperparameters)
d2L_dmdh = np.zeros((len_hyperparameters, len_hyperparameters))
epsilon = 1e-6
grad_at_hps = self.neg_log_likelihood_gradient(hyperparameters=hyperparameters)
for i in range(len_hyperparameters):
hps_temp = np.array(hyperparameters)
hps_temp[i] = hps_temp[i] + epsilon
d2L_dmdh[i, i:] = ((self.neg_log_likelihood_gradient(hyperparameters=hps_temp) - grad_at_hps) / epsilon)[i:]
return d2L_dmdh + d2L_dmdh.T - np.diag(np.diag(d2L_dmdh))

def test_log_likelihood_gradient(self, hyperparameters):
thps = np.array(hyperparameters)
grad = np.empty((len(thps)))
eps = 1e-6
for i in range(len(thps)):
thps_aux = np.array(thps)
thps_aux[i] = thps_aux[i] + eps
grad[i] = (self.log_likelihood(hyperparameters=thps_aux) - self.log_likelihood(hyperparameters=thps)) / eps
analytical = -self.neg_log_likelihood_gradient(hyperparameters=thps)
if np.linalg.norm(grad - analytical) > np.linalg.norm(grad) / 100.0:
print("Gradient possibly wrong")
print("finite diff appr: ", grad)
print("analytical : ", analytical)
else:
print("Gradient correct")
print("finite diff appr: ", grad)
print("analytical : ", analytical)
assert np.linalg.norm(grad - analytical) < np.linalg.norm(grad) / 100.0

return grad, analytical

def _default_noise_function(self, x, hyperparameters, gp_obj):
noise = np.ones((len(x))) * (np.mean(abs(self.y_data)) / 100.0)
Expand Down

0 comments on commit bd09870

Please sign in to comment.