Skip to content

Commit

Permalink
minor cleanups
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcusMNoack committed Feb 26, 2024
1 parent 9427372 commit 1827fc3
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 107 deletions.
45 changes: 25 additions & 20 deletions fvgp/gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@


# TODO: search below "TODO"
# self.V is calculated at init and then again in calculate/update gp prior. That's not good. --> has to be because of training. Can we take it out of the init/update?

class GP:
"""
Expand Down Expand Up @@ -413,9 +414,8 @@ def __init__(
else:
raise Exception("Variances are not given in an allowed format. Give variances as 1d numpy array")


##########################################
# compute the prior########################
# compute the prior#######################
##########################################
self.K, self.KV, self.KVinvY, self.KVlogdet, self.factorization_obj, self.KVinv, self.prior_mean_vec, self.V = self._compute_GPpriorV(
self.x_data, self.y_data, self.hyperparameters, calc_inv=self.store_inv)
Expand Down Expand Up @@ -456,6 +456,7 @@ def update_gp_data(
if self.input_space_dim != len(x_new[0]): raise ValueError(
"The input space dimension is not in agreement with the provided x_data.")
if np.ndim(y_new) == 2: y_new = y_new[:, 0]
if callable(noise_variances): raise Exception("The update noise_variances cannot be a callable")

#update class instance x_data, and y_data, and set noise
if overwrite:
Expand All @@ -469,6 +470,7 @@ def update_gp_data(
self.y_data = np.append(self.y_data, y_new)
if noise_variances is not None: noise_variances = np.append(np.diag(self.V), noise_variances)
self.point_number = len(self.x_data)

##########################################
#######prepare noise covariances##########
##########################################
Expand Down Expand Up @@ -504,7 +506,7 @@ def update_gp_data(
else:
self.K, self.KV, self.KVinvY, self.KVlogdet, self.factorization_obj, \
self.KVinv, self.prior_mean_vec, self.V = self._update_GPpriorV(
x_data_old, x_new, y_data_old, y_new, self.hyperparameters, calc_inv=self.store_inv)
x_data_old, x_new, self.y_data, self.hyperparameters, calc_inv=self.store_inv)

###################################################################################
###################################################################################
Expand Down Expand Up @@ -1154,7 +1156,7 @@ def test_log_likelihood_gradient(self, hyperparameters):
##################################################################################
def _compute_GPpriorV(self, x_data, y_data, hyperparameters, calc_inv=False):
# get the prior mean
prior_mean_vec = self.mean_function(x_data, hyperparameters, self) #only update if overwrite=False
prior_mean_vec = self.mean_function(x_data, hyperparameters, self)
assert np.ndim(prior_mean_vec) == 1
# get the latest noise
if callable(self.noise_function): V = self.noise_function(x_data, hyperparameters, self)
Expand All @@ -1166,13 +1168,12 @@ def _compute_GPpriorV(self, x_data, y_data, hyperparameters, calc_inv=False):
if self.gp2Scale:
st = time.time()
K = self.gp2Scale_obj.compute_covariance(x_data, hyperparameters, self.gp2Scale_dask_client)
#K = self.gp2Scale_obj.update_covariance(hyperparameters, self.gp2Scale_dask_client)
Ksparsity = float(K.nnz) / float(len(x_data) ** 2)
if isinstance(V,np.ndarray): raise Exception("You are running gp2Scale. \
if isinstance(V, np.ndarray): raise Exception("You are running gp2Scale. \
Your noise model has to return a `scipy.sparse.coo_matrix`.")
if len(x_data) < 50000 and Ksparsity < 0.0001: try_sparse_LU = True
if self.info: print("Transferring the covariance matrix to host done after ", time.time() - st,
" seconds. sparsity = ", Ksparsity, flush=True)
if self.info: print("Computing and transferring the covariance matrix took ", time.time() - st,
" seconds | sparsity = ", Ksparsity, flush=True)
else:
K = self._compute_K(x_data, x_data, hyperparameters)

Expand All @@ -1190,7 +1191,10 @@ def _compute_GPpriorV(self, x_data, y_data, hyperparameters, calc_inv=False):

##################################################################################

def _update_GPpriorV(self, x_data, x_new, y_data, y_new, hyperparameters, calc_inv=False):
def _update_GPpriorV(self, x_data_old, x_new, y_data, hyperparameters, calc_inv=False):
#where is the new variance?
#do I need the x_data here? Or can it be self.x_data

# get the prior mean
prior_mean_vec = np.append(self.prior_mean_vec, self.mean_function(x_new, hyperparameters, self))
assert np.ndim(prior_mean_vec) == 1
Expand All @@ -1203,12 +1207,12 @@ def _update_GPpriorV(self, x_data, x_new, y_data, y_new, hyperparameters, calc_i
if self.gp2Scale:
st = time.time()
K = self.gp2Scale_obj.update_covariance(x_new, hyperparameters, self.gp2Scale_dask_client, self.K)
Ksparsity = float(K.nnz) / float(len(x_data) ** 2)
if len(x_data) < 50000 and Ksparsity < 0.0001: try_sparse_LU = True
if self.info: print("Transferring the covariance matrix to host done after ", time.time() - st,
" seconds. sparsity = ", Ksparsity, flush=True)
Ksparsity = float(K.nnz) / float(len(self.x_data) ** 2)
if len(self.x_data) < 50000 and Ksparsity < 0.0001: try_sparse_LU = True
if self.info: print("Computing and transferring the covariance matrix took ", time.time() - st,
" seconds | sparsity = ", Ksparsity, flush=True)
else:
off_diag = self._compute_K(x_data, x_new, hyperparameters)
off_diag = self._compute_K(x_data_old, x_new, hyperparameters)
K = np.block([
[self.K, off_diag],
[off_diag.T, self._compute_K(x_new, x_new, hyperparameters)]
Expand All @@ -1218,7 +1222,7 @@ def _update_GPpriorV(self, x_data, x_new, y_data, y_new, hyperparameters, calc_i

# get K + V
KV = K + V
y_data = np.append(y_data, y_new)
#y_data = np.append(y_data, y_new)
# get Kinv/KVinvY, LU, Chol, logdet(KV)

KVinvY, KVlogdet, factorization_obj, KVinv = self._compute_gp_linalg(y_data-prior_mean_vec, KV,
Expand All @@ -1228,8 +1232,9 @@ def _update_GPpriorV(self, x_data, x_new, y_data, y_new, hyperparameters, calc_i

##################################################################################
def _compute_gp_linalg(self, vec, KV, calc_inv=False, try_sparse_LU=False):
st = time.time()

if self.gp2Scale:
st = time.time()
from imate import logdet
# try fast but RAM intensive SuperLU first
if try_sparse_LU:
Expand All @@ -1239,7 +1244,7 @@ def _compute_gp_linalg(self, vec, KV, calc_inv=False, try_sparse_LU=False):
KVinvY = LU.solve(vec)
upper_diag = abs(LU.U.diagonal())
KVlogdet = np.sum(np.log(upper_diag))
if self.info: print("LU done after", time.time() - st, "seconds.")
if self.info: print("LU compute time: ", time.time() - st, "seconds.")
# if that did not work, do random lin algebra magic
except:
KVinvY, exit_code = minres(KV.tocsc(), vec)
Expand All @@ -1250,20 +1255,20 @@ def _compute_gp_linalg(self, vec, KV, calc_inv=False, try_sparse_LU=False):
KVlogdet, info_slq = logdet(KV, method='slq', min_num_samples=10, max_num_samples=100,
lanczos_degree=20, error_rtol=0.1, gpu=gpu,
return_info=True, plot=False, verbose=self.info)
if self.info: print("logdet/LU done after ", time.time() - st, "seconds.")
if self.info: print("logdet/LU compute time: ", time.time() - st, "seconds.")
# if the problem is large go with rand. lin. algebra straight away
else:
if self.info: print("MINRES solve in progress ...", time.time() - st, "seconds.")
factorization_obj = ("gp2Scale", None)
KVinvY, exit_code = minres(KV.tocsc(), vec)
if self.info: print("MINRES solve done after ", time.time() - st, "seconds.")
if self.info: print("MINRES solve compute time: ", time.time() - st, "seconds.")
if self.compute_device == "gpu": gpu = True
else: gpu = False
if self.info: print("logdet() in progress ... ", time.time() - st, "seconds.")
KVlogdet, info_slq = logdet(KV, method='slq', min_num_samples=10, max_num_samples=100,
lanczos_degree=20, error_rtol=0.1, orthogonalize=0, gpu=gpu,
return_info=True, plot=False, verbose=self.info)
if self.info: print("logdet/LU done after ", time.time() - st, "seconds.")
if self.info: print("logdet/LU compute time: ", time.time() - st, "seconds.")
KVinv = None
else:
c, l = cho_factor(KV)
Expand Down
89 changes: 2 additions & 87 deletions fvgp/gp2Scale.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,6 @@ def __init__(
if not worker_info: raise Exception("No workers available")
self.compute_workers = list(worker_info)

#scatter_data = self.x_data
#self.scatter_future = covariance_dask_client.scatter(
# scatter_data, workers=self.compute_workers, broadcast=False)
##scatter the data to compute workers, TEST if broadcast is better
self.x_data_scatter_future = None
self.x_new_scatter_future = None
self.x_data = None
Expand Down Expand Up @@ -173,95 +169,14 @@ def calculate_sparse_noise_covariance(self, vector):
return diag


#########################################################################
#########################################################################
#########################################################################
#########################################################################
#########################################################################
#########################################################################
#########################################################################
#########################################################################
#########################################################################
#########################################################################
#########################################################################
#########################################################################
#########################################################################
#########################################################################
class gpm2Scale(gp2Scale): # pragma: no cover
def __init__(self, input_space_dim,
output_space_dim,
x_data, ##data in the original input space
init_hyperparameters,
batch_size,
variances=None,
init_y_data=None, # initial latent space positions
gp_kernel_function=None,
gp_mean_function=None, covariance_dask_client=None,
info=False):

if input_space_dim != len(x_data[0]): raise ValueError(
"input space dimensions are not in agreement with the point positions given")
self.input_dim = input_space_dim
self.output_dim = output_space_dim
self.x_data = x_data
self.point_number = len(self.x_data)
if init_y_data is None: init_y_data = np.random.rand(len(x_data), self.output_dim)
self.y_data = init_y_data
self.batch_size = batch_size
self.num_batches = self.point_number // self.batch_size
self.info = info
##########################################
#######prepare variances##################
##########################################
if variances is None:
self.variances = np.ones((self.x_data.shape[0])) * \
abs(np.mean(self.x_data) / 100.0)
print("CAUTION: you have not provided data variances in fvGP,")
print("they will be set to 1 percent of the data values!")
if len(self.variances[self.variances < 0]) > 0: raise Exception(
"Negative measurement variances communicated to fvgp.")
##########################################
#######define kernel and mean function####
##########################################
if gp_kernel_function == "robust":
self.kernel = sparse_stat_kernel_robust
elif callable(gp_kernel_function):
self.kernel = gp_kernel_function
else:
raise Exception("A kernel callable has to be provided!")

self.gp_mean_function = gp_mean_function
if callable(gp_mean_function):
self.mean_function = gp_mean_function
else:
self.mean_function = self.default_mean_function

##########################################
#######prepare hyper parameters###########
##########################################
self.hyperparameters = np.array(init_hyperparameters)
##########################################
# compute the prior########################
##########################################

self.covariance_dask_client = covariance_dask_client
scatter_data = {"x_data": self.x_data} ##data that can be scattered
self.scatter_future = covariance_dask_client.scatter(scatter_data,
workers=self.compute_worker_set) ##scatter the data

self.st = time.time()
self.compute_covariance(self.y_data, self.y_data, self.hyperparameters, variances, covariance_dask_client)
if self.info:
sp = self.SparsePriorCovariance.get_result().result()
print("gp2Scale successfully initiated, here is some info about the prior covariance matrix:")
print("non zero elements: ", sp.nnz)
print("Size in GBits: ", sp.data.nbytes / 1e9)
print("Sparsity: ", sp.nnz / float(self.point_number) ** 2)

if self.point_number <= 5000:
print("Here is an image:")
plt.imshow(sp.toarray())
plt.show()
def __init__(self, input_space_dim,):
self.input_space = input_space_dim

def train(self,
hyperparameter_bounds,
Expand Down

0 comments on commit 1827fc3

Please sign in to comment.