diff --git a/crates/ego/benches/ego.rs b/crates/ego/benches/ego.rs index c912f92f..96ffebcc 100644 --- a/crates/ego/benches/ego.rs +++ b/crates/ego/benches/ego.rs @@ -1,6 +1,7 @@ use criterion::{Criterion, criterion_group, criterion_main}; -use egobox_ego::{EgorBuilder, InfillStrategy}; +use egobox_ego::{EGOBOX_LOG, EgorBuilder, InfillStrategy}; use egobox_moe::{CorrelationSpec, RegressionSpec}; +use env_logger::{Builder, Env}; use ndarray::{Array2, ArrayView2, Zip, array}; /// Ackley test function: min f(x)=0 at x=(0, 0, 0) @@ -15,7 +16,13 @@ fn ackley(x: &ArrayView2) -> Array2 { fn criterion_ego(c: &mut Criterion) { let xlimits = array![[-32.768, 32.768], [-32.768, 32.768], [-32.768, 32.768]]; let mut group = c.benchmark_group("ego"); + group.sample_size(20); group.bench_function("ego ackley", |b| { + let env = Env::new().filter_or(EGOBOX_LOG, "error"); + let mut builder = Builder::from_env(env); + let builder = builder.target(env_logger::Target::Stdout); + builder.try_init().ok(); + b.iter(|| { std::hint::black_box( EgorBuilder::optimize(ackley) @@ -23,7 +30,7 @@ fn criterion_ego(c: &mut Criterion) { config .configure_gp(|conf| { conf.regression_spec(RegressionSpec::CONSTANT) - .correlation_spec(CorrelationSpec::ABSOLUTEEXPONENTIAL) + .correlation_spec(CorrelationSpec::MATERN52) }) .infill_strategy(InfillStrategy::WB2S) .max_iters(10) diff --git a/crates/ego/src/criteria/ei.rs b/crates/ego/src/criteria/ei.rs index eef05db4..39d7b4c9 100644 --- a/crates/ego/src/criteria/ei.rs +++ b/crates/ego/src/criteria/ei.rs @@ -28,23 +28,20 @@ impl InfillCriterion for ExpectedImprovement { _scale: Option, ) -> f64 { let pt = ArrayView::from_shape((1, x.len()), x).unwrap(); - match obj_model.predict(&pt) { - Ok(p) => match obj_model.predict_var(&pt) { - Ok(s) => { - if s[0] < f64::EPSILON { - 0.0 - } else { - let pred = p[0]; - let k = sigma_weight.unwrap_or(1.0); - let sigma = k * s[0].sqrt(); - let args0 = (fmin - pred) / sigma; - let args1 = args0 * norm_cdf(args0); - let args2 = norm_pdf(args0); - sigma * (args1 + args2) - } + match obj_model.predict_valvar(&pt) { + Ok((p, s)) => { + if s[0] < f64::EPSILON { + 0.0 + } else { + let pred = p[0]; + let k = sigma_weight.unwrap_or(1.0); + let sigma = k * s[0].sqrt(); + let args0 = (fmin - pred) / sigma; + let args1 = args0 * norm_cdf(args0); + let args2 = norm_pdf(args0); + sigma * (args1 + args2) } - _ => 0.0, - }, + } _ => 0.0, } } @@ -60,36 +57,32 @@ impl InfillCriterion for ExpectedImprovement { _scale: Option, ) -> Array1 { let pt = ArrayView::from_shape((1, x.len()), x).unwrap(); - match obj_model.predict(&pt) { - Ok(p) => match obj_model.predict_var(&pt) { - Ok(s) => { - if s[0] < f64::EPSILON { - Array1::zeros(pt.len()) - } else { - let pred = p[0]; - let diff_y = fmin - pred; - let k = sigma_weight.unwrap_or(1.0); - let sigma = s[0].sqrt(); - let arg = (fmin - pred) / (k * sigma); - let y_prime = obj_model.predict_gradients(&pt).unwrap(); - let y_prime = y_prime.row(0); - let sig_2_prime = obj_model.predict_var_gradients(&pt).unwrap(); - - let sig_2_prime = sig_2_prime.row(0); - let sig_prime = sig_2_prime.mapv(|v| k * v / (2. * sigma)); - let arg_prime = y_prime.mapv(|v| v / (-k * sigma)) - - diff_y.to_owned() * sig_prime.mapv(|v| v / (k * sigma * k * sigma)); - let factor = k * sigma * (-arg / SQRT_2PI) * (-(arg * arg) / 2.).exp(); - - let arg1 = y_prime.mapv(|v| v * (-norm_cdf(arg))); - let arg2 = diff_y * norm_pdf(arg) * arg_prime.to_owned(); - let arg3 = sig_prime.to_owned() * norm_pdf(arg); - let arg4 = factor * arg_prime; - arg1 + arg2 + arg3 + arg4 - } + match obj_model.predict_valvar(&pt) { + Ok((p, s)) => { + if s[0] < f64::EPSILON { + Array1::zeros(pt.len()) + } else { + let pred = p[0]; + let diff_y = fmin - pred; + let k = sigma_weight.unwrap_or(1.0); + let sigma = s[0].sqrt(); + let arg = (fmin - pred) / (k * sigma); + + let (y_prime, var_prime) = obj_model.predict_valvar_gradients(&pt).unwrap(); + let y_prime = y_prime.row(0); + let sig_2_prime = var_prime.row(0); + let sig_prime = sig_2_prime.mapv(|v| k * v / (2. * sigma)); + let arg_prime = y_prime.mapv(|v| v / (-k * sigma)) + - diff_y.to_owned() * sig_prime.mapv(|v| v / (k * sigma * k * sigma)); + let factor = k * sigma * (-arg / SQRT_2PI) * (-(arg * arg) / 2.).exp(); + + let arg1 = y_prime.mapv(|v| v * (-norm_cdf(arg))); + let arg2 = diff_y * norm_pdf(arg) * arg_prime.to_owned(); + let arg3 = sig_prime.to_owned() * norm_pdf(arg); + let arg4 = factor * arg_prime; + arg1 + arg2 + arg3 + arg4 } - _ => Array1::zeros(pt.len()), - }, + } _ => Array1::zeros(pt.len()), } } @@ -120,20 +113,17 @@ impl InfillCriterion for LogExpectedImprovement { ) -> f64 { let pt = ArrayView::from_shape((1, x.len()), x).unwrap(); - match obj_model.predict(&pt) { - Ok(p) => match obj_model.predict_var(&pt) { - Ok(s) => { - if s[0] < f64::EPSILON { - f64::MIN - } else { - let pred = p[0]; - let sigma = s[0].sqrt(); - let u = (fmin - pred) / sigma; - log_ei_helper(u) + sigma.ln() - } + match obj_model.predict_valvar(&pt) { + Ok((p, s)) => { + if s[0] < f64::EPSILON { + f64::MIN + } else { + let pred = p[0]; + let sigma = s[0].sqrt(); + let u = (fmin - pred) / sigma; + log_ei_helper(u) + sigma.ln() } - _ => f64::MIN, - }, + } _ => f64::MIN, } } @@ -150,35 +140,31 @@ impl InfillCriterion for LogExpectedImprovement { ) -> Array1 { let pt = ArrayView::from_shape((1, x.len()), x).unwrap(); - match obj_model.predict(&pt) { - Ok(p) => match obj_model.predict_var(&pt) { - Ok(s) => { - if s[0] < f64::EPSILON { - Array1::from_elem(pt.len(), f64::MIN) - } else { - let pred = p[0]; - let diff_y = fmin - pred; - let sigma = s[0].sqrt(); - let arg = diff_y / sigma; - - let y_prime = obj_model.predict_gradients(&pt).unwrap(); - let y_prime = y_prime.row(0); - let sig_2_prime = obj_model.predict_var_gradients(&pt).unwrap(); - let sig_2_prime = sig_2_prime.row(0); - let sig_prime = sig_2_prime.mapv(|v| v / (2. * sigma)); - - let arg_prime = y_prime.mapv(|v| v / (-sigma)) - - diff_y.to_owned() * sig_prime.mapv(|v| v / (sigma * sigma)); - - let dhelper = d_log_ei_helper(arg); - let arg1 = arg_prime.mapv(|v| dhelper * v); - - let arg2 = sig_prime / sigma; - arg1 + arg2 - } + match obj_model.predict_valvar(&pt) { + Ok((p, s)) => { + if s[0] < f64::EPSILON { + Array1::from_elem(pt.len(), f64::MIN) + } else { + let pred = p[0]; + let diff_y = fmin - pred; + let sigma = s[0].sqrt(); + let arg = diff_y / sigma; + + let (y_prime, var_prime) = obj_model.predict_valvar_gradients(&pt).unwrap(); + let y_prime = y_prime.row(0); + let sig_2_prime = var_prime.row(0); + let sig_prime = sig_2_prime.mapv(|v| v / (2. * sigma)); + + let arg_prime = y_prime.mapv(|v| v / (-sigma)) + - diff_y.to_owned() * sig_prime.mapv(|v| v / (sigma * sigma)); + + let dhelper = d_log_ei_helper(arg); + let arg1 = arg_prime.mapv(|v| dhelper * v); + + let arg2 = sig_prime / sigma; + arg1 + arg2 } - _ => Array1::from_elem(pt.len(), f64::MIN), - }, + } _ => Array1::from_elem(pt.len(), f64::MIN), } } diff --git a/crates/ego/src/gpmix/mixint.rs b/crates/ego/src/gpmix/mixint.rs index da89cdf2..4aac9503 100644 --- a/crates/ego/src/gpmix/mixint.rs +++ b/crates/ego/src/gpmix/mixint.rs @@ -606,6 +606,19 @@ impl GpSurrogate for MixintGpMixture { self.moe.predict_var(&xcast) } + fn predict_valvar( + &self, + x: &ArrayView2, + ) -> egobox_moe::Result<(Array1, Array1)> { + let mut xcast = if self.work_in_folded_space { + unfold_with_enum_mask(&self.xtypes, x) + } else { + x.to_owned() + }; + cast_to_discrete_values_mut(&self.xtypes, &mut xcast); + self.moe.predict_valvar(&xcast) + } + /// Save Moe model in given file. #[cfg(feature = "persistent")] fn save(&self, path: &str, format: GpFileFormat) -> egobox_moe::Result<()> { @@ -660,6 +673,19 @@ impl GpSurrogateExt for MixintGpMixture { self.moe.predict_var_gradients(&xcast) } + fn predict_valvar_gradients( + &self, + x: &ArrayView2, + ) -> egobox_moe::Result<(Array2, Array2)> { + let mut xcast = if self.work_in_folded_space { + unfold_with_enum_mask(&self.xtypes, x) + } else { + x.to_owned() + }; + cast_to_discrete_values_mut(&self.xtypes, &mut xcast); + self.moe.predict_valvar_gradients(&xcast) + } + fn sample(&self, x: &ArrayView2, n_traj: usize) -> egobox_moe::Result> { let mut xcast = if self.work_in_folded_space { unfold_with_enum_mask(&self.xtypes, x) diff --git a/crates/ego/src/solver/coego.rs b/crates/ego/src/solver/coego.rs index 902a1ad4..5e9b02bf 100644 --- a/crates/ego/src/solver/coego.rs +++ b/crates/ego/src/solver/coego.rs @@ -198,14 +198,17 @@ where ) -> Result> { let mut res: Vec = vec![]; let x = &x.view().insert_axis(Axis(0)); - let sigma = obj_model.predict_var(&x.view()).unwrap()[0].sqrt(); + + let (pred, var) = obj_model.predict_valvar(x)?; + let sigma = var[0].sqrt(); // Use lower trust bound for a minimization - let pred = obj_model.predict(x)?[0] - CSTR_DOUBT * sigma; + let pred = pred[0] - CSTR_DOUBT * sigma; res.push(pred); for cstr_model in cstr_models { - let sigma = cstr_model.predict_var(&x.view()).unwrap()[0].sqrt(); + let (pred, var) = cstr_model.predict_valvar(x)?; + let sigma = var[0].sqrt(); // Use upper trust bound - res.push(cstr_model.predict(x)?[0] + CSTR_DOUBT * sigma); + res.push(pred[0] + CSTR_DOUBT * sigma); } Ok(Array1::from_vec(res)) } diff --git a/crates/ego/src/solver/solver_computations.rs b/crates/ego/src/solver/solver_computations.rs index ac724239..0797c5a2 100644 --- a/crates/ego/src/solver/solver_computations.rs +++ b/crates/ego/src/solver/solver_computations.rs @@ -223,18 +223,19 @@ where active: &[usize], ) -> f64 { let x = Array::from_shape_vec((1, x.len()), x.to_vec()).unwrap(); - let sigma = cstr_model.predict_var(&x.view()).unwrap()[0].sqrt(); - let cstr_val = cstr_model.predict(&x.view()).unwrap()[0]; + + let (pred, var) = cstr_model.predict_valvar(&x.view()).unwrap(); + let sigma = var[0].sqrt(); + let cstr_val = pred[0]; if let Some(grad) = gradient { + let (pred_grad, var_grad) = cstr_model.predict_valvar_gradients(&x.view()).unwrap(); let sigma_prime = if sigma < f64::EPSILON { 0. } else { - cstr_model.predict_var_gradients(&x.view()).unwrap()[[0, 0]] / (2. * sigma) + var_grad[[0, 0]] / (2. * sigma) }; - let grd = cstr_model - .predict_gradients(&x.view()) - .unwrap() + let grd = pred_grad .row(0) .mapv(|v| (v + CSTR_DOUBT * sigma_prime) / scale_cstr) .to_vec(); diff --git a/crates/ego/src/utils/cstr_pof.rs b/crates/ego/src/utils/cstr_pof.rs index 75cd1714..159439cd 100644 --- a/crates/ego/src/utils/cstr_pof.rs +++ b/crates/ego/src/utils/cstr_pof.rs @@ -8,20 +8,17 @@ use ndarray::{Array1, ArrayView}; /// a constraint function wrt the tolerance (ie cstr <= cstr_tol) fn pof(x: &[f64], cstr_model: &dyn MixtureGpSurrogate, cstr_tol: f64) -> f64 { let pt = ArrayView::from_shape((1, x.len()), x).unwrap(); - match cstr_model.predict(&pt) { - Ok(p) => match cstr_model.predict_var(&pt) { - Ok(s) => { - if s[0] < f64::EPSILON { - 0.0 - } else { - let pred = p[0]; - let sigma = s[0].sqrt(); - let args0 = (cstr_tol - pred) / sigma; - norm_cdf(args0) - } + match cstr_model.predict_valvar(&pt) { + Ok((p, s)) => { + if s[0] < f64::EPSILON { + 0.0 + } else { + let pred = p[0]; + let sigma = s[0].sqrt(); + let args0 = (cstr_tol - pred) / sigma; + norm_cdf(args0) } - _ => 0.0, - }, + } _ => 0.0, } } @@ -30,27 +27,23 @@ fn pof(x: &[f64], cstr_model: &dyn MixtureGpSurrogate, cstr_tol: f64) -> f64 { /// constraint surrogate model. fn pof_grad(x: &[f64], cstr_model: &dyn MixtureGpSurrogate, cstr_tol: f64) -> Array1 { let pt = ArrayView::from_shape((1, x.len()), x).unwrap(); - match cstr_model.predict(&pt) { - Ok(p) => match cstr_model.predict_var(&pt) { - Ok(s) => { - if s[0] < f64::EPSILON { - Array1::zeros(pt.len()) - } else { - let pred = p[0]; - let sigma = s[0].sqrt(); - let arg = (cstr_tol - pred) / sigma; - let y_prime = cstr_model.predict_gradients(&pt).unwrap(); - let y_prime = y_prime.row(0); - let sig_2_prime = cstr_model.predict_var_gradients(&pt).unwrap(); - let sig_2_prime = sig_2_prime.row(0); - let sig_prime = sig_2_prime.mapv(|v| v / (2. * sigma)); - let arg_prime = y_prime.mapv(|v| v / (-sigma)) - + sig_prime.mapv(|v| v * pred / (sigma * sigma)); - norm_pdf(arg) * arg_prime.to_owned() - } + match cstr_model.predict_valvar(&pt) { + Ok((p, s)) => { + if s[0] < f64::EPSILON { + Array1::zeros(pt.len()) + } else { + let pred = p[0]; + let sigma = s[0].sqrt(); + let arg = (cstr_tol - pred) / sigma; + let (y_prime, var_prime) = cstr_model.predict_valvar_gradients(&pt).unwrap(); + let y_prime = y_prime.row(0); + let sig_2_prime = var_prime.row(0); + let sig_prime = sig_2_prime.mapv(|v| v / (2. * sigma)); + let arg_prime = + y_prime.mapv(|v| v / (-sigma)) + sig_prime.mapv(|v| v * pred / (sigma * sigma)); + norm_pdf(arg) * arg_prime.to_owned() } - _ => Array1::zeros(pt.len()), - }, + } _ => Array1::zeros(pt.len()), } } diff --git a/crates/gp/Cargo.toml b/crates/gp/Cargo.toml index 4c46aa14..7865457a 100644 --- a/crates/gp/Cargo.toml +++ b/crates/gp/Cargo.toml @@ -61,3 +61,7 @@ argmin_testfunctions.workspace = true [[bench]] name = "gp" harness = false + +[[bench]] +name = "corr" +harness = false diff --git a/crates/gp/benches/corr.rs b/crates/gp/benches/corr.rs new file mode 100644 index 00000000..5b3981d4 --- /dev/null +++ b/crates/gp/benches/corr.rs @@ -0,0 +1,101 @@ +use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; +use egobox_gp::DiffMatrix; +use egobox_gp::correlation_models::{CorrelationModel, Matern32Corr, Matern52Corr}; +use ndarray::{Array1, Array2}; +use ndarray_rand::RandomExt; +use ndarray_rand::rand_distr::Uniform; + +fn bench_matern32_value(c: &mut Criterion) { + let mut group = c.benchmark_group("matern32_value"); + + let dim = 100; + let xt = Array2::from_shape_fn((1, dim), |(i, j)| (i as f64 * 0.1 + j as f64 * 0.05) % 1.0); + let dm = DiffMatrix::new(&xt); + let theta = Array1::from_elem(dim, f64::sqrt(0.2)); + let weights = Array2::ones((dim, dim)); + + let corr = Matern32Corr::default(); + group.bench_function(BenchmarkId::from_parameter(dim), |b| { + b.iter(|| corr.value(&dm.d, &theta, &weights)); + }); + group.finish(); +} + +fn bench_matern52_value(c: &mut Criterion) { + let mut group = c.benchmark_group("matern52_value"); + + let dim = 100; + let xt = Array2::from_shape_fn((1, dim), |(i, j)| (i as f64 * 0.1 + j as f64 * 0.05) % 1.0); + let dm = DiffMatrix::new(&xt); + let theta = Array1::from_elem(dim, f64::sqrt(0.2)); + let weights = Array2::ones((dim, dim)); + + let corr = Matern52Corr::default(); + group.bench_function(BenchmarkId::from_parameter(dim), |b| { + b.iter(|| corr.value(&dm.d, &theta, &weights)); + }); + group.finish(); +} + +fn bench_matern32_jacobian(c: &mut Criterion) { + let mut group = c.benchmark_group("matern32_jacobian"); + + let dim = 100; + let uniform = Uniform::new(0., 1.); + let mut rng = ndarray_rand::rand::thread_rng(); + let x = Array1::random_using((dim,), uniform, &mut rng); + let xtrain = Array2::from_shape_fn((1, dim), |(i, j)| (i as f64 * 0.1 + j as f64 * 0.05) % 1.0); + let theta = Array1::from_elem(dim, f64::sqrt(0.2)); + let weights = Array2::ones((dim, dim)); + + let corr = Matern32Corr::default(); + + group.bench_function(BenchmarkId::from_parameter(dim), |b| { + b.iter(|| corr.jacobian(&x, &xtrain, &theta, &weights)); + }); + group.finish(); +} + +fn bench_matern52_jacobian(c: &mut Criterion) { + let mut group = c.benchmark_group("matern52_jacobian"); + + let dim = 100; + let uniform = Uniform::new(0., 1.); + let mut rng = ndarray_rand::rand::thread_rng(); + let x = Array1::random_using((dim,), uniform, &mut rng); + let xtrain = Array2::from_shape_fn((1, dim), |(i, j)| (i as f64 * 0.1 + j as f64 * 0.05) % 1.0); + let theta = Array1::from_elem(dim, f64::sqrt(0.2)); + let weights = Array2::ones((dim, dim)); + + let corr = Matern52Corr::default(); + + group.bench_function(BenchmarkId::from_parameter(dim), |b| { + b.iter(|| corr.jacobian(&x, &xtrain, &theta, &weights)); + }); + group.finish(); +} + +fn bench_diff_matrix(c: &mut Criterion) { + let mut group = c.benchmark_group("diff_matrix"); + + let dim = 100; + let nt = 100; + let uniform = Uniform::new(0., 1.); + let mut rng = ndarray_rand::rand::thread_rng(); + let x = Array2::random_using((nt, dim), uniform, &mut rng); + + group.bench_function(format!("{}x{}", nt, dim), |b| { + b.iter(|| DiffMatrix::new(&x)); + }); + group.finish(); +} + +criterion_group!( + benches, + bench_matern32_value, + bench_matern52_value, + bench_matern32_jacobian, + bench_matern52_jacobian, + bench_diff_matrix, +); +criterion_main!(benches); diff --git a/crates/gp/src/algorithm.rs b/crates/gp/src/algorithm.rs index c638ba81..c98527f2 100644 --- a/crates/gp/src/algorithm.rs +++ b/crates/gp/src/algorithm.rs @@ -2,7 +2,7 @@ use crate::errors::{GpError, Result}; use crate::mean_models::*; use crate::optimization::{CobylaParams, optimize_params, prepare_multistart}; use crate::parameters::{GpParams, GpValidParams}; -use crate::utils::{DistanceMatrix, NormalizedData, pairwise_differences}; +use crate::utils::{DiffMatrix, NormalizedData, pairwise_differences}; use crate::{ThetaTuning, correlation_models::*}; use linfa::dataset::{WithLapack, WithoutLapack}; @@ -265,7 +265,9 @@ impl, Corr: CorrelationModel> GaussianProc /// Predict variance values at n given `x` points of nx components specified as a (n, nx) matrix. /// Returns n variance values as (n,) column vector. pub fn predict_var(&self, x: &ArrayBase, Ix2>) -> Result> { - let (rt, u, _) = self._compute_rt_u(x); + let xnorm = (x - &self.xt_norm.mean) / &self.xt_norm.std; + let corr = self._compute_correlation(&xnorm); + let (rt, u) = self._compute_rt_u(&xnorm, &corr); let mut mse = Array::ones(rt.ncols()) - rt.mapv(|v| v * v).sum_axis(Axis(0)) + u.mapv(|v: F| v * v).sum_axis(Axis(0)); @@ -276,9 +278,39 @@ impl, Corr: CorrelationModel> GaussianProc Ok(mse.mapv(|v| if v < F::zero() { F::zero() } else { F::cast(v) })) } + /// Predict both output values and variance at n given `x` points of nx components + pub fn predict_valvar( + &self, + x: &ArrayBase, Ix2>, + ) -> Result<(Array1, Array1)> { + let xnorm = (x - &self.xt_norm.mean) / &self.xt_norm.std; + // Compute the mean term at x + let f = self.params.mean.value(&xnorm); + // Compute the correlation term at x + let corr = self._compute_correlation(&xnorm); + // Scaled predictor + let y_ = &f.dot(&self.inner_params.beta) + &corr.dot(&self.inner_params.gamma); + // Predictor + let yp = (&y_ * &self.yt_norm.std + &self.yt_norm.mean).remove_axis(Axis(1)); + + let (rt, u) = self._compute_rt_u(&xnorm, &corr); + + let mut mse = Array::ones(rt.ncols()) - rt.mapv(|v| v * v).sum_axis(Axis(0)) + + u.mapv(|v: F| v * v).sum_axis(Axis(0)); + mse.mapv_inplace(|v| self.inner_params.sigma2 * v); + + // Mean Squared Error might be slightly negative depending on + // machine precision: set to zero in that case + let vmse = mse.mapv(|v| if v < F::zero() { F::zero() } else { F::cast(v) }); + + Ok((yp, vmse)) + } + /// Compute covariance matrix given x points specified as a (n, nx) matrix fn _compute_covariance(&self, x: &ArrayBase, Ix2>) -> Array2 { - let (rt, u, xnorm) = self._compute_rt_u(x); + let xnorm = (x - &self.xt_norm.mean) / &self.xt_norm.std; + let corr = self._compute_correlation(&xnorm); + let (rt, u) = self._compute_rt_u(&xnorm, &corr); let cross_dx = pairwise_differences(&xnorm, &xnorm); let k = self.params.corr.value(&cross_dx, &self.theta, &self.w_star); @@ -297,10 +329,9 @@ impl, Corr: CorrelationModel> GaussianProc /// This method factorizes computations done to get variances and covariance matrix fn _compute_rt_u( &self, - x: &ArrayBase, Ix2>, - ) -> (Array2, Array2, Array2) { - let xnorm = (x - &self.xt_norm.mean) / &self.xt_norm.std; - let corr = self._compute_correlation(&xnorm); + xnorm: &ArrayBase, Ix2>, + corr: &ArrayBase, Ix2>, + ) -> (Array2, Array2) { let inners = &self.inner_params; let corr_t = corr.t().to_owned(); @@ -318,7 +349,7 @@ impl, Corr: CorrelationModel> GaussianProc .solve_triangular(&corr_t, UPLO::Lower) .unwrap(); - let rhs = inners.ft.t().dot(&rt) - self.params.mean.value(&xnorm).t(); + let rhs = inners.ft.t().dot(&rt) - self.params.mean.value(xnorm).t(); #[cfg(feature = "blas")] let u = inners .ft_qr_r @@ -334,7 +365,7 @@ impl, Corr: CorrelationModel> GaussianProc .t() .solve_triangular(&rhs, UPLO::Lower) .unwrap(); - (rt, u, xnorm) + (rt, u) } /// Compute correlation matrix given x points specified as a (n, nx) matrix @@ -489,7 +520,7 @@ impl, Corr: CorrelationModel> GaussianProc /// Predict gradient at a given x point /// Note: output is one dimensional, named jacobian as result is given as a one-column matrix - pub fn predict_jacobian(&self, x: &ArrayBase, Ix1>) -> Array2 { + fn predict_jacobian(&self, x: &ArrayBase, Ix1>) -> Array2 { let xx = x.to_owned().insert_axis(Axis(0)); let mut jac = Array2::zeros((xx.ncols(), 1)); @@ -527,15 +558,13 @@ impl, Corr: CorrelationModel> GaussianProc ) -> Array1 { let x = &(x.to_owned().insert_axis(Axis(0))); let xnorm = (x - &self.xt_norm.mean) / &self.xt_norm.std; - let dx = pairwise_differences(&xnorm, &self.xt_norm.data); let sigma2 = self.inner_params.sigma2; let r_chol = &self.inner_params.r_chol; - let r = self.params.corr.value(&dx, &self.theta, &self.w_star); - let dr = + let (r, dr) = self.params .corr - .jacobian(&xnorm.row(0), &self.xt_norm.data, &self.theta, &self.w_star); + .valjac(&xnorm.row(0), &self.xt_norm.data, &self.theta, &self.w_star); // rho1 = Rc^-1 . r(x, X) let rho1 = r_chol.solve_triangular(&r, UPLO::Lower).unwrap(); @@ -677,6 +706,24 @@ impl, Corr: CorrelationModel> GaussianProc .for_each(|mut der, x| der.assign(&self.predict_var_gradients_single(&x))); derivs } + + /// Predict both value and variance gradients at a set of points `x` specified as a (n, nx) matrix + /// where x has nx components. + pub fn predict_valvar_gradients( + &self, + x: &ArrayBase, Ix2>, + ) -> (Array2, Array2) { + let mut val_derivs = Array::zeros((x.nrows(), x.ncols())); + let mut var_derivs = Array::zeros((x.nrows(), x.ncols())); + Zip::from(val_derivs.rows_mut()) + .and(var_derivs.rows_mut()) + .and(x.rows()) + .for_each(|mut val_der, mut var_der, x| { + val_der.assign(&self.predict_jacobian(&x).column(0)); + var_der.assign(&self.predict_var_gradients_single(&x)); + }); + (val_derivs, var_derivs) + } } impl PredictInplace, Array1> @@ -806,7 +853,7 @@ impl, Corr: CorrelationModel, D: Data, Corr: CorrelationModel, D: Data( fx: &ArrayBase, Ix2>, rxx: ArrayBase, Ix2>, - x_distances: &DistanceMatrix, + x_distances: &DiffMatrix, ytrain: &NormalizedData, nugget: F, ) -> Result<(F, GpInnerParams)> { @@ -1013,7 +1060,7 @@ fn reduced_likelihood( fn reduced_likelihood( fx: &ArrayBase, Ix2>, rxx: ArrayBase, Ix2>, - x_distances: &DistanceMatrix, + x_distances: &DiffMatrix, ytrain: &NormalizedData, nugget: F, ) -> Result<(F, GpInnerParams)> { diff --git a/crates/gp/src/correlation_models.rs b/crates/gp/src/correlation_models.rs index 7ffb61ee..3ab5aabc 100644 --- a/crates/gp/src/correlation_models.rs +++ b/crates/gp/src/correlation_models.rs @@ -40,6 +40,16 @@ pub trait CorrelationModel: Clone + Copy + Default + fmt::Display + Sy weights: &ArrayBase, Ix2>, ) -> Array2; + /// Compute both the correlation function matrix `r(x, x')` and its jacobian at given `x` + /// given a set of `xtrain` training samples, `theta` parameters, and + fn valjac( + &self, + x: &ArrayBase, Ix1>, + xtrain: &ArrayBase, Ix2>, + theta: &ArrayBase, Ix1>, + weights: &ArrayBase, Ix2>, + ) -> (Array2, Array2); + /// Returns the theta influence factors for the correlation model. /// See fn theta_influence_factors(&self) -> (F, F) { @@ -111,6 +121,25 @@ impl CorrelationModel for SquaredExponentialCorr { d * &dtheta_w * &r } + fn valjac( + &self, + x: &ArrayBase, Ix1>, + xtrain: &ArrayBase, Ix2>, + theta: &ArrayBase, Ix1>, + weights: &ArrayBase, Ix2>, + ) -> (Array2, Array2) { + let d = differences(x, xtrain); + let r = self.value(&d, theta, weights); + + let dtheta_w = (theta * weights) + .mapv(|v| v.powf(F::cast(2))) + .sum_axis(Axis(1)) + .mapv(|v| F::cast(-v)); + + let jr = d * &dtheta_w * &r; + (r, jr) + } + fn theta_influence_factors(&self) -> (F, F) { (F::cast(0.29), F::cast(1.96)) } @@ -184,6 +213,25 @@ impl CorrelationModel for AbsoluteExponentialCorr { &dtheta_w * &r } + fn valjac( + &self, + x: &ArrayBase, Ix1>, + xtrain: &ArrayBase, Ix2>, + theta: &ArrayBase, Ix1>, + weights: &ArrayBase, Ix2>, + ) -> (Array2, Array2) { + let d = differences(x, xtrain); + let r = self.value(&d, theta, weights); + let sign_d = d.mapv(|v| v.signum()); + + let dtheta_w = sign_d + * (theta * weights.mapv(|v| v.abs())) + .sum_axis(Axis(1)) + .mapv(|v| F::cast(-1.) * v); + let jr = &dtheta_w * &r; + (r, jr) + } + fn theta_influence_factors(&self) -> (F, F) { (F::cast(0.15), F::cast(3.76)) } @@ -222,32 +270,6 @@ impl TryFrom for Matern32Corr { } } -impl Matern32Corr { - fn compute_r_factors( - &self, - d: &ArrayBase, Ix2>, - theta: &ArrayBase, Ix1>, - weights: &ArrayBase, Ix2>, - ) -> (Array1, Array1) { - let sqrt3 = F::cast(3.).sqrt(); - let theta_w = theta * weights.mapv(|v| v.abs()); - - let mut a = Array1::ones(d.nrows()); - Zip::from(&mut a).and(d.rows()).for_each(|a_i, d_i| { - Zip::from(&d_i) - .and(theta_w.rows()) - .for_each(|d_ij, theta_w_j| { - *a_i *= theta_w_j - .mapv(|v| F::one() + sqrt3 * v * d_ij.abs()) - .product(); - }); - }); - let d_theta_w = d.mapv(|v| v.abs()).dot(&theta_w); - let b = d_theta_w.sum_axis(Axis(1)).mapv(|v| F::exp(-sqrt3 * v)); - (a, b) - } -} - impl CorrelationModel for Matern32Corr { /// d h /// prod prod (1 + sqrt(3) * theta_l * |d_j . weight_j|) exp( - sqrt(3) * theta_l * |d_j . weight_j| ) @@ -258,7 +280,7 @@ impl CorrelationModel for Matern32Corr { theta: &ArrayBase, Ix1>, weights: &ArrayBase, Ix2>, ) -> Array2 { - let (a, b) = self.compute_r_factors(d, theta, weights); + let (a, b) = self._compute_r_factors(d, theta, weights); let r = a * b; r.into_shape_with_order((d.nrows(), 1)).unwrap() } @@ -270,19 +292,83 @@ impl CorrelationModel for Matern32Corr { theta: &ArrayBase, Ix1>, weights: &ArrayBase, Ix2>, ) -> Array2 { - let sqrt3 = F::cast(3.).sqrt(); let d = differences(x, xtrain); - let (a, b) = self.compute_r_factors(&d, theta, weights); + let (a, b) = self._compute_r_factors(&d, theta, weights); + self._jac_helper(&d, &a, &b, xtrain, theta, weights) + } + + fn valjac( + &self, + x: &ArrayBase, Ix1>, + xtrain: &ArrayBase, Ix2>, + theta: &ArrayBase, Ix1>, + weights: &ArrayBase, Ix2>, + ) -> (Array2, Array2) { + let d = differences(x, xtrain); + let (a, b) = self._compute_r_factors(&d, theta, weights); + let r = &a * &b; + let jr = self._jac_helper(&d, &a, &b, xtrain, theta, weights); + (r.into_shape_with_order((d.nrows(), 1)).unwrap(), jr) + } + + fn theta_influence_factors(&self) -> (F, F) { + (F::cast(0.21), F::cast(2.74)) + } +} +impl fmt::Display for Matern32Corr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "Matern32") + } +} + +impl Matern32Corr { + fn _compute_r_factors( + &self, + d: &ArrayBase, Ix2>, + theta: &ArrayBase, Ix1>, + weights: &ArrayBase, Ix2>, + ) -> (Array1, Array1) { + let sqrt3 = F::cast(3.).sqrt(); + let theta_w = theta * weights.mapv(|v| v.abs()); + + let abs_d = d.mapv(|v| v.abs()); + let d_theta_w = abs_d.dot(&theta_w); + + let mut a = Array1::ones(d.nrows()); + Zip::from(&mut a) + .and(abs_d.rows()) + .for_each(|a_i, abs_d_i| { + Zip::from(abs_d_i) + .and(theta_w.rows()) + .for_each(|abs_d_ij, theta_w_j| { + *a_i *= theta_w_j + .mapv(|v| F::one() + sqrt3 * v * *abs_d_ij) + .product(); + }); + }); + + let b = d_theta_w.sum_axis(Axis(1)).mapv(|v| F::exp(-sqrt3 * v)); + (a, b) + } + + fn _jac_helper( + &self, + d: &ArrayBase, Ix2>, + a: &ArrayBase, Ix1>, + b: &ArrayBase, Ix1>, + xtrain: &ArrayBase, Ix2>, + theta: &ArrayBase, Ix1>, + weights: &ArrayBase, Ix2>, + ) -> Array2 { + let sqrt3 = F::cast(3.).sqrt(); let theta_w = weights.mapv(|v| v.abs()).dot(theta); let sign_d = d.mapv(|v| v.signum()); - let mut db = Array2::::zeros((xtrain.nrows(), xtrain.ncols())); let abs_d = d.mapv(|v| v.abs()); - Zip::from(db.rows_mut()) - .and(&a) - .and(&b) + .and(a) + .and(b) .and(sign_d.rows()) .for_each(|mut db_i, ai, bi, si| { Zip::from(&mut db_i) @@ -292,7 +378,6 @@ impl CorrelationModel for Matern32Corr { *db_ij = -sqrt3 * *theta_wj * *sij * *bi * *ai; }); }); - let theta_w = theta * weights.mapv(|v| v.abs()); let mut da = Array2::::zeros((xtrain.nrows(), xtrain.ncols())); Zip::from(da.rows_mut()) @@ -319,23 +404,12 @@ impl CorrelationModel for Matern32Corr { }); }); }); - - let da = einsum("i,ij->ij", &[&b, &da]) + let da = einsum("i,ij->ij", &[b, &da]) .unwrap() .into_shape_with_order((xtrain.nrows(), xtrain.ncols())) .unwrap(); db + da } - - fn theta_influence_factors(&self) -> (F, F) { - (F::cast(0.21), F::cast(2.74)) - } -} - -impl fmt::Display for Matern32Corr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Matern32") - } } /// Matern 5/2 correlation model @@ -365,6 +439,60 @@ impl TryFrom for Matern52Corr { } } +impl CorrelationModel for Matern52Corr { + /// d h + /// prod prod (1 + sqrt(5) * theta_l * |d_j . weight_j| + (5./3.) * theta_l^2 * |d_j . weight_j|^2) exp( - sqrt(5) * theta_l * |d_j . weight_j| ) + /// j=1 l=1 + fn value( + &self, + d: &ArrayBase, Ix2>, + theta: &ArrayBase, Ix1>, + weights: &ArrayBase, Ix2>, + ) -> Array2 { + let (a, b) = self.compute_r_factors(d, theta, weights); + let r = a * b; + r.into_shape_with_order((d.nrows(), 1)).unwrap() + } + + fn jacobian( + &self, + x: &ArrayBase, Ix1>, + xtrain: &ArrayBase, Ix2>, + theta: &ArrayBase, Ix1>, + weights: &ArrayBase, Ix2>, + ) -> Array2 { + let d = differences(x, xtrain); + let (a, b) = self.compute_r_factors(&d, theta, weights); + + self._jac_helper(&d, &a, &b, xtrain, theta, weights) + } + + fn valjac( + &self, + x: &ArrayBase, Ix1>, + xtrain: &ArrayBase, Ix2>, + theta: &ArrayBase, Ix1>, + weights: &ArrayBase, Ix2>, + ) -> (Array2, Array2) { + let d = differences(x, xtrain); + let (a, b) = self.compute_r_factors(&d, theta, weights); + let r = &a * &b; + let jr = self._jac_helper(&d, &a, &b, xtrain, theta, weights); + + (r.into_shape_with_order((d.nrows(), 1)).unwrap(), jr) + } + + fn theta_influence_factors(&self) -> (F, F) { + (F::cast(0.23), F::cast(2.44)) + } +} + +impl fmt::Display for Matern52Corr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "Matern52") + } +} + impl Matern52Corr { fn compute_r_factors( &self, @@ -393,26 +521,12 @@ impl Matern52Corr { let b = d_theta_w.sum_axis(Axis(1)).mapv(|v| F::exp(-sqrt5 * v)); (a, b) } -} -impl CorrelationModel for Matern52Corr { - /// d h - /// prod prod (1 + sqrt(5) * theta_l * |d_j . weight_j| + (5./3.) * theta_l^2 * |d_j . weight_j|^2) exp( - sqrt(5) * theta_l * |d_j . weight_j| ) - /// j=1 l=1 - fn value( + fn _jac_helper( &self, d: &ArrayBase, Ix2>, - theta: &ArrayBase, Ix1>, - weights: &ArrayBase, Ix2>, - ) -> Array2 { - let (a, b) = self.compute_r_factors(d, theta, weights); - let r = a * b; - r.into_shape_with_order((d.nrows(), 1)).unwrap() - } - - fn jacobian( - &self, - x: &ArrayBase, Ix1>, + a: &ArrayBase, Ix1>, + b: &ArrayBase, Ix1>, xtrain: &ArrayBase, Ix2>, theta: &ArrayBase, Ix1>, weights: &ArrayBase, Ix2>, @@ -420,18 +534,14 @@ impl CorrelationModel for Matern52Corr { let sqrt5 = F::cast(5).sqrt(); let div5_3 = F::cast(5. / 3.); let div10_3 = F::cast(2.) * div5_3; - let d = differences(x, xtrain); - let (a, b) = self.compute_r_factors(&d, theta, weights); let theta_w = weights.mapv(|v| v.abs()).dot(theta); let sign_d = d.mapv(|v| v.signum()); - let mut db = Array2::::zeros((xtrain.nrows(), xtrain.ncols())); let abs_d = d.mapv(|v| v.abs()); - Zip::from(db.rows_mut()) - .and(&a) - .and(&b) + .and(a) + .and(b) .and(sign_d.rows()) .for_each(|mut db_i, ai, bi, si| { Zip::from(&mut db_i) @@ -441,7 +551,6 @@ impl CorrelationModel for Matern52Corr { *db_ij = -sqrt5 * *theta_wj * *sij * *bi * *ai; }); }); - let theta_w = theta * weights.mapv(|v| v.abs()); let mut da = Array2::::zeros((xtrain.nrows(), xtrain.ncols())); Zip::from(da.rows_mut()) @@ -469,28 +578,18 @@ impl CorrelationModel for Matern52Corr { }, ); }); - let da = einsum("i,ij->ij", &[&b, &da]) + let da = einsum("i,ij->ij", &[b, &da]) .unwrap() .into_shape_with_order((xtrain.nrows(), xtrain.ncols())) .unwrap(); db + da } - - fn theta_influence_factors(&self) -> (F, F) { - (F::cast(0.23), F::cast(2.44)) - } -} - -impl fmt::Display for Matern52Corr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Matern52") - } } #[cfg(test)] mod tests { use super::*; - use crate::utils::{DistanceMatrix, NormalizedData}; + use crate::utils::{DiffMatrix, NormalizedData}; use approx::assert_abs_diff_eq; use ndarray::{arr1, array}; use paste::paste; @@ -498,7 +597,7 @@ mod tests { #[test] fn test_squared_exponential() { let xt = array![[4.5], [1.2], [2.0], [3.0], [4.0]]; - let dm = DistanceMatrix::new(&xt); + let dm = DiffMatrix::new(&xt); let res = SquaredExponentialCorr::default().value(&dm.d, &arr1(&[f64::sqrt(0.2)]), &array![[1.]]); let expected = array![ @@ -519,7 +618,7 @@ mod tests { #[test] fn test_squared_exponential_2d() { let xt = array![[0., 1.], [2., 3.], [4., 5.]]; - let dm = DistanceMatrix::new(&xt); + let dm = DiffMatrix::new(&xt); dbg!(&dm); let res = SquaredExponentialCorr::default().value( &dm.d, @@ -533,7 +632,7 @@ mod tests { #[test] fn test_matern32_2d() { let xt = array![[0., 1.], [2., 3.], [4., 5.]]; - let dm = DistanceMatrix::new(&xt); + let dm = DiffMatrix::new(&xt); dbg!(&dm); let res = Matern32Corr::default().value(&dm.d, &arr1(&[1., 2.]), &array![[1., 0.], [0., 1.]]); @@ -619,7 +718,7 @@ mod tests { #[test] fn test_matern52_2d() { let xt = array![[0., 1.], [2., 3.], [4., 5.]]; - let dm = DistanceMatrix::new(&xt); + let dm = DiffMatrix::new(&xt); let res = Matern52Corr::default().value(&dm.d, &arr1(&[1., 2.]), &array![[1., 0.], [0., 1.]]); let expected = array![[6.62391590e-04], [1.02117882e-08], [6.62391590e-04]]; diff --git a/crates/gp/src/lib.rs b/crates/gp/src/lib.rs index 505f898b..fe238bc9 100644 --- a/crates/gp/src/lib.rs +++ b/crates/gp/src/lib.rs @@ -30,3 +30,4 @@ pub use errors::*; pub use parameters::*; pub use sparse_algorithm::*; pub use sparse_parameters::*; +pub use utils::DiffMatrix; diff --git a/crates/gp/src/utils.rs b/crates/gp/src/utils.rs index 94a240da..734a8f7f 100644 --- a/crates/gp/src/utils.rs +++ b/crates/gp/src/utils.rs @@ -53,49 +53,50 @@ pub fn normalize( (xnorm, x_mean, x_std) } +/// A structure to retain absolute differences computation used to compute covariance matrix #[derive(Debug)] -pub struct DistanceMatrix { +pub struct DiffMatrix { + /// Differences as (n_obs * (n_obs-1))/2, nx) array pub d: Array2, + /// Indices of the differences in the original data array pub d_indices: Array2, + /// Number of observations pub n_obs: usize, } -impl DistanceMatrix { - pub fn new(x: &ArrayBase, Ix2>) -> DistanceMatrix { - let (d, d_indices) = Self::_cross_distances(x); +impl DiffMatrix { + /// Compute differences given points given as an array (n_obs, nx) + pub fn new(x: &ArrayBase, Ix2>) -> DiffMatrix { + let (d, d_indices) = Self::_cross_diff(x); let n_obs = x.nrows(); - DistanceMatrix { - d: d.to_owned(), - d_indices: d_indices.to_owned(), + DiffMatrix { + d, + d_indices, n_obs, } } - fn _cross_distances(x: &ArrayBase, Ix2>) -> (Array2, Array2) { + fn _cross_diff(x: &ArrayBase, Ix2>) -> (Array2, Array2) { let n_obs = x.nrows(); - let n_features = x.ncols(); + let nx = x.ncols(); let n_non_zero_cross_dist = n_obs * (n_obs - 1) / 2; let mut indices = Array2::::zeros((n_non_zero_cross_dist, 2)); - let mut d = Array2::zeros((n_non_zero_cross_dist, n_features)); - let mut ll_1 = 0; + let mut d = Array2::zeros((n_non_zero_cross_dist, nx)); + let mut idx = 0; for k in 0..(n_obs - 1) { - let ll_0 = ll_1; - ll_1 = ll_0 + n_obs - k - 1; - indices - .slice_mut(s![ll_0..ll_1, 0..1]) - .assign(&Array2::::from_elem((n_obs - k - 1, 1), k)); - let init_values = ((k + 1)..n_obs).collect(); - indices - .slice_mut(s![ll_0..ll_1, 1..2]) - .assign(&Array2::from_shape_vec((n_obs - k - 1, 1), init_values).unwrap()); - - let diff = &x - .slice(s![k..(k + 1), ..]) - .broadcast((n_obs - k - 1, n_features)) - .unwrap() - - &x.slice(s![k + 1..n_obs, ..]); - d.slice_mut(s![ll_0..ll_1, ..]).assign(&diff); + let idx0 = idx; + let offset = n_obs - k - 1; + idx = idx0 + offset; + + for i in (k + 1)..n_obs { + let r = idx0 + i - k - 1; + indices[[r, 0]] = k; + indices[[r, 1]] = i; + } + + let diff = &x.slice(s![k, ..]) - &x.slice(s![k + 1..n_obs, ..]); + d.slice_mut(s![idx0..idx, ..]).assign(&diff); } d = d.mapv(|v| v.abs()); @@ -207,7 +208,7 @@ mod tests { } #[test] - fn test_cross_distance_matrix() { + fn test_diff_matrix() { let xt = array![[0.5], [1.2], [2.0], [3.0], [4.0]]; let expected = ( array![ @@ -235,7 +236,7 @@ mod tests { [3, 4] ], ); - let dm = DistanceMatrix::new(&xt); + let dm = DiffMatrix::new(&xt); assert_eq!(expected.0, dm.d); assert_eq!(expected.1, dm.d_indices); } diff --git a/crates/moe/src/algorithm.rs b/crates/moe/src/algorithm.rs index c8881a39..6356238f 100644 --- a/crates/moe/src/algorithm.rs +++ b/crates/moe/src/algorithm.rs @@ -499,6 +499,13 @@ impl GpSurrogate for GpMixture { } } + fn predict_valvar(&self, x: &ArrayView2) -> Result<(Array1, Array1)> { + match self.recombination { + Recombination::Hard => self.predict_valvar_hard(x), + Recombination::Smooth(_) => self.predict_valvar_smooth(x), + } + } + /// Save Moe model in given file. #[cfg(feature = "persistent")] fn save(&self, path: &str, format: GpFileFormat) -> Result<()> { @@ -533,6 +540,13 @@ impl GpSurrogateExt for GpMixture { } } + fn predict_valvar_gradients(&self, x: &ArrayView2) -> Result<(Array2, Array2)> { + match self.recombination { + Recombination::Hard => self.predict_valvar_gradients_hard(x), + Recombination::Smooth(_) => self.predict_valvar_gradients_smooth(x), + } + } + fn sample(&self, x: &ArrayView2, n_traj: usize) -> Result> { if self.n_clusters() != 1 { return Err(MoeError::SampleError(format!( @@ -666,7 +680,7 @@ impl GpMixture { let p = probas.column(i); gp.predict_var(&x.view()).unwrap() * p * p }) - .fold(Array1::zeros(x.nrows()), |acc, pred| acc + pred); + .fold(Array1::zeros(x.nrows()), |acc, var| acc + var); Ok(preds) } @@ -768,6 +782,96 @@ impl GpMixture { Ok(drv) } + /// Predict outputs and variances at a set of points `x` specified as (n, nx) matrix. + /// Gaussian Mixture is used to get the probability of the point to belongs to one cluster + /// or another (ie responsabilities). + /// The smooth recombination of each cluster expert responsabilty is used to get the result. + pub fn predict_valvar_smooth( + &self, + x: &ArrayBase, Ix2>, + ) -> Result<(Array1, Array1)> { + let probas = self.gmx.predict_probas(x); + let valvar: (Array1, Array1) = self + .experts + .iter() + .enumerate() + .map(|(i, gp)| { + let p = probas.column(i); + let (pred, var) = gp.predict_valvar(&x.view()).unwrap(); + (pred * p, var * p * p) + }) + .fold( + (Array1::zeros((x.nrows(),)), Array1::zeros((x.nrows(),))), + |acc, (pred, var)| (acc.0 + pred, acc.1 + var), + ); + + Ok(valvar) + } + + fn predict_valvar_gradients_smooth( + &self, + x: &ArrayBase, Ix2>, + ) -> Result<(Array2, Array2)> { + let probas = self.gmx.predict_probas(x); + let probas_drv = self.gmx.predict_probas_derivatives(x); + + let mut val_drv = Array2::::zeros((x.nrows(), x.ncols())); + let mut var_drv = Array2::::zeros((x.nrows(), x.ncols())); + + Zip::from(val_drv.rows_mut()) + .and(var_drv.rows_mut()) + .and(x.rows()) + .and(probas.rows()) + .and(probas_drv.outer_iter()) + .for_each(|mut val_y, mut var_y, xi, p, pprime| { + let xii = xi.insert_axis(Axis(0)); + let (preds, vars): (Vec, Vec) = self + .experts + .iter() + .map(|gp| { + let (pred, var) = gp.predict_valvar(&xii).unwrap(); + (pred[0], var[0]) + }) + .unzip(); + let preds: Array2 = Array1::from(preds).insert_axis(Axis(1)); + let vars: Array2 = Array1::from(vars).insert_axis(Axis(1)); + let (drvs, var_drvs): (Vec>, Vec>) = self + .experts + .iter() + .map(|gp| { + let (predg, varg) = gp.predict_valvar_gradients(&xii).unwrap(); + (predg.row(0).to_owned(), varg.row(0).to_owned()) + }) + .unzip(); + + let mut preds_drv = Array2::zeros((self.experts.len(), xi.len())); + let mut vars_drv = Array2::zeros((self.experts.len(), xi.len())); + Zip::indexed(preds_drv.rows_mut()).for_each(|i, mut jc| jc.assign(&drvs[i])); + Zip::indexed(vars_drv.rows_mut()).for_each(|i, mut jc| jc.assign(&var_drvs[i])); + + let mut val_term1 = Array2::zeros((self.experts.len(), xi.len())); + Zip::from(val_term1.rows_mut()) + .and(&p) + .and(preds_drv.rows()) + .for_each(|mut t, p, der| t.assign(&(der.to_owned().mapv(|v| v * p)))); + let val_term1 = val_term1.sum_axis(Axis(0)); + let val_term2 = pprime.to_owned() * preds; + let val_term2 = val_term2.sum_axis(Axis(0)); + val_y.assign(&(val_term1 + val_term2)); + + let mut var_term1 = Array2::zeros((self.experts.len(), xi.len())); + Zip::from(var_term1.rows_mut()) + .and(&p) + .and(vars_drv.rows()) + .for_each(|mut t, p, der| t.assign(&(der.to_owned().mapv(|v| v * p * p)))); + let var_term1 = var_term1.sum_axis(Axis(0)); + let var_term2 = (p.to_owned() * pprime * vars).mapv(|v| 2. * v); + let var_term2 = var_term2.sum_axis(Axis(0)); + var_y.assign(&(var_term1 + var_term2)); + }); + Ok((val_drv, var_drv)) + } + /// Predict outputs at a set of points `x` specified as (n, nx) matrix. /// Gaussian Mixture is used to get the cluster where the point belongs (highest responsability) /// Then the expert of the cluster is used to predict the output value. @@ -805,6 +909,31 @@ impl GpMixture { Ok(variances) } + /// Predict outputs and variances at a set of points `x` specified as (n, nx) matrix. + /// Gaussian Mixture is used to get the cluster where the point belongs (highest responsability) + /// The expert of the cluster is used to predict variance value. + pub fn predict_valvar_hard( + &self, + x: &ArrayBase, Ix2>, + ) -> Result<(Array1, Array1)> { + let clustering = self.gmx.predict(x); + trace!("Clustering {clustering:?}"); + let mut preds = Array1::zeros((x.nrows(),)); + let mut variances = Array1::zeros(x.nrows()); + Zip::from(&mut preds) + .and(&mut variances) + .and(x.rows()) + .and(&clustering) + .for_each(|y, v, x, &c| { + let (pred, var) = self.experts[c] + .predict_valvar(&x.insert_axis(Axis(0))) + .unwrap(); + *y = pred[0]; + *v = var[0]; + }); + Ok((preds, variances)) + } + /// Predict derivatives of the output at a set of points `x` specified as (n, nx) matrix. /// Gaussian Mixture is used to get the cluster where the point belongs (highest responsability) /// The expert of the cluster is used to predict variance value. @@ -851,6 +980,30 @@ impl GpMixture { Ok(vardrv) } + /// Predict derivatives of the outputs and variances at a set of points `x` specified as (n, nx) matrix. + /// Gaussian Mixture is used to get the cluster where the point belongs (highest responsability) + /// The expert of the cluster is used to predict variance value. + pub fn predict_valvar_gradients_hard( + &self, + x: &ArrayBase, Ix2>, + ) -> Result<(Array2, Array2)> { + let mut val_drv = Array2::::zeros((x.nrows(), x.ncols())); + let mut var_drv = Array2::::zeros((x.nrows(), x.ncols())); + let clustering = self.gmx.predict(x); + Zip::from(val_drv.rows_mut()) + .and(var_drv.rows_mut()) + .and(x.rows()) + .and(&clustering) + .for_each(|mut val_y, mut var_y, xi, &c| { + let x = xi.to_owned().insert_axis(Axis(0)); + let (x_val_drv, x_var_drv) = + self.experts[c].predict_valvar_gradients(&x.view()).unwrap(); + val_y.assign(&x_val_drv.row(0)); + var_y.assign(&x_var_drv.row(0)); + }); + Ok((val_drv, var_drv)) + } + /// Sample `n_traj` trajectories at a set of points `x` specified as (n, nx) matrix. /// using the expert `ith` of the mixture. /// Returns the samples as a (n, n_traj) matrix where the ith row @@ -873,6 +1026,14 @@ impl GpMixture { ::predict_var(self, &x.view()) } + /// Predict outputs and variances at a set of points `x` specified as (n, nx) matrix. + pub fn predict_valvar( + &self, + x: &ArrayBase, Ix2>, + ) -> Result<(Array1, Array1)> { + ::predict_valvar(self, &x.view()) + } + /// Predict derivatives of the output at a set of points `x` specified as (n, nx) matrix. pub fn predict_gradients( &self, @@ -889,6 +1050,14 @@ impl GpMixture { ::predict_var_gradients(self, &x.view()) } + /// Predict derivatives of the outputs and variances at a set of points `x` specified as (n, nx) matrix. + pub fn predict_valvar_gradients( + &self, + x: &ArrayBase, Ix2>, + ) -> Result<(Array2, Array2)> { + ::predict_valvar_gradients(self, &x.view()) + } + /// Sample `n_traj` trajectories at a set of points `x` specified as (n, nx) matrix. /// Returns the samples as a (n, n_traj) matrix where the ith row /// contain the samples of the output at the ith point. @@ -1333,6 +1502,66 @@ mod tests { } } + /// Test prediction valvar derivatives against derivatives and variance derivatives + #[test] + fn test_valvar_predictions() { + let rng = Xoshiro256Plus::seed_from_u64(0); + let xt = egobox_doe::FullFactorial::new(&array![[-1., 1.], [-1., 1.]]).sample(100); + let yt = rosenb(&xt).remove_axis(Axis(1)); + + for corr in [ + CorrelationSpec::SQUAREDEXPONENTIAL, + CorrelationSpec::MATERN32, + CorrelationSpec::MATERN52, + ] { + println!("Test valvar derivatives with correlation {corr:?}"); + for recomb in [ + Recombination::Hard, + Recombination::Smooth(Some(0.5)), + Recombination::Smooth(None), + ] { + println!("Testing valvar derivatives with recomb={recomb:?}"); + + let moe = GpMixture::params() + .n_clusters(NbClusters::fixed(2)) + .regression_spec(RegressionSpec::CONSTANT) + .correlation_spec(corr) + .recombination(recomb) + .with_rng(rng.clone()) + .fit(&Dataset::new(xt.to_owned(), yt.to_owned())) + .expect("MOE fitted"); + + for _ in 0..10 { + let mut rng = Xoshiro256Plus::seed_from_u64(42); + let x = Array::random_using((2,), Uniform::new(0., 1.), &mut rng); + let xa: f64 = x[0]; + let xb: f64 = x[1]; + let e = 1e-4; + + let x = array![ + [xa, xb], + [xa + e, xb], + [xa - e, xb], + [xa, xb + e], + [xa, xb - e] + ]; + let (y_pred, v_pred) = moe.predict_valvar(&x).unwrap(); + let (y_deriv, v_deriv) = moe.predict_valvar_gradients(&x).unwrap(); + + let pred = moe.predict(&x).unwrap(); + let var = moe.predict_var(&x).unwrap(); + assert_abs_diff_eq!(y_pred, pred, epsilon = 1e-12); + assert_abs_diff_eq!(v_pred, var, epsilon = 1e-12); + + let deriv = moe.predict_gradients(&x).unwrap(); + let vardrv = moe.predict_var_gradients(&x).unwrap(); + assert_abs_diff_eq!(y_deriv, deriv, epsilon = 1e-12); + assert_abs_diff_eq!(v_deriv, vardrv, epsilon = 1e-12); + } + } + } + } + fn assert_rel_or_abs_error(y_deriv: f64, fdiff: f64) { println!("analytic deriv = {y_deriv}, fdiff = {fdiff}"); if fdiff.abs() < 1e-2 { diff --git a/crates/moe/src/metrics.rs b/crates/moe/src/metrics.rs index 1d1467de..b36398b1 100644 --- a/crates/moe/src/metrics.rs +++ b/crates/moe/src/metrics.rs @@ -67,8 +67,7 @@ where let model: O = params .fit(&train) .expect("cross-validation: sub model fitted"); - let pred = model.predict(&valid.records().view()).unwrap(); - let var = model.predict_var(&valid.records().view()).unwrap(); + let (pred, var) = model.predict_valvar(&valid.records().view()).unwrap(); varss += ((valid.targets() - &pred).mapv(|v| v * v) / var).sum(); n += valid.nsamples(); } @@ -152,8 +151,8 @@ fn iae_alpha( let x = valid.records(); let y = valid.targets(); - let pred = model.predict(&x.view()).unwrap(); - let sigma = model.predict_var(&x.view()).unwrap().sqrt(); + let (pred, var) = model.predict_valvar(&x.view()).unwrap(); + let sigma = var.sqrt(); let n_test = x.nrows(); let n_alpha = alphas.len(); diff --git a/crates/moe/src/surrogates.rs b/crates/moe/src/surrogates.rs index 3398a9eb..7b963a3c 100644 --- a/crates/moe/src/surrogates.rs +++ b/crates/moe/src/surrogates.rs @@ -56,6 +56,8 @@ pub trait GpSurrogate: std::fmt::Display + Sync + Send { fn predict(&self, x: &ArrayView2) -> Result>; /// Predict variance values at n points given as (n, xdim) matrix. fn predict_var(&self, x: &ArrayView2) -> Result>; + /// Predict both output values and variance at n given `x` points of nx components + fn predict_valvar(&self, x: &ArrayView2) -> Result<(Array1, Array1)>; /// Save model in given file. #[cfg(feature = "persistent")] fn save(&self, path: &str, format: GpFileFormat) -> Result<()>; @@ -70,6 +72,10 @@ pub trait GpSurrogateExt { /// Predict derivatives of the variance at n points and return (n, xdim) matrix /// where each column is the partial derivatives wrt the ith component fn predict_var_gradients(&self, x: &ArrayView2) -> Result>; + /// Predict both output values and derivatives of the variance at n points + /// return ((n, ), (n, xdim)) where first array is the predicted values + /// and second array is the derivatives of the variance wrt each input dimension + fn predict_valvar_gradients(&self, x: &ArrayView2) -> Result<(Array2, Array2)>; /// Sample trajectories fn sample(&self, x: &ArrayView2, n_traj: usize) -> Result>; } @@ -167,6 +173,9 @@ macro_rules! declare_surrogate { fn predict_var(&self, x: &ArrayView2) -> Result> { Ok(self.0.predict_var(x)?) } + fn predict_valvar(&self, x: &ArrayView2) -> Result<(Array1, Array1)> { + Ok(self.0.predict_valvar(x)?) + } #[cfg(feature = "persistent")] fn save(&self, path: &str, format: GpFileFormat) -> Result<()> { @@ -193,6 +202,9 @@ macro_rules! declare_surrogate { fn predict_var_gradients(&self, x: &ArrayView2) -> Result> { Ok(self.0.predict_var_gradients(x)) } + fn predict_valvar_gradients(&self, x: &ArrayView2) -> Result<(Array2, Array2)> { + Ok(self.0.predict_valvar_gradients(x)) + } fn sample(&self, x: &ArrayView2, n_traj: usize) -> Result> { Ok(self.0.sample(x, n_traj)) } @@ -329,6 +341,9 @@ macro_rules! declare_sgp_surrogate { fn predict_var(&self, x: &ArrayView2) -> Result> { Ok(self.0.predict_var(x)?) } + fn predict_valvar(&self, x: &ArrayView2) -> Result<(Array1, Array1)> { + Ok((self.0.predict(x)?, self.0.predict_var(x)?)) + } #[cfg(feature = "persistent")] fn save(&self, path: &str, format: GpFileFormat) -> Result<()> { @@ -353,6 +368,9 @@ macro_rules! declare_sgp_surrogate { fn predict_var_gradients(&self, x: &ArrayView2) -> Result> { Ok(self.0.predict_var_gradients(x)) } + fn predict_valvar_gradients(&self, x: &ArrayView2) -> Result<(Array2, Array2)> { + Ok((self.0.predict_gradients(x), self.0.predict_var_gradients(x))) + } fn sample(&self, x: &ArrayView2, n_traj: usize) -> Result> { Ok(self.0.sample(x, n_traj)) }