diff --git a/src/scalar/golden_ratio.rs b/src/scalar/golden_ratio.rs index 3d13673..134f3ad 100644 --- a/src/scalar/golden_ratio.rs +++ b/src/scalar/golden_ratio.rs @@ -1,19 +1,19 @@ //! Golden ratio is to minimization what bisection is to root finding. -//! GoldenRatio searches within an interval for a local minimum. At -//! every iteration, the interval is decreased in size by a constant +//! GoldenRatio searches within an interval for a local minimum. At +//! every iteration, the interval is decreased in size by a constant //! factor, until the desired precision is obtained. -//! -//! This algorithm is guaranteed to converge on a local minimum in a +//! +//! This algorithm is guaranteed to converge on a local minimum in a //! finite amount of steps under very light smoothess criteria for the //! target function. -//! +//! //! In an iteration, the target function is calculated at 4 points: //! +---------+----+---------+ //! iter 1 a b c d //! The interval for the next iteration is chosen to be [a,c] if b. /// Bigger is more precise. #[builder(default = "1000")] @@ -42,26 +42,30 @@ pub struct GoldenRatio { const RATIO: f64 = 2.618033988749895; // 1.5 + 0.5*5f64.sqrt(); impl GoldenRatio { - /// Search for the minimum of `func` around `x0`. /// The search region is expanded in both directions until an expansion /// leads to an increase in the function value at the bound of the region. - /// - /// Currently `minimize` makes, in some specific cases, at most around 2000 + /// + /// Currently `minimize` makes, in some specific cases, at most around 2000 /// additional calls to `func` to find a bracketing interval. For more details /// see https://github.com/to266/optimize/issues/11 - pub fn minimize(&self, func: F, x0: f64) -> f64 - where F: Fn(f64) -> f64 { - let left = x0 + self.explore(&func, x0, true); - let right = x0 + self.explore(&func, x0, false); + pub fn minimize(&self, mut func: F, x0: f64) -> f64 + where + F: FnMut(f64) -> f64, + { + let left = x0 + self.explore(&mut func, x0, true); + let right = x0 + self.explore(&mut func, x0, false); self.minimize_bracket(func, left, right) } /// increase step until func stops decreasing, then return step - fn explore(&self, func: F, x0: f64, explore_left: bool) -> f64 - where F: Fn(f64) -> f64 { - let mut step = if explore_left {-1.} else {1.} * - f64::powi(2., f64::log2(f64::EPSILON + x0.abs()) as i32) * f64::EPSILON; + fn explore(&self, mut func: F, x0: f64, explore_left: bool) -> f64 + where + F: FnMut(f64) -> f64, + { + let mut step = if explore_left { -1. } else { 1. } + * f64::powi(2., f64::log2(f64::EPSILON + x0.abs()) as i32) + * f64::EPSILON; let mut fprev = func(x0); let mut fstepped = func(x0 + step); @@ -70,13 +74,15 @@ impl GoldenRatio { step *= 2.0; fprev = fstepped; fstepped = func(x0 + step); - } + } step } /// Search for the minimum of `func` between `left` and `right`. - pub fn minimize_bracket(&self, func: F, left: f64, right: f64) -> f64 - where F: Fn(f64) -> f64 { + pub fn minimize_bracket(&self, mut func: F, left: f64, right: f64) -> f64 + where + F: FnMut(f64) -> f64, + { let mut min = left.max(f64::MIN); let mut max = right.min(f64::MAX); let mut iter = 0; @@ -86,7 +92,7 @@ impl GoldenRatio { let mut f_b = func(x_b); let mut f_c = func(x_c); - while (max-min).abs() > self.xtol && iter < self.max_iter { + while (max - min).abs() > self.xtol && iter < self.max_iter { iter += 1; if f_b < f_c { max = x_c; @@ -108,7 +114,6 @@ impl GoldenRatio { } } - #[cfg(test)] mod tests { use super::*; @@ -118,12 +123,13 @@ mod tests { let minimizer = GoldenRatioBuilder::default() .xtol(1e-7) .max_iter(1000) - .build().unwrap(); - let f = |x: f64| (x-0.2).powi(2); + .build() + .unwrap(); + let f = |x: f64| (x - 0.2).powi(2); let res = minimizer.minimize_bracket(&f, -1.0, 1.0); println!("res: {}", res); - assert!( (res - 0.2).abs() <= 1e-7); + assert!((res - 0.2).abs() <= 1e-7); } #[test] @@ -131,12 +137,13 @@ mod tests { let minimizer = GoldenRatioBuilder::default() .xtol(1e-7) .max_iter(1000) - .build().unwrap(); - let f = |x: f64| (x-0.2).powi(2); + .build() + .unwrap(); + let f = |x: f64| (x - 0.2).powi(2); let res = minimizer.minimize(&f, 10.0); println!("res: {}", res); - assert!( (res - 0.2).abs() <= 1e-7); + assert!((res - 0.2).abs() <= 1e-7); } #[test] @@ -144,11 +151,12 @@ mod tests { let minimizer = GoldenRatioBuilder::default() .xtol(1e-7) .max_iter(1000) - .build().unwrap(); + .build() + .unwrap(); let f = |x: f64| x; let res = minimizer.minimize(&f, 10.0); println!("res: {}", res); - assert!( res.is_infinite() ); + assert!(res.is_infinite()); } -} \ No newline at end of file +} diff --git a/src/vector/l_bfgs.rs b/src/vector/l_bfgs.rs new file mode 100644 index 0000000..71adebf --- /dev/null +++ b/src/vector/l_bfgs.rs @@ -0,0 +1,233 @@ +use ndarray::prelude::*; +use std::collections::VecDeque; + +/// Limited-memory BFGS Quasi-Newton optimizer. +/// +/// This implementation uses the two-loop recursion to +/// calculate the quasi-inverse-hessian, as formulated in +/// +/// Jorge Nocedal. Updating Quasi-Newton Matrices With Limited Storage. +/// MATHEMATICS OF COMPUTATION, VOLUME 35, NUMBER 151 JULY 1980, PAGES 773-782 +/// +/// This optimizer is known to be particularly suited for optimizing +/// high-dimensional convex functions. +/// +/// # Examples +/// +/// ### With an exact gradient +/// +/// ``` +/// # extern crate ndarray; +/// use ndarray::prelude::*; +/// +/// # extern crate optimize; +/// use optimize::vector::LBFGSBuilder; +/// +/// # fn main() { +/// // Use the builder pattern to create a minimizer object +/// // with default values for the parameters +/// let min = LBFGSBuilder::default().build().unwrap(); +/// +/// // The function that is to be minimized, and an exact gradient function +/// let center = arr1(&[0.9, 1.3, 0.5]); +/// let f = |x: ArrayView1| (&x - ¢er).mapv(|xi| xi.powi(4)).scalar_sum(); +/// let g = |x: ArrayView1| 4.0 * (&x - ¢er).mapv(|xi| xi.powi(3)); +/// let x0 = Array1::ones(center.len()); +/// +/// // do the actual minimization +/// let xmin = min.minimize(&f, &g, x0.view()); +/// +/// println!("{:?}", xmin); +/// assert!(xmin.all_close(¢er, 1e-4)) +/// # } +/// ``` +/// +/// ### Or with a finite-difference approximation +/// +/// ``` +/// # extern crate ndarray; +/// # use ndarray::prelude::*; +/// # extern crate optimize; +/// # use optimize::vector::LBFGSBuilder; +/// +/// # fn main() { +/// // Use the builder pattern to create a minimizer object +/// // with default values for the parameters +/// let min = LBFGSBuilder::default().build().unwrap(); +/// +/// // The function that is to be minimized, and an exact gradient function +/// let center = arr1(&[0.9, 1.3, 0.5]); +/// let f = |x: ArrayView1| (&x - ¢er).mapv(|xi| xi.powi(4)).scalar_sum(); +/// let x0 = Array1::ones(center.len()); +/// +/// // do the actual minimization +/// let xmin = min.minimize_approx_grad(&f, x0.view()); +/// +/// println!("{:?}", xmin); +/// assert!(xmin.all_close(¢er, 1e-4)) +/// # } +/// ``` +#[derive(Builder, Debug)] +pub struct LBFGS { + /// The tolerance on the gradient used to terminate the optimization. + /// Smaller is more precise. + #[builder(default = "1e-12")] + pub gtol: f64, + + /// The maximum number of iterations. The gradient is evaluated once per iteration. + /// Larger is more precise. + #[builder(default = "1500")] + pub max_iter: usize, + + /// The number of datapoints to use to estimate the inverse hessian. + /// Larger is more precise. If `m` is larger than `x0.len()`, then + /// `x0.len()` is used. + #[builder(default = "5")] + pub m: usize, + + /// The maximum step to be taken in the direction determined by the Quasi-Newton + /// method. + #[builder(default = "2.0")] + pub max_step: f64, + + /// The tolerance on x used to terminate the line search. + /// Smaller is more precise. + #[builder(default = "1e-8")] + pub xtol: f64, + + /// The maximum number of function evaluations. + #[builder(default = "1500")] + pub max_feval: usize, +} + +impl LBFGS { + /// Minimize `func` starting in `x0` using a finite difference approximation + /// for the gradient. For more control over the approximation of the gradient, + /// you can use `minimize` with you own approximation. + pub fn minimize_approx_grad(&self, func: F, x0: ArrayView1) -> Array1 + where + F: Fn(ArrayView1) -> f64, + { + let eps = Array1::ones(x0.len()) * 1e-9; + let eps_view = eps.view(); + let grad = |x: ArrayView1| ::utils::approx_fprime(x, &func, eps_view); + self.minimize(&func, &grad, x0) + } + + /// Minimize `func` starting in `x0` using `grad` as the gradient of `func`. + pub fn minimize(&self, func: F, grad: G, x0: ArrayView1) -> Array1 + where + F: Fn(ArrayView1) -> f64, + G: Fn(ArrayView1) -> Array1, + { + let mut iter = 0; + let mut feval_count = 0; + let m = x0.len().min(self.m).max(1); + + let mut hist = VecDeque::with_capacity(m); + + let mut x = x0.to_owned(); + let mut g = grad(x.view()); + + loop { + let dir = self.quasi_update(&g, &hist); + let a = { + let min = ::scalar::GoldenRatioBuilder::default() + .xtol(self.xtol) + .build() + .unwrap(); + let f = |a: f64| { + feval_count += 1; + func((&x + &(a * &dir)).view()) + }; + min.minimize_bracket(f, -self.max_step, 0.0) + }; + let x_new = &x + &(a * &dir); + let g_new = grad(x_new.view()); + + let s = &x_new - &x; + let y = &g_new - &g; + let r = 1f64 / s.dot(&y); + + iter += 1; + + if r.is_nan() + || iter > self.max_iter + || feval_count > self.max_feval + || g_new.mapv(f64::abs).scalar_sum() < self.gtol + { + break; + } + + while hist.len() >= m { + hist.pop_front(); + } + hist.push_back((s, y, r)); + + x = x_new; + g = g_new; + } + + x + } + + /// Calculate the Quasi-Newton direction H*g efficiently, where g + /// is the gradient and H is the *inverse* hessian. + #[inline] + fn quasi_update( + &self, + grad: &Array1, + hist: &VecDeque<(Array1, Array1, f64)>, + ) -> Array1 { + let mut q = grad.to_owned(); + let mut a = Vec::with_capacity(hist.len()); + + for (si, yi, ri) in hist.iter().rev() { + let ai = ri * si.dot(&q); + q.scaled_add(-ai, &yi); + a.push(ai); + } + + // q = { + // // H_0 * q + // let (ref s, ref y, _) = hist[hist.len() - 1]; + // y * (s.dot(&q) / y.dot(y)) + // }; + + for ((si, yi, ri), ai) in hist.iter().zip(a.iter().rev()) { + let bi = ri * yi.dot(&q); + q.scaled_add(ai - bi, &si); + } + q + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn minimize() { + let center = arr1(&[0.9, 1.3, 0.5]); + let min = LBFGSBuilder::default().build().unwrap(); + let f = |x: ArrayView1| (&x - ¢er).mapv(|xi| -(-xi * xi).exp()).scalar_sum(); + let g = |x: ArrayView1| { + -2.0 * (&x - ¢er).mapv(|xi| -(-xi * xi).exp()) * &(&x - ¢er) + }; + let x0 = Array1::ones(center.len()); + let xmin = min.minimize(&f, &g, x0.view()); + println!("{:?}", xmin); + assert!(xmin.all_close(¢er, 1e-5)) + } + + #[test] + fn minimize_approx() { + let center = arr1(&[0.9, 1.3, 0.5]); + let min = LBFGSBuilder::default().build().unwrap(); + let f = |x: ArrayView1| (&x - ¢er).mapv(|xi| -(-xi * xi).exp()).scalar_sum(); + let x0 = Array1::ones(center.len()); + let xmin = min.minimize_approx_grad(&f, x0.view()); + println!("{:?}", xmin); + assert!(xmin.all_close(¢er, 1e-5)) + } +} diff --git a/src/vector/mod.rs b/src/vector/mod.rs index 93975fa..b750137 100644 --- a/src/vector/mod.rs +++ b/src/vector/mod.rs @@ -1,5 +1,10 @@ -//! This module contains algorithms that search for local minima of functions along multiple dimensions. +//! Algorithms that search for local minima of functions along multiple dimensions. mod nelder_mead; -pub use self::nelder_mead::NelderMeadBuilder; \ No newline at end of file +pub use self::nelder_mead::NelderMead; +pub use self::nelder_mead::NelderMeadBuilder; + +mod l_bfgs; +pub use self::l_bfgs::LBFGSBuilder; +pub use self::l_bfgs::LBFGS;