-
-
Notifications
You must be signed in to change notification settings - Fork 1k
/
LaplacianInferenceMethodWithLBFGS.h
237 lines (202 loc) · 8.52 KB
/
LaplacianInferenceMethodWithLBFGS.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
/*
* Copyright (c) The Shogun Machine Learning Toolbox
* Written (w) 2014 Wu Lin
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* The views and conclusions contained in the software and documentation are those
* of the authors and should not be interpreted as representing official policies,
* either expressed or implied, of the Shogun Development Team.
*
* Code adapted from Gaussian Process Machine Learning Toolbox
* http://www.gaussianprocess.org/gpml/code/matlab/doc/
*/
#ifndef CLAPLACIANINFERENCEMETHODWITHLBFGS_H_
#define CLAPLACIANINFERENCEMETHODWITHLBFGS_H_
#include <shogun/lib/config.h>
#ifdef HAVE_EIGEN3
#include <shogun/machine/gp/LaplacianInferenceMethod.h>
#include <shogun/mathematics/eigen3.h>
#include <shogun/optimization/lbfgs/lbfgs.h>
namespace shogun
{
/** @brief The Laplace approximation inference method with LBFGS class.
*
* This inference method approximates the posterior likelihood function by using
* Laplace's method. Here, we compute a Gaussian approximation to the posterior
* via a Taylor expansion around the maximum of the posterior likelihood
* function. We use the Limited-memory BFGS method to obtain the maximum of likelihood.
* Note that due to the Laplace approximation, the time complexity of the class still is O(n^3),
* where n is the number of training data points.
* However, in the optimization step we use L-BFGS method, which of the time complexity
* is O(n*m) to replace the Newton method, which of the time complexity is O(n^3).
* Here L-BFGS only uses the last m (m<<n) function/gradient pairs to find the optimal pointer
*
* For more details, see Nocedal, Jorge, and Stephen J. Wright.
* "Numerical Optimization 2nd." (2006), Pages 177-180.
*
* This specific implementation was based on the idea
* from Murphy, Kevin P. "Machine learning: a probabilistic perspective." (2012), Pages 251-252.
*/
class CLaplacianInferenceMethodWithLBFGS: public CLaplacianInferenceMethod
{
public:
/* default constructor */
CLaplacianInferenceMethodWithLBFGS();
/* constructor
*
* @param kernel covariance function
* @param features features to use in inference
* @param mean mean function
* @param labels labels of the features
* @param model Likelihood model to use
*/
CLaplacianInferenceMethodWithLBFGS(CKernel* kernel,
CFeatures* features,
CMeanFunction* mean,
CLabels* labels,
CLikelihoodModel* model);
virtual ~CLaplacianInferenceMethodWithLBFGS();
/* returns the name of the inference method
*
* @return name LaplacianWithLBFGS
*/
virtual const char* get_name() const
{return "LaplacianInferenceMethodWithLBFGS";}
/* set L-BFGS parameters
* For details please see shogun/optimization/lbfgs/lbfgs.h
* @param m The number of corrections to approximate the inverse hessian matrix.
* Default value is 100.
* @param max_linesearch The maximum number of trials to do line search for each L-BFGS update.
* Default value is 1000.
* @param linesearch The line search algorithm.
* Default value is using the Morethuente line search
* @param max_iterations The maximum number of iterations for L-BFGS update.
* Default value is 1000.
* @param delta Delta for convergence test based on the change of function value.
* Default value is 0.
* @param past Distance for delta-based convergence test.
* Default value is 0.
* @param epsilon Epsilon for convergence test based on the change of gradient.
* Default value is 1e-5
* @param enable_newton_if_fail if LBFGS optimizer fails, should we use Newton method.
* Default value is true.
* @param min_step The minimum step of the line search.
* The default value is 1e-20
* @param max_step The maximum step of the line search.
* The default value is 1e+20
* @param ftol A parameter used in Armijo condition.
* Default value is 1e-4
* @param wolfe A parameter used in curvature condition.
* Default value is 0.9
* @param gtol A parameter used in Morethuente linesearch to control the accuracy.
* Default value is 0.9
* @param xtol The machine precision for floating-point values.
* Default value is 1e-16.
* @param orthantwise_c Coeefficient for the L1 norm of variables.
* This parameter should be set to zero for standard minimization problems.
* Setting this parameter to a positive value activates
* Orthant-Wise Limited-memory Quasi-Newton (OWL-QN) method. Default value is 0.
* @param orthantwise_start Start index for computing L1 norm of the variables.
* This parameter is valid only for OWL-QN method. Default value is 0.
* @param orthantwise_end End index for computing L1 norm of the variables.
* Default value is 1.
*/
virtual void set_lbfgs_parameters(int m = 100,
int max_linesearch = 1000,
int linesearch = LBFGS_LINESEARCH_DEFAULT,
int max_iterations = 1000,
float64_t delta = 0.0,
int past = 0,
float64_t epsilon = 1e-5,
bool enable_newton_if_fail = true,
float64_t min_step = 1e-20,
float64_t max_step = 1e+20,
float64_t ftol = 1e-4,
float64_t wolfe = 0.9,
float64_t gtol = 0.9,
float64_t xtol = 1e-16,
float64_t orthantwise_c = 0.0,
int orthantwise_start = 0,
int orthantwise_end = 1);
protected:
/* update alpha using the LBFGS method*/
virtual void update_alpha();
private:
/* a parameter used to compute function value and gradient for LBFGS update*/
SGVector<float64_t> * m_mean_f;
/* should we enable the original Newton method
* if the L-BFGS method fails
* */
bool m_enable_newton_if_fail;
/* The number of corrections to approximate the inverse hessian matrix.*/
int m_m;
/* The maximum number of trials to do line search for each L-BFGS update.*/
int m_max_linesearch;
/* The line search algorithm.*/
int m_linesearch;
/* The maximum number of iterations for L-BFGS update.*/
int m_max_iterations;
/* Delta for convergence test based on the change of function value.*/
float64_t m_delta;
/* Distance for delta-based convergence test.*/
int m_past;
/* Epsilon for convergence test based on the change of gradient.*/
float64_t m_epsilon;
/* The minimum step of the line search.*/
float64_t m_min_step;
/* The maximum step of the line search.*/
float64_t m_max_step;
/* A parameter used in Armijo condition.*/
float64_t m_ftol;
/* A parameter used in curvature condition.*/
float64_t m_wolfe;
/* A parameter used in Morethuente linesearch to control the accuracy.*/
float64_t m_gtol;
/* The machine precision for floating-point values.*/
float64_t m_xtol;
/* Coeefficient for the L1 norm of variables.*/
float64_t m_orthantwise_c;
/* Start index for computing L1 norm of the variables.*/
int m_orthantwise_start;
/* End index for computing L1 norm of the variables.*/
int m_orthantwise_end;
void init();
/* helper function is passed to the LBFGS API
* Note that this function should be static and
* private.
* */
static float64_t evaluate(void *obj,
const float64_t *alpha,
float64_t *gradient,
const int dim,
const float64_t step);
/* compute the gradient given the current alpha*/
void get_gradient_wrt_alpha(Eigen::Map<Eigen::VectorXd>* alpha,
Eigen::Map<Eigen::VectorXd>* gradient);
/* compute the function value given the current alpha*/
void get_psi_wrt_alpha(Eigen::Map<Eigen::VectorXd>* alpha,
float64_t* psi);
};
} /* namespace shogun */
#endif /* HAVE_EIGEN3 */
#endif /* CLAPLACIANINFERENCEMETHODWITHLBFGS_H_ */