-
-
Notifications
You must be signed in to change notification settings - Fork 1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
autodiff gaussian width parameter #4782
Changes from 7 commits
0d13a9e
e7f2e62
0b0de2e
e9bced7
5815a3d
a35cf20
bdcebbf
bb85e1b
f94af02
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,12 +6,15 @@ | |
* Tonmoy Saikia, Sergey Lisitsyn, Matt Aasted, Sanuj Sharma | ||
*/ | ||
|
||
#include <Eigen/Core> | ||
#include <unsupported/Eigen/AutoDiff> | ||
#include <shogun/lib/common.h> | ||
#include <shogun/kernel/GaussianKernel.h> | ||
#include <shogun/features/DotFeatures.h> | ||
#include <shogun/distance/EuclideanDistance.h> | ||
#include <shogun/mathematics/Math.h> | ||
|
||
|
||
using namespace shogun; | ||
|
||
CGaussianKernel::CGaussianKernel() : CShiftInvariantKernel() | ||
|
@@ -88,21 +91,42 @@ void CGaussianKernel::set_width(float64_t w) | |
m_log_width = std::log(w / 2.0) / 2.0; | ||
} | ||
|
||
auto CGaussianKernel::kernel_function(int32_t idx_a, int32_t idx_b) const | ||
{ | ||
// this could be written as Eigen::Matrix<float64_t, n_differentiable_params, 1>; | ||
using EigenScalar = Eigen::Matrix<float64_t, 1, 1>; | ||
Eigen::AutoDiffScalar<EigenScalar> eigen_log_width(m_log_width); | ||
// resize(n_differentiable_params) | ||
eigen_log_width.derivatives().resize(1); | ||
// this could be written as | ||
// eigen_log_width.derivatives() = EigenScalar::Unit(1,i); | ||
// where i is the idx of the adjoint | ||
eigen_log_width.derivatives() = EigenScalar::Unit(1,0); | ||
auto el = CShiftInvariantKernel::distance(idx_a, idx_b); | ||
return exp(-el / (exp(eigen_log_width * 2.0) * 2.0)); | ||
} | ||
|
||
SGMatrix<float64_t> CGaussianKernel::get_parameter_gradient(const TParameter* param, index_t index) | ||
{ | ||
using std::exp; | ||
|
||
require(lhs, "Left hand side features must be set!"); | ||
require(rhs, "Rightt hand side features must be set!"); | ||
require(rhs, "Right hand side features must be set!"); | ||
|
||
if (!strcmp(param->m_name, "log_width")) | ||
{ | ||
SGMatrix<float64_t> derivative=SGMatrix<float64_t>(num_lhs, num_rhs); | ||
|
||
for (int k=0; k<num_rhs; k++) | ||
{ | ||
#pragma omp parallel for | ||
for (int j=0; j<num_lhs; j++) | ||
{ | ||
float64_t element=distance(j, k); | ||
derivative(j, k) = std::exp(-element) * element * 2.0; | ||
// this could be written as Eigen::Matrix<float64_t, n_differentiable_params, 1>; | ||
using EigenScalar = Eigen::Matrix<float64_t, 1, 1>; | ||
Eigen::AutoDiffScalar<EigenScalar> kernel = kernel_function(j, k); | ||
// 0 is the index of the width parameter | ||
derivative(j, k) = kernel.derivatives()(0); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this seems pretty automatable to me (at least say for scalar valued kernels with scalar parameters) |
||
} | ||
} | ||
return derivative; | ||
|
@@ -116,9 +140,11 @@ SGMatrix<float64_t> CGaussianKernel::get_parameter_gradient(const TParameter* pa | |
|
||
float64_t CGaussianKernel::compute(int32_t idx_a, int32_t idx_b) | ||
{ | ||
float64_t result=distance(idx_a, idx_b); | ||
return std::exp(-result); | ||
} | ||
// this could be written as Eigen::Matrix<float64_t, n_differentiable_params, 1>; | ||
using EigenScalar = Eigen::Matrix<float64_t, 1, 1>; | ||
Eigen::AutoDiffScalar<EigenScalar> kernel = kernel_function(idx_a, idx_b); | ||
return kernel.value(); | ||
} | ||
|
||
void CGaussianKernel::load_serializable_post() noexcept(false) | ||
{ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -123,6 +123,15 @@ class CGaussianKernel: public CShiftInvariantKernel | |
return std::exp(m_log_width * 2.0) * 2.0; | ||
} | ||
|
||
#ifndef SWIG | ||
/** | ||
* Returns a lazily evaluated Eigen expression template | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If it is already evaluated, then it won't be lazily evaluated (at least not from the point of view of deferring evaluation after this method has been executed). What about without the lazily evaluated part, or refactoring to "to be lazily evaluated" if you really want to keep that part. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, good point, i'll change that |
||
* that tracks the gradient of differentiable hyperparameters | ||
* | ||
* @return expression template of kernel function | ||
*/ | ||
auto kernel_function(int32_t idx_a, int32_t idx_b) const; | ||
#endif | ||
/** return derivative with respect to specified parameter | ||
* | ||
* @param param the parameter | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
so I guess a next step could be to start thinking about getting rid of this explicit code, and rather automatically offer this derivative through registering something in the ctors ....
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think for that we should maybe have a base class for classes that have parameters that we can take the derivative wrt. This class registers the gradient parameters in some vector and then we can get the index from there.
Basically when we do watch_param(...) this would add the variable in such a vector if it has the flag GRADIENT