Skip to content

Commit

Permalink
move width in GaussianKernel to log_width
Browse files Browse the repository at this point in the history
  • Loading branch information
yorkerlin committed May 7, 2015
1 parent 3f9ee7a commit 9ca1285
Show file tree
Hide file tree
Showing 14 changed files with 80 additions and 72 deletions.
22 changes: 17 additions & 5 deletions src/shogun/kernel/GaussianKernel.cpp
Expand Up @@ -16,9 +16,21 @@
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/features/DotFeatures.h>
#include <shogun/io/SGIO.h>
#include <shogun/mathematics/Math.h>

using namespace shogun;

void CGaussianKernel::set_width(float64_t w)
{
REQUIRE(w>0, "width (%f) must be positive\n",w);
m_log_width=CMath::log(w/2.0)/2.0;
}

float64_t CGaussianKernel::get_width() const
{
return CMath::exp(m_log_width*2.0)*2.0;
}

CGaussianKernel::CGaussianKernel() : CDotKernel()
{
init();
Expand Down Expand Up @@ -63,7 +75,7 @@ CSGObject *CGaussianKernel::shallow_copy() const
// this assert is to avoid any subclass of CGaussianKernel accidentally called
// with the implement here
ASSERT(typeid(*this) == typeid(CGaussianKernel))
CGaussianKernel *ker = new CGaussianKernel(cache_size, width);
CGaussianKernel *ker = new CGaussianKernel(cache_size, get_width());
if (lhs)
{
ker->init(lhs, rhs);
Expand Down Expand Up @@ -150,15 +162,15 @@ SGMatrix<float64_t> CGaussianKernel::get_parameter_gradient(
{
REQUIRE(lhs && rhs, "Features not set!\n")

if (!strcmp(param->m_name, "width"))
if (!strcmp(param->m_name, "log_width"))
{
SGMatrix<float64_t> derivative=SGMatrix<float64_t>(num_lhs, num_rhs);

for (int j=0; j<num_lhs; j++)
for (int k=0; k<num_rhs; k++)
{
float64_t element=distance(j,k);
derivative(j,k)=exp(-element)*element/width;
derivative(j,k)=exp(-element)*element*2.0;
}

return derivative;
Expand All @@ -176,11 +188,11 @@ void CGaussianKernel::init()
set_compact_enabled(false);
sq_lhs=NULL;
sq_rhs=NULL;
SG_ADD(&width, "width", "Kernel width", MS_AVAILABLE, GRADIENT_AVAILABLE);
SG_ADD(&m_log_width, "log_width", "Kernel width in log domain", MS_AVAILABLE, GRADIENT_AVAILABLE);
SG_ADD(&m_compact, "compact", "Compact enabled option", MS_AVAILABLE);
}

float64_t CGaussianKernel::distance(int32_t idx_a, int32_t idx_b)
{
return (sq_lhs[idx_a]+sq_rhs[idx_b]-2*CDotKernel::compute(idx_a,idx_b))/width;
return (sq_lhs[idx_a]+sq_rhs[idx_b]-2*CDotKernel::compute(idx_a,idx_b))/get_width();
}
6 changes: 3 additions & 3 deletions src/shogun/kernel/GaussianKernel.h
Expand Up @@ -106,13 +106,13 @@ class CGaussianKernel: public CDotKernel
*
* @param w kernel width
*/
virtual void set_width(float64_t w) { width=w; }
virtual void set_width(float64_t w);

/** return the kernel's width
*
* @return kernel width
*/
virtual float64_t get_width() const { return width; }
virtual float64_t get_width() const;

/** set the compact option
*
Expand Down Expand Up @@ -189,7 +189,7 @@ class CGaussianKernel: public CDotKernel

protected:
/** width */
float64_t width;
float64_t m_log_width;
/** squared left-hand side */
float64_t* sq_lhs;
/** squared right-hand side */
Expand Down
6 changes: 3 additions & 3 deletions src/shogun/kernel/GaussianShiftKernel.cpp
Expand Up @@ -56,19 +56,19 @@ float64_t CGaussianShiftKernel::compute(int32_t idx_a, int32_t idx_b)
float64_t sum=0.0 ;
for (int32_t i=0; i<alen; i++)
sum+=(avec[i]-bvec[i])*(avec[i]-bvec[i]);
result += exp(-sum/width) ;
result += exp(-sum/get_width()) ;

for (int32_t shift = shift_step, s=1; shift<max_shift; shift+=shift_step, s++)
{
sum=0.0 ;
for (int32_t i=0; i<alen-shift; i++)
sum+=(avec[i+shift]-bvec[i])*(avec[i+shift]-bvec[i]);
result += exp(-sum/width)/(2*s) ;
result += exp(-sum/get_width())/(2*s) ;

sum=0.0 ;
for (int32_t i=0; i<alen-shift; i++)
sum+=(avec[i]-bvec[i+shift])*(avec[i]-bvec[i+shift]);
result += exp(-sum/width)/(2*s) ;
result += exp(-sum/get_width())/(2*s) ;
}

((CDenseFeatures<float64_t>*) lhs)->free_feature_vector(avec, idx_a, afree);
Expand Down
10 changes: 5 additions & 5 deletions tests/unit/kernel/CombinedKernel_unittest.cc
Expand Up @@ -88,9 +88,9 @@ TEST(CombinedKernelTest,serialization)
CGaussianKernel* k1 = (CGaussianKernel*) combined_read->get_kernel(1);
CGaussianKernel* k2 = (CGaussianKernel*) combined_read->get_kernel(2);

EXPECT_EQ(k0->get_width(), 4);
EXPECT_EQ(k1->get_width(), 3);
EXPECT_EQ(k2->get_width(), 9);
EXPECT_NEAR(k0->get_width(), 4, 1e-9);
EXPECT_NEAR(k1->get_width(), 3, 1e-9);
EXPECT_NEAR(k2->get_width(), 9, 1e-9);

SG_UNREF(k0);
SG_UNREF(k1);
Expand Down Expand Up @@ -168,7 +168,7 @@ TEST(CombinedKernelTest,combination)
{
CGaussianKernel* c_subkernel =
dynamic_cast<CGaussianKernel* >(c_kernel->get_kernel(k_idx));
EXPECT_EQ(c_subkernel->get_width(), combs2[i++][j]);
EXPECT_NEAR(c_subkernel->get_width(), combs2[i++][j], 1e-9);
SG_UNREF(c_subkernel);
}
++j;
Expand Down Expand Up @@ -207,7 +207,7 @@ TEST(CombinedKernelTest,combination)
{
CGaussianKernel* c_subkernel =
dynamic_cast<CGaussianKernel* >(c_kernel->get_kernel(k_idx));
EXPECT_EQ(c_subkernel->get_width(), combs[i++][j]);
EXPECT_NEAR(c_subkernel->get_width(), combs[i++][j], 1e-9);
SG_UNREF(c_subkernel);
}
++j;
Expand Down
16 changes: 8 additions & 8 deletions tests/unit/kernel/GaussianARDKernel_unittest.cc
Expand Up @@ -178,16 +178,16 @@ TEST(GaussianARDKernel_scalar,get_parameter_gradient)
kernel2->init(features_train, latent_features_train);

TParameter* width_param=kernel->m_gradient_parameters->get_parameter("width");
TParameter* width_param2=kernel2->m_gradient_parameters->get_parameter("width");
TParameter* width_param2=kernel2->m_gradient_parameters->get_parameter("log_width");

SGMatrix<float64_t> mat=kernel->get_parameter_gradient(width_param);
SGMatrix<float64_t> mat2=kernel2->get_parameter_gradient(width_param2);
for(int32_t i=0;i<mat.num_rows;i++)
{
for(int32_t j=0;j<mat.num_cols;j++)
{
abs_tolerance=CMath::get_abs_tolerance(mat2(i,j),rel_tolerance);
EXPECT_NEAR(mat(i,j),mat2(i,j),abs_tolerance);
abs_tolerance=CMath::get_abs_tolerance(mat2(i,j)/(ell2*ell2*4.0),rel_tolerance);
EXPECT_NEAR(mat(i,j),mat2(i,j)/(ell2*ell2*4.0),abs_tolerance);
}
}

Expand All @@ -200,8 +200,8 @@ TEST(GaussianARDKernel_scalar,get_parameter_gradient)
{
for(int32_t j=0;j<mat.num_cols;j++)
{
abs_tolerance=CMath::get_abs_tolerance(mat2(i,j),rel_tolerance);
EXPECT_NEAR(mat(i,j),mat2(i,j),abs_tolerance);
abs_tolerance=CMath::get_abs_tolerance(mat2(i,j)/(ell2*ell2*4.0),rel_tolerance);
EXPECT_NEAR(mat(i,j),mat2(i,j)/(ell2*ell2*4.0),abs_tolerance);
}
}

Expand Down Expand Up @@ -710,14 +710,14 @@ TEST(GaussianARDKernel,get_parameter_gradient_diagonal)
kernel2->init(features_train, latent_features_train);

TParameter* width_param=kernel->m_gradient_parameters->get_parameter("width");
TParameter* width_param2=kernel2->m_gradient_parameters->get_parameter("width");
TParameter* width_param2=kernel2->m_gradient_parameters->get_parameter("log_width");

SGVector<float64_t> vec=kernel->get_parameter_gradient_diagonal(width_param);
SGVector<float64_t> vec2=kernel2->get_parameter_gradient_diagonal(width_param2);
for(int32_t j=0;j<vec.vlen;j++)
{
abs_tolerance=CMath::get_abs_tolerance(vec2[j],rel_tolerance);
EXPECT_NEAR(vec[j],vec2[j],abs_tolerance);
EXPECT_NEAR(vec[j],vec2[j]/(ell2*ell2*4.0),abs_tolerance);
}

kernel->init(features_train, features_train);
Expand All @@ -728,7 +728,7 @@ TEST(GaussianARDKernel,get_parameter_gradient_diagonal)
for(int32_t j=0;j<vec.vlen;j++)
{
abs_tolerance=CMath::get_abs_tolerance(vec2[j],rel_tolerance);
EXPECT_NEAR(vec[j],vec2[j],abs_tolerance);
EXPECT_NEAR(vec[j],vec2[j]/(ell2*ell2*4.0),abs_tolerance);
}

// cleanup
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/machine/gp/EPInferenceMethod_unittest.cc
Expand Up @@ -258,10 +258,10 @@ TEST(EPInferenceMethod,get_marginal_likelihood_derivatives_probit_likelihood)
inf->get_negative_log_marginal_likelihood_derivatives(parameter_dictionary);

// get parameters to compute derivatives
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("width");
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("log_width");
TParameter* scale_param=inf->m_gradient_parameters->get_parameter("scale");

float64_t dnlZ_ell=4*ell*ell*(gradient->get_element(width_param))[0];
float64_t dnlZ_ell=(gradient->get_element(width_param))[0];
float64_t dnlZ_sf2=1.5*(gradient->get_element(scale_param))[0];

// comparison of partial derivatives of negative marginal likelihood with
Expand Down
8 changes: 2 additions & 6 deletions tests/unit/machine/gp/ExactInferenceMethod_unittest.cc
Expand Up @@ -243,15 +243,11 @@ TEST(ExactInferenceMethod,get_negative_log_marginal_likelihood_derivatives)
inf->get_negative_log_marginal_likelihood_derivatives(parameter_dictionary);

// get parameters to compute derivatives
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("width");
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("log_width");
TParameter* scale_param=inf->m_gradient_parameters->get_parameter("scale");
TParameter* sigma_param=lik->m_gradient_parameters->get_parameter("sigma");

// in GPML package: dK(i,j)/dell = sf2 * exp(-(x(i) - x(j))^2/(2*ell^2)) *
// (x(i) - x(j))^2 / (ell^2), but in SHOGUN we compute: dK(i,j)/dw = sf2 *
// exp(-(x(i) - x(j))^2/w) * (x(i) - x(j))^2 / (w^2), so if w = 2 * ell^2,
// then dK(i,j)/dell = 4 * ell^2 * dK(i,j)/dw.
float64_t dnlZ_ell=4*ell*ell*(gradient->get_element(width_param))[0];
float64_t dnlZ_ell=(gradient->get_element(width_param))[0];
float64_t dnlZ_sf2=(gradient->get_element(scale_param))[0];
float64_t dnlZ_lik=(gradient->get_element(sigma_param))[0];

Expand Down
8 changes: 4 additions & 4 deletions tests/unit/machine/gp/FITCInferenceMethod_unittest.cc
Expand Up @@ -542,11 +542,11 @@ TEST(FITCInferenceMethod,get_marginal_likelihood_derivatives)
inf->get_negative_log_marginal_likelihood_derivatives(parameter_dictionary);

// get parameters to compute derivatives
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("width");
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("log_width");
TParameter* scale_param=inf->m_gradient_parameters->get_parameter("scale");
TParameter* sigma_param=lik->m_gradient_parameters->get_parameter("sigma");

float64_t dnlZ_ell=4*(gradient->get_element(width_param))[0];
float64_t dnlZ_ell=(gradient->get_element(width_param))[0];
float64_t dnlZ_sf2=(gradient->get_element(scale_param))[0];
float64_t dnlZ_lik=(gradient->get_element(sigma_param))[0];

Expand Down Expand Up @@ -632,11 +632,11 @@ TEST(FITCInferenceMethod,get_marginal_likelihood_derivatives_sparse)
inf->get_negative_log_marginal_likelihood_derivatives(parameter_dictionary);

// get parameters to compute derivatives
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("width");
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("log_width");
TParameter* scale_param=inf->m_gradient_parameters->get_parameter("scale");
TParameter* sigma_param=lik->m_gradient_parameters->get_parameter("sigma");

float64_t dnlZ_ell=4*CMath::sq(ell)*(gradient->get_element(width_param))[0];
float64_t dnlZ_ell=(gradient->get_element(width_param))[0];
float64_t dnlZ_sf2=2.5*(gradient->get_element(scale_param))[0];
float64_t dnlZ_lik=(gradient->get_element(sigma_param))[0];

Expand Down
Expand Up @@ -1171,12 +1171,12 @@ TEST(KLApproxDiagonalInferenceMethod,get_marginal_likelihood_derivatives_t_likel
inf->get_negative_log_marginal_likelihood_derivatives(parameter_dictionary);

// get parameters to compute derivatives
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("width");
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("log_width");
TParameter* scale_param=inf->m_gradient_parameters->get_parameter("scale");
TParameter* sigma_param=lik->m_gradient_parameters->get_parameter("sigma");
TParameter* df_param=lik->m_gradient_parameters->get_parameter("df");

float64_t dnlZ_ell=4*ell*ell*(gradient->get_element(width_param))[0];
float64_t dnlZ_ell=(gradient->get_element(width_param))[0];
float64_t dnlZ_df=(gradient->get_element(df_param))[0];
float64_t dnlZ_sigma=(gradient->get_element(sigma_param))[0];
float64_t dnlZ_sf2=(gradient->get_element(scale_param))[0];
Expand Down Expand Up @@ -1261,10 +1261,10 @@ TEST(KLApproxDiagonalInferenceMethod,get_marginal_likelihood_derivatives_logit_l
inf->get_negative_log_marginal_likelihood_derivatives(parameter_dictionary);

// get parameters to compute derivatives
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("width");
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("log_width");
TParameter* scale_param=inf->m_gradient_parameters->get_parameter("scale");

float64_t dnlZ_ell=4.0*(gradient->get_element(width_param))[0];
float64_t dnlZ_ell=(gradient->get_element(width_param))[0];
float64_t dnlZ_sf2=(gradient->get_element(scale_param))[0];

//Reference result is generated from the Matlab code, which can be found at
Expand Down Expand Up @@ -1340,10 +1340,10 @@ TEST(KLApproxDiagonalInferenceMethod,get_marginal_likelihood_derivatives_probit_
inf->get_negative_log_marginal_likelihood_derivatives(parameter_dictionary);

// get parameters to compute derivatives
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("width");
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("log_width");
TParameter* scale_param=inf->m_gradient_parameters->get_parameter("scale");

float64_t dnlZ_ell=4*ell*ell*(gradient->get_element(width_param))[0];
float64_t dnlZ_ell=(gradient->get_element(width_param))[0];
float64_t dnlZ_sf2=(gradient->get_element(scale_param))[0];

//Reference result is generated from the Matlab code, which can be found at
Expand Down
12 changes: 6 additions & 6 deletions tests/unit/machine/gp/KLCholeskyInferenceMethod_unittest.cc
Expand Up @@ -1170,12 +1170,12 @@ TEST(KLCholeskyInferenceMethod,get_marginal_likelihood_derivatives_t_likelihood)
inf->get_negative_log_marginal_likelihood_derivatives(parameter_dictionary);

// get parameters to compute derivatives
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("width");
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("log_width");
TParameter* scale_param=inf->m_gradient_parameters->get_parameter("scale");
TParameter* sigma_param=lik->m_gradient_parameters->get_parameter("sigma");
TParameter* df_param=lik->m_gradient_parameters->get_parameter("df");

float64_t dnlZ_ell=4*ell*ell*(gradient->get_element(width_param))[0];
float64_t dnlZ_ell=(gradient->get_element(width_param))[0];
float64_t dnlZ_df=(gradient->get_element(df_param))[0];
float64_t dnlZ_sigma=(gradient->get_element(sigma_param))[0];
float64_t dnlZ_sf2=(gradient->get_element(scale_param))[0];
Expand Down Expand Up @@ -1263,10 +1263,10 @@ TEST(KLCholeskyInferenceMethod,get_marginal_likelihood_derivatives_logit_likelih
inf->get_negative_log_marginal_likelihood_derivatives(parameter_dictionary);

// get parameters to compute derivatives
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("width");
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("log_width");
TParameter* scale_param=inf->m_gradient_parameters->get_parameter("scale");

float64_t dnlZ_ell=4.0*(gradient->get_element(width_param))[0];
float64_t dnlZ_ell=(gradient->get_element(width_param))[0];
float64_t dnlZ_sf2=(gradient->get_element(scale_param))[0];

//Reference result is generated from the Matlab code, which can be found at
Expand Down Expand Up @@ -1342,10 +1342,10 @@ TEST(KLCholeskyInferenceMethod,get_marginal_likelihood_derivatives_probit_likeli
inf->get_negative_log_marginal_likelihood_derivatives(parameter_dictionary);

// get parameters to compute derivatives
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("width");
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("log_width");
TParameter* scale_param=inf->m_gradient_parameters->get_parameter("scale");

float64_t dnlZ_ell=4*ell*ell*(gradient->get_element(width_param))[0];
float64_t dnlZ_ell=(gradient->get_element(width_param))[0];
float64_t dnlZ_sf2=(gradient->get_element(scale_param))[0];

//Reference result is generated from the Matlab code, which can be found at
Expand Down
12 changes: 6 additions & 6 deletions tests/unit/machine/gp/KLCovarianceInfernceMethod_unittest.cc
Expand Up @@ -1160,12 +1160,12 @@ TEST(KLCovarianceInferenceMethod,get_marginal_likelihood_derivatives_t_likelihoo
inf->get_negative_log_marginal_likelihood_derivatives(parameter_dictionary);

// get parameters to compute derivatives
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("width");
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("log_width");
TParameter* scale_param=inf->m_gradient_parameters->get_parameter("scale");
TParameter* sigma_param=lik->m_gradient_parameters->get_parameter("sigma");
TParameter* df_param=lik->m_gradient_parameters->get_parameter("df");

float64_t dnlZ_ell=4*ell*ell*(gradient->get_element(width_param))[0];
float64_t dnlZ_ell=(gradient->get_element(width_param))[0];
float64_t dnlZ_df=(gradient->get_element(df_param))[0];
float64_t dnlZ_sigma=(gradient->get_element(sigma_param))[0];
float64_t dnlZ_sf2=1.0*(gradient->get_element(scale_param))[0];
Expand Down Expand Up @@ -1251,10 +1251,10 @@ TEST(KLCovarianceInferenceMethod,get_marginal_likelihood_derivatives_logit_likel
inf->get_negative_log_marginal_likelihood_derivatives(parameter_dictionary);

// get parameters to compute derivatives
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("width");
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("log_width");
TParameter* scale_param=inf->m_gradient_parameters->get_parameter("scale");

float64_t dnlZ_ell=4*(gradient->get_element(width_param))[0];
float64_t dnlZ_ell=(gradient->get_element(width_param))[0];
float64_t dnlZ_sf2=1.0*(gradient->get_element(scale_param))[0];

//Reference result is generated from the Matlab code, which can be found at
Expand Down Expand Up @@ -1329,10 +1329,10 @@ TEST(KLCovarianceInferenceMethod,get_marginal_likelihood_derivatives_probit_like
inf->get_negative_log_marginal_likelihood_derivatives(parameter_dictionary);

// get parameters to compute derivatives
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("width");
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("log_width");
TParameter* scale_param=inf->m_gradient_parameters->get_parameter("scale");

float64_t dnlZ_ell=4*(gradient->get_element(width_param))[0];
float64_t dnlZ_ell=(gradient->get_element(width_param))[0];
float64_t dnlZ_sf2=1.0*(gradient->get_element(scale_param))[0];

//Reference result is generated from the Matlab code, which can be found at
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/machine/gp/KLDualInferenceMethod_unittest.cc
Expand Up @@ -471,10 +471,10 @@ TEST(KLDualInferenceMethod,get_marginal_likelihood_derivatives_logit_likelihood)
inf->get_negative_log_marginal_likelihood_derivatives(parameter_dictionary);

// get parameters to compute derivatives
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("width");
TParameter* width_param=kernel->m_gradient_parameters->get_parameter("log_width");
TParameter* scale_param=inf->m_gradient_parameters->get_parameter("scale");

float64_t dnlZ_ell=4*ell*ell*(gradient->get_element(width_param))[0];
float64_t dnlZ_ell=(gradient->get_element(width_param))[0];
float64_t dnlZ_sf2=scale*(gradient->get_element(scale_param))[0];

//Reference result is generated from the Matlab code, which can be found at
Expand Down

0 comments on commit 9ca1285

Please sign in to comment.