Skip to content

Commit

Permalink
Added Hessian to KernelExpFamily
Browse files Browse the repository at this point in the history
  • Loading branch information
sorig committed Jul 6, 2016
1 parent c0023a6 commit b45bdc8
Show file tree
Hide file tree
Showing 13 changed files with 279 additions and 21 deletions.
11 changes: 10 additions & 1 deletion src/shogun/distributions/kernel_exp_family/KernelExpFamily.cpp
Expand Up @@ -86,7 +86,7 @@ float64_t CKernelExpFamily::log_pdf(SGVector<float64_t> x)
{
auto D = m_impl->get_num_dimensions();
REQUIRE(x.vector, "Given data point cannot be empty\n");
REQUIRE(x.vector, "Dimension of given data point (%d) must match the estimator's (%d)\n", x.vlen, D);
REQUIRE(x.vlen==D, "Dimension of given data point (%d) must match the estimator's (%d)\n", x.vlen, D);

return m_impl->log_pdf(x);
}
Expand All @@ -100,6 +100,15 @@ SGVector<float64_t> CKernelExpFamily::grad(SGVector<float64_t> x)
return m_impl->grad(x);
}

SGMatrix<float64_t> CKernelExpFamily::hessian(SGVector<float64_t> x)
{
auto D = m_impl->get_num_dimensions();
REQUIRE(x.vector, "Given data point cannot be empty\n");
REQUIRE(x.vlen==D, "Dimension of given data point (%d) must match the estimator's (%d)\n", x.vlen, D);

return m_impl->hessian(x);
}

SGVector<float64_t> CKernelExpFamily::log_pdf_multiple(SGMatrix<float64_t> X)
{
auto D = m_impl->get_num_dimensions();
Expand Down
Expand Up @@ -55,6 +55,7 @@ class CKernelExpFamily : public CSGObject
virtual float64_t log_pdf(SGVector<float64_t> x);
virtual SGVector<float64_t> log_pdf_multiple(SGMatrix<float64_t> X);
virtual SGVector<float64_t> grad(SGVector<float64_t> x);
virtual SGMatrix<float64_t> hessian(SGVector<float64_t> x);

virtual const char* get_name() const { return "KernelExpFamily"; }

Expand Down
12 changes: 9 additions & 3 deletions src/shogun/distributions/kernel_exp_family/impl/Base.cpp
Expand Up @@ -112,20 +112,26 @@ SGVector<float64_t> Base::log_pdf(const SGMatrix<float64_t> X)
SGVector<float64_t> result(N_test);
#pragma omp parallel for
for (auto i=0; i<N_test; ++i)
result[i] = log_pdf(i);
result[i] = ((const Base*)this)->log_pdf(i);

return result;
}

float64_t Base::log_pdf(SGVector<float64_t> x)
{
set_test_data(x);
return log_pdf(0);
return ((const Base*)this)->log_pdf(0);
}

SGVector<float64_t> Base::grad(SGVector<float64_t> x)
{
set_test_data(x);
return grad(0);
return ((const Base*)this)->grad(0);
}

SGMatrix<float64_t> Base::hessian(SGVector<float64_t> x)
{
set_test_data(x);
return ((const Base*)this)->hessian(0);
}

2 changes: 2 additions & 0 deletions src/shogun/distributions/kernel_exp_family/impl/Base.h
Expand Up @@ -66,6 +66,7 @@ public :
SGVector<float64_t> log_pdf(SGMatrix<float64_t> X);

SGVector<float64_t> grad(SGVector<float64_t> x);
SGMatrix<float64_t> hessian(SGVector<float64_t> x);

SGVector<float64_t> get_alpha_beta() const { return m_alpha_beta; }

Expand All @@ -76,6 +77,7 @@ public :
virtual std::pair<SGMatrix<float64_t>, SGVector<float64_t>> build_system() const=0;
virtual float64_t log_pdf(index_t idx_test) const=0;
virtual SGVector<float64_t> grad(index_t idx_test) const=0;
virtual SGMatrix<float64_t> hessian(index_t idx_test) const=0;

protected:
virtual void solve_and_store(const SGMatrix<float64_t>& A, const SGVector<float64_t>& b);
Expand Down
48 changes: 48 additions & 0 deletions src/shogun/distributions/kernel_exp_family/impl/Full.cpp
Expand Up @@ -189,3 +189,51 @@ SGVector<float64_t> Full::grad(index_t idx_test) const
eigen_xi_grad *= m_alpha_beta[0] / N;
return xi_grad + beta_sum_grad;
}

SGMatrix<float64_t> Full::hessian(index_t idx_test) const
{
auto N = get_num_lhs();
auto D = get_num_dimensions();

SGMatrix<float64_t> xi_hessian(D, D);
SGMatrix<float64_t> beta_sum_hessian(D, D);
SGVector<float64_t> ones(D);

Map<MatrixXd> eigen_xi_hessian(xi_hessian.matrix, D, D);
Map<MatrixXd> eigen_beta_sum_hessian(beta_sum_hessian.matrix, D, D);
Map<VectorXd> eigen_ones(ones.vector, D);

eigen_xi_hessian = MatrixXd::Zero(D, D);
eigen_beta_sum_hessian = MatrixXd::Zero(D, D);
eigen_ones = VectorXd::Ones(D);

// Entire alpha-beta vector
Map<VectorXd> eigen_alpha_beta(m_alpha_beta.vector, N*D+1);
// Beta segment vector
SGVector<float64_t> beta_a(D);
Map<VectorXd> eigen_beta_a(beta_a.vector, D);

for (auto a=0; a<N; a++)
{
// Arguments are opposite order of Python code but sign flip is not
// needed since the function is symmetric
auto xi_hess_summ = m_kernel->dx_i_dx_j_dx_k_dx_k_dot_vec(a, idx_test, ones);
Map<MatrixXd> eigen_xi_hess_summ(xi_hess_summ.matrix, D, D);
eigen_xi_hessian += eigen_xi_hess_summ;

eigen_beta_a = eigen_alpha_beta.segment(1+a*D, D);

// Note sign flip becayse arguments are opposite order of Python code
auto beta_hess_summ = m_kernel->dx_i_dx_j_dx_k_dot_vec(a, idx_test, beta_a);
Map<MatrixXd> eigen_beta_hess_summ(beta_hess_summ.matrix, D, D);
eigen_beta_sum_hessian -= eigen_beta_hess_summ;
}

eigen_xi_hessian.array() *= m_alpha_beta[0] / N;

SGMatrix<float64_t> result(D, D);
Map<MatrixXd> eigen_result(result.matrix, D, D);
eigen_result = eigen_xi_hessian + eigen_beta_sum_hessian;

return result;
}
2 changes: 2 additions & 0 deletions src/shogun/distributions/kernel_exp_family/impl/Full.h
Expand Up @@ -83,12 +83,14 @@ public :
virtual std::pair<SGMatrix<float64_t>, SGVector<float64_t>> build_system() const;
virtual float64_t log_pdf(index_t idx_test) const;
virtual SGVector<float64_t> grad(index_t idx_test) const;
virtual SGMatrix<float64_t> hessian(index_t idx_test) const;

float64_t compute_xi_norm_2() const;
SGVector<float64_t> compute_h() const;

using Base::log_pdf;
using Base::grad;
using Base::hessian;
};
};

Expand Down
8 changes: 8 additions & 0 deletions src/shogun/distributions/kernel_exp_family/impl/Nystrom.cpp
Expand Up @@ -269,6 +269,14 @@ SGVector<float64_t> Nystrom::grad(index_t idx_test) const
return xi_grad;
}

SGMatrix<float64_t> Nystrom::hessian(index_t idx_test) const
{
REQUIRE(false, "The hessian is not implemented for the Nystrom approximation\n");
auto D = get_num_dimensions();

return SGMatrix<float64_t>(D, D);
}

SGMatrix<float64_t> Nystrom::pinv_self_adjoint(const SGMatrix<float64_t>& A)
{
// based on the snippet from
Expand Down
2 changes: 2 additions & 0 deletions src/shogun/distributions/kernel_exp_family/impl/Nystrom.h
Expand Up @@ -67,9 +67,11 @@ public :

virtual float64_t log_pdf(index_t idx_test) const;
virtual SGVector<float64_t> grad(index_t idx_test) const;
virtual SGMatrix<float64_t> hessian(index_t idx_test) const;

using Base::log_pdf;
using Base::grad;
using Base::hessian;

static SGMatrix<float64_t> pinv_self_adjoint(const SGMatrix<float64_t>& A);

Expand Down
2 changes: 2 additions & 0 deletions src/shogun/distributions/kernel_exp_family/impl/kernel/Base.h
Expand Up @@ -70,6 +70,8 @@ public :
virtual SGVector<float64_t> dx_dx(index_t a, index_t idx_b) const=0;
virtual SGMatrix<float64_t> dx_i_dx_i_dx_j(index_t a, index_t idx_b) const=0;
virtual SGMatrix<float64_t> dx_i_dx_j(index_t a, index_t idx_b) const=0;
virtual SGMatrix<float64_t> dx_i_dx_j_dx_k_dot_vec(index_t idx_a, index_t idx_b, const SGVector<float64_t>& vec) const=0;
virtual SGMatrix<float64_t> dx_i_dx_j_dx_k_dx_k_dot_vec(index_t idx_a, index_t idx_b, const SGVector<float64_t>& vec) const=0;

// old develop code
virtual SGMatrix<float64_t> dx_dx_dy_dy(index_t idx_a, index_t idx_b) const=0;
Expand Down
Expand Up @@ -307,6 +307,68 @@ SGVector<float64_t> Gaussian::dx(index_t idx_a, index_t idx_b) const
return gradient;
}

SGMatrix<float64_t> Gaussian::dx_i_dx_j_dx_k_dot_vec(index_t idx_a, index_t idx_b, const SGVector<float64_t>& vec) const
{
auto D = get_num_dimensions();

SGVector<float64_t> diff = difference(idx_a, idx_b);
Map<VectorXd> eigen_diff = Map<VectorXd>(diff.vector, D);

SGMatrix<float64_t> result(D, D);
Map<MatrixXd> eigen_result(result.matrix, D, D);

Map<VectorXd> eigen_vec(vec.vector, D);

auto weighted_sum = eigen_diff.dot(eigen_vec);
eigen_result = eigen_diff * (eigen_diff.transpose() * weighted_sum);

auto k = kernel(idx_a, idx_b);

eigen_result *= k * pow(2.0/m_sigma, 3);
eigen_result.diagonal().array() -= k * pow(2.0/m_sigma, 2) * weighted_sum;

MatrixXd diff_vec = eigen_vec * eigen_diff.transpose();
eigen_result -= k * pow(2.0/m_sigma, 2) * diff_vec;
eigen_result -= k * pow(2.0/m_sigma, 2) * diff_vec.transpose();

return result;
}

SGMatrix<float64_t> Gaussian::dx_i_dx_j_dx_k_dx_k_dot_vec(index_t idx_a, index_t idx_b, const SGVector<float64_t>& vec) const
{
auto D = get_num_dimensions();

SGVector<float64_t> diff = difference(idx_a, idx_b);
Map<VectorXd> eigen_diff = Map<VectorXd>(diff.vector, D);

SGMatrix<float64_t> result(D, D);
Map<MatrixXd> eigen_result(result.matrix, D, D);

Map<VectorXd> eigen_vec(vec.vector, D);

auto weighted_sq_distances = eigen_diff.array().pow(2).matrix().dot(eigen_vec);
auto pairwise_distances = eigen_diff * eigen_diff.transpose();
auto k = kernel(idx_a, idx_b);

eigen_result = k * pow(2.0/m_sigma, 4) * pairwise_distances * weighted_sq_distances;
eigen_result -= k * pow(2.0/m_sigma, 3) * eigen_vec.sum() * pairwise_distances;

// TODO: No effect??
//std::cout << eigen_vec << std::endl << std::endl;
//std::cout << pairwise_distances.sum() << std::endl;
//pairwise_distances.array().colwise() *= eigen_vec.array();
//std::cout << pairwise_distances.sum() << std::endl << std::endl;

eigen_result -= k * (16.0/pow(m_sigma, 3)) * (pairwise_distances.array().colwise() * eigen_vec.array()).matrix();
eigen_result -= k * (16.0/pow(m_sigma, 3)) * (pairwise_distances.array().rowwise() * eigen_vec.array().transpose()).matrix();

eigen_result.diagonal().array() -= k * pow(2.0/m_sigma, 3) * weighted_sq_distances;
eigen_result.diagonal().array() += k * eigen_vec.sum() * pow(2.0/m_sigma, 2);
eigen_result.diagonal() += k * (8.0/pow(m_sigma, 2)) * eigen_vec;

return result;
}


float64_t Gaussian::difference_component(index_t idx_a, index_t idx_b, index_t i) const
{
Expand Down
Expand Up @@ -68,6 +68,8 @@ public :
virtual SGMatrix<float64_t> dx_i_dx_i_dx_j(index_t a, index_t idx_b) const;
virtual SGMatrix<float64_t> dx_i_dx_j(index_t a, index_t idx_b) const;
virtual SGMatrix<float64_t> dx_dx_dy_dy(index_t idx_a, index_t idx_b) const;
virtual SGMatrix<float64_t> dx_i_dx_j_dx_k_dot_vec(index_t idx_a, index_t idx_b, const SGVector<float64_t>& vec) const;
virtual SGMatrix<float64_t> dx_i_dx_j_dx_k_dx_k_dot_vec(index_t idx_a, index_t idx_b, const SGVector<float64_t>& vec) const;

// nystrom parts
virtual float64_t difference_component(index_t idx_a, index_t idx_b, index_t i) const;
Expand Down
44 changes: 42 additions & 2 deletions tests/unit/distribution/kernel_exp_family/impl/Full_unittest.cc
Expand Up @@ -166,8 +166,8 @@ TEST(kernel_exp_family_impl_Full, fit_kernel_Gaussian)

// from kernel_exp_family Python implementation
float64_t reference_x[] = {-0.99999999999999989, 0.00228091, 0.00342023,
0.00406425, 0.0092514 ,
-0.00646103, -0.01294499};
0.00406425, 0.0092514 ,
-0.00646103, -0.01294499};

for (auto i=0; i<ND+1; i++)
EXPECT_NEAR(x[i], reference_x[i], 1e-5);
Expand Down Expand Up @@ -236,3 +236,43 @@ TEST(kernel_exp_family_impl_Full, grad_kernel_Gaussian)
for (auto i=0; i<D; i++)
EXPECT_NEAR(grad[i], reference2[i], 1e-8);
}

TEST(kernel_exp_family_impl_Full, hessian_kernel_Gaussian)
{
index_t N=3;
index_t D=2;
SGMatrix<float64_t> X(D,N);
X(0,0)=0;
X(1,0)=1;
X(0,1)=2;
X(1,1)=4;
X(0,2)=3;
X(1,2)=6;

float64_t sigma = 2;
float64_t lambda = 2;
auto kernel = new kernel::Gaussian(sigma);
Full est(X, kernel, lambda);
est.fit();

SGVector<float64_t> x(D);
x[0] = 1;
x[1] = 1;
auto hessian = est.hessian(x);

float64_t reference[] = {0.20518773, -0.01275602,
-0.01275602, -0.33620648};
for (auto i=0; i<D*D; i++)
EXPECT_NEAR(hessian[i], reference[i], 1e-8);

x[0] = -1;
x[1] = 0;

hessian = est.hessian(x);

float64_t reference2[] = {0.12205638, 0.24511196,
0.24511196, 0.12173557};

for (auto i=0; i<D*D; i++)
EXPECT_NEAR(hessian[i], reference2[i], 1e-8);
}

0 comments on commit b45bdc8

Please sign in to comment.