added nystrom grad

shogun-toolbox · Sep 22, 2016 · 126a0a6 · 126a0a6
1 parent 9df490f
commit 126a0a6
Show file tree

Hide file tree

Showing 6 changed files with 230 additions and 36 deletions.
diff --git a/src/shogun/distributions/kernel_exp_family/KernelExpFamily.cpp b/src/shogun/distributions/kernel_exp_family/KernelExpFamily.cpp
@@ -43,7 +43,7 @@ CKernelExpFamily::CKernelExpFamily() : CSGObject()
 }
 
 CKernelExpFamily::CKernelExpFamily(SGMatrix<float64_t> data,
-			float64_t sigma, float64_t lambda) : CSGObject()
+			float64_t sigma, float64_t lambda, float memory_limit_gib) : CSGObject()
 {
 	REQUIRE(data.matrix, "Given observations cannot be empty\n");
 	REQUIRE(data.num_rows>0, "Dimension of given observations (%d) must be positive.\n", data.num_rows);
@@ -52,6 +52,19 @@ CKernelExpFamily::CKernelExpFamily(SGMatrix<float64_t> data,
 	REQUIRE(lambda>0, "Given lambda (%f) must be positive.\n", lambda);
 
 	m_impl = new KernelExpFamilyImpl(data, sigma, lambda);
+
+	auto N =  m_impl->get_num_data();
+	auto D = m_impl->get_num_dimensions();
+	auto ND = D*N;
+	auto system_size = (ND+1)*(ND+1) + (ND+1);
+	auto memory_required_gib = system_size * sizeof(float64_t) / 8.0 / 1024.0 / 1024.0 / 1024.0;
+	if (memory_required_gib > memory_limit_gib)
+	{
+		SG_ERROR("The problem's size (N=%d, D=%d) will at least use %f GiB of computer memory, "
+				"which is above the set limit (%f). "
+				"In order to remove this error, increase this limit in the constructor.\n",
+				N, D, memory_required_gib, memory_limit_gib);
+	}
 }
 
 CKernelExpFamily::~CKernelExpFamily()

diff --git a/src/shogun/distributions/kernel_exp_family/KernelExpFamily.h b/src/shogun/distributions/kernel_exp_family/KernelExpFamily.h
@@ -45,7 +45,7 @@ class CKernelExpFamily : public CSGObject
 public:
 	CKernelExpFamily();
 	CKernelExpFamily(SGMatrix<float64_t> data,
-			float64_t sigma, float64_t lambda);
+			float64_t sigma, float64_t lambda, float memory_limit_gib=4.0);
 	virtual ~CKernelExpFamily();
 
 	void fit();

diff --git a/src/shogun/distributions/kernel_exp_family/impl/KernelExpFamilyImpl.h b/src/shogun/distributions/kernel_exp_family/impl/KernelExpFamilyImpl.h
@@ -40,7 +40,7 @@
 
 /* TODO
  *
- *   The kernel can be optional later on. For now we can just fix it (too many
+ * -  The kernel can be optional later on. For now we can just fix it (too many
  *   derivatives to be implemented for now.). We can make it a simple class where
  *   people who want other kernels can overload methods in.
  *   NOTE: Some functions should should take a reference to the return type as
@@ -56,6 +56,7 @@
  *   a lot of cache misses and how we can avoid that. Investigate how it scales
  *   with multiple cores. Investigate how much memory it uses for larger datasets.
  * - Profile memory usage of full vs Nystrom.
+ * - Nystrom can be slower than full (N=1000, D=5). Why is that?
  *
  *   Optimizations:
  * - See the various TODOs in the code

diff --git a/src/shogun/distributions/kernel_exp_family/impl/KernelExpFamilyNystromImpl.cpp b/src/shogun/distributions/kernel_exp_family/impl/KernelExpFamilyNystromImpl.cpp
@@ -61,7 +61,7 @@ KernelExpFamilyNystromImpl::KernelExpFamilyNystromImpl(SGMatrix<float64_t> data,
 	CMath::qsort(m_inds.vector, num_rkhs_basis);
 }
 
-float64_t KernelExpFamilyNystromImpl::kernel_hessian_i_j(index_t idx_a, index_t idx_b, index_t i, index_t j)
+float64_t KernelExpFamilyNystromImpl::kernel_hessian_component(index_t idx_a, index_t idx_b, index_t i, index_t j)
 {
 	auto D = get_num_dimensions();
 
@@ -104,7 +104,7 @@ float64_t KernelExpFamilyNystromImpl::compute_lower_right_submatrix_element(inde
 	auto b = bj.first;
 	auto j = bj.second;
 
-	auto G_a_b_i_j = kernel_hessian_i_j(a, b, i, j);
+	auto G_a_b_i_j = kernel_hessian_component(a, b, i, j);
 
 	float64_t G_sum = 0;
 	// TODO check parallel with accumulating on the G_sum
@@ -113,8 +113,8 @@ float64_t KernelExpFamilyNystromImpl::compute_lower_right_submatrix_element(inde
 		for (auto idx_d=0; idx_d<D; idx_d++)
 		{
 			// TODO find case when G1=G2 and dont re-compute
-			auto G1 = kernel_hessian_i_j(a, idx_n, i, idx_d);
-			auto G2 = kernel_hessian_i_j(idx_n, b, idx_d, j);
+			auto G1 = kernel_hessian_component(a, idx_n, i, idx_d);
+			auto G2 = kernel_hessian_component(idx_n, b, idx_d, j);
 			G_sum += G1*G2;
 		}
 
@@ -145,7 +145,7 @@ SGVector<float64_t> KernelExpFamilyNystromImpl::compute_first_row_no_storing()
 			auto bj = idx_to_ai(ind2);
 			auto b = bj.first;
 			auto j = bj.second;
-			auto entry = kernel_hessian_i_j(a, b, i, j);
+			auto entry = kernel_hessian_component(a, b, i, j);
 			result[ind1] += h[ind2] * entry;
 		}
 
@@ -253,7 +253,7 @@ SGMatrix<float64_t> KernelExpFamilyNystromImpl::pinv(const SGMatrix<float64_t>&
 	return A_pinv;
 }
 
-float64_t KernelExpFamilyNystromImpl::kernel_dx_i(const SGVector<float64_t>& a, index_t idx_b, index_t i)
+float64_t KernelExpFamilyNystromImpl::kernel_dx_component(const SGVector<float64_t>& a, index_t idx_b, index_t i)
 {
 	auto D = get_num_dimensions();
 	Map<VectorXd> eigen_a(a.vector, D);
@@ -266,7 +266,7 @@ float64_t KernelExpFamilyNystromImpl::kernel_dx_i(const SGVector<float64_t>& a,
 	return 2*k*diff[i]/m_sigma;
 }
 
-float64_t KernelExpFamilyNystromImpl::kernel_dx_dx_i(const SGVector<float64_t>& a, index_t idx_b, index_t i)
+float64_t KernelExpFamilyNystromImpl::kernel_dx_dx_component(const SGVector<float64_t>& a, index_t idx_b, index_t i)
 {
 	auto D = get_num_dimensions();
 	Map<VectorXd> eigen_a(a.vector, D);
@@ -277,6 +277,43 @@ float64_t KernelExpFamilyNystromImpl::kernel_dx_dx_i(const SGVector<float64_t>&
 
 	return k*(sq_diff[i]*pow(2.0/m_sigma, 2) -2.0/m_sigma);
 }
+SGVector<float64_t> KernelExpFamilyNystromImpl::kernel_dx_i_dx_i_dx_j_component(const SGVector<float64_t>& a, index_t idx_b, index_t i)
+{
+	auto D = get_num_dimensions();
+	Map<VectorXd> eigen_a(a.vector, D);
+	Map<VectorXd> eigen_b(m_data.get_column_vector(idx_b), D);
+	auto diff = eigen_b-eigen_a;
+	auto sq_diff = diff.array().pow(2).matrix();
+	auto k=CMath::exp(-sq_diff.sum() / m_sigma);
+
+	SGVector<float64_t> result(D);
+	Map<VectorXd> eigen_result(result.vector, D);
+
+	eigen_result = sq_diff[i]*diff.transpose();
+	eigen_result *= k* pow(2.0/m_sigma, 3);
+	eigen_result -= k * diff.transpose() * pow(2.0/m_sigma, 2);
+	eigen_result[i] -= 2* k * diff[i] * pow(2.0/m_sigma, 2);
+
+	return result;
+}
+
+SGVector<float64_t> KernelExpFamilyNystromImpl::kernel_dx_i_dx_j_component(const SGVector<float64_t>& a, index_t idx_b, index_t i)
+{
+	auto D = get_num_dimensions();
+	Map<VectorXd> eigen_a(a.vector, D);
+	Map<VectorXd> eigen_b(m_data.get_column_vector(idx_b), D);
+	auto diff = eigen_b-eigen_a;
+	auto k=CMath::exp(-diff.array().pow(2).sum() / m_sigma);
+
+	SGVector<float64_t> result(D);
+	Map<VectorXd> eigen_result(result.vector, D);
+
+	eigen_result = diff[i]*diff;
+	eigen_result *= k * pow(2.0/m_sigma, 2);
+	eigen_result[i] -= k * 2.0/m_sigma;
+
+	return result;
+}
 
 index_t KernelExpFamilyNystromImpl::get_num_rkhs_basis()
 {
@@ -297,12 +334,46 @@ float64_t KernelExpFamilyNystromImpl::log_pdf(const SGVector<float64_t>& x)
 		auto a = ai.first;
 		auto i = ai.second;
 
-		auto grad_x_xa_i = kernel_dx_i(x, a, i);
-		auto xi_grad_i = kernel_dx_dx_i(x, a, i);
+		auto grad_x_xa_i = kernel_dx_component(x, a, i);
+		auto xi_grad_i = kernel_dx_dx_component(x, a, i);
 
 		xi += xi_grad_i / N;
 		beta_sum += grad_x_xa_i * m_alpha_beta[1+ind_idx];
 	}
 
 	return m_alpha_beta[0]*xi + beta_sum;
 }
+
+SGVector<float64_t> KernelExpFamilyNystromImpl::grad(const SGVector<float64_t>& x)
+{
+	auto N = get_num_data();
+	auto D = get_num_dimensions();
+	auto m = get_num_rkhs_basis();
+
+	SGVector<float64_t> xi_grad(D);
+	SGVector<float64_t> beta_sum_grad(D);
+	Map<VectorXd> eigen_xi_grad(xi_grad.vector, D);
+	Map<VectorXd> eigen_beta_sum_grad(beta_sum_grad.vector, D);
+	eigen_xi_grad = VectorXd::Zero(D);
+	eigen_beta_sum_grad.array() = VectorXd::Zero(D);
+
+	for (auto ind_idx=0; ind_idx<m; ind_idx++)
+	{
+		auto ai = idx_to_ai(m_inds[ind_idx]);
+		auto a = ai.first;
+		auto i = ai.second;
+
+		auto xi_gradient_mat_component = kernel_dx_i_dx_i_dx_j_component(x, a, i);
+		Map<VectorXd> eigen_xi_gradient_mat_component(xi_gradient_mat_component.vector, D);
+		auto left_arg_hessian_component = kernel_dx_i_dx_j_component(x, a, i);
+		Map<VectorXd> eigen_left_arg_hessian_component(left_arg_hessian_component.vector, D);
+
+		eigen_xi_grad += eigen_xi_gradient_mat_component;
+		eigen_beta_sum_grad += eigen_left_arg_hessian_component * m_alpha_beta[1+ind_idx];
+	}
+
+	// re-use memory
+	eigen_xi_grad *= m_alpha_beta[0] / N;
+	eigen_xi_grad += eigen_beta_sum_grad;
+	return xi_grad;
+}
diff --git a/src/shogun/distributions/kernel_exp_family/impl/KernelExpFamilyNystromImpl.h b/src/shogun/distributions/kernel_exp_family/impl/KernelExpFamilyNystromImpl.h
@@ -50,17 +50,20 @@ public :
 			SGVector<index_t> inds);
 
 	// for training
-	float64_t kernel_hessian_i_j(index_t idx_a, index_t idx_b, index_t i, index_t j);
+	float64_t kernel_hessian_component(index_t idx_a, index_t idx_b, index_t i, index_t j);
 
 	// for new data
-	float64_t kernel_dx_i(const SGVector<float64_t>& a, index_t idx_b, index_t i);
-	float64_t kernel_dx_dx_i(const SGVector<float64_t>& a, index_t idx_b, index_t i);
+	float64_t kernel_dx_component(const SGVector<float64_t>& a, index_t idx_b, index_t i);
+	float64_t kernel_dx_dx_component(const SGVector<float64_t>& a, index_t idx_b, index_t i);
+	SGVector<float64_t> kernel_dx_i_dx_i_dx_j_component(const SGVector<float64_t>& a, index_t idx_b, index_t i);
+	SGVector<float64_t> kernel_dx_i_dx_j_component(const SGVector<float64_t>& a, index_t idx_b, index_t i);
 
 	float64_t compute_lower_right_submatrix_element(index_t row_idx, index_t col_idx);
 	SGVector<float64_t> compute_first_row_no_storing();
 	std::pair<SGMatrix<float64_t>, SGVector<float64_t>> build_system();
 	void fit();
 	float64_t  log_pdf(const SGVector<float64_t>& x);
+	SGVector<float64_t> grad(const SGVector<float64_t>& x);
 
 	std::pair<index_t, index_t> idx_to_ai(const index_t& idx);
 	static SGMatrix<float64_t> pinv(const SGMatrix<float64_t>& A);