Simplify LRR via using CDenseFeatures interface for cov, gram, sum

shogun-toolbox · Jul 31, 2018 · 587aa7d · 587aa7d
1 parent aac28bd
commit 587aa7d
Show file tree

Hide file tree

Showing 30 changed files with 310 additions and 118 deletions.
diff --git a/data b/data
diff --git a/doc/cookbook/source/examples/regression/linear_ridge_regression.rst b/doc/cookbook/source/examples/regression/linear_ridge_regression.rst
@@ -2,15 +2,21 @@
 Linear Ridge Regression
 =======================
 
-A linear ridge regression model can be defined as :math:`y_i = \bf{w}^\top\bf{x_i} + b` where :math:`y_i` is the predicted value, :math:`\bf{x_i}` is a feature vector, :math:`\bf{w}` is the weight vector, and :math:`b` is a bias term.
+A linear ridge regression model can be defined as :math:`y_i = \bf{w}^\top\bf{x_i}` where :math:`y_i` is the predicted value, :math:`\bf{x_i}` is a feature vector, :math:`\bf{w}` is the weight vector.
 We aim to find the linear function that best explains the data, i.e. minimizes the squared loss plus a :math:`L_2` regularization term. One can show the solution can be written as:
 
 .. math::
     {\bf w}=\left(\tau I_{D}+XX^{\top}\right)^{-1}X^{\top}y
 
 where :math:`X=\left[{\bf x}_{1},\dots{\bf x}_{N}\right]\in\mathbb{R}^{D\times N}` is the training data matrix, containing :math:`N` training samples of dimension :math:`D`, :math:`y=[y_{1},\dots,y_{N}]^{\top}\in\mathbb{R}^{N}` are the labels, and :math:`\tau>0` scales the regularization term.
 
-The bias term is computed as :math:`b=\frac{1}{N}\sum_{i=1}^{N}y_{i}-{\bf w}\cdot\bar{\mathbf{x}}`, where :math:`\bar{\mathbf{x}}=\frac{1}{N}\sum_{i=1}^{N}{\bf x}_{i}`.
+Alternatively if :math:`D>N`, the solution can be written as
+.. math::
+    {\bf w}=X\left(\tau I_{N}+X^{\top}X\right)^{-1}y
+
+In practice, an additional bias :math:`b=\frac{1}{N}\sum_{i=1}^{N}y_{i}-{\bf w}\cdot\bar{\mathbf{x}}` for
+:math:`\bar{\mathbf{x}}=\frac{1}{N}\sum_{i=1}^{N}{\bf x}_{i}` can also be included, which effectively centers the :math:`X` before
+computing the solution.
 
 For the special case when :math:`\tau = 0`, a wrapper class :sgclass:`CLeastSquaresRegression` is available.
 
@@ -34,6 +40,10 @@ After training, we can extract :math:`{\bf w}` and the bias.
 
 .. sgexample:: linear_ridge_regression.sg:extract_w
 
+We could also have trained without bias and set it manually.
+
+.. sgexample:: linear_ridge_regression.sg:manual_bias
+
 Finally, we can evaluate the :sgclass:`CMeanSquaredError`.
 
 .. sgexample:: linear_ridge_regression.sg:evaluate_error

diff --git a/examples/meta/src/regression/linear_ridge_regression.sg b/examples/meta/src/regression/linear_ridge_regression.sg
@@ -24,10 +24,19 @@ real b = lrr.get_real("bias")
 RealVector w = lrr.get_real_vector("w")
 #[!extract_w]
 
+#[!manual_bias]
+Machine lrr2 = machine("LinearRidgeRegression", tau=0.001, labels=labels_train, use_bias=False)
+lrr2.train(features_train)
+real my_bias = 0.1
+lrr2.put("bias", my_bias)
+Labels labels_predict2 = lrr2.apply(features_test)
+#[!manual_bias]
+
 #![evaluate_error]
 Evaluation eval = evaluation("MeanSquaredError")
 real mse = eval.evaluate(labels_predict, labels_test)
 #![evaluate_error]
 
 # integration testing variables
-RealVector output = labels_test.get_real_vector("labels")
+RealVector output = labels_predict.get_real_vector("labels")
+RealVector output2 = labels_predict2.get_real_vector("labels")
diff --git a/src/interfaces/swig/Regression.i b/src/interfaces/swig/Regression.i
@@ -8,9 +8,6 @@
 %rename(Regression) CRegression;
 %rename(KernelRidgeRegression) CKernelRidgeRegression;
 %rename(KRRNystrom) CKRRNystrom;
-%rename(LinearRidgeRegression) CLinearRidgeRegression;
-%rename(LeastSquaresRegression) CLeastSquaresRegression;
-%rename(LeastAngleRegression) CLeastAngleRegression;
 %rename(LibSVR) CLibSVR;
 %rename(LibLinearRegression) CLibLinearRegression;
 %rename(MKL) CMKL;
@@ -25,9 +22,6 @@
 %include <shogun/regression/Regression.h>
 %include <shogun/regression/KernelRidgeRegression.h>
 %include <shogun/regression/KRRNystrom.h>
-%include <shogun/regression/LinearRidgeRegression.h>
-%include <shogun/regression/LeastSquaresRegression.h>
-%include <shogun/regression/LeastAngleRegression.h>
 %include <shogun/regression/svr/LibSVR.h>
 %include <shogun/regression/svr/LibLinearRegression.h>
 %include <shogun/classifier/mkl/MKL.h>

diff --git a/src/interfaces/swig/Regression_includes.i b/src/interfaces/swig/Regression_includes.i
@@ -5,9 +5,6 @@
  #include <shogun/regression/GaussianProcessRegression.h>
  #include <shogun/regression/KernelRidgeRegression.h>
  #include <shogun/regression/KRRNystrom.h>
- #include <shogun/regression/LinearRidgeRegression.h>
- #include <shogun/regression/LeastSquaresRegression.h>
- #include <shogun/regression/LeastAngleRegression.h>
  #include <shogun/classifier/svm/SVM.h>
  #include <shogun/classifier/svm/LibSVM.h>
  #include <shogun/regression/svr/LibSVR.h>

diff --git a/src/interfaces/swig/factory.i b/src/interfaces/swig/factory.i
@@ -4,6 +4,8 @@
 %include <shogun/util/factory.h>
 
 %template(features) shogun::features<float64_t>;
+%template(labels) shogun::labels<float64_t>;
+
 
 %newobject shogun::string_features(CFile*, EAlphabet alpha = DNA, EPrimitiveType primitive_type = PT_CHAR);
 %newobject shogun::transformer(const std::string&);
diff --git a/src/shogun/classifier/LDA.h b/src/shogun/classifier/LDA.h
@@ -174,6 +174,7 @@ class CLDA : public CDenseRealDispatch<CLDA, CLinearMachine>
 		/** @return object name */
 		virtual const char* get_name() const { return "LDA"; }
 
+	protected:
 		/** train LDA classifier
 		 *
 		 * @param data training data (parameter can be avoided if distance or
@@ -186,7 +187,6 @@ class CLDA : public CDenseRealDispatch<CLDA, CLinearMachine>
 		                           std::is_floating_point<ST>::value>>
 		bool train_machine_templated(CDenseFeatures<ST>* data);
 
-	protected:
 		/**
 		 * Train the machine with the svd-based solver (@see CFisherLDA).
 		 * @param features training data

diff --git a/src/shogun/features/DenseFeatures.cpp b/src/shogun/features/DenseFeatures.cpp
@@ -19,6 +19,19 @@
 #include <algorithm>
 #include <string.h>
 
+#define ASSERT_FLOATING_POINT                                                  \
+	switch (get_feature_type())                                                \
+	{                                                                          \
+	case F_SHORTREAL:                                                          \
+	case F_DREAL:                                                              \
+	case F_LONGREAL:                                                           \
+		break;                                                                 \
+	default:                                                                   \
+		REQUIRE(                                                               \
+		    false, "Only defined for %s with real type, not for %s.\n",        \
+		    get_name(), demangled_type<ST>().c_str());                         \
+	}
+
 namespace shogun {
 
 template<class ST> CDenseFeatures<ST>::CDenseFeatures(int32_t size) : CDotFeatures(size)
@@ -1001,6 +1014,53 @@ template< class ST > CDenseFeatures< ST >* CDenseFeatures< ST >::obtain_from_gen
 	return (CDenseFeatures< ST >*) base_features;
 }
 
+template <typename ST>
+SGVector<ST> CDenseFeatures<ST>::sum() const
+{
+	// TODO optimize non batch mode, but get_feature_vector is non const :(
+	SGVector<ST> result = linalg::rowwise_sum(get_feature_matrix());
+	return result;
+}
+
+template <typename ST>
+SGVector<ST> CDenseFeatures<ST>::mean() const
+{
+	ASSERT_FLOATING_POINT
+
+	auto result = sum();
+	ST scale = ((ST)1.0) / get_num_vectors();
+	linalg::scale(result, result, scale);
+	return result;
+}
+
+template <typename ST>
+SGMatrix<ST> CDenseFeatures<ST>::cov() const
+{
+	// TODO optimize non batch mode, but get_feature_vector is non const :(
+	auto mat = get_feature_matrix();
+	return linalg::matrix_prod(mat, mat, false, true);
+}
+
+template <typename ST>
+SGMatrix<ST> CDenseFeatures<ST>::gram() const
+{
+	// TODO optimize non batch mode, but get_feature_vector is non const :(
+	auto mat = get_feature_matrix();
+	return linalg::matrix_prod(mat, mat, true, false);
+}
+
+template <typename ST>
+SGVector<ST> CDenseFeatures<ST>::dot(const SGVector<ST>& other) const
+{
+	REQUIRE(
+		get_num_vectors() == other.size(), "Number of feature vectors (%d) "
+		                                   "must match provided vector's size "
+		                                   "(%d).\n",
+		get_num_features(), other.size());
+	// TODO optimize non batch mode, but get_feature_vector is non const :(
+	return linalg::matrix_prod(get_feature_matrix(), other, false);
+}
+
 template class CDenseFeatures<bool>;
 template class CDenseFeatures<char>;
 template class CDenseFeatures<int8_t>;

diff --git a/src/shogun/features/DenseFeatures.h b/src/shogun/features/DenseFeatures.h
@@ -11,14 +11,14 @@
 
 #include <shogun/lib/config.h>
 
-#include <shogun/lib/common.h>
-#include <shogun/lib/Cache.h>
-#include <shogun/io/File.h>
 #include <shogun/features/DotFeatures.h>
 #include <shogun/features/StringFeatures.h>
+#include <shogun/io/File.h>
+#include <shogun/lib/Cache.h>
 #include <shogun/lib/DataType.h>
-
 #include <shogun/lib/SGMatrix.h>
+#include <shogun/lib/common.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 
 namespace shogun {
 template<class ST> class CStringFeatures;
@@ -303,6 +303,46 @@ template<class ST> class CDenseFeatures: public CDotFeatures
 	virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df,
 			int32_t vec_idx2);
 
+	/** Computes the sum of all feature vectors
+	 * @return Sum of all feature vectors
+	 */
+	SGVector<ST> sum() const;
+
+	/** Computes the empirical mean of all feature vectors
+	 * @return Mean of all feature vectors
+	 */
+	SGVector<ST> mean() const;
+
+	/** Computes the \f$DxD\f$ (uncentered, un-normalized) covariance matrix
+	 *
+	 *\f[
+	 * X X^\top
+	 * \f]
+	 *
+	 * where \f$X\f$ is the \f$DxN\f$ dimensional feature matrix with \f$N\f$
+	 * feature vectors of dimension \f$D\f$.
+	 */
+	SGMatrix<ST> cov() const;
+	/** Computes the \f$fNxN\f$ (uncentered, un-normalized) gram matrix of
+	 * pairwise dot products, that is
+	 *
+	 *\f[
+	 * X^\top X
+	 * \f]
+	 *
+	 * where \f$X\f$ is the \f$DxN\f$ dimensional feature matrix with \f$N\f$
+	 * feature vectors of dimension \f$D\f$.
+	 */
+	SGMatrix<ST> gram() const;
+
+	/** Computes the dot product of the feature matrix with a given vector.
+	 *
+	 * @param other Vector to compute dot products with, size must match number
+	 * of feature vectors
+	 * @return Vector as many entries as feature dimensions
+	 */
+	SGVector<ST> dot(const SGVector<ST>& other) const;
+
 	/** compute dot product between vector1 and a dense vector
 	 *
 	 * possible with subset

diff --git a/src/shogun/lib/SGVector.h b/src/shogun/lib/SGVector.h
@@ -123,6 +123,15 @@ template<class T> class SGVector : public SGReferencedData
 
 		/** Wraps an Eigen3 row vector around the data of this matrix */
 		operator EigenRowVectorXtMap() const;
+
+		/** @return a (copied) typed vector with same content */
+		template <class X>
+		SGVector<X> as() const
+		{
+			SGVector<X> v(vlen);
+			std::copy(v.begin(), v.end(), vector);
+			return v;
+		}
 #endif // SWIG
 
 		/** Set vector to a constant

diff --git a/src/shogun/machine/LinearMachine.cpp b/src/shogun/machine/LinearMachine.cpp
@@ -37,7 +37,9 @@ void CLinearMachine::init()
 
 	SG_ADD(&m_w, "w", "Parameter vector w.", MS_NOT_AVAILABLE);
 	SG_ADD(&bias, "bias", "Bias b.", MS_NOT_AVAILABLE);
-	SG_ADD(&features, "features", "Feature object.", MS_NOT_AVAILABLE);
+	SG_ADD(
+	    (CFeatures**)&features, "features", "Feature object.",
+	    MS_NOT_AVAILABLE);
 }
 
 

diff --git a/src/shogun/regression/LeastSquaresRegression.h b/src/shogun/regression/LeastSquaresRegression.h
@@ -18,25 +18,12 @@
 
 namespace shogun
 {
-/** @brief class to perform Least Squares Regression
- *
- * Internally it is solved via minimizing the following system
- *
- * \f[
- * \frac{1}{2}\left(\sum_{i=1}^N(y_i-{\bf w}\cdot {\bf x}_i)^2\right)
- * \f]
- *
- * which boils down to solving the linear system
- *
- * \f[
- * {\bf w} = \left(\sum_{i=1}^N{\bf x}_i{\bf x}_i^T\right)^{-1}\left(\sum_{i=1}^N y_i{\bf x}_i\right)
- * \f]
- * where x are the training examples and y the vector of labels.
- *
- * The expressed solution is a linear method with bias 0 (cf. CLinearMachine).
- */
-class CLeastSquaresRegression : public CLinearRidgeRegression
-{
+	/** @brief class to perform Least Squares Regression
+	 *
+	 * Same as CLinearRidgeRegression, but without a regularization term.
+	 */
+	class CLeastSquaresRegression : public CLinearRidgeRegression
+	{
 	public:
 		/** problem type */
 		MACHINE_PROBLEM_TYPE(PT_REGRESSION);