correct bias computation in ridge regression

shogun-toolbox · Jul 21, 2018 · ab88ee7 · ab88ee7
1 parent 4786b02
commit ab88ee7
Show file tree

Hide file tree

Showing 6 changed files with 23 additions and 17 deletions.
diff --git a/src/shogun/features/DenseFeatures.cpp b/src/shogun/features/DenseFeatures.cpp
@@ -88,6 +88,10 @@ template<class ST> CFeatures* CDenseFeatures<ST>::duplicate() const
 	return new CDenseFeatures<ST>(*this);
 }
 
+{
+	return result;
+}
+
 template<class ST> CDenseFeatures<ST>::~CDenseFeatures()
 {
 	free_features();

diff --git a/src/shogun/features/DenseFeatures.h b/src/shogun/features/DenseFeatures.h
@@ -17,6 +17,7 @@
 #include <shogun/features/DotFeatures.h>
 #include <shogun/features/StringFeatures.h>
 #include <shogun/lib/DataType.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 
 #include <shogun/lib/SGMatrix.h>
 
@@ -303,6 +304,19 @@ template<class ST> class CDenseFeatures: public CDotFeatures
 	virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df,
 			int32_t vec_idx2);
 
+	/** Computes the average feature vector.
+	 * @return Average feature vector
+	 */
+	template <typename X = typename std::enable_if_t<std::is_arithmetic<ST>::value>>
+	SGVector<ST> mean() const
+	{
+		// TODO use DotIterator, but that is for non const :(
+		// TODO optimize for non-batch mode, get_feature_vector non const :(
+		SGVector<ST> result = linalg::colwise_sum(get_feature_matrix());
+		linalg::scale(result, result, 1.0/get_num_vectors());
+		return result;
+	}
+
 	/** compute dot product between vector1 and a dense vector
 	 *
 	 * possible with subset

diff --git a/src/shogun/labels/RegressionLabels.cpp b/src/shogun/labels/RegressionLabels.cpp
@@ -40,11 +40,6 @@ CLabels* CRegressionLabels::shallow_subset_copy()
 	return shallow_copy_labels;
 }
 
-float64_t CRegressionLabels::compute_bias() const
-{
-	return linalg::mean(get_labels());
-}
-
 namespace shogun
 {
 	Some<CRegressionLabels> regression_labels(CLabels* orig)

diff --git a/src/shogun/labels/RegressionLabels.h b/src/shogun/labels/RegressionLabels.h
@@ -52,12 +52,6 @@ class CRegressionLabels : public CDenseLabels
 		 */
 		CRegressionLabels(CFile* loader);
 
-		/** Computes the label bias, that is, the average of all labels.
-		 * Works with subsets.
-		 *
-		 * @return bias*/
-		float64_t compute_bias() const;
-
 		/** get label type
 		 *
 		 * @return label type real

diff --git a/src/shogun/regression/LinearRidgeRegression.cpp b/src/shogun/regression/LinearRidgeRegression.cpp
@@ -76,9 +76,11 @@ bool CLinearRidgeRegression::train_machine(CFeatures* data)
 
 	auto decomposition = linalg::cholesky_factor(kernel_matrix);
 	y = linalg::cholesky_solver(decomposition, y);
+	auto lab = regression_labels(m_labels)->get_labels();
+	auto intercept = linalg::mean(y) - linalg::dot(y, feats->mean<float64_t>());
+	set_bias(intercept);
 
 	set_w(y);
-	set_bias(labels->compute_bias());
 	return true;
 }
 

diff --git a/src/shogun/regression/LinearRidgeRegression.h b/src/shogun/regression/LinearRidgeRegression.h
@@ -19,21 +19,18 @@ namespace shogun
 /** @brief Class LinearRidgeRegression implements Ridge Regression - a regularized least square
  * method for classification and regression.
  *
- * RR is closely related to Fishers Linear Discriminant (cf. LDA).
- *
  * Internally, it is solved via minimizing the following system
  *
  * \f[
- * \frac{1}{2}\left(\sum_{i=1}^N(y_i-{\bf w}\cdot {\bf x}_i)^2 + \tau||{\bf w}||^2\right)
+ * \frac{1}{2}\left(\sum_{i=1}^N(y_i-({\bf w}\cdot {\bf x}_i+b))^2 + \tau||{\bf w}||^2\right)
  * \f]
  *
  * which boils down to solving a linear system
  *
  * \f[
  * {\bf w} = \left(\tau {\bf I}+ \sum_{i=1}^N{\bf x}_i{\bf x}_i^T\right)^{-1}\left(\sum_{i=1}^N y_i{\bf x}_i\right)
+ * b = \frac{1}{N}/sum_{i=1}^Ny_i-{\bf w}\cdot /frac{1}{N}/sum_{i=1}^N{\bf x}_i
  * \f]
- *
- * The expressed solution is a linear method with bias b (cf. CLinearMachine).
  */
 class CLinearRidgeRegression : public CLinearMachine
 {