shogun-toolbox · karlnapf · Jul 23, 2018 · Jul 20, 2018 · Jul 21, 2018 · Jul 23, 2018
diff --git a/data b/data
diff --git a/doc/cookbook/source/examples/regression/linear_ridge_regression.rst b/doc/cookbook/source/examples/regression/linear_ridge_regression.rst
@@ -2,14 +2,15 @@
 Linear Ridge Regression
 =======================
 
-A linear ridge regression model can be defined as :math:`y_i = \bf{w}^\top\bf{x_i}` where :math:`y_i` is the predicted value, :math:`\bf{x_i}` is a feature vector and :math:`\bf{w}` is the weight vector. We aim to find the linear function that best explains the data, i.e. minimizes the squared loss plus a :math:`L_2` regularization term. One can show the solution can be written as:
+A linear ridge regression model can be defined as :math:`y_i = \bf{w}^\top\bf{x_i} + b` where :math:`y_i` is the predicted value, :math:`\bf{x_i}` is a feature vector, :math:`\bf{w}` is the weight vector, and :math:`b` is a bias term.
+We aim to find the linear function that best explains the data, i.e. minimizes the squared loss plus a :math:`L_2` regularization term. One can show the solution can be written as:
 
 .. math::
-    {\bf w} = \left(\tau {\bf I}+ \sum_{i=1}^N{\bf x}_i{\bf x}_i^\top\right)^{-1}\left(\sum_{i=1}^N y_i{\bf x}_i\right)
+    {\bf w}=\left(\tau I_{D}+XX^{\top}\right)^{-1}X^{\top}y
 
-where :math:`N` is the number of training samples and :math:`\tau>0` scales the regularization term.
+where :math:`X=\left[{\bf x}_{1},\dots{\bf x}_{N}\right]\in\mathbb{R}^{D\times N}` is the training data matrix, containing :math:`N` training samples of dimension :math:`D`, :math:`y=[y_{1},\dots,y_{N}]^{\top}\in\mathbb{R}^{N}` are the labels, and :math:`\tau>0` scales the regularization term.
 
-A bias term, which is the squared empirical error, can also be calculated.
+The bias term is computed as :math:`b=\frac{1}{N}\sum_{i=1}^{N}y_{i}-{\bf w}\cdot\bar{\mathbf{x}}`, where :math:`\bar{\mathbf{x}}=\frac{1}{N}\sum_{i=1}^{N}{\bf x}_{i}`.
 
 For the special case when :math:`\tau = 0`, a wrapper class :sgclass:`CLeastSquaresRegression` is available.
 
@@ -25,19 +26,11 @@ We create an instance of :sgclass:`CLinearRidgeRegression` classifier, passing i
 
 .. sgexample:: linear_ridge_regression.sg:create_instance
 
-Then we train the regression model and apply it to test data to get the predicted :sgclass:`CRegressionLabels` and bias.
+Then we train the regression model and apply it to test data to get the predicted :sgclass:`CRegressionLabels`.
 
 .. sgexample:: linear_ridge_regression.sg:train_and_apply
 
-Optionally the bias can be disabled to avoid redundant computation.
-
-.. sgexample:: linear_ridge_regression.sg:disable_bias
-
-Imagine, we know the bias term. We can set it as
-
-.. sgexample:: linear_ridge_regression.sg:set_bias_manually
-
-After training, we can extract :math:`{\bf w}`.
+After training, we can extract :math:`{\bf w}` and the bias.
 
 .. sgexample:: linear_ridge_regression.sg:extract_w
 

diff --git a/examples/meta/src/regression/linear_ridge_regression.sg b/examples/meta/src/regression/linear_ridge_regression.sg
@@ -17,18 +17,10 @@ Machine lrr = machine("LinearRidgeRegression", tau=0.001, labels=labels_train)
 #![train_and_apply]
 lrr.train(features_train)
 Labels labels_predict = lrr.apply(features_test)
-real b = lrr.get_real("bias")
 #![train_and_apply]
 
-#[!disable_bias]
-lrr.put("compute_bias", False)
-#[!disable_bias]
-
-#[!set_bias_manually]
-lrr.put("bias", b)
-#[!set_bias_manually]
-
 #[!extract_w]
+real b = lrr.get_real("bias")
 RealVector w = lrr.get_real_vector("w")
 #[!extract_w]
 

diff --git a/src/shogun/classifier/LDA.cpp b/src/shogun/classifier/LDA.cpp
@@ -19,8 +19,7 @@
 using namespace Eigen;
 using namespace shogun;
 
-CLDA::CLDA(float64_t gamma, ELDAMethod method, bool bdc_svd)
-    : CLinearMachine(false)
+CLDA::CLDA(float64_t gamma, ELDAMethod method, bool bdc_svd) : CLinearMachine()
 {
 	init();
 	m_method=method;
@@ -31,7 +30,7 @@ CLDA::CLDA(float64_t gamma, ELDAMethod method, bool bdc_svd)
 CLDA::CLDA(
     float64_t gamma, CDenseFeatures<float64_t>* traindat, CLabels* trainlab,
     ELDAMethod method, bool bdc_svd)
-    : CLinearMachine(false), m_gamma(gamma)
+    : CLinearMachine(), m_gamma(gamma)
 {
 	init();
 	set_features(traindat);

diff --git a/src/shogun/classifier/svm/LibLinear.cpp b/src/shogun/classifier/svm/LibLinear.cpp
@@ -45,12 +45,10 @@ CLibLinear::CLibLinear(float64_t C, CDotFeatures* traindat, CLabels* trainlab)
 void CLibLinear::init()
 {
 	set_liblinear_solver_type(L2R_L1LOSS_SVC_DUAL);
-	set_bias_enabled(false);
+	set_bias_enabled(true);
 	set_C(1, 1);
 	set_max_iterations();
 	set_epsilon(1e-5);
-	/** Prevent default bias computation*/
-	set_compute_bias(false);
 
 	SG_ADD(&C1, "C1", "C Cost constant 1.", MS_AVAILABLE);
 	SG_ADD(&C2, "C2", "C Cost constant 2.", MS_AVAILABLE);

diff --git a/src/shogun/classifier/svm/NewtonSVM.cpp b/src/shogun/classifier/svm/NewtonSVM.cpp
@@ -27,7 +27,6 @@ CNewtonSVM::CNewtonSVM() : CIterativeMachine<CLinearMachine>()
 	prec = 1e-6;
 	C = 1;
 	t = 0;
-	set_compute_bias(true);
 }
 
 CNewtonSVM::CNewtonSVM(
@@ -41,7 +40,6 @@ CNewtonSVM::CNewtonSVM(
 	t = 0;
 	set_features(traindat);
 	set_labels(trainlab);
-	set_compute_bias(true);
 }
 
 

diff --git a/src/shogun/labels/RegressionLabels.cpp b/src/shogun/labels/RegressionLabels.cpp
@@ -1,5 +1,6 @@
 #include <shogun/labels/DenseLabels.h>
 #include <shogun/labels/RegressionLabels.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
 
 using namespace shogun;
 

diff --git a/src/shogun/machine/LinearMachine.cpp b/src/shogun/machine/LinearMachine.cpp
@@ -11,42 +11,33 @@
 #include <shogun/labels/Labels.h>
 #include <shogun/labels/RegressionLabels.h>
 #include <shogun/machine/LinearMachine.h>
-#include <shogun/mathematics/eigen3.h>
 
 using namespace shogun;
-using namespace Eigen;
 
 CLinearMachine::CLinearMachine(): CMachine()
 {
 	init();
 }
 
-CLinearMachine::CLinearMachine(bool comput_bias): CMachine()
-{
-	init();
-
-	set_compute_bias(comput_bias);
-}
-
 CLinearMachine::CLinearMachine(CLinearMachine* machine) : CMachine()
 {
 	init();
+	REQUIRE(machine, "No machine provided.\n");
 
-	set_w(machine->get_w().clone());
+	auto w = machine->get_w();
+	auto w_clone = w.clone();
+	set_w(w_clone);
 	set_bias(machine->get_bias());
-	set_compute_bias(machine->get_compute_bias());
 }
 
 void CLinearMachine::init()
 {
 	bias = 0;
 	features = NULL;
-	m_compute_bias = true;
 
 	SG_ADD(&m_w, "w", "Parameter vector w.", MS_NOT_AVAILABLE);
 	SG_ADD(&bias, "bias", "Bias b.", MS_NOT_AVAILABLE);
-	SG_ADD((CSGObject**) &features, "features", "Feature object.", MS_NOT_AVAILABLE);
-	SG_ADD(&m_compute_bias, "compute_bias", "Whether bias is computed.", MS_NOT_AVAILABLE);
+	SG_ADD(&features, "features", "Feature object.", MS_NOT_AVAILABLE);
 }
 
 
@@ -114,16 +105,6 @@ float64_t CLinearMachine::get_bias()
 	return bias;
 }
 
-void CLinearMachine::set_compute_bias(bool comput_bias)
-{
-	m_compute_bias = comput_bias;
-}
-
-bool CLinearMachine::get_compute_bias()
-{
-	return m_compute_bias;
-}
-
 void CLinearMachine::set_features(CDotFeatures* feat)
 {
 	SG_REF(feat);
@@ -140,37 +121,3 @@ CDotFeatures* CLinearMachine::get_features()
 void CLinearMachine::store_model_features()
 {
 }
-
-void CLinearMachine::compute_bias(CFeatures* data)
-{
-    REQUIRE(m_labels,"No labels set\n");
-
-    if (!data)
-    	data=features;
-
-    REQUIRE(data,"No features provided and no featured previously set\n");
-
-    REQUIRE(m_labels->get_num_labels() == data->get_num_vectors(),
-    	"Number of training vectors (%d) does not match number of labels (%d)\n",
-    	m_labels->get_num_labels(), data->get_num_vectors());
-
-    SGVector<float64_t> outputs = apply_get_outputs(data);
-
-    int32_t num_vec=data->get_num_vectors();
-
-    Map<VectorXd> eigen_outputs(outputs,num_vec);
-    auto my_labels = m_labels->as<CDenseLabels>()->get_labels();
-    Map<VectorXd> eigen_labels(my_labels,num_vec);
-
-    set_bias((eigen_labels - eigen_outputs).mean()) ;
-}
-
-
-bool CLinearMachine::train(CFeatures* data)
-{
-	bool result = CMachine::train(data);
-	if (m_compute_bias)
-		compute_bias(data);
-
-	return result;
-}
diff --git a/src/shogun/machine/LinearMachine.h b/src/shogun/machine/LinearMachine.h
@@ -65,25 +65,12 @@ class CLinearMachine : public CMachine
 		/** default constructor */
 		CLinearMachine();
 
-		/** Constructor
-		 *
-		 * @param  compute_bias new m_compute_bias
-		 * Determines if bias_compution is considered or not
-		 */
-		CLinearMachine(bool compute_bias);
-
 		/** destructor */
 		virtual ~CLinearMachine();
 
 		/** copy constructor */
 		CLinearMachine(CLinearMachine* machine);
 
-		/** Train machine
-		 *
-		 * @return whether training was successful
-		 */
-		virtual bool train(CFeatures* data=NULL);
-
 		/** get w
 		 *
 		 * @return weight vector
@@ -108,19 +95,6 @@ class CLinearMachine : public CMachine
 		 */
 		virtual float64_t get_bias();
 
-		/** Set m_compute_bias
-		 *
-		 * Determines if bias compution is considered or not
-		 * @param  compute_bias new m_compute_bias
-		 */
-		virtual void set_compute_bias(bool compute_bias);
-
-		/** Get compute bias
-		 *
-		 * @return compute_bias
-		 */
-		virtual bool get_compute_bias();		
-
 		/** set features
 		 *
 		 * @param feat features to set
@@ -174,28 +148,19 @@ class CLinearMachine : public CMachine
 		 */
 		virtual void store_model_features();
 
-		/** Computes the added bias. The bias is computed 
-		 * as the mean error between the predictions and 
-		 * the true labels.
-		*/
-		void compute_bias(CFeatures* data);
-
 	private:
 
 		void init();
 
-	private:
-
+	protected:
 		/** w */
 		SGVector<float64_t> m_w;
 
-	protected:
 		/** bias */
 		float64_t bias;
+
 		/** features */
 		CDotFeatures* features;
-		/** If true, bias is computed in train method */
-		bool m_compute_bias;
 };
 }
 #endif
diff --git a/src/shogun/regression/LinearRidgeRegression.cpp b/src/shogun/regression/LinearRidgeRegression.cpp
@@ -1,8 +1,8 @@
 /*
  * This software is distributed under BSD 3-clause license (see LICENSE file).
  *
- * Authors: Soeren Sonnenburg, Sergey Lisitsyn, Chiyuan Zhang, Viktor Gal, 
- *          Abhinav Rai, Youssef Emad El-Din
+ * Authors: Soeren Sonnenburg, Sergey Lisitsyn, Chiyuan Zhang, Viktor Gal,
+ *          Abhinav Rai, Youssef Emad El-Din, Heiko Strathmann
  */
 #include <shogun/lib/config.h>
 
@@ -71,13 +71,19 @@ bool CLinearRidgeRegression::train_machine(CFeatures* data)
 	linalg::matrix_prod(feats_matrix, feats_matrix, kernel_matrix, false, true);
 	linalg::add_diag(kernel_matrix, tau_vector);
 
-	auto labels = ((CRegressionLabels*)m_labels)->get_labels();
-	linalg::matrix_prod(feats_matrix, labels, y);
+	auto labels = regression_labels(m_labels);
+	auto lab = regression_labels(m_labels)->get_labels();
+	linalg::matrix_prod(feats_matrix, lab, y);
 
 	auto decomposition = linalg::cholesky_factor(kernel_matrix);
 	y = linalg::cholesky_solver(decomposition, y);
-
 	set_w(y);
+
+	auto x_bar = linalg::colwise_sum(feats_matrix);
+	linalg::scale(x_bar, x_bar, 1.0 / ((float64_t)x_bar.size()));
+	auto intercept = linalg::mean(lab) - linalg::dot(lab, x_bar);
+	set_bias(intercept);
+
 	return true;
 }
 

diff --git a/src/shogun/regression/LinearRidgeRegression.h b/src/shogun/regression/LinearRidgeRegression.h
@@ -1,8 +1,8 @@
 /*
  * This software is distributed under BSD 3-clause license (see LICENSE file).
  *
- * Authors: Sergey Lisitsyn, Soeren Sonnenburg, Evan Shelhamer, 
- *          Fernando Iglesias, Youssef Emad El-Din
+ * Authors: Sergey Lisitsyn, Soeren Sonnenburg, Evan Shelhamer,
+ *          Fernando Iglesias, Youssef Emad El-Din, Heiko Strathmann
  */
 
 #ifndef _LINEARRIDGEREGRESSION_H__
@@ -16,27 +16,32 @@
 
 namespace shogun
 {
-/** @brief Class LinearRidgeRegression implements Ridge Regression - a regularized least square
- * method for classification and regression.
- *
- * RR is closely related to Fishers Linear Discriminant (cf. LDA).
- *
- * Internally, it is solved via minimizing the following system
- *
- * \f[
- * \frac{1}{2}\left(\sum_{i=1}^N(y_i-{\bf w}\cdot {\bf x}_i)^2 + \tau||{\bf w}||^2\right)
- * \f]
- *
- * which boils down to solving a linear system
- *
- * \f[
- * {\bf w} = \left(\tau {\bf I}+ \sum_{i=1}^N{\bf x}_i{\bf x}_i^T\right)^{-1}\left(\sum_{i=1}^N y_i{\bf x}_i\right)
- * \f]
- *
- * The expressed solution is a linear method with bias b (cf. CLinearMachine).
- */
-class CLinearRidgeRegression : public CLinearMachine
-{
+	/** @brief Class LinearRidgeRegression implements Ridge Regression - a
+	 * regularized least square
+	 * method for classification and regression.
+	 *
+	 * Internally, it is solved via minimizing the following system
+	 *
+	 * \f[
+	 * \frac{1}{2}\left(\sum_{i=1}^N(y_i-({\bf w}\cdot {\bf x}_i+b))^2 +
+	 * \tau||{\bf w}||^2\right)
+	 * \f]
+	 *
+	 * Define \f$X=\left[{\bf x}_{1},\dots{\bf
+	 * x}_{N}\right]\in\mathbb{R}^{D\times N}\f$, and
+	 * \f$y=[y_{1},\dots,y_{N}]^{\top}\in\mathbb{R}^{N}\f$. Then the
+	 * solution boils down to solving a linear system
+	 *
+	 * \f[
+	 * {\bf w}=\left(\tau I_{D}+XX^{\top}\right)^{-1}X^{\top}y
+	 * \f]
+	 *
+	 * and \f$b=\frac{1}{N}\sum_{i=1}^{N}y_{i}-{\bf w}\cdot\bar{\mathbf{x}}\f$
+	 * for
+	 * \f$\bar{\mathbf{x}}=\frac{1}{N}\sum_{i=1}^{N}{\bf x}_{i}\f$.
+	 */
+	class CLinearRidgeRegression : public CLinearMachine
+	{
 	public:
 		/** problem type */
 		MACHINE_PROBLEM_TYPE(PT_REGRESSION);