Skip to content

Commit

Permalink
correct bias computation in ridge regression
Browse files Browse the repository at this point in the history
  • Loading branch information
karlnapf committed Jul 21, 2018
1 parent 4786b02 commit ab88ee7
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 17 deletions.
4 changes: 4 additions & 0 deletions src/shogun/features/DenseFeatures.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ template<class ST> CFeatures* CDenseFeatures<ST>::duplicate() const
return new CDenseFeatures<ST>(*this);
}

{
return result;
}

template<class ST> CDenseFeatures<ST>::~CDenseFeatures()
{
free_features();
Expand Down
14 changes: 14 additions & 0 deletions src/shogun/features/DenseFeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <shogun/features/DotFeatures.h>
#include <shogun/features/StringFeatures.h>
#include <shogun/lib/DataType.h>
#include <shogun/mathematics/linalg/LinalgNamespace.h>

#include <shogun/lib/SGMatrix.h>

Expand Down Expand Up @@ -303,6 +304,19 @@ template<class ST> class CDenseFeatures: public CDotFeatures
virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df,
int32_t vec_idx2);

/** Computes the average feature vector.
* @return Average feature vector
*/
template <typename X = typename std::enable_if_t<std::is_arithmetic<ST>::value>>
SGVector<ST> mean() const
{
// TODO use DotIterator, but that is for non const :(
// TODO optimize for non-batch mode, get_feature_vector non const :(
SGVector<ST> result = linalg::colwise_sum(get_feature_matrix());
linalg::scale(result, result, 1.0/get_num_vectors());
return result;
}

/** compute dot product between vector1 and a dense vector
*
* possible with subset
Expand Down
5 changes: 0 additions & 5 deletions src/shogun/labels/RegressionLabels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,6 @@ CLabels* CRegressionLabels::shallow_subset_copy()
return shallow_copy_labels;
}

float64_t CRegressionLabels::compute_bias() const
{
return linalg::mean(get_labels());
}

namespace shogun
{
Some<CRegressionLabels> regression_labels(CLabels* orig)
Expand Down
6 changes: 0 additions & 6 deletions src/shogun/labels/RegressionLabels.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,6 @@ class CRegressionLabels : public CDenseLabels
*/
CRegressionLabels(CFile* loader);

/** Computes the label bias, that is, the average of all labels.
* Works with subsets.
*
* @return bias*/
float64_t compute_bias() const;

/** get label type
*
* @return label type real
Expand Down
4 changes: 3 additions & 1 deletion src/shogun/regression/LinearRidgeRegression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,11 @@ bool CLinearRidgeRegression::train_machine(CFeatures* data)

auto decomposition = linalg::cholesky_factor(kernel_matrix);
y = linalg::cholesky_solver(decomposition, y);
auto lab = regression_labels(m_labels)->get_labels();
auto intercept = linalg::mean(y) - linalg::dot(y, feats->mean<float64_t>());
set_bias(intercept);

set_w(y);
set_bias(labels->compute_bias());
return true;
}

Expand Down
7 changes: 2 additions & 5 deletions src/shogun/regression/LinearRidgeRegression.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,18 @@ namespace shogun
/** @brief Class LinearRidgeRegression implements Ridge Regression - a regularized least square
* method for classification and regression.
*
* RR is closely related to Fishers Linear Discriminant (cf. LDA).
*
* Internally, it is solved via minimizing the following system
*
* \f[
* \frac{1}{2}\left(\sum_{i=1}^N(y_i-{\bf w}\cdot {\bf x}_i)^2 + \tau||{\bf w}||^2\right)
* \frac{1}{2}\left(\sum_{i=1}^N(y_i-({\bf w}\cdot {\bf x}_i+b))^2 + \tau||{\bf w}||^2\right)
* \f]
*
* which boils down to solving a linear system
*
* \f[
* {\bf w} = \left(\tau {\bf I}+ \sum_{i=1}^N{\bf x}_i{\bf x}_i^T\right)^{-1}\left(\sum_{i=1}^N y_i{\bf x}_i\right)
* b = \frac{1}{N}/sum_{i=1}^Ny_i-{\bf w}\cdot /frac{1}{N}/sum_{i=1}^N{\bf x}_i
* \f]
*
* The expressed solution is a linear method with bias b (cf. CLinearMachine).
*/
class CLinearRidgeRegression : public CLinearMachine
{
Expand Down

0 comments on commit ab88ee7

Please sign in to comment.