Skip to content

Commit

Permalink
Simplify LRR via using CDenseFeatures interface for cov, gram, sum
Browse files Browse the repository at this point in the history
make bias computation optional
  • Loading branch information
karlnapf committed Jul 23, 2018
1 parent 1be1dbb commit b7c5e58
Show file tree
Hide file tree
Showing 11 changed files with 228 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ We aim to find the linear function that best explains the data, i.e. minimizes t
where :math:`X=\left[{\bf x}_{1},\dots{\bf x}_{N}\right]\in\mathbb{R}^{D\times N}` is the training data matrix, containing :math:`N` training samples of dimension :math:`D`, :math:`y=[y_{1},\dots,y_{N}]^{\top}\in\mathbb{R}^{N}` are the labels, and :math:`\tau>0` scales the regularization term.

Alternatively if :math:`D>N`, the solution can be written as
.. math::
{\bf w}=X\left(\tau I_{N}+X^{\top}X\right)^{-1}y
The bias term is computed as :math:`b=\frac{1}{N}\sum_{i=1}^{N}y_{i}-{\bf w}\cdot\bar{\mathbf{x}}`, where :math:`\bar{\mathbf{x}}=\frac{1}{N}\sum_{i=1}^{N}{\bf x}_{i}`.

For the special case when :math:`\tau = 0`, a wrapper class :sgclass:`CLeastSquaresRegression` is available.
Expand All @@ -34,6 +38,10 @@ After training, we can extract :math:`{\bf w}` and the bias.

.. sgexample:: linear_ridge_regression.sg:extract_w

We could also have trained without bias and set it manually.

.. sgexample:: linear_ridge_regression.sg:manual_bias

Finally, we can evaluate the :sgclass:`CMeanSquaredError`.

.. sgexample:: linear_ridge_regression.sg:evaluate_error
Expand Down
11 changes: 10 additions & 1 deletion examples/meta/src/regression/linear_ridge_regression.sg
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,19 @@ real b = lrr.get_real("bias")
RealVector w = lrr.get_real_vector("w")
#[!extract_w]

#[!manual_bias]
Machine lrr2 = machine("LinearRidgeRegression", tau=0.001, labels=labels_train, use_bias=False)
lrr2.train(features_train)
real my_bias = 0.1
lrr2.put("bias", my_bias)
Labels labels_predict2 = lrr2.apply(features_test)
#[!manual_bias]

#![evaluate_error]
Evaluation eval = evaluation("MeanSquaredError")
real mse = eval.evaluate(labels_predict, labels_test)
#![evaluate_error]

# integration testing variables
RealVector output = labels_test.get_real_vector("labels")
RealVector output = labels_predict.get_real_vector("labels")
RealVector output2 = labels_predict2.get_real_vector("labels")
36 changes: 36 additions & 0 deletions src/shogun/features/DenseFeatures.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1001,6 +1001,42 @@ template< class ST > CDenseFeatures< ST >* CDenseFeatures< ST >::obtain_from_gen
return (CDenseFeatures< ST >*) base_features;
}

template <typename ST>
SGVector<ST> CDenseFeatures<ST>::sum() const
{
// TODO optimize non batch mode, but get_feature_vector is non const :(
SGVector<ST> result = linalg::rowwise_sum(get_feature_matrix());
return result;
}

template <typename ST>
SGMatrix<ST> CDenseFeatures<ST>::cov() const
{
// TODO optimize non batch mode, but get_feature_vector is non const :(
auto mat = get_feature_matrix();
return linalg::matrix_prod(mat, mat, false, true);
}

template <typename ST>
SGMatrix<ST> CDenseFeatures<ST>::gram() const
{
// TODO optimize non batch mode, but get_feature_vector is non const :(
auto mat = get_feature_matrix();
return linalg::matrix_prod(mat, mat, true, false);
}

template <typename ST>
SGVector<ST> CDenseFeatures<ST>::dot(const SGVector<ST>& other) const
{
REQUIRE(
get_num_vectors() == other.size(), "Number of feature vectors (%d) "
"must match provided vector's size "
"(%d).\n",
get_num_features(), other.size());
// TODO optimize non batch mode, but get_feature_vector is non const :(
return linalg::matrix_prod(get_feature_matrix(), other, false);
}

template class CDenseFeatures<bool>;
template class CDenseFeatures<char>;
template class CDenseFeatures<int8_t>;
Expand Down
42 changes: 38 additions & 4 deletions src/shogun/features/DenseFeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@

#include <shogun/lib/config.h>

#include <shogun/lib/common.h>
#include <shogun/lib/Cache.h>
#include <shogun/io/File.h>
#include <shogun/features/DotFeatures.h>
#include <shogun/features/StringFeatures.h>
#include <shogun/io/File.h>
#include <shogun/lib/Cache.h>
#include <shogun/lib/DataType.h>

#include <shogun/lib/SGMatrix.h>
#include <shogun/lib/common.h>
#include <shogun/mathematics/linalg/LinalgNamespace.h>

namespace shogun {
template<class ST> class CStringFeatures;
Expand Down Expand Up @@ -303,6 +303,40 @@ template<class ST> class CDenseFeatures: public CDotFeatures
virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df,
int32_t vec_idx2);

/** Computes the sum of all feature vectors
* @return Sum of all feature vectors
*/
SGVector<ST> sum() const;

/** Computes the \f$DxD\f$ (uncentered, un-normalized) covariance matrix
*
*\f[
* X X^\top
* \f]
*
* where \f$X\f$ is the \f$DxN\f$ dimensional feature matrix with \f$N\f$
* feature vectors of dimension \f$D\f$.
*/
SGMatrix<ST> cov() const;
/** Computes the \f$fNxN\f$ gram matrix of pairwise dot products, that is
*
*\f[
* X^\top X
* \f]
*
* where \f$X\f$ is the \f$DxN\f$ dimensional feature matrix with \f$N\f$
* feature vectors of dimension \f$D\f$.
*/
SGMatrix<ST> gram() const;

/** Computes the dot product of the feature matrix with a given vector.
*
* @param other Vector to compute dot products with, size must match number
* of feature vectors
* @return Vector as many entries as feature dimensions
*/
SGVector<ST> dot(const SGVector<ST>& other) const;

/** compute dot product between vector1 and a dense vector
*
* possible with subset
Expand Down
14 changes: 14 additions & 0 deletions src/shogun/mathematics/linalg/LinalgBackendBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,20 @@ namespace shogun
DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_ADD_DIAG, SGMatrix);
#undef BACKEND_GENERIC_ADD_DIAG

/**
* Wrapper method of add diagonal vector A.diagonal = A.diagonal + beta * b.
*
* @see linalg::add_ridge
*/
#define BACKEND_GENERIC_ADD_RIDGE(Type, Container) \
virtual void add_ridge(SGMatrix<Type>& A, Type beta) const \
{ \
SG_SNOTIMPLEMENTED; \
return; \
}
DEFINE_FOR_ALL_PTYPE(BACKEND_GENERIC_ADD_RIDGE, SGMatrix);
#undef BACKEND_GENERIC_ADD_RIDGE

/**
* Wrapper method of add vector to each column of matrix.
*
Expand Down
10 changes: 10 additions & 0 deletions src/shogun/mathematics/linalg/LinalgBackendEigen.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,12 @@ namespace shogun
DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_ADD_DIAG, SGMatrix)
#undef BACKEND_GENERIC_ADD_DIAG

/** Implementation of @see LinalgBackendBase::add_ridge */
#define BACKEND_GENERIC_ADD_RIDGE(Type, Container) \
virtual void add_ridge(SGMatrix<Type>& A, Type beta) const;
DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_ADD_RIDGE, SGMatrix)
#undef BACKEND_GENERIC_ADD_RIDGE

/** Implementation of @see LinalgBackendBase::add_vector */
#define BACKEND_GENERIC_ADD(Type, Container) \
virtual void add_vector( \
Expand Down Expand Up @@ -443,6 +449,10 @@ namespace shogun
void add_diag_impl(
SGMatrix<T>& A, const SGVector<T>& b, T alpha, T beta) const;

/** Eigen3 add diagonal scalar method */
template <typename T>
void add_ridge_impl(SGMatrix<T>& A, T beta) const;

/** Eigen3 add vector to each column of matrix method */
template <typename T>
void add_vector_impl(
Expand Down
15 changes: 15 additions & 0 deletions src/shogun/mathematics/linalg/LinalgNamespace.h
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,21 @@ namespace shogun
infer_backend(A, SGMatrix<T>(b))->add_diag(A, b, alpha, beta);
}

/**
* Performs the operation A.diagonal = A.diagonal + beta.
* The matrix is not required to be square.
*
* @param A The matrix
* @param beta Constant to be multiplied by the vector
*/
template <typename T>
void add_ridge(SGMatrix<T>& A, T beta)
{
auto diagonal_len = CMath::min(A.num_cols, A.num_rows);
REQUIRE(diagonal_len > 0, "Matrix can't be empty.\n");
infer_backend(A)->add_ridge(A, beta);
}

/**
* Performs the operation result = alpha * A.col(i) + beta * b,
* for each column of A.
Expand Down
16 changes: 16 additions & 0 deletions src/shogun/mathematics/linalg/backend/eigen/BasicOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
* Authors: 2016 Pan Deng, Soumyajit De, Heiko Strathmann, Viktor Gal
*/

#include <shogun/base/range.h>
#include <shogun/mathematics/linalg/LinalgBackendEigen.h>
#include <shogun/mathematics/linalg/LinalgMacros.h>

Expand Down Expand Up @@ -67,6 +68,14 @@ DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_ADD_COL_VEC, SGMatrix)
DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_ADD_DIAG, SGMatrix)
#undef BACKEND_GENERIC_ADD_DIAG

#define BACKEND_GENERIC_ADD_RIDGE(Type, Container) \
void LinalgBackendEigen::add_ridge(SGMatrix<Type>& A, Type beta) const \
{ \
add_ridge_impl(A, beta); \
}
DEFINE_FOR_NUMERIC_PTYPE(BACKEND_GENERIC_ADD_RIDGE, SGMatrix)
#undef BACKEND_GENERIC_ADD_RIDGE

#define BACKEND_GENERIC_PINVH(Type, Container) \
void LinalgBackendEigen::pinvh( \
const SGMatrix<Type>& A, SGMatrix<Type>& result) const \
Expand Down Expand Up @@ -226,6 +235,13 @@ void LinalgBackendEigen::add_diag_impl(
A_eig.diagonal() = alpha * A_eig.diagonal() + beta * b_eig;
}

template <typename T>
void LinalgBackendEigen::add_ridge_impl(SGMatrix<T>& A, T beta) const
{
for (auto i : range(std::min(A.num_rows, A.num_cols)))
A(i, i) += beta;
}

template <typename T>
void LinalgBackendEigen::pinvh_impl(
const SGMatrix<T>& A, SGMatrix<T>& result) const
Expand Down
72 changes: 46 additions & 26 deletions src/shogun/regression/LinearRidgeRegression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,34 @@ CLinearRidgeRegression::CLinearRidgeRegression(float64_t tau, CDenseFeatures<flo
void CLinearRidgeRegression::init()
{
set_tau(1e-6);
m_use_bias = true;

SG_ADD(&m_tau, "tau", "Regularization parameter", MS_AVAILABLE);
SG_ADD(
&m_use_bias, "use_bias", "Whether or not to fit an offset term",
MS_NOT_AVAILABLE);
}

template <typename T>
SGVector<T> solve_using_covariance(
const CDenseFeatures<T>* feats, const SGVector<T>& y, T tau)
{
SGMatrix<float64_t> cov = feats->cov();
linalg::add_ridge(cov, tau);
auto L = linalg::cholesky_factor(cov);
auto Xy = feats->dot(y);
return linalg::cholesky_solver(L, Xy);
}

template <typename T>
SGVector<T>
solve_using_gram(const CDenseFeatures<T>* feats, const SGVector<T>& y, T tau)
{
SGMatrix<float64_t> gram = feats->gram();
linalg::add_ridge(gram, tau);
auto L = linalg::cholesky_factor(gram);
auto b = linalg::cholesky_solver(L, y);
return feats->dot(b);
}

bool CLinearRidgeRegression::train_machine(CFeatures* data)
Expand All @@ -57,32 +83,26 @@ bool CLinearRidgeRegression::train_machine(CFeatures* data)
"Expected Real Features (%d) but got (%d)\n",
F_DREAL, data->get_feature_type());

CDenseFeatures<float64_t>* feats=(CDenseFeatures<float64_t>*) data;
int32_t num_feat=feats->get_num_features();

SGMatrix<float64_t> kernel_matrix(num_feat,num_feat);
SGMatrix<float64_t> feats_matrix(feats->get_feature_matrix());
SGVector<float64_t> y(num_feat);
SGVector<float64_t> tau_vector(num_feat);

tau_vector.zero();
tau_vector.add(m_tau);

linalg::matrix_prod(feats_matrix, feats_matrix, kernel_matrix, false, true);
linalg::add_diag(kernel_matrix, tau_vector);

auto labels = regression_labels(m_labels);
auto lab = regression_labels(m_labels)->get_labels();
linalg::matrix_prod(feats_matrix, lab, y);

auto decomposition = linalg::cholesky_factor(kernel_matrix);
y = linalg::cholesky_solver(decomposition, y);
set_w(y);

auto x_bar = linalg::colwise_sum(feats_matrix);
linalg::scale(x_bar, x_bar, 1.0 / ((float64_t)x_bar.size()));
auto intercept = linalg::mean(lab) - linalg::dot(lab, x_bar);
set_bias(intercept);
auto feats = data->as<CDenseFeatures<float64_t>>();
auto y = regression_labels(m_labels)->get_labels();
auto N = feats->get_num_vectors();
auto D = feats->get_num_features();

SGVector<float64_t> w;
if (N >= D)
w = solve_using_covariance<float64_t>(feats, y, m_tau);
else
w = solve_using_gram<float64_t>(feats, y, m_tau);

set_w(w);

if (m_use_bias)
{
auto x_bar = feats->sum();
linalg::scale(x_bar, x_bar, ((float64_t)1.0) / N);
auto intercept = linalg::mean(y) - linalg::dot(w, x_bar);
set_bias(intercept);
}

return true;
}
Expand Down
10 changes: 10 additions & 0 deletions src/shogun/regression/LinearRidgeRegression.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ namespace shogun
* and \f$b=\frac{1}{N}\sum_{i=1}^{N}y_{i}-{\bf w}\cdot\bar{\mathbf{x}}\f$
* for
* \f$\bar{\mathbf{x}}=\frac{1}{N}\sum_{i=1}^{N}{\bf x}_{i}\f$.
*
* If the dimension of the data \f$D\f$ is larger than the number of data
* \f$N\f$, the weights are computed via the gram matrix
*
* \f[
* X=\left[{\bf x}_{1},\dots{\bf x}_{N}\right]\in\mathbb{R}^{D\times N}
* \f]
*/
class CLinearRidgeRegression : public CLinearMachine
{
Expand Down Expand Up @@ -107,6 +114,9 @@ namespace shogun
protected:
/** regularization parameter tau */
float64_t m_tau;

/** Whether or not to compute an offset term */
bool m_use_bias;
};
}
#endif // _LINEARRIDGEREGRESSION_H__
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,31 @@ TEST(LinalgBackendEigen, add_diag)
EXPECT_THROW(add_diag(A3, b3), ShogunException);
}

TEST(LinalgBackendEigen, add_ridge)
{
SGMatrix<float64_t> A1(2, 3);

A1(0, 0) = 1;
A1(0, 1) = 2;
A1(0, 2) = 3;
A1(1, 0) = 4;
A1(1, 1) = 5;
A1(1, 2) = 6;

add_ridge(A1, 1.0);

EXPECT_NEAR(A1(0, 0), 2, 1e-15);
EXPECT_NEAR(A1(0, 1), 2, 1e-15);
EXPECT_NEAR(A1(0, 2), 3, 1e-15);
EXPECT_NEAR(A1(1, 0), 4, 1e-15);
EXPECT_NEAR(A1(1, 1), 6, 1e-15);
EXPECT_NEAR(A1(1, 2), 6, 1e-15);

// test error cases
SGMatrix<float64_t> A2;
EXPECT_THROW(add_ridge(A2, 1.0), ShogunException);
}

TEST(LinalgBackendEigen, add_vector)
{
const float64_t alpha = 0.7;
Expand Down

0 comments on commit b7c5e58

Please sign in to comment.