-
-
Notifications
You must be signed in to change notification settings - Fork 1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Simplify LRR via using CDenseFeatures interface for cov, gram, sum #4384
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
+1 −1 | testsuite/meta/evaluation/cross_validation.dat | |
+9 −5 | testsuite/meta/regression/linear_ridge_regression.dat |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,19 @@ | |
#include <algorithm> | ||
#include <string.h> | ||
|
||
#define ASSERT_FLOATING_POINT \ | ||
switch (get_feature_type()) \ | ||
{ \ | ||
case F_SHORTREAL: \ | ||
case F_DREAL: \ | ||
case F_LONGREAL: \ | ||
break; \ | ||
default: \ | ||
REQUIRE( \ | ||
false, "Only defined for %s with real type, not for %s.\n", \ | ||
get_name(), demangled_type<ST>().c_str()); \ | ||
} | ||
|
||
namespace shogun { | ||
|
||
template<class ST> CDenseFeatures<ST>::CDenseFeatures(int32_t size) : CDotFeatures(size) | ||
|
@@ -1001,6 +1014,53 @@ template< class ST > CDenseFeatures< ST >* CDenseFeatures< ST >::obtain_from_gen | |
return (CDenseFeatures< ST >*) base_features; | ||
} | ||
|
||
template <typename ST> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it would be even cooler if we moved those methods to |
||
SGVector<ST> CDenseFeatures<ST>::sum() const | ||
{ | ||
// TODO optimize non batch mode, but get_feature_vector is non const :( | ||
SGVector<ST> result = linalg::rowwise_sum(get_feature_matrix()); | ||
return result; | ||
} | ||
|
||
template <typename ST> | ||
SGVector<ST> CDenseFeatures<ST>::mean() const | ||
{ | ||
ASSERT_FLOATING_POINT | ||
|
||
auto result = sum(); | ||
ST scale = ((ST)1.0) / get_num_vectors(); | ||
linalg::scale(result, result, scale); | ||
return result; | ||
} | ||
|
||
template <typename ST> | ||
SGMatrix<ST> CDenseFeatures<ST>::cov() const | ||
{ | ||
// TODO optimize non batch mode, but get_feature_vector is non const :( | ||
auto mat = get_feature_matrix(); | ||
return linalg::matrix_prod(mat, mat, false, true); | ||
} | ||
|
||
template <typename ST> | ||
SGMatrix<ST> CDenseFeatures<ST>::gram() const | ||
{ | ||
// TODO optimize non batch mode, but get_feature_vector is non const :( | ||
auto mat = get_feature_matrix(); | ||
return linalg::matrix_prod(mat, mat, true, false); | ||
} | ||
|
||
template <typename ST> | ||
SGVector<ST> CDenseFeatures<ST>::dot(const SGVector<ST>& other) const | ||
{ | ||
REQUIRE( | ||
get_num_vectors() == other.size(), "Number of feature vectors (%d) " | ||
"must match provided vector's size " | ||
"(%d).\n", | ||
get_num_features(), other.size()); | ||
// TODO optimize non batch mode, but get_feature_vector is non const :( | ||
return linalg::matrix_prod(get_feature_matrix(), other, false); | ||
} | ||
|
||
template class CDenseFeatures<bool>; | ||
template class CDenseFeatures<char>; | ||
template class CDenseFeatures<int8_t>; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,14 +11,13 @@ | |
|
||
#include <shogun/lib/config.h> | ||
|
||
#include <shogun/lib/common.h> | ||
#include <shogun/lib/Cache.h> | ||
#include <shogun/io/File.h> | ||
#include <shogun/features/DotFeatures.h> | ||
#include <shogun/features/StringFeatures.h> | ||
#include <shogun/io/File.h> | ||
#include <shogun/lib/Cache.h> | ||
#include <shogun/lib/DataType.h> | ||
|
||
#include <shogun/lib/SGMatrix.h> | ||
#include <shogun/lib/common.h> | ||
|
||
namespace shogun { | ||
template<class ST> class CStringFeatures; | ||
|
@@ -303,6 +302,46 @@ template<class ST> class CDenseFeatures: public CDotFeatures | |
virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df, | ||
int32_t vec_idx2); | ||
|
||
/** Computes the sum of all feature vectors | ||
* @return Sum of all feature vectors | ||
*/ | ||
SGVector<ST> sum() const; | ||
|
||
/** Computes the empirical mean of all feature vectors | ||
* @return Mean of all feature vectors | ||
*/ | ||
SGVector<ST> mean() const; | ||
|
||
/** Computes the \f$DxD\f$ (uncentered, un-normalized) covariance matrix | ||
* | ||
*\f[ | ||
* X X^\top | ||
* \f] | ||
* | ||
* where \f$X\f$ is the \f$DxN\f$ dimensional feature matrix with \f$N\f$ | ||
* feature vectors of dimension \f$D\f$. | ||
*/ | ||
SGMatrix<ST> cov() const; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it make sense to call it noncentered_cov or something similar that makes the fact that is not centered explicit? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since I first met @lisitsyn I am in favour of short function names. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think having a function called covariance that does not compute the covariance can lead to confusion :-P There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well it computes the covariance, just for a certain case of input. |
||
/** Computes the \f$fNxN\f$ (uncentered, un-normalized) gram matrix of | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Completely minor: missing newline. |
||
* pairwise dot products, that is | ||
* | ||
*\f[ | ||
* X^\top X | ||
* \f] | ||
* | ||
* where \f$X\f$ is the \f$DxN\f$ dimensional feature matrix with \f$N\f$ | ||
* feature vectors of dimension \f$D\f$. | ||
*/ | ||
SGMatrix<ST> gram() const; | ||
|
||
/** Computes the dot product of the feature matrix with a given vector. | ||
* | ||
* @param other Vector to compute dot products with, size must match number | ||
* of feature vectors | ||
* @return Vector as many entries as feature dimensions | ||
*/ | ||
SGVector<ST> dot(const SGVector<ST>& other) const; | ||
|
||
/** compute dot product between vector1 and a dense vector | ||
* | ||
* possible with subset | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@lisitsyn @iglesias @vigsterkr our template style for features doesnt allow me to use type traits to define member functions of templated features, as all template types are instantiated. This is why I added this runtime check for floating point numbers. We can remove that once the features are cleaned up a bit. It for now just stops callers from doing nonsense (compute mean on bools or ints)