Skip to content

Commit

Permalink
Cleanup and refactor PCA and FisherLDA
Browse files Browse the repository at this point in the history
Implement apply in PCA and FisherLDA
Inherit directly from dense preproc
  • Loading branch information
vinx13 authored and vigsterkr committed Jul 12, 2018
1 parent c06258f commit c109238
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 69 deletions.
3 changes: 2 additions & 1 deletion src/shogun/metric/LMNNImpl.cpp
Expand Up @@ -366,7 +366,8 @@ SGMatrix<float64_t> CLMNNImpl::compute_pca_transform(CDenseFeatures<float64_t>*
CPruneVarSubMean* mean_substractor =
new CPruneVarSubMean(false); // false to avoid variance normalization
mean_substractor->fit(cloned_features);
mean_substractor->apply_to_feature_matrix(cloned_features);
cloned_features = mean_substractor->apply(cloned_features)
->as<CDenseFeatures<float64_t>>();

// Obtain the linear transform applying PCA
CPCA* pca = new CPCA();
Expand Down
23 changes: 9 additions & 14 deletions src/shogun/preprocessor/FisherLDA.cpp
Expand Up @@ -40,7 +40,6 @@
#include <shogun/mathematics/eigen3.h>
#include <shogun/mathematics/linalg/LinalgNamespace.h>
#include <shogun/preprocessor/DensePreprocessor.h>
#include <shogun/preprocessor/DimensionReductionPreprocessor.h>
#include <shogun/preprocessor/FisherLDA.h>
#include <shogun/solver/LDACanVarSolver.h>
#include <shogun/solver/LDASolver.h>
Expand All @@ -52,7 +51,7 @@ using namespace shogun;
CFisherLDA::CFisherLDA(
int32_t num_dimensions, EFLDAMethod method, float64_t thresh,
float64_t gamma, bool bdc_svd)
: CDimensionReductionPreprocessor()
: CDensePreprocessor<float64_t>()
{
initialize_parameters();
m_num_dim = num_dimensions;
Expand Down Expand Up @@ -198,13 +197,10 @@ void CFisherLDA::cleanup()
m_eigenvalues_vector=SGVector<float64_t>();
}

SGMatrix<float64_t> CFisherLDA::apply_to_feature_matrix(CFeatures*features)
SGMatrix<float64_t> CFisherLDA::apply_to_matrix(SGMatrix<float64_t> matrix)
{
auto simple_features = features->as<CDenseFeatures<float64_t>>();
auto m = simple_features->get_feature_matrix();

int32_t num_vectors=m.num_cols;
int32_t num_features=m.num_rows;
auto num_vectors = matrix.num_cols;
auto num_features = matrix.num_rows;

SG_INFO("Transforming feature matrix\n")
Map<MatrixXd> transform_matrix(
Expand All @@ -213,7 +209,7 @@ SGMatrix<float64_t> CFisherLDA::apply_to_feature_matrix(CFeatures*features)

SG_INFO("get Feature matrix: %ix%i\n", num_vectors, num_features)

Map<MatrixXd> feature_matrix (m.matrix, num_features, num_vectors);
Map<MatrixXd> feature_matrix(matrix.matrix, num_features, num_vectors);

feature_matrix.block(0, 0, m_num_dim, num_vectors) =
transform_matrix.transpose() * feature_matrix;
Expand All @@ -222,12 +218,11 @@ SGMatrix<float64_t> CFisherLDA::apply_to_feature_matrix(CFeatures*features)
for (int32_t col=0; col<num_vectors; col++)
{
for (int32_t row=0; row<m_num_dim; row++)
m[col*m_num_dim+row]=feature_matrix(row, col);
matrix[col * m_num_dim + row] = feature_matrix(row, col);
}
m.num_rows=m_num_dim;
m.num_cols=num_vectors;
simple_features->set_feature_matrix(m);
return m;
matrix.num_rows = m_num_dim;
matrix.num_cols = num_vectors;
return matrix;
}

SGVector<float64_t> CFisherLDA::apply_to_feature_vector(SGVector<float64_t> vector)
Expand Down
22 changes: 11 additions & 11 deletions src/shogun/preprocessor/FisherLDA.h
Expand Up @@ -38,8 +38,7 @@

#include <shogun/features/Features.h>
#include <shogun/labels/Labels.h>
#include <shogun/preprocessor/DimensionReductionPreprocessor.h>
#include <shogun/preprocessor/Preprocessor.h>
#include <shogun/preprocessor/DensePreprocessor.h>
#include <vector>

namespace shogun
Expand Down Expand Up @@ -88,8 +87,8 @@ namespace shogun
* <em>::AUTO_FLDA</em> : Automagically, the appropriate method is selected based on
* whether D>N (chooses ::CANVAR_FLDA) or D<N(chooses ::CLASSIC_FLDA)
*/
class CFisherLDA: public CDimensionReductionPreprocessor
{
class CFisherLDA : public CDensePreprocessor<float64_t>
{
public:
/** standard constructor
* @param num_dimensions number of dimensions to retain
Expand Down Expand Up @@ -123,13 +122,6 @@ class CFisherLDA: public CDimensionReductionPreprocessor
/** cleanup */
virtual void cleanup();

/** apply preprocessor to feature matrix
* @param features on which the learned tranformation has to be applied.
* Sometimes it is also referred as projecting the given features.
* @return processed feature matrix with reduced dimensions.
*/
virtual SGMatrix<float64_t> apply_to_feature_matrix(CFeatures* features);

/** apply preprocessor to feature vector
* @param vector features on which the learned transformation has to be applied.
* @return processed feature vector with reduced dimensions.
Expand Down Expand Up @@ -159,6 +151,14 @@ class CFisherLDA: public CDimensionReductionPreprocessor
void initialize_parameters();

protected:
/** apply preprocessor to feature matrix
* @param matrix on which the learned tranformation has to be applied.
* Sometimes it is also referred as projecting the given features
* matrix.
* @return processed feature matrix with reduced dimensions.
*/
virtual SGMatrix<float64_t> apply_to_matrix(SGMatrix<float64_t> matrix);

/**
* Train the preprocessor with the canonical variates method.
* @param features training data.
Expand Down
81 changes: 46 additions & 35 deletions src/shogun/preprocessor/PCA.cpp
Expand Up @@ -16,8 +16,10 @@
using namespace shogun;
using namespace Eigen;

CPCA::CPCA(bool do_whitening, EPCAMode mode, float64_t thresh, EPCAMethod method, EPCAMemoryMode mem_mode)
: CDimensionReductionPreprocessor()
CPCA::CPCA(
bool do_whitening, EPCAMode mode, float64_t thresh, EPCAMethod method,
EPCAMemoryMode mem_mode)
: CDensePreprocessor<float64_t>()
{
init();
m_whitening = do_whitening;
Expand All @@ -28,7 +30,7 @@ CPCA::CPCA(bool do_whitening, EPCAMode mode, float64_t thresh, EPCAMethod method
}

CPCA::CPCA(EPCAMethod method, bool do_whitening, EPCAMemoryMode mem_mode)
: CDimensionReductionPreprocessor()
: CDensePreprocessor<float64_t>()
{
init();
m_whitening = do_whitening;
Expand Down Expand Up @@ -68,6 +70,9 @@ void CPCA::init()
"Method used for PCA calculation", MS_NOT_AVAILABLE);
SG_ADD(&m_eigenvalue_zero_tolerance, "eigenvalue_zero_tolerance", "zero tolerance"
" for determining zero eigenvalues during whitening to avoid numerical issues", MS_NOT_AVAILABLE);
SG_ADD(
&m_target_dim, "target_dim", "target dimensionality of preprocessor",
MS_AVAILABLE);
}

CPCA::~CPCA()
Expand Down Expand Up @@ -281,57 +286,52 @@ void CPCA::cleanup()
m_initialized = false;
}

SGMatrix<float64_t> CPCA::apply_to_feature_matrix(CFeatures* features)
SGMatrix<float64_t> CPCA::apply_to_matrix(SGMatrix<float64_t> matrix)
{
ASSERT(m_initialized)
ASSERT(features != NULL)
SGMatrix<float64_t> m = features->as<CDenseFeatures<float64_t>>()->get_feature_matrix();
int32_t num_vectors = m.num_cols;
int32_t num_features = m.num_rows;

auto num_vectors = matrix.num_cols;
auto num_features = matrix.num_rows;

SG_INFO("Transforming feature matrix\n")
Map<MatrixXd> transform_matrix(m_transformation_matrix.matrix,
m_transformation_matrix.num_rows, m_transformation_matrix.num_cols);

if (m_mem_mode == MEM_IN_PLACE)
{
if (m.matrix)
{
SG_INFO("Preprocessing feature matrix\n")
Map<MatrixXd> feature_matrix(m.matrix, num_features, num_vectors);
VectorXd data_mean = feature_matrix.rowwise().sum()/(float64_t) num_vectors;
feature_matrix = feature_matrix.colwise()-data_mean;
SG_INFO("Preprocessing feature matrix\n")
Map<MatrixXd> feature_matrix(matrix.matrix, num_features, num_vectors);
VectorXd data_mean =
feature_matrix.rowwise().sum() / (float64_t)num_vectors;
feature_matrix = feature_matrix.colwise() - data_mean;

feature_matrix.block(0,0,num_dim,num_vectors) =
transform_matrix.transpose()*feature_matrix;
feature_matrix.block(0, 0, num_dim, num_vectors) =
transform_matrix.transpose() * feature_matrix;

SG_INFO("Form matrix of target dimension\n")
for (int32_t col=0; col<num_vectors; col++)
{
for (int32_t row=0; row<num_dim; row++)
m.matrix[col*num_dim+row] = feature_matrix(row,col);
}
m.num_rows = num_dim;
m.num_cols = num_vectors;
SG_INFO("Form matrix of target dimension\n")
for (int32_t col = 0; col < num_vectors; col++)
{
for (int32_t row = 0; row < num_dim; row++)
matrix.matrix[col * num_dim + row] = feature_matrix(row, col);
}
matrix.num_rows = num_dim;
matrix.num_cols = num_vectors;

((CDenseFeatures<float64_t>*) features)->set_feature_matrix(m);
return m;
return matrix;
}
else
{
SGMatrix<float64_t> ret(num_dim, num_vectors);
Map<MatrixXd> ret_matrix(ret.matrix, num_dim, num_vectors);
if (m.matrix)
{
SG_INFO("Preprocessing feature matrix\n")
Map<MatrixXd> feature_matrix(m.matrix, num_features, num_vectors);
VectorXd data_mean = feature_matrix.rowwise().sum()/(float64_t) num_vectors;
feature_matrix = feature_matrix.colwise()-data_mean;

ret_matrix = transform_matrix.transpose()*feature_matrix;
}
((CDenseFeatures<float64_t>*) features)->set_feature_matrix(ret);
SG_INFO("Preprocessing feature matrix\n")
Map<MatrixXd> feature_matrix(matrix.matrix, num_features, num_vectors);
VectorXd data_mean =
feature_matrix.rowwise().sum() / (float64_t)num_vectors;
feature_matrix = feature_matrix.colwise() - data_mean;

ret_matrix = transform_matrix.transpose() * feature_matrix;

return ret;
}
}
Expand Down Expand Up @@ -387,3 +387,14 @@ float64_t CPCA::get_eigenvalue_zero_tolerance() const
{
return m_eigenvalue_zero_tolerance;
}

void CPCA::set_target_dim(int32_t dim)
{
ASSERT(dim > 0)
m_target_dim = dim;
}

int32_t CPCA::get_target_dim() const
{
return m_target_dim;
}
26 changes: 18 additions & 8 deletions src/shogun/preprocessor/PCA.h
Expand Up @@ -11,9 +11,9 @@

#include <shogun/lib/config.h>

#include <shogun/preprocessor/DimensionReductionPreprocessor.h>
#include <shogun/features/Features.h>
#include <shogun/lib/common.h>
#include <shogun/preprocessor/DensePreprocessor.h>

namespace shogun
{
Expand Down Expand Up @@ -104,7 +104,7 @@ enum EPCAMemoryMode
*
* Note that vectors/matrices don't have to have zero mean as it is substracted within the class.
*/
class CPCA: public CDimensionReductionPreprocessor
class CPCA : public CDensePreprocessor<float64_t>
{
public:

Expand Down Expand Up @@ -135,12 +135,6 @@ class CPCA: public CDimensionReductionPreprocessor
/** cleanup */
virtual void cleanup();

/** apply preprocessor to feature matrix
* @param features features
* @return processed feature matrix
*/
virtual SGMatrix<float64_t> apply_to_feature_matrix(CFeatures* features);

/** apply preprocessor to feature vector
* @param vector feature vector
* @return processed feature vector
Expand Down Expand Up @@ -184,10 +178,23 @@ class CPCA: public CDimensionReductionPreprocessor
*/
float64_t get_eigenvalue_zero_tolerance() const;

/** setter for target dimension
* @param dim target dimension
*/
void set_target_dim(int32_t dim);

/** getter for target dimension
* @return target dimension
*/
int32_t get_target_dim() const;

protected:

void init();

virtual SGMatrix<float64_t>
apply_to_matrix(SGMatrix<float64_t>) override;

protected:

/** transformation matrix */
Expand Down Expand Up @@ -218,6 +225,9 @@ class CPCA: public CDimensionReductionPreprocessor
*/
float64_t m_eigenvalue_zero_tolerance;

/** target dimension */
int32_t m_target_dim;

private:
/** Computes the transformation matrix using an eigenvalue decomposition. */
void init_with_evd(const SGMatrix<float64_t>& feature_matrix, int32_t max_dim_allowed);
Expand Down

0 comments on commit c109238

Please sign in to comment.