Skip to content

Commit

Permalink
Cleanup pca preprocessors and rename init to fit
Browse files Browse the repository at this point in the history
  • Loading branch information
vinx13 authored and vigsterkr committed Jun 8, 2018
1 parent cb0f95c commit 1ea60e1
Show file tree
Hide file tree
Showing 9 changed files with 94 additions and 115 deletions.
2 changes: 1 addition & 1 deletion src/shogun/metric/LMNNImpl.cpp
Expand Up @@ -371,7 +371,7 @@ SGMatrix<float64_t> CLMNNImpl::compute_pca_transform(CDenseFeatures<float64_t>*
// Obtain the linear transform applying PCA
CPCA* pca = new CPCA();
pca->set_target_dim(cloned_features->get_num_features());
pca->init(cloned_features);
pca->fit(cloned_features);
SGMatrix<float64_t> pca_transform = pca->get_transformation_matrix();

SG_UNREF(pca);
Expand Down
5 changes: 0 additions & 5 deletions src/shogun/preprocessor/DimensionReductionPreprocessor.cpp
Expand Up @@ -55,11 +55,6 @@ SGMatrix<float64_t> CDimensionReductionPreprocessor::apply_to_feature_matrix(CFe
}
}

bool CDimensionReductionPreprocessor::init(CFeatures* data)
{
return true;
}

void CDimensionReductionPreprocessor::cleanup()
{

Expand Down
6 changes: 0 additions & 6 deletions src/shogun/preprocessor/DimensionReductionPreprocessor.h
Expand Up @@ -43,12 +43,6 @@ class CDimensionReductionPreprocessor: public CDensePreprocessor<float64_t>
/** destructor */
virtual ~CDimensionReductionPreprocessor();

/** init
* set true by default, should be defined if dimension reduction
* preprocessor is using some initialization
*/
virtual bool init(CFeatures* data);

/** cleanup
* set empty by default, should be defined if dimension reduction
* preprocessor should free some resources
Expand Down
97 changes: 48 additions & 49 deletions src/shogun/preprocessor/KernelPCA.cpp
Expand Up @@ -63,63 +63,62 @@ CKernelPCA::~CKernelPCA()
SG_UNREF(m_init_features);
}

bool CKernelPCA::init(CFeatures* features)
void CKernelPCA::fit(CFeatures* features)
{
if (!m_initialized && m_kernel)
{
SG_REF(features);
m_init_features = features;

m_kernel->init(features,features);
SGMatrix<float64_t> kernel_matrix = m_kernel->get_kernel_matrix();
m_kernel->cleanup();
int32_t n = kernel_matrix.num_cols;
int32_t m = kernel_matrix.num_rows;
ASSERT(n==m)
if (m_target_dim > n)
{
SG_SWARNING(
"Target dimension (%d) is not a valid value, it must be"
"less or equal than the number of vectors."
"Setting it to maximum allowed size (%d).",
m_target_dim, n);
m_target_dim = n;
}
REQUIRE(m_kernel, "Kernel not set\n");

SGVector<float64_t> bias_tmp = linalg::rowwise_sum(kernel_matrix);
linalg::scale(bias_tmp, bias_tmp, -1.0 / n);
float64_t s = linalg::sum(bias_tmp) / n;
linalg::add_scalar(bias_tmp, -s);
if (m_initialized)
cleanup();

linalg::center_matrix(kernel_matrix);
SG_REF(features);
m_init_features = features;

m_kernel->init(features, features);
SGMatrix<float64_t> kernel_matrix = m_kernel->get_kernel_matrix();
m_kernel->cleanup();
int32_t n = kernel_matrix.num_cols;
int32_t m = kernel_matrix.num_rows;
ASSERT(n == m)
if (m_target_dim > n)
{
SG_SWARNING(
"Target dimension (%d) is not a valid value, it must be"
"less or equal than the number of vectors."
"Setting it to maximum allowed size (%d).",
m_target_dim, n);
m_target_dim = n;
}

SGVector<float64_t> eigenvalues(m_target_dim);
SGMatrix<float64_t> eigenvectors(kernel_matrix.num_rows, m_target_dim);
linalg::eigen_solver_symmetric(
kernel_matrix, eigenvalues, eigenvectors, m_target_dim);
SGVector<float64_t> bias_tmp = linalg::rowwise_sum(kernel_matrix);
linalg::scale(bias_tmp, bias_tmp, -1.0 / n);
float64_t s = linalg::sum(bias_tmp) / n;
linalg::add_scalar(bias_tmp, -s);

m_transformation_matrix =
SGMatrix<float64_t>(kernel_matrix.num_rows, m_target_dim);
// eigenvalues are in increasing order
for (int32_t i = 0; i < m_target_dim; i++)
{
//normalize and trap divide by zero and negative eigenvalues
auto idx = m_target_dim - i - 1;
auto vec = eigenvectors.get_column(idx);
linalg::scale(
vec, vec, 1.0 / std::sqrt(std::max(std::numeric_limits<float64_t>::epsilon(), eigenvalues[idx])));
m_transformation_matrix.set_column(i, vec);
}
linalg::center_matrix(kernel_matrix);

m_bias_vector = SGVector<float64_t>(m_target_dim);
linalg::matrix_prod(
m_transformation_matrix, bias_tmp, m_bias_vector, true);
SGVector<float64_t> eigenvalues(m_target_dim);
SGMatrix<float64_t> eigenvectors(kernel_matrix.num_rows, m_target_dim);
linalg::eigen_solver_symmetric(
kernel_matrix, eigenvalues, eigenvectors, m_target_dim);

m_initialized=true;
SG_INFO("Done\n")
return true;
m_transformation_matrix =
SGMatrix<float64_t>(kernel_matrix.num_rows, m_target_dim);
// eigenvalues are in increasing order
for (int32_t i = 0; i < m_target_dim; i++)
{
// normalize and trap divide by zero and negative eigenvalues
auto idx = m_target_dim - i - 1;
auto vec = eigenvectors.get_column(idx);
linalg::scale(
vec, vec, 1.0 / std::sqrt(std::max(std::numeric_limits<float64_t>::epsilon(), eigenvalues[idx])));
m_transformation_matrix.set_column(i, vec);
}
return false;

m_bias_vector = SGVector<float64_t>(m_target_dim);
linalg::matrix_prod(m_transformation_matrix, bias_tmp, m_bias_vector, true);

m_initialized = true;
SG_INFO("Done\n")
}

SGMatrix<float64_t> CKernelPCA::apply_to_feature_matrix(CFeatures* features)
Expand Down
4 changes: 2 additions & 2 deletions src/shogun/preprocessor/KernelPCA.h
Expand Up @@ -42,8 +42,8 @@ class CKernelPCA: public CDimensionReductionPreprocessor

virtual ~CKernelPCA();

/// initialize preprocessor from features
virtual bool init(CFeatures* features);
virtual void fit(CFeatures* features);

/// cleanup
virtual void cleanup();

Expand Down
66 changes: 30 additions & 36 deletions src/shogun/preprocessor/PCA.cpp
Expand Up @@ -74,52 +74,46 @@ CPCA::~CPCA()
{
}

bool CPCA::init(CFeatures* features)
void CPCA::fit(CFeatures* features)
{
if (!m_initialized)
{
REQUIRE(features->get_feature_class()==C_DENSE, "PCA only works with dense features")
REQUIRE(features->get_feature_type()==F_DREAL, "PCA only works with real features")

SGMatrix<float64_t> feature_matrix = ((CDenseFeatures<float64_t>*)features)
->get_feature_matrix();
int32_t num_vectors = feature_matrix.num_cols;
int32_t num_features = feature_matrix.num_rows;
SG_INFO(
"num_examples: %d num_features: %d\n", num_vectors, num_features)
if (m_initialized)
cleanup();

// max target dim allowed
int32_t max_dim_allowed = CMath::min(num_vectors, num_features);
num_dim=0;
auto feature_matrix =
features->as<CDenseFeatures<float64_t>>()->get_feature_matrix();
int32_t num_vectors = feature_matrix.num_cols;
int32_t num_features = feature_matrix.num_rows;
SG_INFO("num_examples: %d num_features: %d\n", num_vectors, num_features)

REQUIRE(m_target_dim<=max_dim_allowed,
"target dimension should be less or equal to than minimum of N and D")
// max target dim allowed
int32_t max_dim_allowed = CMath::min(num_vectors, num_features);
num_dim = 0;

// center data
Map<MatrixXd> fmatrix(feature_matrix.matrix, num_features, num_vectors);
REQUIRE(
m_target_dim <= max_dim_allowed,
"target dimension should be less or equal to than minimum of N and D")

m_mean_vector = SGVector<float64_t>(num_features);
Map<VectorXd> data_mean(m_mean_vector.vector, num_features);
data_mean = fmatrix.rowwise().sum()/(float64_t) num_vectors;
fmatrix = fmatrix.colwise()-data_mean;
// center data
Map<MatrixXd> fmatrix(feature_matrix.matrix, num_features, num_vectors);

m_eigenvalues_vector = SGVector<float64_t>(max_dim_allowed);
m_mean_vector = SGVector<float64_t>(num_features);
Map<VectorXd> data_mean(m_mean_vector.vector, num_features);
data_mean = fmatrix.rowwise().sum() / (float64_t)num_vectors;
fmatrix = fmatrix.colwise() - data_mean;

if (m_method == AUTO)
m_method = (num_vectors>num_features) ? EVD : SVD;
m_eigenvalues_vector = SGVector<float64_t>(max_dim_allowed);

if (m_method == EVD)
init_with_evd(feature_matrix, max_dim_allowed);
else
init_with_svd(feature_matrix, max_dim_allowed);
if (m_method == AUTO)
m_method = (num_vectors > num_features) ? EVD : SVD;

// restore feature matrix
fmatrix = fmatrix.colwise()+data_mean;
m_initialized = true;
return true;
}
if (m_method == EVD)
init_with_evd(feature_matrix, max_dim_allowed);
else
init_with_svd(feature_matrix, max_dim_allowed);

return false;
// restore feature matrix
fmatrix = fmatrix.colwise() + data_mean;
m_initialized = true;
}

void CPCA::init_with_evd(const SGMatrix<float64_t>& feature_matrix, int32_t max_dim_allowed)
Expand Down
5 changes: 1 addition & 4 deletions src/shogun/preprocessor/PCA.h
Expand Up @@ -130,10 +130,7 @@ class CPCA: public CDimensionReductionPreprocessor
/** destructor */
virtual ~CPCA();

/** initialize preprocessor from features
* @param features
*/
virtual bool init(CFeatures* features);
virtual void fit(CFeatures* features);

/** cleanup */
virtual void cleanup();
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/preprocessor/KernelPCA_unittest.cc
Expand Up @@ -51,7 +51,7 @@ TEST(KernelPCA, apply_to_feature_matrix)
CKernelPCA* kpca = new CKernelPCA(kernel);
SG_REF(kpca)
kpca->set_target_dim(target_dim);
kpca->init(train_feats);
kpca->fit(train_feats);

SGMatrix<float64_t> embedding = kpca->apply_to_feature_matrix(test_feats);

Expand Down Expand Up @@ -82,7 +82,7 @@ TEST(KernelPCA, apply_to_feature_vector)
CKernelPCA* kpca = new CKernelPCA(kernel);
SG_REF(kpca)
kpca->set_target_dim(target_dim);
kpca->init(train_feats);
kpca->fit(train_feats);

SGVector<float64_t> embedding = kpca->apply_to_feature_vector(test_vector);

Expand Down
20 changes: 10 additions & 10 deletions tests/unit/preprocessor/PCA_unittest.cc
Expand Up @@ -54,7 +54,7 @@ TEST(PCA, PCA_N_greater_D_EVD)
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(data);
CPCA* pca=new CPCA(EVD);
pca->set_target_dim(3);
pca->init(features);
pca->fit(features);

SGMatrix<float64_t> transmat=pca->get_transformation_matrix();
SGMatrix<float64_t> finalmat=pca->apply_to_feature_matrix(features);
Expand Down Expand Up @@ -118,7 +118,7 @@ TEST(PCA, PCA_N_equals_D_EVD)
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(data);
CPCA* pca=new CPCA(EVD);
pca->set_target_dim(3);
pca->init(features);
pca->fit(features);

SGMatrix<float64_t> transmat=pca->get_transformation_matrix();
SGMatrix<float64_t> finalmat=pca->apply_to_feature_matrix(features);
Expand Down Expand Up @@ -182,7 +182,7 @@ TEST(PCA, PCA_N_less_D_EVD)
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(data);
CPCA* pca=new CPCA(EVD);
pca->set_target_dim(2);
pca->init(features);
pca->fit(features);

SGMatrix<float64_t> transmat=pca->get_transformation_matrix();
SGMatrix<float64_t> finalmat=pca->apply_to_feature_matrix(features);
Expand Down Expand Up @@ -243,7 +243,7 @@ TEST(PCA, PCA_N_greater_D_SVD)
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(data);
CPCA* pca=new CPCA(SVD);
pca->set_target_dim(3);
pca->init(features);
pca->fit(features);

SGMatrix<float64_t> transmat=pca->get_transformation_matrix();
SGMatrix<float64_t> finalmat=pca->apply_to_feature_matrix(features);
Expand Down Expand Up @@ -303,7 +303,7 @@ TEST(PCA, PCA_N_equals_D_SVD)
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(data);
CPCA* pca=new CPCA(SVD);
pca->set_target_dim(3);
pca->init(features);
pca->fit(features);

SGMatrix<float64_t> transmat=pca->get_transformation_matrix();
SGMatrix<float64_t> finalmat=pca->apply_to_feature_matrix(features);
Expand Down Expand Up @@ -362,7 +362,7 @@ TEST(PCA, PCA_N_less_D_SVD)
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(data);
CPCA* pca=new CPCA(SVD);
pca->set_target_dim(2);
pca->init(features);
pca->fit(features);

SGMatrix<float64_t> transmat=pca->get_transformation_matrix();
SGMatrix<float64_t> finalmat=pca->apply_to_feature_matrix(features);
Expand Down Expand Up @@ -414,7 +414,7 @@ TEST(PCA, PCA_MEM_IN_PLACE)
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(data);
CPCA* pca=new CPCA(AUTO,false,MEM_IN_PLACE);
pca->set_target_dim(3);
pca->init(features);
pca->fit(features);

SGMatrix<float64_t> transmat=pca->get_transformation_matrix();
SGMatrix<float64_t> finalmat=pca->apply_to_feature_matrix(features);
Expand Down Expand Up @@ -480,7 +480,7 @@ TEST(PCA, PCA_apply_to_feature_vector_methodTest)
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(data);
CPCA* pca=new CPCA(SVD);
pca->set_target_dim(2);
pca->init(features);
pca->fit(features);

SGVector<float64_t> finalVector=pca->apply_to_feature_vector(inputVector);

Expand Down Expand Up @@ -515,7 +515,7 @@ TEST(PCA, PCA_WHITEN_SVD)
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(data);
CPCA* pca=new CPCA(SVD,true);
pca->set_target_dim(3);
pca->init(features);
pca->fit(features);

SGMatrix<float64_t> transmat=pca->get_transformation_matrix();
SGMatrix<float64_t> finalmat=pca->apply_to_feature_matrix(features);
Expand Down Expand Up @@ -594,7 +594,7 @@ TEST(PCA, PCA_WHITEN_EVD)
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(data);
CPCA* pca=new CPCA(EVD,true);
pca->set_target_dim(3);
pca->init(features);
pca->fit(features);

SGMatrix<float64_t> finalmat=pca->apply_to_feature_matrix(features);

Expand Down

0 comments on commit 1ea60e1

Please sign in to comment.