From ad0991cbb5d6adaadf3deb194928725aeb1a153b Mon Sep 17 00:00:00 2001
From: Wuwei Lin <vincentl13x@gmail.com>
Date: Fri, 18 May 2018 21:59:15 +0800
Subject: [PATCH] Cleanup and refactor PCA and FisherLDA

Implement apply in PCA and FisherLDA
Inherit directly from dense preproc
---
 src/shogun/metric/LMNNImpl.cpp        |  3 +-
 src/shogun/preprocessor/FisherLDA.cpp | 23 +++-----
 src/shogun/preprocessor/FisherLDA.h   | 22 ++++----
 src/shogun/preprocessor/PCA.cpp       | 81 +++++++++++++++------------
 src/shogun/preprocessor/PCA.h         | 26 ++++++---
 5 files changed, 86 insertions(+), 69 deletions(-)
diff --git a/src/shogun/metric/LMNNImpl.cpp b/src/shogun/metric/LMNNImpl.cpp
index 07f3be5b4d1..9573117f1f3 100644
--- a/src/shogun/metric/LMNNImpl.cpp
+++ b/src/shogun/metric/LMNNImpl.cpp
@@ -366,7 +366,8 @@ SGMatrix<float64_t> CLMNNImpl::compute_pca_transform(CDenseFeatures<float64_t>*
 	CPruneVarSubMean* mean_substractor =
 			new CPruneVarSubMean(false); // false to avoid variance normalization
 	mean_substractor->fit(cloned_features);
-	mean_substractor->apply_to_feature_matrix(cloned_features);
+	cloned_features = mean_substractor->apply(cloned_features)
+	                      ->as<CDenseFeatures<float64_t>>();
 
 	// Obtain the linear transform applying PCA
 	CPCA* pca = new CPCA();
diff --git a/src/shogun/preprocessor/FisherLDA.cpp b/src/shogun/preprocessor/FisherLDA.cpp
index 24fe7bf8a1c..9537127093a 100644
--- a/src/shogun/preprocessor/FisherLDA.cpp
+++ b/src/shogun/preprocessor/FisherLDA.cpp
@@ -40,7 +40,6 @@
 #include <shogun/mathematics/eigen3.h>
 #include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <shogun/preprocessor/DensePreprocessor.h>
-#include <shogun/preprocessor/DimensionReductionPreprocessor.h>
 #include <shogun/preprocessor/FisherLDA.h>
 #include <shogun/solver/LDACanVarSolver.h>
 #include <shogun/solver/LDASolver.h>
@@ -52,7 +51,7 @@ using namespace shogun;
 CFisherLDA::CFisherLDA(
     int32_t num_dimensions, EFLDAMethod method, float64_t thresh,
     float64_t gamma, bool bdc_svd)
-    : CDimensionReductionPreprocessor()
+    : CDensePreprocessor<float64_t>()
 {
 	initialize_parameters();
 	m_num_dim = num_dimensions;
@@ -198,13 +197,10 @@ void CFisherLDA::cleanup()
 	m_eigenvalues_vector=SGVector<float64_t>();
 }
 
-SGMatrix<float64_t> CFisherLDA::apply_to_feature_matrix(CFeatures*features)
+SGMatrix<float64_t> CFisherLDA::apply_to_matrix(SGMatrix<float64_t> matrix)
 {
-	auto simple_features = features->as<CDenseFeatures<float64_t>>();
-	auto m = simple_features->get_feature_matrix();
-
-	int32_t num_vectors=m.num_cols;
-	int32_t num_features=m.num_rows;
+	auto num_vectors = matrix.num_cols;
+	auto num_features = matrix.num_rows;
 
 	SG_INFO("Transforming feature matrix\n")
 	Map<MatrixXd> transform_matrix(
@@ -213,7 +209,7 @@ SGMatrix<float64_t> CFisherLDA::apply_to_feature_matrix(CFeatures*features)
 
 	SG_INFO("get Feature matrix: %ix%i\n", num_vectors, num_features)
 
-	Map<MatrixXd> feature_matrix (m.matrix, num_features, num_vectors);
+	Map<MatrixXd> feature_matrix(matrix.matrix, num_features, num_vectors);
 
 	feature_matrix.block(0, 0, m_num_dim, num_vectors) =
 	    transform_matrix.transpose() * feature_matrix;
@@ -222,12 +218,11 @@ SGMatrix<float64_t> CFisherLDA::apply_to_feature_matrix(CFeatures*features)
 	for (int32_t col=0; col<num_vectors; col++)
 	{
 		for (int32_t row=0; row<m_num_dim; row++)
-			m[col*m_num_dim+row]=feature_matrix(row, col);
+			matrix[col * m_num_dim + row] = feature_matrix(row, col);
 	}
-	m.num_rows=m_num_dim;
-	m.num_cols=num_vectors;
-	simple_features->set_feature_matrix(m);
-	return m;
+	matrix.num_rows = m_num_dim;
+	matrix.num_cols = num_vectors;
+	return matrix;
 }
 
 SGVector<float64_t> CFisherLDA::apply_to_feature_vector(SGVector<float64_t> vector)
diff --git a/src/shogun/preprocessor/FisherLDA.h b/src/shogun/preprocessor/FisherLDA.h
index a377d1f1479..9870b75a5c8 100644
--- a/src/shogun/preprocessor/FisherLDA.h
+++ b/src/shogun/preprocessor/FisherLDA.h
@@ -38,8 +38,7 @@
 
 #include <shogun/features/Features.h>
 #include <shogun/labels/Labels.h>
-#include <shogun/preprocessor/DimensionReductionPreprocessor.h>
-#include <shogun/preprocessor/Preprocessor.h>
+#include <shogun/preprocessor/DensePreprocessor.h>
 #include <vector>
 
 namespace shogun
@@ -88,8 +87,8 @@ namespace shogun
 	 * <em>::AUTO_FLDA</em> : Automagically, the appropriate method is selected based on
 	 * whether D>N (chooses ::CANVAR_FLDA) or D<N(chooses ::CLASSIC_FLDA)
 	 */
-class CFisherLDA: public CDimensionReductionPreprocessor
-{
+	class CFisherLDA : public CDensePreprocessor<float64_t>
+	{
 	public:
 		/** standard constructor
 		 * @param num_dimensions number of dimensions to retain
@@ -123,13 +122,6 @@ class CFisherLDA: public CDimensionReductionPreprocessor
 		/** cleanup */
 		virtual void cleanup();
 
-		/** apply preprocessor to feature matrix
-		 * @param features on which the learned tranformation has to be applied.
-		 * Sometimes it is also referred as projecting the given features.
-		 * @return processed feature matrix with reduced dimensions.
-		 */
-		virtual SGMatrix<float64_t> apply_to_feature_matrix(CFeatures* features);
-
 		/** apply preprocessor to feature vector
 		 * @param vector features on which the learned transformation has to be applied.
 		 * @return processed feature vector with reduced dimensions.
@@ -159,6 +151,14 @@ class CFisherLDA: public CDimensionReductionPreprocessor
 		void initialize_parameters();
 
 	protected:
+		/** apply preprocessor to feature matrix
+		 * @param matrix on which the learned tranformation has to be applied.
+		 * Sometimes it is also referred as projecting the given features
+		 * matrix.
+		 * @return processed feature matrix with reduced dimensions.
+		 */
+		virtual SGMatrix<float64_t> apply_to_matrix(SGMatrix<float64_t> matrix);
+
 		/**
 		 * Train the preprocessor with the canonical variates method.
 		 * @param features training data.
diff --git a/src/shogun/preprocessor/PCA.cpp b/src/shogun/preprocessor/PCA.cpp
index 2a137b20015..638fb0a66b9 100644
--- a/src/shogun/preprocessor/PCA.cpp
+++ b/src/shogun/preprocessor/PCA.cpp
@@ -16,8 +16,10 @@
 using namespace shogun;
 using namespace Eigen;
 
-CPCA::CPCA(bool do_whitening, EPCAMode mode, float64_t thresh, EPCAMethod method, EPCAMemoryMode mem_mode)
-: CDimensionReductionPreprocessor()
+CPCA::CPCA(
+    bool do_whitening, EPCAMode mode, float64_t thresh, EPCAMethod method,
+    EPCAMemoryMode mem_mode)
+    : CDensePreprocessor<float64_t>()
 {
 	init();
 	m_whitening = do_whitening;
@@ -28,7 +30,7 @@ CPCA::CPCA(bool do_whitening, EPCAMode mode, float64_t thresh, EPCAMethod method
 }
 
 CPCA::CPCA(EPCAMethod method, bool do_whitening, EPCAMemoryMode mem_mode)
-: CDimensionReductionPreprocessor()
+    : CDensePreprocessor<float64_t>()
 {
 	init();
 	m_whitening = do_whitening;
@@ -68,6 +70,9 @@ void CPCA::init()
 		"Method used for PCA calculation", MS_NOT_AVAILABLE);
 	SG_ADD(&m_eigenvalue_zero_tolerance, "eigenvalue_zero_tolerance", "zero tolerance"
 	" for determining zero eigenvalues during whitening to avoid numerical issues", MS_NOT_AVAILABLE);
+	SG_ADD(
+	    &m_target_dim, "target_dim", "target dimensionality of preprocessor",
+	    MS_AVAILABLE);
 }
 
 CPCA::~CPCA()
@@ -281,13 +286,12 @@ void CPCA::cleanup()
 	m_initialized = false;
 }
 
-SGMatrix<float64_t> CPCA::apply_to_feature_matrix(CFeatures* features)
+SGMatrix<float64_t> CPCA::apply_to_matrix(SGMatrix<float64_t> matrix)
 {
 	ASSERT(m_initialized)
-	ASSERT(features != NULL)
-	SGMatrix<float64_t> m = features->as<CDenseFeatures<float64_t>>()->get_feature_matrix();
-	int32_t num_vectors = m.num_cols;
-	int32_t num_features = m.num_rows;
+
+	auto num_vectors = matrix.num_cols;
+	auto num_features = matrix.num_rows;
 
 	SG_INFO("Transforming feature matrix\n")
 	Map<MatrixXd> transform_matrix(m_transformation_matrix.matrix,
@@ -295,43 +299,39 @@ SGMatrix<float64_t> CPCA::apply_to_feature_matrix(CFeatures* features)
 
 	if (m_mem_mode == MEM_IN_PLACE)
 	{
-		if (m.matrix)
-		{
-			SG_INFO("Preprocessing feature matrix\n")
-			Map<MatrixXd> feature_matrix(m.matrix, num_features, num_vectors);
-			VectorXd data_mean = feature_matrix.rowwise().sum()/(float64_t) num_vectors;
-			feature_matrix = feature_matrix.colwise()-data_mean;
+		SG_INFO("Preprocessing feature matrix\n")
+		Map<MatrixXd> feature_matrix(matrix.matrix, num_features, num_vectors);
+		VectorXd data_mean =
+		    feature_matrix.rowwise().sum() / (float64_t)num_vectors;
+		feature_matrix = feature_matrix.colwise() - data_mean;
 
-			feature_matrix.block(0,0,num_dim,num_vectors) =
-					transform_matrix.transpose()*feature_matrix;
+		feature_matrix.block(0, 0, num_dim, num_vectors) =
+		    transform_matrix.transpose() * feature_matrix;
 
-			SG_INFO("Form matrix of target dimension\n")
-			for (int32_t col=0; col<num_vectors; col++)
-			{
-				for (int32_t row=0; row<num_dim; row++)
-					m.matrix[col*num_dim+row] = feature_matrix(row,col);
-			}
-			m.num_rows = num_dim;
-			m.num_cols = num_vectors;
+		SG_INFO("Form matrix of target dimension\n")
+		for (int32_t col = 0; col < num_vectors; col++)
+		{
+			for (int32_t row = 0; row < num_dim; row++)
+				matrix.matrix[col * num_dim + row] = feature_matrix(row, col);
 		}
+		matrix.num_rows = num_dim;
+		matrix.num_cols = num_vectors;
 
-		((CDenseFeatures<float64_t>*) features)->set_feature_matrix(m);
-		return m;
+		return matrix;
 	}
 	else
 	{
 		SGMatrix<float64_t> ret(num_dim, num_vectors);
 		Map<MatrixXd> ret_matrix(ret.matrix, num_dim, num_vectors);
-		if (m.matrix)
-		{
-			SG_INFO("Preprocessing feature matrix\n")
-			Map<MatrixXd> feature_matrix(m.matrix, num_features, num_vectors);
-			VectorXd data_mean = feature_matrix.rowwise().sum()/(float64_t) num_vectors;
-			feature_matrix = feature_matrix.colwise()-data_mean;
 
-			ret_matrix = transform_matrix.transpose()*feature_matrix;
-		}
-		((CDenseFeatures<float64_t>*) features)->set_feature_matrix(ret);
+		SG_INFO("Preprocessing feature matrix\n")
+		Map<MatrixXd> feature_matrix(matrix.matrix, num_features, num_vectors);
+		VectorXd data_mean =
+		    feature_matrix.rowwise().sum() / (float64_t)num_vectors;
+		feature_matrix = feature_matrix.colwise() - data_mean;
+
+		ret_matrix = transform_matrix.transpose() * feature_matrix;
+
 		return ret;
 	}
 }
@@ -387,3 +387,14 @@ float64_t CPCA::get_eigenvalue_zero_tolerance() const
 {
 	return m_eigenvalue_zero_tolerance;
 }
+
+void CPCA::set_target_dim(int32_t dim)
+{
+	ASSERT(dim > 0)
+	m_target_dim = dim;
+}
+
+int32_t CPCA::get_target_dim() const
+{
+	return m_target_dim;
+}
diff --git a/src/shogun/preprocessor/PCA.h b/src/shogun/preprocessor/PCA.h
index 38e195d5d71..ea6b8874cde 100644
--- a/src/shogun/preprocessor/PCA.h
+++ b/src/shogun/preprocessor/PCA.h
@@ -11,9 +11,9 @@
 
 #include <shogun/lib/config.h>
 
-#include <shogun/preprocessor/DimensionReductionPreprocessor.h>
 #include <shogun/features/Features.h>
 #include <shogun/lib/common.h>
+#include <shogun/preprocessor/DensePreprocessor.h>
 
 namespace shogun
 {
@@ -104,7 +104,7 @@ enum EPCAMemoryMode
  *
  * Note that vectors/matrices don't have to have zero mean as it is substracted within the class.
  */
-class CPCA: public CDimensionReductionPreprocessor
+class CPCA : public CDensePreprocessor<float64_t>
 {
 	public:
 
@@ -135,12 +135,6 @@ class CPCA: public CDimensionReductionPreprocessor
 		/** cleanup */
 		virtual void cleanup();
 
-		/** apply preprocessor to feature matrix
-		 * @param features features
-		 * @return processed feature matrix
-		 */
-		virtual SGMatrix<float64_t> apply_to_feature_matrix(CFeatures* features);
-
 		/** apply preprocessor to feature vector
 		 * @param vector feature vector
 		 * @return processed feature vector
@@ -184,10 +178,23 @@ class CPCA: public CDimensionReductionPreprocessor
 		 */
 		float64_t get_eigenvalue_zero_tolerance() const;
 
+		/** setter for target dimension
+		 * @param dim target dimension
+		 */
+		void set_target_dim(int32_t dim);
+
+		/** getter for target dimension
+		 * @return target dimension
+		 */
+		int32_t get_target_dim() const;
+
 	protected:
 
 		void init();
 
+		virtual SGMatrix<float64_t>
+		    apply_to_matrix(SGMatrix<float64_t>) override;
+
 	protected:
 
 		/** transformation matrix */
@@ -218,6 +225,9 @@ class CPCA: public CDimensionReductionPreprocessor
 		 */
 		float64_t m_eigenvalue_zero_tolerance;
 
+		/** target dimension */
+		int32_t m_target_dim;
+
 	private:
 		/** Computes the transformation matrix using an eigenvalue decomposition. */
 		void init_with_evd(const SGMatrix<float64_t>& feature_matrix, int32_t max_dim_allowed);