Cleanup and implement apply_to_matrix in dense preproc subclasses

shogun-toolbox · Jul 12, 2018 · c06258f · c06258f
1 parent 8853809
commit c06258f
Show file tree

Hide file tree

Showing 16 changed files with 132 additions and 177 deletions.
diff --git a/src/shogun/preprocessor/HomogeneousKernelMap.cpp b/src/shogun/preprocessor/HomogeneousKernelMap.cpp
@@ -5,8 +5,8 @@
  *          Evgeniy Andreev, Evan Shelhamer, Bjoern Esser
  */
 
+#include <shogun/base/range.h>
 #include <shogun/io/SGIO.h>
-
 #include <shogun/mathematics/Math.h>
 #include <shogun/preprocessor/HomogeneousKernelMap.h>
 
@@ -154,22 +154,20 @@ void CHomogeneousKernelMap::init()
 
 }
 
-SGMatrix<float64_t> CHomogeneousKernelMap::apply_to_feature_matrix (CFeatures* features)
+SGMatrix<float64_t>
+CHomogeneousKernelMap::apply_to_matrix(SGMatrix<float64_t> matrix)
 {
-	auto simple_features = features->as<CDenseFeatures<float64_t>>();
-	int32_t num_vectors = simple_features->get_num_vectors ();
-	int32_t num_features = simple_features->get_num_features ();
+	auto num_vectors = matrix.num_cols;
+	auto num_features = matrix.num_rows;
 
 	SGMatrix<float64_t> feature_matrix(num_features*(2*m_order+1),num_vectors);
-	for (int i = 0; i < num_vectors; ++i)
+	for (auto i : range(num_vectors))
 	{
-		SGVector<float64_t> transformed = apply_to_vector(simple_features->get_feature_vector(i));
-		for (int j=0; j<transformed.vlen; j++)
+		SGVector<float64_t> transformed = apply_to_vector(matrix.get_column(i));
+		for (auto j : range(transformed.vlen))
 			feature_matrix(j,i) = transformed[j];
 	}
 
-	simple_features->set_feature_matrix(feature_matrix);
-
 	return feature_matrix;
 }
 

diff --git a/src/shogun/preprocessor/HomogeneousKernelMap.h b/src/shogun/preprocessor/HomogeneousKernelMap.h
@@ -72,12 +72,6 @@ namespace shogun
 			/** cleanup */
 			virtual void cleanup();
 
-			/** applies to features
-			 * @param features features
-			 * @return feature matrix
-			 */
-			virtual SGMatrix<float64_t> apply_to_feature_matrix(CFeatures* features);
-
 			/** applies to feature vector
 			 * @param vector features vector
 			 * @return transformed feature vector
@@ -135,6 +129,10 @@ namespace shogun
 			 */
 			float64_t get_period() const;
 
+		protected:
+		    virtual SGMatrix<float64_t>
+		    apply_to_matrix(SGMatrix<float64_t> matrix) override;
+
 		private:
 			void init ();
 			void register_params ();

diff --git a/src/shogun/preprocessor/LogPlusOne.cpp b/src/shogun/preprocessor/LogPlusOne.cpp
@@ -4,10 +4,11 @@
  * Authors: Soeren Sonnenburg, Sergey Lisitsyn, Evgeniy Andreev
  */
 
-#include <shogun/preprocessor/LogPlusOne.h>
-#include <shogun/preprocessor/DensePreprocessor.h>
+#include <shogun/base/range.h>
 #include <shogun/features/Features.h>
 #include <shogun/mathematics/Math.h>
+#include <shogun/preprocessor/DensePreprocessor.h>
+#include <shogun/preprocessor/LogPlusOne.h>
 
 using namespace shogun;
 
@@ -42,21 +43,14 @@ bool CLogPlusOne::save(FILE* f)
 	return false;
 }
 
-/// apply preproc on feature matrix
-/// result in feature matrix
-/// return pointer to feature_matrix, i.e. f->get_feature_matrix();
-SGMatrix<float64_t> CLogPlusOne::apply_to_feature_matrix(CFeatures* features)
+SGMatrix<float64_t> CLogPlusOne::apply_to_matrix(SGMatrix<float64_t> matrix)
 {
-	auto feature_matrix =
-	    features->as<CDenseFeatures<float64_t>>()->get_feature_matrix();
-
-	for (int32_t i=0; i<feature_matrix.num_cols; i++)
+	for (auto i : range(matrix.num_cols))
 	{
-		for (int32_t j=0; j<feature_matrix.num_rows; j++)
-			feature_matrix.matrix[i * feature_matrix.num_rows + j] = std::log(
-			    feature_matrix.matrix[i * feature_matrix.num_rows + j] + 1.0);
+		for (auto j : range(matrix.num_rows))
+			matrix(i, j) = std::log(matrix(i, j) + 1.0);
 	}
-	return feature_matrix;
+	return matrix;
 }
 
 /// apply preproc on single feature vector

diff --git a/src/shogun/preprocessor/LogPlusOne.h b/src/shogun/preprocessor/LogPlusOne.h
@@ -44,11 +44,6 @@ class CLogPlusOne : public CDensePreprocessor<float64_t>
 		/// save preprocessor init-data to file
 		virtual bool save(FILE* f);
 
-		/// apply preproc on feature matrix
-		/// result in feature matrix
-		/// return pointer to feature_matrix, i.e. f->get_feature_matrix();
-		virtual SGMatrix<float64_t> apply_to_feature_matrix(CFeatures* features);
-
 		/// apply preproc on single feature vector
 		/// result in feature matrix
 		virtual SGVector<float64_t> apply_to_feature_vector(SGVector<float64_t> vector);
@@ -58,6 +53,10 @@ class CLogPlusOne : public CDensePreprocessor<float64_t>
 
 		/// return a type of preprocessor
 		virtual EPreprocessorType get_type() const { return P_LOGPLUSONE; }
+
+	protected:
+		virtual SGMatrix<float64_t>
+		apply_to_matrix(SGMatrix<float64_t> matrix) override;
 };
 }
 #endif
diff --git a/src/shogun/preprocessor/NormOne.cpp b/src/shogun/preprocessor/NormOne.cpp
@@ -5,11 +5,12 @@
  *          Viktor Gal
  */
 
-#include <shogun/preprocessor/NormOne.h>
-#include <shogun/preprocessor/DensePreprocessor.h>
+#include <shogun/base/range.h>
+#include <shogun/features/Features.h>
 #include <shogun/mathematics/Math.h>
 #include <shogun/mathematics/linalg/LinalgNamespace.h>
-#include <shogun/features/Features.h>
+#include <shogun/preprocessor/DensePreprocessor.h>
+#include <shogun/preprocessor/NormOne.h>
 
 using namespace shogun;
 
@@ -43,21 +44,15 @@ bool CNormOne::save(FILE* f)
 	return false;
 }
 
-/// apply preproc on feature matrix
-/// result in feature matrix
-/// return pointer to feature_matrix, i.e. f->get_feature_matrix();
-SGMatrix<float64_t> CNormOne::apply_to_feature_matrix(CFeatures* features)
+SGMatrix<float64_t> CNormOne::apply_to_matrix(SGMatrix<float64_t> matrix)
 {
-	auto feature_matrix =
-	    features->as<CDenseFeatures<float64_t>>()->get_feature_matrix();
-
-	for (int32_t i=0; i<feature_matrix.num_cols; i++)
+	for (auto i : range(matrix.num_cols))
 	{
-		SGVector<float64_t> vec(&(feature_matrix.matrix[i*feature_matrix.num_rows]), feature_matrix.num_rows, false);
-		float64_t norm = std::sqrt(linalg::dot(vec, vec));
-		SGVector<float64_t>::scale_vector(1.0/norm, vec, feature_matrix.num_rows);
+		auto vec = matrix.get_column(i);
+		auto norm = linalg::norm(vec);
+		linalg::scale(vec, vec, 1.0 / norm);
 	}
-	return feature_matrix;
+	return matrix;
 }
 
 /// apply preproc on single feature vector

diff --git a/src/shogun/preprocessor/NormOne.h b/src/shogun/preprocessor/NormOne.h
@@ -44,11 +44,6 @@ class CNormOne : public CDensePreprocessor<float64_t>
 		/// save preprocessor init-data to file
 		virtual bool save(FILE* f);
 
-		/// apply preproc on feature matrix
-		/// result in feature matrix
-		/// return pointer to feature_matrix, i.e. f->get_feature_matrix();
-		virtual SGMatrix<float64_t> apply_to_feature_matrix(CFeatures* features);
-
 		/// apply preproc on single feature vector
 		/// result in feature matrix
 		virtual SGVector<float64_t> apply_to_feature_vector(SGVector<float64_t> vector);
@@ -58,6 +53,10 @@ class CNormOne : public CDensePreprocessor<float64_t>
 
 		/// return a type of preprocessor
 		virtual EPreprocessorType get_type() const { return P_NORMONE; }
+
+	protected:
+		virtual SGMatrix<float64_t>
+		apply_to_matrix(SGMatrix<float64_t> matrix) override;
 };
 }
 #endif
diff --git a/src/shogun/preprocessor/PNorm.cpp b/src/shogun/preprocessor/PNorm.cpp
@@ -5,10 +5,11 @@
  *          Sergey Lisitsyn
  */
 
-#include <shogun/preprocessor/PNorm.h>
-#include <shogun/preprocessor/DensePreprocessor.h>
-#include <shogun/mathematics/Math.h>
 #include <shogun/features/Features.h>
+#include <shogun/mathematics/Math.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
+#include <shogun/preprocessor/DensePreprocessor.h>
+#include <shogun/preprocessor/PNorm.h>
 
 #ifdef HAVE_LAPACK
 #include <shogun/mathematics/lapack.h>
@@ -56,21 +57,15 @@ bool CPNorm::save (FILE* f)
 	return false;
 }
 
-/// apply preproc on feature matrix
-/// result in feature matrix
-/// return pointer to feature_matrix, i.e. f->get_feature_matrix();
-SGMatrix<float64_t> CPNorm::apply_to_feature_matrix (CFeatures* features)
+SGMatrix<float64_t> CPNorm::apply_to_matrix(SGMatrix<float64_t> matrix)
 {
-	auto feature_matrix =
-	    features->as<CDenseFeatures<float64_t>>()->get_feature_matrix();
-
-	for (int32_t i=0; i<feature_matrix.num_cols; i++)
+	for (auto i : range(matrix.num_cols))
 	{
-		float64_t* vec= &(feature_matrix.matrix[i*feature_matrix.num_rows]);
-		float64_t norm = get_pnorm (vec, feature_matrix.num_rows);
-		SGVector<float64_t>::scale_vector(1.0/norm, vec, feature_matrix.num_rows);
+		auto vec = matrix.get_column(i);
+		auto norm = get_pnorm(vec.vector, vec.vlen);
+		linalg::scale(vec, vec, 1.0 / norm);
 	}
-	return feature_matrix;
+	return matrix;
 }
 
 /// apply preproc on single feature vector

diff --git a/src/shogun/preprocessor/PNorm.h b/src/shogun/preprocessor/PNorm.h
@@ -47,11 +47,6 @@ class CPNorm : public CDensePreprocessor<float64_t>
 		/// save preprocessor init-data to file
 		virtual bool save (FILE* f);
 
-		/// apply preproc on feature matrix
-		/// result in feature matrix
-		/// return pointer to feature_matrix, i.e. f->get_feature_matrix();
-		virtual SGMatrix<float64_t> apply_to_feature_matrix (CFeatures* features);
-
 		/// apply preproc on single feature vector
 		/// result in feature matrix
 		virtual SGVector<float64_t> apply_to_feature_vector (SGVector<float64_t> vector);
@@ -74,6 +69,10 @@ class CPNorm : public CDensePreprocessor<float64_t>
 		 */
 		double get_pnorm () const;
 
+	protected:
+		virtual SGMatrix<float64_t>
+		apply_to_matrix(SGMatrix<float64_t> matrix) override;
+
 	private:
 		void register_param ();
 		inline float64_t get_pnorm (float64_t* vec, int32_t vec_len) const;

diff --git a/src/shogun/preprocessor/PruneVarSubMean.cpp b/src/shogun/preprocessor/PruneVarSubMean.cpp
@@ -51,12 +51,15 @@ void CPruneVarSubMean::fit(CFeatures* features)
 	for (auto i : range(num_examples))
 	{
 		for (auto j : range(num_features))
-			var[j] += CMath::sq(
-			    m_mean[j] - feature_matrix.matrix[i * num_features + j]);
+		{
+			auto diff =
+			    linalg::add(m_mean, feature_matrix.get_column(i), 1.0, -1.0);
+			var[j] += linalg::dot(diff, diff);
+		}
 	}
 
 	int32_t num_ok = 0;
-	int32_t* idx_ok = SG_MALLOC(int32_t, num_features);
+	auto idx_ok = SGVector<int32_t>(num_features);
 
 	for (auto j : range(num_features))
 	{
@@ -82,7 +85,6 @@ void CPruneVarSubMean::fit(CFeatures* features)
 		m_std[j] = std::sqrt(var[idx_ok[j]]);
 	}
 	m_num_idx = num_ok;
-	SG_FREE(idx_ok);
 	m_mean = new_mean;
 
 	m_initialized = true;
@@ -97,43 +99,33 @@ void CPruneVarSubMean::cleanup()
 	m_initialized = false;
 }
 
-/// apply preproc on feature matrix
-/// result in feature matrix
-/// return pointer to feature_matrix, i.e. f->get_feature_matrix();
-SGMatrix<float64_t> CPruneVarSubMean::apply_to_feature_matrix(CFeatures* features)
+SGMatrix<float64_t>
+CPruneVarSubMean::apply_to_matrix(SGMatrix<float64_t> matrix)
 {
 	ASSERT(m_initialized)
 
-	int32_t num_vectors=0;
-	int32_t num_features=0;
+	int32_t num_vectors = matrix.num_cols;
 
-	auto simple_features = features->as<CDenseFeatures<float64_t>>();
-	auto m = simple_features->get_feature_matrix();
+	SGMatrix<float64_t> result(matrix.data(), m_num_idx, num_vectors);
 
-	SG_INFO("get Feature matrix: %ix%i\n", num_vectors, num_features)
-	SG_INFO("Preprocessing feature matrix\n")
-	for (int32_t vec=0; vec<num_vectors; vec++)
+	for (auto i : range(num_vectors))
 	{
-		float64_t* v_src=&m[num_features*vec];
-		float64_t* v_dst=&m[m_num_idx*vec];
+		auto v_src = matrix.get_column(i);
+		auto v_dst = matrix.get_column(i);
 
 		if (m_divide_by_std)
 		{
-			for (int32_t feat=0; feat<m_num_idx; feat++)
+			for (auto feat : range(m_num_idx))
 				v_dst[feat]=(v_src[m_idx[feat]]-m_mean[feat])/m_std[feat];
 		}
 		else
 		{
-			for (int32_t feat=0; feat<m_num_idx; feat++)
+			for (auto feat : range(m_num_idx))
 				v_dst[feat]=(v_src[m_idx[feat]]-m_mean[feat]);
 		}
 	}
 
-	simple_features->set_num_features(m_num_idx);
-	simple_features->get_feature_matrix(num_features, num_vectors);
-	SG_INFO("new Feature matrix: %ix%i\n", num_vectors, num_features)
-
-	return simple_features->get_feature_matrix();
+	return result;
 }
 
 /// apply preproc on single feature vector

diff --git a/src/shogun/preprocessor/PruneVarSubMean.h b/src/shogun/preprocessor/PruneVarSubMean.h
@@ -41,11 +41,6 @@ class CPruneVarSubMean : public CDensePreprocessor<float64_t>
 		/// cleanup
 		virtual void cleanup();
 
-		/// apply preproc on feature matrix
-		/// result in feature matrix
-		/// return pointer to feature_matrix, i.e. f->get_feature_matrix();
-		virtual SGMatrix<float64_t> apply_to_feature_matrix(CFeatures* features);
-
 		/// apply preproc on single feature vector
 		/// result in feature matrix
 		virtual SGVector<float64_t> apply_to_feature_vector(SGVector<float64_t> vector);
@@ -56,6 +51,10 @@ class CPruneVarSubMean : public CDensePreprocessor<float64_t>
 		/// return a type of preprocessor
 		virtual EPreprocessorType get_type() const { return P_PRUNEVARSUBMEAN; }
 
+	protected:
+		virtual SGMatrix<float64_t>
+		apply_to_matrix(SGMatrix<float64_t> matrix) override;
+
 	private:
 		void init();
 		void register_parameters();