Refactor (Fisher)LDA solvers into separate class.

Add accuracy option in LDA to use Jacobi-SVD or BDC-SVD. Ignore eigenvector magnitude in LDA unit-test.
shogun-toolbox · Jul 5, 2017 · 00ec38a · 00ec38a
1 parent e29ddfb
commit 00ec38a
Show file tree

Hide file tree

Showing 7 changed files with 588 additions and 327 deletions.
diff --git a/src/shogun/classifier/LDA.cpp b/src/shogun/classifier/LDA.cpp
@@ -11,50 +11,50 @@
 #include <shogun/lib/config.h>
 
 #include <shogun/classifier/LDA.h>
-#include <shogun/labels/BinaryLabels.h>
-#include <shogun/labels/Labels.h>
-#include <shogun/lib/common.h>
-#include <shogun/machine/LinearMachine.h>
-#include <shogun/machine/Machine.h>
-#include <shogun/mathematics/Math.h>
 #include <shogun/mathematics/eigen3.h>
 #include <shogun/mathematics/linalg/LinalgNamespace.h>
 #include <shogun/preprocessor/FisherLDA.h>
+#include <shogun/solver/LDACanVarSolver.h>
+#include <shogun/solver/LDASolver.h>
 #include <vector>
 
 using namespace Eigen;
 using namespace shogun;
 
-CLDA::CLDA(float64_t gamma, ELDAMethod method)
-	:CLinearMachine()
+CLDA::CLDA(float64_t gamma, ELDAMethod method, bool bdc_svd)
+    : CLinearMachine(false)
 {
 	init();
 	m_method=method;
 	m_gamma=gamma;
+	m_bdc_svd = bdc_svd;
 }
 
 CLDA::CLDA(
     float64_t gamma, CDenseFeatures<float64_t>* traindat, CLabels* trainlab,
-    ELDAMethod method)
-    : CLinearMachine(), m_gamma(gamma)
+    ELDAMethod method, bool bdc_svd)
+    : CLinearMachine(false), m_gamma(gamma)
 {
 	init();
 	set_features(traindat);
 	set_labels(trainlab);
 	m_method=method;
 	m_gamma=gamma;
+	m_bdc_svd = bdc_svd;
 }
 
 void CLDA::init()
 {
 	m_method=AUTO_LDA;
 	m_gamma=0;
+	m_bdc_svd = true;
 	SG_ADD(
 	    (machine_int_t*)&m_method, "m_method",
 	    "Method used for LDA calculation", MS_NOT_AVAILABLE);
 	SG_ADD(
 	    (machine_int_t*)&m_gamma, "m_gamma", "Regularization parameter",
 	    MS_NOT_AVAILABLE);
+	SG_ADD(&m_bdc_svd, "m_bdc_svd", "Use BDC-SVD algorithm", MS_NOT_AVAILABLE);
 }
 
 CLDA::~CLDA()
@@ -66,8 +66,7 @@ bool CLDA::train_machine(CFeatures *data)
 	REQUIRE(m_labels, "Labels for the given features are not specified!\n")
 	REQUIRE(
 	    m_labels->get_label_type() == LT_BINARY,
-	    "The labels should of type"
-	    " CBinaryLabels! you provided %s \n",
+	    "The labels should of type CBinaryLabels! Provided type is %s \n",
 	    m_labels->get_name())
 
 	if(data)
@@ -82,114 +81,89 @@ bool CLDA::train_machine(CFeatures *data)
 		REQUIRE(data, "Features have not been provided.\n")
 	}
 
-	SGVector<int32_t>train_labels=((CBinaryLabels *)m_labels)->get_int_labels();
-	REQUIRE(train_labels.vector,"Provided Labels are empty!\n")
+	REQUIRE(
+	    data->get_num_vectors() == m_labels->get_num_labels(),
+	    "Number of training examples(%d) should be equal to number of labels "
+	    "(%d)!\n",
+	    data->get_num_vectors(), m_labels->get_num_labels());
 
-	REQUIRE(data->get_num_vectors() == train_labels.vlen,"Number of training examples(%d) should be "
-		"equal to number of labels (%d)!\n", data->get_num_vectors(), train_labels.vlen);
+	REQUIRE(
+	    features->get_feature_class() == C_DENSE,
+	    "LDA only works with dense features")
 
 	if(data->get_feature_type() == F_SHORTREAL)
-		return CLDA::train_machine_templated<float32_t>(train_labels, data);
+		return CLDA::train_machine_templated<float32_t>();
 	else if(data->get_feature_type() == F_DREAL)
-		return CLDA::train_machine_templated<float64_t>(train_labels, data);
+		return CLDA::train_machine_templated<float64_t>();
 	else if(data->get_feature_type() == F_LONGREAL)
-		return CLDA::train_machine_templated<floatmax_t>(train_labels, data);
+		return CLDA::train_machine_templated<floatmax_t>();
 
 	return false;
 }
 
 template <typename ST>
-bool CLDA::train_machine_templated(SGVector<int32_t> train_labels, CFeatures *data)
+bool CLDA::train_machine_templated()
 {
-	SGMatrix<ST> feature_matrix =
-	    ((CDenseFeatures<ST>*)data)->get_feature_matrix();
-	index_t num_feat = feature_matrix.num_rows;
-	index_t num_vec = feature_matrix.num_cols;
+	index_t num_feat = ((CDenseFeatures<ST>*)features)->get_num_features();
+	index_t num_vec = features->get_num_vectors();
+	;
 
 	bool lda_more_efficient = (m_method == AUTO_LDA && num_vec <= num_feat);
 
 	if (m_method == SVD_LDA || lda_more_efficient)
-		return solver_svd(train_labels, data);
+		return solver_svd<ST>();
 	else
-		return solver_classic<ST>(train_labels, data);
+		return solver_classic<ST>();
 }
 
-bool CLDA::solver_svd(SGVector<int32_t> train_labels, CFeatures* data)
+template <typename ST>
+bool CLDA::solver_svd()
 {
-	std::unique_ptr<CFisherLDA> lda(new CFisherLDA(CANVAR_FLDA));
-	std::unique_ptr<CMulticlassLabels> multiclass_labels(
-	    new CMulticlassLabels(m_labels->get_num_labels()));
+	auto dense_feat = static_cast<CDenseFeatures<ST>*>(features);
+
+	// keep just one dimension to do binary classification
+	const index_t projection_dim = 1;
+	auto solver = std::unique_ptr<LDACanVarSolver<ST>>(
+	    new LDACanVarSolver<ST>(
+	        dense_feat,
+	        new CMulticlassLabels(static_cast<CBinaryLabels*>(m_labels)),
+	        projection_dim, m_gamma, m_bdc_svd));
+
+	SGVector<ST> w_st(solver->get_eigenvectors());
 
-	for (index_t i = 0; i < m_labels->get_num_labels(); ++i)
-		multiclass_labels->set_int_label(
-		    i, (((CBinaryLabels*)m_labels)->get_int_label(i) == 1 ? 1 : 0));
+	auto class_mean = solver->get_class_mean();
+	ST m_neg = linalg::dot(w_st, class_mean[0]);
+	ST m_pos = linalg::dot(w_st, class_mean[1]);
 
-	// keep just the first dimension to do binary classification
-	lda->fit(data, multiclass_labels.get(), 1);
-	auto m = lda->get_transformation_matrix();
+	// change the sign of w if needed to get the correct labels
+	float64_t sign = (m_pos > m_neg) ? 1 : -1;
 
-	SGVector<float64_t> w(m);
+	SGVector<float64_t> w(dense_feat->get_num_features());
+	// copy w_st into w
+	for (index_t i = 0; i < w.size(); ++i)
+		w[i] = sign * w_st[i];
 	set_w(w);
-	set_bias(-linalg::dot(w, lda->get_mean()));
+
+	set_bias(-0.5 * sign * (m_neg + m_pos));
 
 	return true;
 }
 
 template <typename ST>
-bool CLDA::solver_classic(SGVector<int32_t> train_labels, CFeatures* data)
+bool CLDA::solver_classic()
 {
-	SGMatrix<ST> feature_matrix =
-	    ((CDenseFeatures<ST>*)data)->get_feature_matrix();
-	index_t num_feat = feature_matrix.num_rows;
-	index_t num_vec = feature_matrix.num_cols;
-
-	std::vector<index_t> idx_neg;
-	std::vector<index_t> idx_pos;
-
-	for (index_t i = 0; i < train_labels.vlen; i++)
-	{
-		if (train_labels.vector[i] == -1)
-			idx_neg.push_back(i);
-		else if (train_labels.vector[i] == +1)
-			idx_pos.push_back(i);
-	}
-
-	SGMatrix<ST> matrix(feature_matrix);
-	SGVector<ST> mean_neg(num_feat);
-	SGVector<ST> mean_pos(num_feat);
-
-	linalg::zero(mean_neg);
-	linalg::zero(mean_pos);
-
-	// mean neg
-	for (auto i : idx_neg)
-		linalg::add_col_vec(matrix, i, mean_neg, mean_neg);
-	linalg::scale(mean_neg, mean_neg, 1 / (ST)idx_neg.size());
-
-	// get m(-ve) - mean(-ve)
-	for (auto i : idx_neg)
-		linalg::add_col_vec(matrix, i, mean_neg, matrix, (ST)1, (ST)-1);
-
-	// mean pos
-	for (auto i : idx_pos)
-		linalg::add_col_vec(matrix, i, mean_pos, mean_pos);
-	linalg::scale(mean_pos, mean_pos, 1 / (ST)idx_pos.size());
-
-	// get m(+ve) - mean(+ve)
-	for (auto i : idx_pos)
-		linalg::add_col_vec(matrix, i, mean_pos, matrix, (ST)1, (ST)-1);
+	auto dense_feat = static_cast<CDenseFeatures<ST>*>(features);
+	index_t num_feat = dense_feat->get_num_features();
 
-	// covariance matrix.
-	auto cov_mat = linalg::matrix_prod(matrix, matrix, false, true);
-	SGMatrix<ST> scatter_matrix(num_feat, num_feat);
-	linalg::scale(cov_mat, scatter_matrix, 1 / (ST)(num_vec - 1));
+	auto solver = std::unique_ptr<LDASolver<ST>>(
+	    new LDASolver<ST>(
+	        dense_feat,
+	        new CMulticlassLabels(static_cast<CBinaryLabels*>(m_labels)),
+	        m_gamma));
 
-	ST trace = linalg::trace(scatter_matrix);
-	SGMatrix<ST> id(num_feat, num_feat);
-	linalg::identity(id);
-	linalg::add(
-	    scatter_matrix, id, scatter_matrix, (ST)(1.0 - m_gamma),
-	    trace * ((ST)m_gamma) / num_feat);
+	auto class_mean = solver->get_class_mean();
+	auto class_count = solver->get_class_count();
+	SGMatrix<ST> scatter_matrix = solver->get_within_cov();
 
 	// the usual way
 	// we need to find a Basic Linear Solution of A.x=b for 'x'.
@@ -200,11 +174,12 @@ bool CLDA::solver_classic(SGVector<int32_t> train_labels, CFeatures* data)
 	// VectorXd x=w;
 	auto decomposition = linalg::cholesky_factor(scatter_matrix);
 	SGVector<ST> w_st = linalg::cholesky_solver(
-	    decomposition, linalg::add(mean_pos, mean_neg, (ST)1, (ST)-1));
+	    decomposition,
+	    linalg::add(class_mean[1], class_mean[0], (ST)1, (ST)-1));
 
 	// get the weights w_neg(for -ve class) and w_pos(for +ve class)
-	auto w_neg = linalg::cholesky_solver(decomposition, mean_neg);
-	auto w_pos = linalg::cholesky_solver(decomposition, mean_pos);
+	auto w_neg = linalg::cholesky_solver(decomposition, class_mean[0]);
+	auto w_pos = linalg::cholesky_solver(decomposition, class_mean[1]);
 
 	SGVector<float64_t> w(num_feat);
 	// copy w_st into w
@@ -215,8 +190,8 @@ bool CLDA::solver_classic(SGVector<int32_t> train_labels, CFeatures* data)
 	// get the bias.
 	set_bias(
 	    (float64_t)(
-	        0.5 *
-	        (linalg::dot(w_neg, mean_neg) - linalg::dot(w_pos, mean_pos))));
+	        0.5 * (linalg::dot(w_neg, class_mean[0]) -
+	               linalg::dot(w_pos, class_mean[1]))));
 
 	return true;
 }
diff --git a/src/shogun/classifier/LDA.h b/src/shogun/classifier/LDA.h
@@ -32,7 +32,7 @@ enum ELDAMethod
 	/** Singular Value Decomposition based LDA.
 	*/
 	SVD_LDA = 20,
-	/** Fisher two class discrimiant based LDA.
+	/** Fisher two class discriminant based LDA.
 	*/
 	FLD_LDA = 30
 };
@@ -55,7 +55,8 @@ template <class ST> class CDenseFeatures;
  * is maximized, where
  * \f[S_b := ({\bf m_{+1}} - {\bf m_{-1}})({\bf m_{+1}} - {\bf m_{-1}})^T \f]
  * is the between class scatter matrix and
- * \f[S_w := \sum_{c\in\{-1,+1\}}\sum_{{\bf x}\in X_{c}}({\bf x} - {\bf m_c})({\bf x} - {\bf m_c})^T \f]
+ * \f[S_w := \sum_{c\in\{-1,+1\}}\sum_{{\bf x}\in X_{c}}({\bf x} - {\bf
+ * m_c})({\bf x} - {\bf m_c})^T \f]
  * is the within class scatter matrix with mean \f${\bf m_c} :=
  * \frac{1}{N}\sum_{j=1}^N {\bf x_j^c}\f$ and \f$X_c:=\{x_1^c, \dots, x_N^c\}\f$
  * the set of examples of class c.
@@ -66,10 +67,14 @@ template <class ST> class CDenseFeatures;
  *
  * <em>::SVD_LDA</em> : Singular Valued decomposition method.
  * The above derivation of Fisher's LDA requires the invertibility of the within
- * class matrix. However, this condition gets void when there are fewer data-points
- * than dimensions. A solution is to require that \f${\bf W}\f$ lies only in the subspace
- * spanned by the data. A basis of the data \f${\bf X}\f$ is found using the thin-SVD technique
- * which returns an orthonormal non-square basis matrix \f${\bf Q}\f$. We then require the
+ * class matrix. However, this condition gets void when there are fewer
+ * data-points
+ * than dimensions. A solution is to require that \f${\bf W}\f$ lies only in the
+ * subspace
+ * spanned by the data. A basis of the data \f${\bf X}\f$ is found using the
+ * thin-SVD technique
+ * which returns an orthonormal non-square basis matrix \f${\bf Q}\f$. We then
+ * require the
  * solution \f${\bf w}\f$ to be expressed in this basis.
  * \f[{\bf W} := {\bf Q} {\bf{W^\prime}}\f]
  * The between class Matrix is replaced with:
@@ -80,8 +85,13 @@ template <class ST> class CDenseFeatures;
  * been projected down to the basis that spans the data.
  * see: Bayesian Reasoning and Machine Learning, section 16.3.1.
  *
- * <em>::AUTO_LDA</em> : This mode automagically chooses one of the above modes for
- * the users based on whether N > D (chooses ::FLD_LDA) or N < D(chooses ::SVD_LDA)
+ * <em>::AUTO_LDA</em> : This mode automagically chooses one of the above modes
+ * for
+ * the users based on whether N > D (chooses ::FLD_LDA) or N < D(chooses
+ * ::SVD_LDA)
+ * Note that even if N > D FLD_LDA may fail being the covariance matrix not
+ * invertible,
+ * in such case one should use SVD_LDA.
  * \sa CLinearMachine
  * \sa http://en.wikipedia.org/wiki/Linear_discriminant_analysis
  */
@@ -94,19 +104,33 @@ class CLDA : public CLinearMachine
 		/** constructor
 		 *
 		 * @param gamma gamma
-		 * @param method LDA using Fisher's algorithm or Singular Value Decomposition : ::FLD_LDA/::SVD_LDA/::AUTO_LDA[default]
+		 * @param method LDA using Fisher's algorithm or Singular Value
+		 * Decomposition : ::FLD_LDA/::SVD_LDA/::AUTO_LDA[default]
+		 * @param bdc_svd when using SVD solver switch between
+		 * Bidiagonal Divide and Conquer algorithm (BDC) and
+		 * Jacobi's algorithm, for the differences @see linalg::SVDAlgorithm.
+		 * [default = BDC-SVD]
 		 */
-		CLDA(float64_t gamma=0, ELDAMethod method=AUTO_LDA);
+		CLDA(
+		    float64_t gamma = 0, ELDAMethod method = AUTO_LDA,
+		    bool bdc_svd = true);
 
 		/** constructor
 		 *
 		 * @param gamma gamma
 		 * @param traindat training features
 		 * @param trainlab labels for training features
-		 * @param method LDA using Fisher's algorithm or Singular Value Decomposition : ::FLD_LDA/::SVD_LDA/::AUTO_LDA[default]
-
+		 * @param method LDA using Fisher's algorithm or Singular Value
+		 * Decomposition : ::FLD_LDA/::SVD_LDA/::AUTO_LDA[default]
+		 * @param bdc_svd when using SVD solver switch between
+		 * Bidiagonal Divide and Conquer algorithm (BDC-SVD) and
+		 * Jacobi's algorithm, for the differences @see linalg::SVDAlgorithm.
+		 * [default = BDC-SVD]
 		 */
-		CLDA(float64_t gamma, CDenseFeatures<float64_t>* traindat, CLabels* trainlab, ELDAMethod method=AUTO_LDA);
+		CLDA(
+		    float64_t gamma, CDenseFeatures<float64_t>* traindat,
+		    CLabels* trainlab, ELDAMethod method = AUTO_LDA,
+		    bool bdc_svd = true);
 		virtual ~CLDA();
 
 		/** set gamma
@@ -170,15 +194,15 @@ class CLDA : public CLinearMachine
 		 * @see train_machine
 		 */
 		template <typename ST>
-		bool train_machine_templated(SGVector<int32_t> train_labels, CFeatures * features);
+		bool train_machine_templated();
 
 		/**
 		 * Train the machine with the svd-based solver (@see CFisherLDA).
-		 * \warning This method is currently untemplated.
 		 * @param features training data
 		 * @param labels labels for training data
 		 */
-		bool solver_svd(SGVector<int32_t> train_labels, CFeatures* data);
+		template <typename ST>
+		bool solver_svd();
 
 		/**
 		 * Train the machine with the classic method based on the cholesky
@@ -187,7 +211,7 @@ class CLDA : public CLinearMachine
 		 * @param labels labels for training data
 		 */
 		template <typename ST>
-		bool solver_classic(SGVector<int32_t> train_labels, CFeatures* data);
+		bool solver_classic();
 
 	protected:
 
@@ -197,6 +221,8 @@ class CLDA : public CLinearMachine
 		float64_t m_gamma;
 		/** LDA mode */
 		ELDAMethod m_method;
+		/** use bdc-svd algorithm */
+		bool m_bdc_svd;
 };
 }
 #endif//ifndef