Refactor and port to use linalg FisherLDA's and LDA's solvers.

shogun-toolbox · Jun 28, 2017 · 3dd43d5 · 3dd43d5
1 parent 7b8733a
commit 3dd43d5
Show file tree

Hide file tree

Showing 4 changed files with 426 additions and 350 deletions.
diff --git a/src/shogun/classifier/LDA.cpp b/src/shogun/classifier/LDA.cpp
@@ -10,14 +10,17 @@
  */
 #include <shogun/lib/config.h>
 
-#include <shogun/lib/common.h>
-#include <shogun/machine/Machine.h>
-#include <shogun/machine/LinearMachine.h>
 #include <shogun/classifier/LDA.h>
-#include <shogun/labels/Labels.h>
 #include <shogun/labels/BinaryLabels.h>
+#include <shogun/labels/Labels.h>
+#include <shogun/lib/common.h>
+#include <shogun/machine/LinearMachine.h>
+#include <shogun/machine/Machine.h>
 #include <shogun/mathematics/Math.h>
 #include <shogun/mathematics/eigen3.h>
+#include <shogun/mathematics/linalg/LinalgNamespace.h>
+#include <shogun/preprocessor/FisherLDA.h>
+#include <vector>
 
 using namespace Eigen;
 using namespace shogun;
@@ -30,9 +33,10 @@ CLDA::CLDA(float64_t gamma, ELDAMethod method)
 	m_gamma=gamma;
 }
 
-CLDA::CLDA(float64_t gamma, CDenseFeatures<float64_t> *traindat,
-			CLabels *trainlab, ELDAMethod method)
-	:CLinearMachine(), m_gamma(gamma)
+CLDA::CLDA(
+    float64_t gamma, CDenseFeatures<float64_t>* traindat, CLabels* trainlab,
+    ELDAMethod method)
+    : CLinearMachine(), m_gamma(gamma)
 {
 	init();
 	set_features(traindat);
@@ -45,10 +49,12 @@ void CLDA::init()
 {
 	m_method=AUTO_LDA;
 	m_gamma=0;
-	SG_ADD((machine_int_t*) &m_method, "m_method",
-		"Method used for LDA calculation", MS_NOT_AVAILABLE);
-	SG_ADD((machine_int_t*) &m_gamma, "m_gamma",
-		"Regularization parameter", MS_NOT_AVAILABLE);
+	SG_ADD(
+	    (machine_int_t*)&m_method, "m_method",
+	    "Method used for LDA calculation", MS_NOT_AVAILABLE);
+	SG_ADD(
+	    (machine_int_t*)&m_gamma, "m_gamma", "Regularization parameter",
+	    MS_NOT_AVAILABLE);
 }
 
 CLDA::~CLDA()
@@ -58,8 +64,11 @@ CLDA::~CLDA()
 bool CLDA::train_machine(CFeatures *data)
 {
 	REQUIRE(m_labels, "Labels for the given features are not specified!\n")
-	REQUIRE(m_labels->get_label_type()==LT_BINARY, "The labels should of type"
-			" CBinaryLabels! you provided %s \n",m_labels->get_name())
+	REQUIRE(
+	    m_labels->get_label_type() == LT_BINARY,
+	    "The labels should of type"
+	    " CBinaryLabels! you provided %s \n",
+	    m_labels->get_name())
 
 	if(data)
 	{
@@ -91,174 +100,123 @@ bool CLDA::train_machine(CFeatures *data)
 
 template <typename ST>
 bool CLDA::train_machine_templated(SGVector<int32_t> train_labels, CFeatures *data)
-{	
-	SGMatrix<ST>feature_matrix=((CDenseFeatures<ST>*)features)
-										->get_feature_matrix();
-	int32_t num_feat=feature_matrix.num_rows;
-	int32_t num_vec=feature_matrix.num_cols;
-
-	SGVector<int32_t> classidx_neg(num_vec);
-	SGVector<int32_t> classidx_pos(num_vec);
+{
+	SGMatrix<ST> feature_matrix =
+	    ((CDenseFeatures<ST>*)data)->get_feature_matrix();
+	index_t num_feat = feature_matrix.num_rows;
+	index_t num_vec = feature_matrix.num_cols;
 
-	int32_t i=0;
-	int32_t num_neg=0;
-	int32_t num_pos=0;
+	bool lda_more_efficient = (m_method == AUTO_LDA && num_vec <= num_feat);
 
-	for(i=0; i<train_labels.vlen; i++)
-	{
-		if (train_labels.vector[i]==-1)
-			classidx_neg[num_neg++]=i;
+	if (m_method == SVD_LDA || lda_more_efficient)
+		return solver_svd(train_labels, data);
+	else
+		return solver_classic<ST>(train_labels, data);
+}
 
-		else if(train_labels.vector[i]==+1)
-			classidx_pos[num_pos++]=i;
-	}
+bool CLDA::solver_svd(SGVector<int32_t> train_labels, CFeatures* data)
+{
+	std::unique_ptr<CFisherLDA> lda(new CFisherLDA(CANVAR_FLDA));
+	std::unique_ptr<CMulticlassLabels> multiclass_labels(
+	    new CMulticlassLabels(m_labels->get_num_labels()));
 
-	SGVector<ST> w_st(num_feat);
-	w_st.zero();
-	typename SGMatrix<ST>::EigenMatrixXt fmatrix=typename SGMatrix<ST>::EigenMatrixXtMap(feature_matrix.matrix, num_feat, num_vec);
-	typename SGVector<ST>::EigenVectorXt mean_neg(num_feat);
-	mean_neg.setZero();
-	typename SGVector<ST>::EigenVectorXt mean_pos(num_feat);
-	mean_pos.setZero();
+	for (index_t i = 0; i < m_labels->get_num_labels(); ++i)
+		multiclass_labels->set_int_label(
+		    i, (((CBinaryLabels*)m_labels)->get_int_label(i) == 1 ? 1 : 0));
 
-	//mean neg
-	for(i=0; i<num_neg; i++)
-		mean_neg+=fmatrix.col(classidx_neg[i]);
-	mean_neg/=(ST)num_neg;
+	// keep just the first dimension to do binary classification
+	lda->fit(data, multiclass_labels.get(), 1);
+	auto m = lda->get_transformation_matrix();
 
-	// get m(-ve) - mean(-ve)
-	for(i=0; i<num_neg; i++)
-		fmatrix.col(classidx_neg[i])-=mean_neg;
+	SGVector<float64_t> w(m);
+	set_w(w);
+	set_bias(-linalg::dot(w, lda->get_mean()));
 
-	//mean pos
-	for(i=0; i<num_pos; i++)
-		mean_pos+=fmatrix.col(classidx_pos[i]);
-	mean_pos/=(ST)num_pos;
+	return true;
+}
 
-	// get m(+ve) - mean(+ve)
-	for(i=0; i<num_pos; i++)
-		fmatrix.col(classidx_pos[i])-=mean_pos;
+template <typename ST>
+bool CLDA::solver_classic(SGVector<int32_t> train_labels, CFeatures* data)
+{
+	SGMatrix<ST> feature_matrix =
+	    ((CDenseFeatures<ST>*)data)->get_feature_matrix();
+	index_t num_feat = feature_matrix.num_rows;
+	index_t num_vec = feature_matrix.num_cols;
 
-	SGMatrix<ST>scatter_matrix(num_feat, num_feat);
-	typename SGMatrix<ST>::EigenMatrixXtMap scatter(scatter_matrix.matrix, num_feat, num_feat);
+	std::vector<index_t> idx_neg;
+	std::vector<index_t> idx_pos;
 
-	if (m_method == FLD_LDA || (m_method==AUTO_LDA && num_vec>num_feat))
+	for (index_t i = 0; i < train_labels.vlen; i++)
 	{
-		// covariance matrix.
-		typename SGMatrix<ST>::EigenMatrixXt cov_mat(num_feat, num_feat);
-		cov_mat=fmatrix*fmatrix.transpose();
-		scatter=cov_mat/(num_vec-1);
-		ST trace=scatter.trace();
-		ST s=1.0-((ST) m_gamma);
-		scatter*=s;
-		scatter.diagonal()+=Eigen::DenseBase<typename SGVector<ST>::EigenVectorXt>::Constant(num_feat, trace*((ST)m_gamma)/num_feat);
-
-		// the usual way
-		// we need to find a Basic Linear Solution of A.x=b for 'x'.
-		// Instead of crudely Inverting A, we go for solve() using Decompositions.
-		// where:
-		// MatrixXd A=scatter;
-		// VectorXd b=mean_pos-mean_neg;
-		// VectorXd x=w;
-		typename SGVector<ST>::EigenVectorXtMap x(w_st.vector, num_feat);
-		LLT<typename SGMatrix<ST>::EigenMatrixXt> decomposition(scatter);
-		x=decomposition.solve(mean_pos-mean_neg);
-
-		// get the weights w_neg(for -ve class) and w_pos(for +ve class)
-		typename SGVector<ST>::EigenVectorXt w_neg=decomposition.solve(mean_neg);
-		typename SGVector<ST>::EigenVectorXt w_pos=decomposition.solve(mean_pos);
-
-		// get the bias.
-		bias=((float64_t)(0.5*(w_neg.dot(mean_neg)-w_pos.dot(mean_pos))));
-	}
-	else
-	{
-		//for algorithmic detail, please refer to section 16.3.1. of Bayesian
-		//Reasoning and Machine Learning by David Barber.
-
-		//we will perform SVD here.
-		typename SGMatrix<ST>::EigenMatrixXtMap fmatrix1(feature_matrix.matrix, num_feat, num_vec);
-
-		// to hold the centered positive and negative class data
-		typename SGMatrix<ST>::EigenMatrixXt cen_pos(num_feat,num_pos);
-		typename SGMatrix<ST>::EigenMatrixXt cen_neg(num_feat,num_neg);
-
-		for(i=0; i<num_pos;i++)
-			cen_pos.col(i)=fmatrix.col(classidx_pos[i]);
-
-		for(i=0; i<num_neg;i++)
-			cen_neg.col(i)=fmatrix.col(classidx_neg[i]);
-
-		//+ve covariance matrix
-#if EIGEN_WITH_OPERATOR_BUG
-		cen_pos=cen_pos*cen_pos.transpose();
-		cen_pos/=(ST)(num_pos-1);
-#else
-		cen_pos=cen_pos*cen_pos.transpose()/((ST)(num_pos-1));
-#endif
-
-		//-ve covariance matrix
-#if EIGEN_WITH_OPERATOR_BUG
-		cen_neg=cen_neg*cen_neg.transpose();
-		cen_neg/=(ST)(num_neg-1);
-#else
-		cen_neg=cen_neg*cen_neg.transpose()/((ST)(num_neg-1));
-#endif
-
-		//within class matrix
-		typename SGMatrix<ST>::EigenMatrixXt Sw= num_pos*cen_pos+num_neg*cen_neg;
-		ST trace=Sw.trace();
-		ST s=1.0-((ST)m_gamma);
-		Sw *=s;
-		Sw.diagonal()+=Eigen::DenseBase<typename SGVector<ST>::EigenVectorXt>::Constant(num_feat, trace*((ST)m_gamma)/num_feat);
-
-		//total mean
-		typename SGVector<ST>::EigenVectorXt mean_total=(num_pos*mean_pos+num_neg*mean_neg)/(ST)num_vec;
-
-		//between class matrix
-		typename SGMatrix<ST>::EigenMatrixXt Sb(num_feat,2);
-		Sb.col(0)=sqrt(num_pos)*(mean_pos-mean_total);
-		Sb.col(1)=sqrt(num_neg)*(mean_neg-mean_total);
-
-		JacobiSVD<typename SGMatrix<ST>::EigenMatrixXt> svd(fmatrix1, ComputeThinU);
-
-		// basis to represent the solution
-		typename SGMatrix<ST>::EigenMatrixXt Q=svd.matrixU();
-		// modified between class scatter
-		Sb=Q.transpose()*(Sb*(Sb.transpose()))*Q;
-
-		// modified within class scatter
-		Sw=Q.transpose()*Sw*Q;
-
-		// to find SVD((inverse(Chol(Sw)))' * Sb * (inverse(Chol(Sw))))
-		//1.get Cw=Chol(Sw)
-		//find the decomposition of Cw'.
-		HouseholderQR<typename SGMatrix<ST>::EigenMatrixXt> decomposition(Sw.llt().matrixU().transpose());
-
-		//2.get P=inv(Cw')*Sb_new
-		//MatrixXd P=decomposition.solve(Sb);
-		//3. final value to be put in SVD will be therefore:
-		// final_ output =(inv(Cw')*(P'))'
-		JacobiSVD<typename SGMatrix<ST>::EigenMatrixXt> svd2(decomposition.solve((decomposition.solve(Sb))
-					.transpose()).transpose(), ComputeThinU);
-
-		// Since this is a linear classifier, with only binary classes,
-		// we need to keep only the 1st eigenvector.
-		Map<typename SGVector<ST>::EigenVectorXt> x(w_st.vector, num_feat);
-		x=Q*(svd2.matrixU().col(0));
-		// get the bias
-		bias=((float64_t)(x.transpose()*mean_total));
-		bias=bias*(-1);
+		if (train_labels.vector[i] == -1)
+			idx_neg.push_back(i);
+		else if (train_labels.vector[i] == +1)
+			idx_pos.push_back(i);
 	}
 
- 	SGVector<float64_t> w(num_feat);
-	w.zero();
+	SGMatrix<ST> matrix(feature_matrix);
+	SGVector<ST> mean_neg(num_feat);
+	SGVector<ST> mean_pos(num_feat);
+
+	linalg::zero(mean_neg);
+	linalg::zero(mean_pos);
 
-	//copy w_st into w
-	for(i = 0; i < w.size(); ++i)
-		w[i] = (float64_t) w_st[i];
+	// mean neg
+	for (auto i : idx_neg)
+		linalg::add_col_vec(matrix, i, mean_neg, mean_neg);
+	linalg::scale(mean_neg, mean_neg, 1 / (ST)idx_neg.size());
+
+	// get m(-ve) - mean(-ve)
+	for (auto i : idx_neg)
+		linalg::add_col_vec(matrix, i, mean_neg, matrix, (ST)1, (ST)-1);
 
+	// mean pos
+	for (auto i : idx_pos)
+		linalg::add_col_vec(matrix, i, mean_pos, mean_pos);
+	linalg::scale(mean_pos, mean_pos, 1 / (ST)idx_pos.size());
+
+	// get m(+ve) - mean(+ve)
+	for (auto i : idx_pos)
+		linalg::add_col_vec(matrix, i, mean_pos, matrix, (ST)1, (ST)-1);
+
+	// covariance matrix.
+	auto cov_mat = linalg::matrix_prod(matrix, matrix, false, true);
+	SGMatrix<ST> scatter_matrix(num_feat, num_feat);
+	linalg::scale(cov_mat, scatter_matrix, 1 / (ST)(num_vec - 1));
+
+	ST trace = linalg::trace(scatter_matrix);
+	SGMatrix<ST> id(num_feat, num_feat);
+	linalg::identity(id);
+	linalg::add(
+	    scatter_matrix, id, scatter_matrix, (ST)(1.0 - m_gamma),
+	    trace * ((ST)m_gamma) / num_feat);
+
+	// the usual way
+	// we need to find a Basic Linear Solution of A.x=b for 'x'.
+	// Instead of crudely Inverting A, we go for solve() using Decompositions.
+	// where:
+	// MatrixXd A=scatter;
+	// VectorXd b=mean_pos-mean_neg;
+	// VectorXd x=w;
+	auto decomposition = linalg::cholesky_factor(scatter_matrix);
+	SGVector<ST> w_st = linalg::cholesky_solver(
+	    decomposition, linalg::add(mean_pos, mean_neg, (ST)1, (ST)-1));
+
+	// get the weights w_neg(for -ve class) and w_pos(for +ve class)
+	auto w_neg = linalg::cholesky_solver(decomposition, mean_neg);
+	auto w_pos = linalg::cholesky_solver(decomposition, mean_pos);
+
+	SGVector<float64_t> w(num_feat);
+	// copy w_st into w
+	for (index_t i = 0; i < w.size(); ++i)
+		w[i] = (float64_t)w_st[i];
 	set_w(w);
 
+	// get the bias.
+	set_bias(
+	    (float64_t)(
+	        0.5 *
+	        (linalg::dot(w_neg, mean_neg) - linalg::dot(w_pos, mean_pos))));
+
 	return true;
 }
diff --git a/src/shogun/classifier/LDA.h b/src/shogun/classifier/LDA.h
@@ -172,6 +172,23 @@ class CLDA : public CLinearMachine
 		template <typename ST>
 		bool train_machine_templated(SGVector<int32_t> train_labels, CFeatures * features);
 
+		/**
+		 * Train the machine with the svd-based solver (@see CFisherLDA).
+		 * \warning This method is currently untemplated.
+		 * @param features training data
+		 * @param labels labels for training data
+		 */
+		bool solver_svd(SGVector<int32_t> train_labels, CFeatures* data);
+
+		/**
+		 * Train the machine with the classic method based on the cholesky
+		 * decomposition of the covariance matrix.
+		 * @param features training data
+		 * @param labels labels for training data
+		 */
+		template <typename ST>
+		bool solver_classic(SGVector<int32_t> train_labels, CFeatures* data);
+
 	protected:
 
 		void init();