Skip to content

Commit

Permalink
Refactor (Fisher)LDA solvers into separate class.
Browse files Browse the repository at this point in the history
Add accuracy option in LDA to use Jacobi-SVD or BDC-SVD.
Ignore eigenvector magnitude in LDA unit-test.
  • Loading branch information
micmn authored and vigsterkr committed Jul 5, 2017
1 parent e29ddfb commit 00ec38a
Show file tree
Hide file tree
Showing 7 changed files with 588 additions and 327 deletions.
163 changes: 69 additions & 94 deletions src/shogun/classifier/LDA.cpp
Expand Up @@ -11,50 +11,50 @@
#include <shogun/lib/config.h>

#include <shogun/classifier/LDA.h>
#include <shogun/labels/BinaryLabels.h>
#include <shogun/labels/Labels.h>
#include <shogun/lib/common.h>
#include <shogun/machine/LinearMachine.h>
#include <shogun/machine/Machine.h>
#include <shogun/mathematics/Math.h>
#include <shogun/mathematics/eigen3.h>
#include <shogun/mathematics/linalg/LinalgNamespace.h>
#include <shogun/preprocessor/FisherLDA.h>
#include <shogun/solver/LDACanVarSolver.h>
#include <shogun/solver/LDASolver.h>
#include <vector>

using namespace Eigen;
using namespace shogun;

CLDA::CLDA(float64_t gamma, ELDAMethod method)
:CLinearMachine()
CLDA::CLDA(float64_t gamma, ELDAMethod method, bool bdc_svd)
: CLinearMachine(false)
{
init();
m_method=method;
m_gamma=gamma;
m_bdc_svd = bdc_svd;
}

CLDA::CLDA(
float64_t gamma, CDenseFeatures<float64_t>* traindat, CLabels* trainlab,
ELDAMethod method)
: CLinearMachine(), m_gamma(gamma)
ELDAMethod method, bool bdc_svd)
: CLinearMachine(false), m_gamma(gamma)
{
init();
set_features(traindat);
set_labels(trainlab);
m_method=method;
m_gamma=gamma;
m_bdc_svd = bdc_svd;
}

void CLDA::init()
{
m_method=AUTO_LDA;
m_gamma=0;
m_bdc_svd = true;
SG_ADD(
(machine_int_t*)&m_method, "m_method",
"Method used for LDA calculation", MS_NOT_AVAILABLE);
SG_ADD(
(machine_int_t*)&m_gamma, "m_gamma", "Regularization parameter",
MS_NOT_AVAILABLE);
SG_ADD(&m_bdc_svd, "m_bdc_svd", "Use BDC-SVD algorithm", MS_NOT_AVAILABLE);
}

CLDA::~CLDA()
Expand All @@ -66,8 +66,7 @@ bool CLDA::train_machine(CFeatures *data)
REQUIRE(m_labels, "Labels for the given features are not specified!\n")
REQUIRE(
m_labels->get_label_type() == LT_BINARY,
"The labels should of type"
" CBinaryLabels! you provided %s \n",
"The labels should of type CBinaryLabels! Provided type is %s \n",
m_labels->get_name())

if(data)
Expand All @@ -82,114 +81,89 @@ bool CLDA::train_machine(CFeatures *data)
REQUIRE(data, "Features have not been provided.\n")
}

SGVector<int32_t>train_labels=((CBinaryLabels *)m_labels)->get_int_labels();
REQUIRE(train_labels.vector,"Provided Labels are empty!\n")
REQUIRE(
data->get_num_vectors() == m_labels->get_num_labels(),
"Number of training examples(%d) should be equal to number of labels "
"(%d)!\n",
data->get_num_vectors(), m_labels->get_num_labels());

REQUIRE(data->get_num_vectors() == train_labels.vlen,"Number of training examples(%d) should be "
"equal to number of labels (%d)!\n", data->get_num_vectors(), train_labels.vlen);
REQUIRE(
features->get_feature_class() == C_DENSE,
"LDA only works with dense features")

if(data->get_feature_type() == F_SHORTREAL)
return CLDA::train_machine_templated<float32_t>(train_labels, data);
return CLDA::train_machine_templated<float32_t>();
else if(data->get_feature_type() == F_DREAL)
return CLDA::train_machine_templated<float64_t>(train_labels, data);
return CLDA::train_machine_templated<float64_t>();
else if(data->get_feature_type() == F_LONGREAL)
return CLDA::train_machine_templated<floatmax_t>(train_labels, data);
return CLDA::train_machine_templated<floatmax_t>();

return false;
}

template <typename ST>
bool CLDA::train_machine_templated(SGVector<int32_t> train_labels, CFeatures *data)
bool CLDA::train_machine_templated()
{
SGMatrix<ST> feature_matrix =
((CDenseFeatures<ST>*)data)->get_feature_matrix();
index_t num_feat = feature_matrix.num_rows;
index_t num_vec = feature_matrix.num_cols;
index_t num_feat = ((CDenseFeatures<ST>*)features)->get_num_features();
index_t num_vec = features->get_num_vectors();
;

bool lda_more_efficient = (m_method == AUTO_LDA && num_vec <= num_feat);

if (m_method == SVD_LDA || lda_more_efficient)
return solver_svd(train_labels, data);
return solver_svd<ST>();
else
return solver_classic<ST>(train_labels, data);
return solver_classic<ST>();
}

bool CLDA::solver_svd(SGVector<int32_t> train_labels, CFeatures* data)
template <typename ST>
bool CLDA::solver_svd()
{
std::unique_ptr<CFisherLDA> lda(new CFisherLDA(CANVAR_FLDA));
std::unique_ptr<CMulticlassLabels> multiclass_labels(
new CMulticlassLabels(m_labels->get_num_labels()));
auto dense_feat = static_cast<CDenseFeatures<ST>*>(features);

// keep just one dimension to do binary classification
const index_t projection_dim = 1;
auto solver = std::unique_ptr<LDACanVarSolver<ST>>(
new LDACanVarSolver<ST>(
dense_feat,
new CMulticlassLabels(static_cast<CBinaryLabels*>(m_labels)),
projection_dim, m_gamma, m_bdc_svd));

SGVector<ST> w_st(solver->get_eigenvectors());

for (index_t i = 0; i < m_labels->get_num_labels(); ++i)
multiclass_labels->set_int_label(
i, (((CBinaryLabels*)m_labels)->get_int_label(i) == 1 ? 1 : 0));
auto class_mean = solver->get_class_mean();
ST m_neg = linalg::dot(w_st, class_mean[0]);
ST m_pos = linalg::dot(w_st, class_mean[1]);

// keep just the first dimension to do binary classification
lda->fit(data, multiclass_labels.get(), 1);
auto m = lda->get_transformation_matrix();
// change the sign of w if needed to get the correct labels
float64_t sign = (m_pos > m_neg) ? 1 : -1;

SGVector<float64_t> w(m);
SGVector<float64_t> w(dense_feat->get_num_features());
// copy w_st into w
for (index_t i = 0; i < w.size(); ++i)
w[i] = sign * w_st[i];
set_w(w);
set_bias(-linalg::dot(w, lda->get_mean()));

set_bias(-0.5 * sign * (m_neg + m_pos));

return true;
}

template <typename ST>
bool CLDA::solver_classic(SGVector<int32_t> train_labels, CFeatures* data)
bool CLDA::solver_classic()
{
SGMatrix<ST> feature_matrix =
((CDenseFeatures<ST>*)data)->get_feature_matrix();
index_t num_feat = feature_matrix.num_rows;
index_t num_vec = feature_matrix.num_cols;

std::vector<index_t> idx_neg;
std::vector<index_t> idx_pos;

for (index_t i = 0; i < train_labels.vlen; i++)
{
if (train_labels.vector[i] == -1)
idx_neg.push_back(i);
else if (train_labels.vector[i] == +1)
idx_pos.push_back(i);
}

SGMatrix<ST> matrix(feature_matrix);
SGVector<ST> mean_neg(num_feat);
SGVector<ST> mean_pos(num_feat);

linalg::zero(mean_neg);
linalg::zero(mean_pos);

// mean neg
for (auto i : idx_neg)
linalg::add_col_vec(matrix, i, mean_neg, mean_neg);
linalg::scale(mean_neg, mean_neg, 1 / (ST)idx_neg.size());

// get m(-ve) - mean(-ve)
for (auto i : idx_neg)
linalg::add_col_vec(matrix, i, mean_neg, matrix, (ST)1, (ST)-1);

// mean pos
for (auto i : idx_pos)
linalg::add_col_vec(matrix, i, mean_pos, mean_pos);
linalg::scale(mean_pos, mean_pos, 1 / (ST)idx_pos.size());

// get m(+ve) - mean(+ve)
for (auto i : idx_pos)
linalg::add_col_vec(matrix, i, mean_pos, matrix, (ST)1, (ST)-1);
auto dense_feat = static_cast<CDenseFeatures<ST>*>(features);
index_t num_feat = dense_feat->get_num_features();

// covariance matrix.
auto cov_mat = linalg::matrix_prod(matrix, matrix, false, true);
SGMatrix<ST> scatter_matrix(num_feat, num_feat);
linalg::scale(cov_mat, scatter_matrix, 1 / (ST)(num_vec - 1));
auto solver = std::unique_ptr<LDASolver<ST>>(
new LDASolver<ST>(
dense_feat,
new CMulticlassLabels(static_cast<CBinaryLabels*>(m_labels)),
m_gamma));

ST trace = linalg::trace(scatter_matrix);
SGMatrix<ST> id(num_feat, num_feat);
linalg::identity(id);
linalg::add(
scatter_matrix, id, scatter_matrix, (ST)(1.0 - m_gamma),
trace * ((ST)m_gamma) / num_feat);
auto class_mean = solver->get_class_mean();
auto class_count = solver->get_class_count();
SGMatrix<ST> scatter_matrix = solver->get_within_cov();

// the usual way
// we need to find a Basic Linear Solution of A.x=b for 'x'.
Expand All @@ -200,11 +174,12 @@ bool CLDA::solver_classic(SGVector<int32_t> train_labels, CFeatures* data)
// VectorXd x=w;
auto decomposition = linalg::cholesky_factor(scatter_matrix);
SGVector<ST> w_st = linalg::cholesky_solver(
decomposition, linalg::add(mean_pos, mean_neg, (ST)1, (ST)-1));
decomposition,
linalg::add(class_mean[1], class_mean[0], (ST)1, (ST)-1));

// get the weights w_neg(for -ve class) and w_pos(for +ve class)
auto w_neg = linalg::cholesky_solver(decomposition, mean_neg);
auto w_pos = linalg::cholesky_solver(decomposition, mean_pos);
auto w_neg = linalg::cholesky_solver(decomposition, class_mean[0]);
auto w_pos = linalg::cholesky_solver(decomposition, class_mean[1]);

SGVector<float64_t> w(num_feat);
// copy w_st into w
Expand All @@ -215,8 +190,8 @@ bool CLDA::solver_classic(SGVector<int32_t> train_labels, CFeatures* data)
// get the bias.
set_bias(
(float64_t)(
0.5 *
(linalg::dot(w_neg, mean_neg) - linalg::dot(w_pos, mean_pos))));
0.5 * (linalg::dot(w_neg, class_mean[0]) -
linalg::dot(w_pos, class_mean[1]))));

return true;
}
60 changes: 43 additions & 17 deletions src/shogun/classifier/LDA.h
Expand Up @@ -32,7 +32,7 @@ enum ELDAMethod
/** Singular Value Decomposition based LDA.
*/
SVD_LDA = 20,
/** Fisher two class discrimiant based LDA.
/** Fisher two class discriminant based LDA.
*/
FLD_LDA = 30
};
Expand All @@ -55,7 +55,8 @@ template <class ST> class CDenseFeatures;
* is maximized, where
* \f[S_b := ({\bf m_{+1}} - {\bf m_{-1}})({\bf m_{+1}} - {\bf m_{-1}})^T \f]
* is the between class scatter matrix and
* \f[S_w := \sum_{c\in\{-1,+1\}}\sum_{{\bf x}\in X_{c}}({\bf x} - {\bf m_c})({\bf x} - {\bf m_c})^T \f]
* \f[S_w := \sum_{c\in\{-1,+1\}}\sum_{{\bf x}\in X_{c}}({\bf x} - {\bf
* m_c})({\bf x} - {\bf m_c})^T \f]
* is the within class scatter matrix with mean \f${\bf m_c} :=
* \frac{1}{N}\sum_{j=1}^N {\bf x_j^c}\f$ and \f$X_c:=\{x_1^c, \dots, x_N^c\}\f$
* the set of examples of class c.
Expand All @@ -66,10 +67,14 @@ template <class ST> class CDenseFeatures;
*
* <em>::SVD_LDA</em> : Singular Valued decomposition method.
* The above derivation of Fisher's LDA requires the invertibility of the within
* class matrix. However, this condition gets void when there are fewer data-points
* than dimensions. A solution is to require that \f${\bf W}\f$ lies only in the subspace
* spanned by the data. A basis of the data \f${\bf X}\f$ is found using the thin-SVD technique
* which returns an orthonormal non-square basis matrix \f${\bf Q}\f$. We then require the
* class matrix. However, this condition gets void when there are fewer
* data-points
* than dimensions. A solution is to require that \f${\bf W}\f$ lies only in the
* subspace
* spanned by the data. A basis of the data \f${\bf X}\f$ is found using the
* thin-SVD technique
* which returns an orthonormal non-square basis matrix \f${\bf Q}\f$. We then
* require the
* solution \f${\bf w}\f$ to be expressed in this basis.
* \f[{\bf W} := {\bf Q} {\bf{W^\prime}}\f]
* The between class Matrix is replaced with:
Expand All @@ -80,8 +85,13 @@ template <class ST> class CDenseFeatures;
* been projected down to the basis that spans the data.
* see: Bayesian Reasoning and Machine Learning, section 16.3.1.
*
* <em>::AUTO_LDA</em> : This mode automagically chooses one of the above modes for
* the users based on whether N > D (chooses ::FLD_LDA) or N < D(chooses ::SVD_LDA)
* <em>::AUTO_LDA</em> : This mode automagically chooses one of the above modes
* for
* the users based on whether N > D (chooses ::FLD_LDA) or N < D(chooses
* ::SVD_LDA)
* Note that even if N > D FLD_LDA may fail being the covariance matrix not
* invertible,
* in such case one should use SVD_LDA.
* \sa CLinearMachine
* \sa http://en.wikipedia.org/wiki/Linear_discriminant_analysis
*/
Expand All @@ -94,19 +104,33 @@ class CLDA : public CLinearMachine
/** constructor
*
* @param gamma gamma
* @param method LDA using Fisher's algorithm or Singular Value Decomposition : ::FLD_LDA/::SVD_LDA/::AUTO_LDA[default]
* @param method LDA using Fisher's algorithm or Singular Value
* Decomposition : ::FLD_LDA/::SVD_LDA/::AUTO_LDA[default]
* @param bdc_svd when using SVD solver switch between
* Bidiagonal Divide and Conquer algorithm (BDC) and
* Jacobi's algorithm, for the differences @see linalg::SVDAlgorithm.
* [default = BDC-SVD]
*/
CLDA(float64_t gamma=0, ELDAMethod method=AUTO_LDA);
CLDA(
float64_t gamma = 0, ELDAMethod method = AUTO_LDA,
bool bdc_svd = true);

/** constructor
*
* @param gamma gamma
* @param traindat training features
* @param trainlab labels for training features
* @param method LDA using Fisher's algorithm or Singular Value Decomposition : ::FLD_LDA/::SVD_LDA/::AUTO_LDA[default]
* @param method LDA using Fisher's algorithm or Singular Value
* Decomposition : ::FLD_LDA/::SVD_LDA/::AUTO_LDA[default]
* @param bdc_svd when using SVD solver switch between
* Bidiagonal Divide and Conquer algorithm (BDC-SVD) and
* Jacobi's algorithm, for the differences @see linalg::SVDAlgorithm.
* [default = BDC-SVD]
*/
CLDA(float64_t gamma, CDenseFeatures<float64_t>* traindat, CLabels* trainlab, ELDAMethod method=AUTO_LDA);
CLDA(
float64_t gamma, CDenseFeatures<float64_t>* traindat,
CLabels* trainlab, ELDAMethod method = AUTO_LDA,
bool bdc_svd = true);
virtual ~CLDA();

/** set gamma
Expand Down Expand Up @@ -170,15 +194,15 @@ class CLDA : public CLinearMachine
* @see train_machine
*/
template <typename ST>
bool train_machine_templated(SGVector<int32_t> train_labels, CFeatures * features);
bool train_machine_templated();

/**
* Train the machine with the svd-based solver (@see CFisherLDA).
* \warning This method is currently untemplated.
* @param features training data
* @param labels labels for training data
*/
bool solver_svd(SGVector<int32_t> train_labels, CFeatures* data);
template <typename ST>
bool solver_svd();

/**
* Train the machine with the classic method based on the cholesky
Expand All @@ -187,7 +211,7 @@ class CLDA : public CLinearMachine
* @param labels labels for training data
*/
template <typename ST>
bool solver_classic(SGVector<int32_t> train_labels, CFeatures* data);
bool solver_classic();

protected:

Expand All @@ -197,6 +221,8 @@ class CLDA : public CLinearMachine
float64_t m_gamma;
/** LDA mode */
ELDAMethod m_method;
/** use bdc-svd algorithm */
bool m_bdc_svd;
};
}
#endif//ifndef

0 comments on commit 00ec38a

Please sign in to comment.