Skip to content

Commit

Permalink
fixed QDA with Eigen3 for merge, also added a unit test for QDA and f…
Browse files Browse the repository at this point in the history
…ixed a bad tab in the MCLDA code
  • Loading branch information
Kevin Hughes authored and Kevin Hughes committed May 24, 2013
1 parent 571ea98 commit 0c4934b
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 87 deletions.
2 changes: 1 addition & 1 deletion examples/undocumented/libshogun/classifier_lda.cpp
Expand Up @@ -23,7 +23,7 @@

using namespace shogun;

#define NUM 50
#define NUM 50
#define DIMS 2
#define CLASSES 2

Expand Down
4 changes: 3 additions & 1 deletion examples/undocumented/libshogun/classifier_qda.cpp
Expand Up @@ -18,12 +18,13 @@

using namespace shogun;

#define NUM 50
#define NUM 50
#define DIMS 2
#define CLASSES 2

void test()
{
#ifdef HAVE_EIGEN3
SGVector< float64_t > lab(CLASSES*NUM);
SGMatrix< float64_t > feat(DIMS, CLASSES*NUM);

Expand Down Expand Up @@ -51,6 +52,7 @@ void test()
// Free memory
SG_UNREF(output);
SG_UNREF(qda);
#endif
}

int main(int argc, char ** argv)
Expand Down
167 changes: 86 additions & 81 deletions src/shogun/multiclass/QDA.cpp
Expand Up @@ -10,15 +10,22 @@

#include <shogun/lib/common.h>

#ifdef HAVE_LAPACK
#ifdef HAVE_EIGEN3

#include <shogun/multiclass/QDA.h>
#include <shogun/machine/NativeMulticlassMachine.h>
#include <shogun/features/Features.h>
#include <shogun/labels/Labels.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/mathematics/Math.h>
#include <shogun/mathematics/lapack.h>

#include <shogun/mathematics/eigen3.h>

using namespace Eigen;

typedef Matrix< float64_t, Dynamic, Dynamic, ColMajor > EMatrix;
typedef Matrix< float64_t, Dynamic, 1, ColMajor > EVector;
typedef Array< float64_t, Dynamic, 1 > EArray;

using namespace shogun;

Expand Down Expand Up @@ -83,98 +90,104 @@ CMulticlassLabels* CQDA::apply_multiclass(CFeatures* data)

CDenseFeatures< float64_t >* rf = (CDenseFeatures< float64_t >*) m_features;

SGMatrix< float64_t > X(num_vecs, m_dim);
SGMatrix< float64_t > A(num_vecs, m_dim);
SGVector< float64_t > norm2(num_vecs*m_num_classes);
norm2.zero();
EMatrix X(num_vecs, m_dim);
EMatrix A(num_vecs, m_dim);
EVector norm2(num_vecs*m_num_classes);
norm2.setZero();

int i, j, k, vlen;
int32_t vlen;
bool vfree;
float64_t* vec;
for ( k = 0 ; k < m_num_classes ; ++k )
for (int k = 0; k < m_num_classes; k++)
{
// X = features - means
for ( i = 0 ; i < num_vecs ; ++i )
for (int i = 0; i < num_vecs; i++)
{
vec = rf->get_feature_vector(i, vlen, vfree);
ASSERT(vec)

for ( j = 0 ; j < m_dim ; ++j )
X[i + j*num_vecs] = vec[j] - m_means[k*m_dim + j];
Eigen::Map< EVector > Evec(vec,vlen);
Eigen::Map< EVector > Em_means_col(m_means.get_column_vector(k), m_dim);

rf->free_feature_vector(vec, i, vfree);
X.row(i) = Evec - Em_means_col;

rf->free_feature_vector(vec, i, vfree);
}

cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, num_vecs, m_dim,
m_dim, 1.0, X.matrix, num_vecs, m_M.get_matrix(k), m_dim, 0.0,
A.matrix, num_vecs);
Eigen::Map< EMatrix > Em_M(m_M.get_matrix(k), m_dim, m_dim);
A = X*Em_M;

for ( i = 0 ; i < num_vecs ; ++i )
for ( j = 0 ; j < m_dim ; ++j )
norm2[i + k*num_vecs] += CMath::sq(A[i + j*num_vecs]);
for (int i = 0; i < num_vecs; i++)
norm2(i + k*num_vecs) = A.row(i).array().square().sum();

#ifdef DEBUG_QDA
CMath::display_matrix(A.matrix, num_vecs, m_dim, "A");
SG_PRINT("\n>>> Displaying A ...\n")
SGMatrix< float64_t >::display_matrix(A.data(), num_vecs, m_dim);
#endif
}

for ( i = 0 ; i < num_vecs ; ++i )
for ( k = 0 ; k < m_num_classes ; ++k )
for (int i = 0; i < num_vecs; i++)
for (int k = 0; k < m_num_classes; k++)
{
norm2[i + k*num_vecs] += m_slog[k];
norm2[i + k*num_vecs] *= -0.5;
}

CMulticlassLabels* out = new CMulticlassLabels(num_vecs);

for ( i = 0 ; i < num_vecs ; ++i )
out->set_label(i, SGVector<float64_t>::arg_max(norm2.vector+i, num_vecs, m_num_classes));
for (int i = 0 ; i < num_vecs; i++)
out->set_label(i, SGVector<float64_t>::arg_max(norm2.data()+i, num_vecs, m_num_classes));

#ifdef DEBUG_QDA
CMath::display_matrix(norm2.vector, num_vecs, m_num_classes, "norm2");
CMath::display_vector(out->get_labels().vector, num_vecs, "Labels");
SG_PRINT("\n>>> Displaying norm2 ...\n")
SGMatrix< float64_t >::display_matrix(norm2.data(), num_vecs, m_num_classes);
SG_PRINT("\n>>> Displaying out ...\n")
SGVector< float64_t >::display_vector(out->get_labels().vector, num_vecs);
#endif

return out;
}

bool CQDA::train_machine(CFeatures* data)
{
if ( !m_labels )
if (!m_labels)
SG_ERROR("No labels allocated in QDA training\n")

if ( data )
{
if ( !data->has_property(FP_DOT) )
if (!data->has_property(FP_DOT))
SG_ERROR("Speficied features are not of type CDotFeatures\n")

set_features((CDotFeatures*) data);
}
if ( !m_features )

if (!m_features)
SG_ERROR("No features allocated in QDA training\n")

SGVector< int32_t > train_labels = ((CMulticlassLabels*) m_labels)->get_int_labels();
if ( !train_labels.vector )

if (!train_labels.vector)
SG_ERROR("No train_labels allocated in QDA training\n")

cleanup();

m_num_classes = ((CMulticlassLabels*) m_labels)->get_num_classes();
m_dim = m_features->get_dim_feature_space();
int32_t num_vec = m_features->get_num_vectors();
if ( num_vec != train_labels.vlen )

if (num_vec != train_labels.vlen)
SG_ERROR("Dimension mismatch between features and labels in QDA training")

int32_t* class_idxs = SG_MALLOC(int32_t, num_vec*m_num_classes);
// number of examples of each class
int32_t* class_idxs = SG_MALLOC(int32_t, num_vec*m_num_classes); // number of examples of each class
int32_t* class_nums = SG_MALLOC(int32_t, m_num_classes);
memset(class_nums, 0, m_num_classes*sizeof(int32_t));
int32_t class_idx;
int32_t i, j, k;
for ( i = 0 ; i < train_labels.vlen ; ++i )

for (int i = 0; i < train_labels.vlen; i++)
{
class_idx = train_labels.vector[i];

if ( class_idx < 0 || class_idx >= m_num_classes )
if (class_idx < 0 || class_idx >= m_num_classes)
{
SG_ERROR("found label out of {0, 1, 2, ..., num_classes-1}...")
return false;
Expand All @@ -185,16 +198,16 @@ bool CQDA::train_machine(CFeatures* data)
}
}

for ( i = 0 ; i < m_num_classes ; ++i )
for (int i = 0; i < m_num_classes; i++)
{
if ( class_nums[i] <= 0 )
if (class_nums[i] <= 0)
{
SG_ERROR("What? One class with no elements\n")
return false;
}
}

if ( m_store_covs )
if (m_store_covs)
{
// cov_dims will be free in m_covs.destroy_ndarray()
index_t * cov_dims = SG_MALLOC(index_t, 3);
Expand All @@ -221,63 +234,55 @@ bool CQDA::train_machine(CFeatures* data)
int32_t vlen;
bool vfree;
float64_t* vec;
for ( k = 0 ; k < m_num_classes ; ++k )
for (int k = 0; k < m_num_classes; k++)
{
SGMatrix< float64_t > buffer(class_nums[k], m_dim);
for ( i = 0 ; i < class_nums[k] ; ++i )
EMatrix buffer(class_nums[k], m_dim);
Eigen::Map< EVector > Em_means(m_means.get_column_vector(k), m_dim);
for (int i = 0; i < class_nums[k]; i++)
{
vec = rf->get_feature_vector(class_idxs[k*num_vec + i], vlen, vfree);
ASSERT(vec)

for ( j = 0 ; j < vlen ; ++j )
{
m_means[k*m_dim + j] += vec[j];
buffer[i + j*class_nums[k]] = vec[j];
}
Eigen::Map< EVector > Evec(vec, vlen);
Em_means += Evec;
buffer.row(i) = Evec;

rf->free_feature_vector(vec, class_idxs[k*num_vec + i], vfree);
}

for ( j = 0 ; j < m_dim ; ++j )
m_means[k*m_dim + j] /= class_nums[k];
Em_means /= class_nums[k];

for ( i = 0 ; i < class_nums[k] ; ++i )
for ( j = 0 ; j < m_dim ; ++j )
buffer[i + j*class_nums[k]] -= m_means[k*m_dim + j];
for (int i = 0; i < class_nums[k]; i++)
buffer.row(i) -= Em_means;

/* calling external lib, buffer = U * S * V^T, U is not interesting here */
char jobu = 'N', jobvt = 'A';
int m = class_nums[k], n = m_dim;
int lda = m, ldu = m, ldvt = n;
int info = -1;
// SVD
float64_t * col = scalings.get_column_vector(k);
float64_t * rot_mat = rotations.get_matrix(k);

wrap_dgesvd(jobu, jobvt, m, n, buffer.matrix, lda, col, NULL, ldu,
rot_mat, ldvt, &info);
ASSERT(info == 0)
buffer=SGMatrix<float64_t>();
Eigen::JacobiSVD<EMatrix> eSvd;
eSvd.compute(buffer,Eigen::ComputeFullV);
memcpy(col, eSvd.singularValues().data(), m_dim*sizeof(float64_t));
memcpy(rot_mat, eSvd.matrixV().data(), m_dim*m_dim*sizeof(float64_t));

SGVector<float64_t>::vector_multiply(col, col, col, m_dim);
SGVector<float64_t>::scale_vector(1.0/(m-1), col, m_dim);
SGVector<float64_t>::scale_vector(1.0/(class_nums[k]-1), col, m_dim);
rotations.transpose_matrix(k);

if ( m_store_covs )
if (m_store_covs)
{
SGMatrix< float64_t > M(n ,n);
Eigen::Map< EMatrix > EM(SGVector<float64_t>::clone_vector(rot_mat, m_dim*m_dim), m_dim, m_dim);
Eigen::Map< EArray > Escalings(scalings.get_column_vector(k), m_dim);
for (int i = 0; i < m_dim; i++)
EM.row(i) = ( (EM.row(i).array()) * Escalings ).matrix();

M.matrix = SGVector<float64_t>::clone_vector(rot_mat, n*n);
for ( i = 0 ; i < m_dim ; ++i )
for ( j = 0 ; j < m_dim ; ++j )
M[i + j*m_dim] *= scalings[k*m_dim + j];
Eigen::Map< EMatrix > Em_covs(m_covs.get_matrix(k), m_dim, m_dim);
Eigen::Map< EMatrix > Erot_mat(rot_mat, m_dim, m_dim);

cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, n, n, n, 1.0,
M.matrix, n, rot_mat, n, 0.0, m_covs.get_matrix(k), n);
Em_covs = EM*Erot_mat;
}
}

/* Computation of terms required for classification */

SGVector< float32_t > sinvsqrt(m_dim);

// M_dims will be freed in m_M.destroy_ndarray()
Expand All @@ -291,16 +296,16 @@ bool CQDA::train_machine(CFeatures* data)
m_slog.zero();

index_t idx = 0;
for ( k = 0 ; k < m_num_classes ; ++k )
for (int k = 0; k < m_num_classes; k++)
{
for ( j = 0 ; j < m_dim ; ++j )
for (int j = 0; j < m_dim; j++)
{
sinvsqrt[j] = 1.0 / CMath::sqrt(scalings[k*m_dim + j]);
m_slog[k] += CMath::log(scalings[k*m_dim + j]);
}

for ( i = 0 ; i < m_dim ; ++i )
for ( j = 0 ; j < m_dim ; ++j )
for (int i = 0; i < m_dim; i++)
for (int j = 0; j < m_dim; j++)
{
idx = k*m_dim*m_dim + i + j*m_dim;
m_M[idx] = rotations[idx] * sinvsqrt[j];
Expand All @@ -311,21 +316,21 @@ bool CQDA::train_machine(CFeatures* data)
SG_PRINT(">>> QDA machine trained with %d classes\n", m_num_classes)

SG_PRINT("\n>>> Displaying means ...\n")
CMath::display_matrix(m_means.matrix, m_dim, m_num_classes);
SGMatrix< float64_t >::display_matrix(m_means.matrix, m_dim, m_num_classes);

SG_PRINT("\n>>> Displaying scalings ...\n")
CMath::display_matrix(scalings.matrix, m_dim, m_num_classes);
SGMatrix< float64_t >::display_matrix(scalings.matrix, m_dim, m_num_classes);

SG_PRINT("\n>>> Displaying rotations ... \n")
for ( k = 0 ; k < m_num_classes ; ++k )
CMath::display_matrix(rotations.get_matrix(k), m_dim, m_dim);
for (int k = 0; k < m_num_classes; k++)
SGMatrix< float64_t >::display_matrix(rotations.get_matrix(k), m_dim, m_dim);

SG_PRINT("\n>>> Displaying sinvsqrt ... \n")
sinvsqrt.display_vector();

SG_PRINT("\n>>> Diplaying m_M matrices ... \n")
for ( k = 0 ; k < m_num_classes ; ++k )
CMath::display_matrix(m_M.get_matrix(k), m_dim, m_dim);
for (int k = 0; k < m_num_classes; k++)
SGMatrix< float64_t >::display_matrix(m_M.get_matrix(k), m_dim, m_dim);

SG_PRINT("\n>>> Exit DEBUG_QDA\n")
#endif
Expand All @@ -335,4 +340,4 @@ bool CQDA::train_machine(CFeatures* data)
return true;
}

#endif /* HAVE_LAPACK */
#endif /* HAVE_EIGEN3 */
4 changes: 2 additions & 2 deletions src/shogun/multiclass/QDA.h
Expand Up @@ -13,7 +13,7 @@

#include <shogun/lib/config.h>

#ifdef HAVE_LAPACK
#ifdef HAVE_EIGEN3

#include <shogun/features/DotFeatures.h>
#include <shogun/features/DenseFeatures.h>
Expand Down Expand Up @@ -189,5 +189,5 @@ class CQDA : public CNativeMulticlassMachine
}; /* class QDA */
} /* namespace shogun */

#endif /* HAVE_LAPACK */
#endif /* HAVE_EIGEN3 */
#endif /* _QDA_H__ */
4 changes: 2 additions & 2 deletions tests/unit/multiclass/MCLDA_unittest.cc
Expand Up @@ -5,7 +5,7 @@
#include <gtest/gtest.h>
#ifdef HAVE_EIGEN3

#define NUM 50
#define NUM 50
#define DIMS 2
#define CLASSES 2

Expand All @@ -30,7 +30,7 @@ TEST(MCLDA, train_and_apply)

CMulticlassLabels* output=CLabelsFactory::to_multiclass(lda->apply());
SG_REF(output);

// Test
for ( index_t i = 0; i < CLASSES*NUM; ++i )
EXPECT_EQ(output->get_label(i), labels->get_label(i));
Expand Down

0 comments on commit 0c4934b

Please sign in to comment.