Skip to content

Commit

Permalink
PCA eigen-ized
Browse files Browse the repository at this point in the history
  • Loading branch information
mazumdarparijat committed Mar 3, 2014
1 parent ad04382 commit 5cae1ab
Show file tree
Hide file tree
Showing 2 changed files with 164 additions and 68 deletions.
85 changes: 29 additions & 56 deletions src/shogun/preprocessor/PCA.cpp
Expand Up @@ -71,72 +71,48 @@ bool CPCA::init(CFeatures* features)
{
if (!m_initialized)
{
// loop varibles
int32_t i,j,k;
// loop variable
int32_t i;

ASSERT(features->get_feature_class()==C_DENSE)
ASSERT(features->get_feature_type()==F_DREAL)

int32_t num_vectors=((CDenseFeatures<float64_t>*)features)->get_num_vectors();
int32_t num_features=((CDenseFeatures<float64_t>*)features)->get_num_features();
int32_t num_vectors = ((CDenseFeatures<float64_t>*)features)->get_num_vectors();
int32_t num_features = ((CDenseFeatures<float64_t>*)features)->get_num_features();
SG_INFO("num_examples: %ld num_features: %ld \n", num_vectors, num_features)

SGMatrix<float64_t> feature_matrix = ((CDenseFeatures<float64_t>*)features)->get_feature_matrix();
m_mean_vector.vlen = num_features;
m_mean_vector.vector = SG_CALLOC(float64_t, num_features);

// sum
SGMatrix<float64_t> feature_matrix = ((CDenseFeatures<float64_t>*)features)->get_feature_matrix();
for (i=0; i<num_vectors; i++)
{
for (j=0; j<num_features; j++)
m_mean_vector.vector[j] += feature_matrix.matrix[i*num_features+j];
}

//divide
for (i=0; i<num_features; i++)
m_mean_vector.vector[i] /= num_vectors;

float64_t* cov = SG_CALLOC(float64_t, num_features*num_features);

float64_t* sub_mean = SG_MALLOC(float64_t, num_features);

for (i=0; i<num_vectors; i++)
{
for (k=0; k<num_features; k++)
sub_mean[k]=feature_matrix.matrix[i*num_features+k]-m_mean_vector.vector[k];

cblas_dger(CblasColMajor,
num_features,num_features,
1.0,sub_mean,1,
sub_mean,1,
cov, num_features);
}

SG_FREE(sub_mean);
// center data
Map<MatrixXd> fmatrix(feature_matrix.matrix, num_features, num_vectors);
Map<VectorXd> data_mean(m_mean_vector.vector, num_features);
data_mean = fmatrix.rowwise().sum()/(float64_t) num_vectors;
fmatrix = fmatrix.colwise()-data_mean;

for (i=0; i<num_features; i++)
{
for (j=0; j<num_features; j++)
cov[i*num_features+j]/=(num_vectors-1);
}
// covariance matrix
MatrixXd cov_mat(num_features, num_features);
cov_mat = fmatrix*fmatrix.transpose();
cov_mat /= (num_vectors-1);

SG_INFO("Computing Eigenvalues ... ")

m_eigenvalues_vector.vector = SGMatrix<float64_t>::compute_eigenvectors(cov,num_features,num_features);
// eigen value computed
SelfAdjointEigenSolver<MatrixXd> eigenSolve = SelfAdjointEigenSolver<MatrixXd>(cov_mat);
m_eigenvalues_vector.vector = SG_MALLOC(float64_t, num_features);
m_eigenvalues_vector.vlen = num_features;
num_dim=0;
Map<VectorXd> eigenValues(m_eigenvalues_vector.vector, num_features);
eigenValues = eigenSolve.eigenvalues();

num_dim=0;
if (m_mode == FIXED_NUMBER)
{
ASSERT(m_target_dim <= num_features)
num_dim = m_target_dim;
}
if (m_mode == VARIANCE_EXPLAINED)
{
float64_t eig_sum = 0;
for (i=0; i<num_features; i++)
eig_sum += m_eigenvalues_vector.vector[i];

float64_t eig_sum = eigenValues.sum();
float64_t com_sum = 0;
for (i=num_features-1; i>-1; i--)
{
Expand All @@ -160,22 +136,19 @@ bool CPCA::init(CFeatures* features)
SG_INFO("Done\nReducing from %i to %i features..", num_features, num_dim)

m_transformation_matrix = SGMatrix<float64_t>(num_features,num_dim);
Map<MatrixXd> transformMatrix(m_transformation_matrix.matrix, num_features, num_dim);
num_old_dim = num_features;

int32_t offs=0;
for (i=num_features-num_dim; i<num_features; i++)
// eigenvector matrix
transformMatrix = eigenSolve.eigenvectors().block(0, num_features-num_dim, num_features, num_dim);
if (m_whitening)
{
for (k=0; k<num_features; k++)
if (m_whitening)
m_transformation_matrix.matrix[offs+k*num_dim] =
cov[num_features*i+k]/sqrt(m_eigenvalues_vector.vector[i]);
else
m_transformation_matrix.matrix[offs+k*num_dim] =
cov[num_features*i+k];
offs++;
for (i=0; i<num_dim; i++)
transformMatrix.col(i) /= sqrt(eigenValues[i+num_features-num_dim]);
}

SG_FREE(cov);
// restore feature matrix
fmatrix = fmatrix.colwise()+data_mean;
m_initialized = true;
return true;
}
Expand Down
147 changes: 135 additions & 12 deletions tests/unit/preprocessor/PCA_unittest.cc
Expand Up @@ -11,6 +11,7 @@
#include <shogun/mathematics/Math.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/lib/SGMatrix.h>
#include <shogun/lib/SGVector.h>
#include <shogun/preprocessor/PCA.h>

using namespace shogun;
Expand All @@ -34,9 +35,9 @@ TEST(PCA, PCA_output_test_N_greaterthan_D)

EXPECT_EQ(1,returned_matrix.num_rows);
EXPECT_EQ(3,returned_matrix.num_cols);
EXPECT_NEAR(-1,returned_matrix(0,0),0.0000001);
EXPECT_NEAR(1,returned_matrix(0,0),0.0000001);
EXPECT_NEAR(0,returned_matrix(0,1),0.0000001);
EXPECT_NEAR(1,returned_matrix(0,2),0.0000001);
EXPECT_NEAR(-1,returned_matrix(0,2),0.0000001);

SG_UNREF(pca);
SG_UNREF(features);
Expand Down Expand Up @@ -85,8 +86,8 @@ TEST(PCA, PCA_output_test_N_equals_D)

EXPECT_EQ(1,returned_matrix.num_rows);
EXPECT_EQ(2,returned_matrix.num_cols);
EXPECT_NEAR(-0.5,returned_matrix(0,0),0.0000001);
EXPECT_NEAR(0.5,returned_matrix(0,1),0.0000001);
EXPECT_NEAR(0.5,returned_matrix(0,0),0.0000001);
EXPECT_NEAR(-0.5,returned_matrix(0,1),0.0000001);

SG_UNREF(pca);
SG_UNREF(features);
Expand All @@ -111,9 +112,9 @@ TEST(PCA, PCA_output_test_N_greaterthan_D_IN_PLACE)

EXPECT_EQ(1,returned_matrix.num_rows);
EXPECT_EQ(3,returned_matrix.num_cols);
EXPECT_NEAR(-1,returned_matrix(0,0),0.0000001);
EXPECT_NEAR(1,returned_matrix(0,0),0.0000001);
EXPECT_NEAR(0,returned_matrix(0,1),0.0000001);
EXPECT_NEAR(1,returned_matrix(0,2),0.0000001);
EXPECT_NEAR(-1,returned_matrix(0,2),0.0000001);

SG_UNREF(pca);
SG_UNREF(features);
Expand Down Expand Up @@ -160,11 +161,133 @@ TEST(PCA, PCA_output_test_N_equals_D_IN_PLACE)

SGMatrix<float64_t> returned_matrix=pca->apply_to_feature_matrix(features);

EXPECT_EQ(1,returned_matrix.num_rows);
EXPECT_EQ(2,returned_matrix.num_cols);
EXPECT_NEAR(-0.5,returned_matrix(0,0),0.0000001);
EXPECT_NEAR(0.5,returned_matrix(0,1),0.0000001);
EXPECT_EQ(1,returned_matrix.num_rows);
EXPECT_EQ(2,returned_matrix.num_cols);
EXPECT_NEAR(0.5,returned_matrix(0,0),0.0000001);
EXPECT_NEAR(-0.5,returned_matrix(0,1),0.0000001);

SG_UNREF(pca);
SG_UNREF(features);
SG_UNREF(pca);
SG_UNREF(features);
}

TEST(PCA, PCA_rigorous_test_N_greater_D)
{
SGMatrix<float64_t> data(3,5);
data(0,0)=2.908008030729362;
data(0,1)=-1.058180257987362;
data(0,2)=1.098424617888623;
data(0,3)=-2.051816299911149;
data(0,4)=-1.577057022799202;
data(1,0)=0.825218894228491;
data(1,1)=-0.468615581100624;
data(1,2)=-0.277871932787639;
data(1,3)=-0.353849997774433;
data(1,4)=0.507974650905946;
data(2,0)=1.378971977916614;
data(2,1)=-0.272469409250187;
data(2,2)=0.701541458163284;
data(2,3)=-0.823586525156853;
data(2,4)=0.281984063670556;

CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(data);
CPCA* pca=new CPCA();
pca->set_target_dim(3);
pca->init(features);

SGMatrix<float64_t> transmat=pca->get_transformation_matrix();
SGMatrix<float64_t> finalmat=pca->apply_to_feature_matrix(features);
SGVector<float64_t> eigvec=pca->get_eigenvalues();

float64_t epsilon = 0.00000000000001;

EXPECT_NEAR(0.041855883987175,eigvec[0],epsilon);
EXPECT_NEAR(0.291219269837891,eigvec[1],epsilon);
EXPECT_NEAR(5.077526030285309,eigvec[2],epsilon);

EXPECT_NEAR(0.238820512479407,transmat(0,0),epsilon);
EXPECT_NEAR(-0.304406370622002,transmat(0,1),epsilon);
EXPECT_NEAR(0.922117955764778,transmat(0,2),epsilon);
EXPECT_NEAR(0.502124514814308,transmat(1,0),epsilon);
EXPECT_NEAR(0.851501730295596,transmat(1,1),epsilon);
EXPECT_NEAR(0.151048915673366,transmat(1,2),epsilon);
EXPECT_NEAR(-0.831165287076865,transmat(2,0),epsilon);
EXPECT_NEAR(0.426944451689378,transmat(2,1),epsilon);
EXPECT_NEAR(0.356205980761254,transmat(2,2),epsilon);

EXPECT_NEAR(0.182350122017013,finalmat(0,0),epsilon);
EXPECT_NEAR(-0.041902251203685,finalmat(0,1),epsilon);
EXPECT_NEAR(-0.240647729898028,finalmat(0,2),epsilon);
EXPECT_NEAR(0.236493108746648,finalmat(0,3),epsilon);
EXPECT_NEAR(-0.136293249661948,finalmat(0,4),epsilon);
EXPECT_NEAR(0.216971008375464,finalmat(1,0),epsilon);
EXPECT_NEAR(-0.382472041452699,finalmat(1,1),epsilon);
EXPECT_NEAR(-0.460689222275080,finalmat(1,2),epsilon);
EXPECT_NEAR(-0.217576202298234,finalmat(1,3),epsilon);
EXPECT_NEAR(0.843766457650550,finalmat(1,4),epsilon);
EXPECT_NEAR(3.325638119909419,finalmat(2,0),epsilon);
EXPECT_NEAR(-1.115340910605008,finalmat(2,1),epsilon);
EXPECT_NEAR(1.249063286478502,finalmat(2,2),epsilon);
EXPECT_NEAR(-2.210566542225781,finalmat(2,3),epsilon);
EXPECT_NEAR(-1.248793953557132,finalmat(2,4),epsilon);

SG_UNREF(pca);
SG_UNREF(features);
}

TEST(PCA, PCA_rigorous_test_N_less_D)
{
SGMatrix<float64_t> data(5,3);
data(0,0)=0.033479882244451;
data(0,1)=0.022889792751630;
data(0,2)=-0.979206305167302;
data(1,0)=-1.333677943428106;
data(1,1)=-0.261995434966092;
data(1,2)=-1.156401655664002;
data(2,0)=1.127492278341590;
data(2,1)=-1.750212368446790;
data(2,2)=-0.533557109315987;
data(3,0)=0.350179410603312;
data(3,1)=-0.285650971595330;
data(3,2)=-2.002635735883060;
data(4,0)=-0.299066030332982;
data(4,1)=-0.831366511567624;
data(4,2)=0.964229422631627;

CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(data);
CPCA* pca=new CPCA();
pca->set_target_dim(2);
pca->init(features);

SGMatrix<float64_t> transmat=pca->get_transformation_matrix();
SGMatrix<float64_t> finalmat=pca->apply_to_feature_matrix(features);
SGVector<float64_t> eigvec=pca->get_eigenvalues();

float64_t epsilon = 0.00000000000001;

EXPECT_NEAR(0,eigvec[0],epsilon);
EXPECT_NEAR(0,eigvec[1],epsilon);
EXPECT_NEAR(0,eigvec[2],epsilon);
EXPECT_NEAR(2.327794822241147,eigvec[3],epsilon);
EXPECT_NEAR(2.759160840481412,eigvec[4],epsilon);

EXPECT_NEAR(0.258049566055304,transmat(0,0),epsilon);
EXPECT_NEAR(0.257746561935451,transmat(0,1),epsilon);
EXPECT_NEAR(0.349092719192590,transmat(1,0),epsilon);
EXPECT_NEAR(-0.129544636386834,transmat(1,1),epsilon);
EXPECT_NEAR(-0.630860251575450,transmat(2,0),epsilon);
EXPECT_NEAR(0.648487498866225,transmat(2,1),epsilon);
EXPECT_NEAR(0.374280965623520,transmat(3,0),epsilon);
EXPECT_NEAR(0.647067522254220,transmat(3,1),epsilon);
EXPECT_NEAR(-0.522947221638548,transmat(4,0),epsilon);
EXPECT_NEAR(-0.278482463454826,transmat(4,1),epsilon);

EXPECT_NEAR(-0.511467003751085,finalmat(0,0),epsilon);
EXPECT_NEAR(1.715732114990145,finalmat(0,1),epsilon);
EXPECT_NEAR(-1.204265111239059,finalmat(0,2),epsilon);
EXPECT_NEAR(1.835430614937060,finalmat(1,0),epsilon);
EXPECT_NEAR(-0.435473994643473,finalmat(1,1),epsilon);
EXPECT_NEAR(-1.39995662029358,finalmat(1,2),epsilon);

SG_UNREF(pca);
SG_UNREF(features);
}

0 comments on commit 5cae1ab

Please sign in to comment.