Skip to content

Commit

Permalink
Add option to compute DotFeatures covariance matrix by matrix product…
Browse files Browse the repository at this point in the history
… and set it as default.

Add unit tests for DotFeatures::get_cov/compute_cov.
  • Loading branch information
micmn committed Jul 10, 2017
1 parent 5e5ebb0 commit a26f9f1
Show file tree
Hide file tree
Showing 3 changed files with 192 additions and 34 deletions.
86 changes: 54 additions & 32 deletions src/shogun/features/DotFeatures.cpp
Expand Up @@ -305,42 +305,52 @@ CDotFeatures::compute_mean(CDotFeatures* lhs, CDotFeatures* rhs)
return mean;
}

SGMatrix<float64_t> CDotFeatures::get_cov()
SGMatrix<float64_t> CDotFeatures::get_cov(bool copy_data_for_speed)
{
int32_t num=get_num_vectors();
int32_t dim=get_dim_feature_space();
ASSERT(num>0)
ASSERT(dim>0)

SGMatrix<float64_t> cov(dim, dim);
linalg::zero(cov);

SGVector<float64_t> mean = get_mean();
for (int i = 0; i < num; i++)

if (copy_data_for_speed)
{
SGVector<float64_t> v = get_computed_dot_feature_vector(i);
linalg::add(v, mean, v, 1.0, -1.0);
for (int m = 0; m < v.vlen; m++)
SGMatrix<float64_t> centered_data(dim, num);
for (int i = 0; i < num; i++)
{
for (int n = 0; n <= m ; n++)
{
(cov.matrix)[m*v.vlen+n] += v.vector[m]*v.vector[n];
}
SGVector<float64_t> v = get_computed_dot_feature_vector(i);
centered_data.set_column(i, linalg::add(v, mean, 1.0, -1.0));
}

cov = linalg::matrix_prod(centered_data, centered_data, false, true);
}
for (int m = 0; m < dim-1; m++)
else
{
for (int n = m+1; n < dim; n++)
linalg::zero(cov);
for (int i = 0; i < num; i++)
{
SGVector<float64_t> v = get_computed_dot_feature_vector(i);
linalg::add(v, mean, v, 1.0, -1.0);
for (int m = 0; m < v.vlen; m++)
linalg::add_col_vec(cov, m, v, cov, 1.0, v.vector[m]);
}
for (int m = 0; m < dim - 1; m++)
{
(cov.matrix)[m*dim+n] = (cov.matrix)[n*dim+m];
for (int n = m + 1; n < dim; n++)
{
(cov.matrix)[m * dim + n] = (cov.matrix)[n * dim + m];
}
}
}
linalg::scale(cov, cov, 1.0 / num);

return cov;
}

SGMatrix<float64_t> CDotFeatures::compute_cov(CDotFeatures* lhs, CDotFeatures* rhs)
SGMatrix<float64_t> CDotFeatures::compute_cov(
CDotFeatures* lhs, CDotFeatures* rhs, bool copy_data_for_speed)
{
CDotFeatures* feats[2];
feats[0]=lhs;
Expand All @@ -361,34 +371,46 @@ SGMatrix<float64_t> CDotFeatures::compute_cov(CDotFeatures* lhs, CDotFeatures* r
int32_t dim = dims[0];

SGMatrix<float64_t> cov(dim, dim);
linalg::zero(cov);

SGVector<float64_t> mean = compute_mean(lhs, rhs);

for (int i = 0; i < 2; i++)
if (copy_data_for_speed)
{
for (int j = 0; j < nums[i]; j++)
SGMatrix<float64_t> centered_data(dim, num);
for (int i = 0; i < num; i++)
{
SGVector<float64_t> v = feats[i]->get_computed_dot_feature_vector(j);
linalg::add(v, mean, v, 1.0, -1.0);
for (int m = 0; m < v.vlen; m++)
SGVector<float64_t> v =
i < nums[0] ? lhs->get_computed_dot_feature_vector(i)
: rhs->get_computed_dot_feature_vector(i - nums[0]);

centered_data.set_column(i, linalg::add(v, mean, 1.0, -1.0));
}

cov = linalg::matrix_prod(centered_data, centered_data, false, true);
}
else
{
linalg::zero(cov);
for (int i = 0; i < 2; i++)
{
for (int j = 0; j < nums[i]; j++)
{
for (int n = 0; n <= m; n++)
{
(cov.matrix)[m*v.vlen+n] += v.vector[m]*v.vector[n];
}
SGVector<float64_t> v =
feats[i]->get_computed_dot_feature_vector(j);
linalg::add(v, mean, v, 1.0, -1.0);
for (int m = 0; m < v.vlen; m++)
linalg::add_col_vec(cov, m, v, cov, 1.0, v.vector[m]);
}
}
}
linalg::scale(cov, cov, 1.0 / num);

for (int m = 0; m < dim-1; m++)
{
for (int n = m+1; n < dim; n++)
for (int m = 0; m < dim - 1; m++)
{
(cov.matrix[m*dim+n]) = (cov.matrix)[n*dim+m];
for (int n = m + 1; n < dim; n++)
{
(cov.matrix[m * dim + n]) = (cov.matrix)[n * dim + m];
}
}
}
linalg::scale(cov, cov, 1.0 / num);

return cov;
}
Expand Down
15 changes: 13 additions & 2 deletions src/shogun/features/DotFeatures.h
Expand Up @@ -218,15 +218,26 @@ class CDotFeatures : public CFeatures

/** get covariance
*
* @param copy_data_for_speed if true, the method stores explicitly
* the centered data matrix and the covariance is calculated by matrix
* product of the centered data with its transpose, this make it
* possible to take advantage of multithreaded matrix product,
* this may not be possible if the data doesn't fit into memory,
* in such case set this parameter to false to compute iteratively
* the covariance matrix without storing the centered data.
* [default = true]
* @return covariance
*/
virtual SGMatrix<float64_t> get_cov();
virtual SGMatrix<float64_t> get_cov(bool copy_data_for_speed = true);

/** compute the covariance of two CDotFeatures together
*
* @param copy_data_for_speed @see CDotFeatures::get_cov
* @return covariance
*/
static SGMatrix<float64_t> compute_cov(CDotFeatures* lhs, CDotFeatures* rhs);
static SGMatrix<float64_t> compute_cov(
CDotFeatures* lhs, CDotFeatures* rhs,
bool copy_data_for_speed = true);

private:
void init();
Expand Down
125 changes: 125 additions & 0 deletions tests/unit/features/DotFeatures_unittest.cc
@@ -0,0 +1,125 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
*/

#include <gtest/gtest.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/features/DotFeatures.h>

using namespace shogun;

class DotFeaturesTest : public ::testing::Test
{
protected:
virtual void SetUp()
{
SGMatrix<float64_t> data_a(dims, num_a);
data_a(0, 0) = 1.01611997;
data_a(1, 0) = 0.88935567;
data_a(2, 0) = -0.53592717;
data_a(0, 1) = 0.24132379;
data_a(1, 1) = 0.50475675;
data_a(2, 1) = 0.66029218;
data_a(0, 2) = 0.776238;
data_a(1, 2) = 0.19904003;
data_a(2, 2) = -0.60085628;
data_a(0, 3) = 0.86905328;
data_a(1, 3) = -1.22505732;
data_a(2, 3) = -1.12045593;
data_a(0, 4) = -0.60848342;
data_a(1, 4) = -1.45115708;
data_a(2, 4) = 1.15711328;
feats_a = new CDenseFeatures<float64_t>(data_a);
SG_REF(feats_a);

SGMatrix<float64_t> data_b(dims, num_b);
data_b(0, 0) = 0.14210129;
data_b(1, 0) = -0.36770534;
data_b(2, 0) = 0.80232687;
data_b(0, 1) = -0.10386986;
data_b(1, 1) = 0.3970658;
data_b(2, 1) = 1.15765292;
data_b(0, 2) = 1.22478326;
data_b(1, 2) = 0.61167198;
data_b(2, 2) = 0.49287339;
data_b(0, 3) = 0.04932024;
data_b(1, 3) = -1.0330936;
data_b(2, 3) = -0.87217125;
feats_b = new CDenseFeatures<float64_t>(data_b);
SG_REF(feats_b);

ref_cov_a = SGMatrix<float64_t>(dims, dims);
ref_cov_a(0, 0) = 0.353214;
ref_cov_a(1, 0) = 0.29906652;
ref_cov_a(2, 0) = -0.46552636;
ref_cov_a(0, 1) = 0.29906652;
ref_cov_a(1, 1) = 0.8914735;
ref_cov_a(2, 1) = -0.13294825;
ref_cov_a(0, 2) = -0.46552636;
ref_cov_a(1, 2) = -0.13294825;
ref_cov_a(2, 2) = 0.72797476;

ref_cov_ab = SGMatrix<float64_t>(dims, dims);
ref_cov_ab(0, 0) = 0.32300248;
ref_cov_ab(1, 0) = 0.24380185;
ref_cov_ab(2, 0) = -0.27024556;
ref_cov_ab(0, 1) = 0.24380185;
ref_cov_ab(1, 1) = 0.68716546;
ref_cov_ab(2, 1) = 0.10940845;
ref_cov_ab(0, 2) = -0.27024556;
ref_cov_ab(1, 2) = 0.10940845;
ref_cov_ab(2, 2) = 0.72460503;
}

virtual void TearDown()
{
SG_UNREF(feats_a);
SG_UNREF(feats_b);
}

const index_t num_a = 5;
const index_t num_b = 4;
const index_t dims = 3;
const float64_t eps = 1e-8;

CDenseFeatures<float64_t>* feats_a;
CDenseFeatures<float64_t>* feats_b;
SGMatrix<float64_t> ref_cov_a;
SGMatrix<float64_t> ref_cov_ab;
};

TEST_F(DotFeaturesTest, get_cov)
{
auto cov = feats_a->CDotFeatures::get_cov();

for (index_t i = 0; i < (index_t)cov.size(); ++i)
EXPECT_NEAR(cov[i], ref_cov_a[i], eps);
}

TEST_F(DotFeaturesTest, get_cov_nocopy)
{
auto cov = feats_a->CDotFeatures::get_cov(false);

for (index_t i = 0; i < (index_t)cov.size(); ++i)
EXPECT_NEAR(cov[i], ref_cov_a[i], eps);
}

TEST_F(DotFeaturesTest, compute_cov)
{
auto cov = CDotFeatures::compute_cov(feats_a, feats_b);

for (index_t i = 0; i < (index_t)cov.size(); ++i)
EXPECT_NEAR(cov[i], ref_cov_ab[i], eps);
}

TEST_F(DotFeaturesTest, compute_cov_nocopy)
{
auto cov = CDotFeatures::compute_cov(feats_a, feats_b, false);

for (index_t i = 0; i < (index_t)cov.size(); ++i)
EXPECT_NEAR(cov[i], ref_cov_ab[i], eps);
}

0 comments on commit a26f9f1

Please sign in to comment.