Skip to content

Commit

Permalink
removed temporary permuted matrix creation for mmd
Browse files Browse the repository at this point in the history
  - since SIMD doesn't help in this case, rather slows things down
    due to temporary matrix creation
  • Loading branch information
lambday authored and karlnapf committed Jul 1, 2016
1 parent 023f9fb commit a8febee
Show file tree
Hide file tree
Showing 6 changed files with 376 additions and 37 deletions.
4 changes: 1 addition & 3 deletions src/shogun/statistical_testing/MMD.cpp
Expand Up @@ -93,18 +93,16 @@ void CMMD::Self::create_statistic_job()
{
case EStatisticType::UNBIASED_FULL:
statistic_job = mmd::UnbiasedFull(Bx);
permutation_job = mmd::WithinBlockPermutation<mmd::UnbiasedFull>(Bx);
break;
case EStatisticType::UNBIASED_INCOMPLETE:
statistic_job = mmd::UnbiasedIncomplete(Bx);
permutation_job = mmd::WithinBlockPermutation<mmd::UnbiasedIncomplete>(Bx);
break;
case EStatisticType::BIASED_FULL:
statistic_job = mmd::BiasedFull(Bx);
permutation_job = mmd::WithinBlockPermutation<mmd::BiasedFull>(Bx);
break;
default : break;
};
permutation_job = mmd::WithinBlockPermutation(Bx, statistic_type);
}

void CMMD::Self::create_variance_job()
Expand Down
4 changes: 1 addition & 3 deletions src/shogun/statistical_testing/QuadraticTimeMMD.cpp
Expand Up @@ -93,18 +93,16 @@ void CQuadraticTimeMMD::Self::create_statistic_job()
{
case EStatisticType::UNBIASED_FULL:
statistic_job=mmd::UnbiasedFull(Nx);
permutation_job=mmd::WithinBlockPermutation<mmd::UnbiasedFull>(Nx);
break;
case EStatisticType::UNBIASED_INCOMPLETE:
statistic_job=mmd::UnbiasedIncomplete(Nx);
permutation_job=mmd::WithinBlockPermutation<mmd::UnbiasedIncomplete>(Nx);
break;
case EStatisticType::BIASED_FULL:
statistic_job=mmd::BiasedFull(Nx);
permutation_job=mmd::WithinBlockPermutation<mmd::BiasedFull>(Nx);
break;
default : break;
};
permutation_job = mmd::WithinBlockPermutation(Nx, owner.get_statistic_type());
SG_SDEBUG("Leaving\n");
}

Expand Down
Expand Up @@ -16,11 +16,13 @@
* along with this program. If not, see <http:/www.gnu.org/licenses/>.
*/

#include <shogun/io/SGIO.h>
#include <shogun/lib/SGMatrix.h>
#include <shogun/lib/SGVector.h>
#include <shogun/lib/GPUMatrix.h>
#include <shogun/mathematics/eigen3.h>
#include <shogun/mathematics/Math.h>
#include <shogun/statistical_testing/MMD.h>
#include <shogun/statistical_testing/internals/mmd/WithinBlockPermutation.h>
#include <shogun/statistical_testing/internals/mmd/BiasedFull.h>
#include <shogun/statistical_testing/internals/mmd/UnbiasedFull.h>
Expand All @@ -30,30 +32,84 @@ using namespace shogun;
using namespace internal;
using namespace mmd;

template <class T>
WithinBlockPermutation<T>::WithinBlockPermutation(index_t n) : n_x(n)
WithinBlockPermutation::WithinBlockPermutation(index_t n, EStatisticType type)
: n_x(n), stype(type)
{
}

template <class T>
typename T::return_type WithinBlockPermutation<T>::operator()(SGMatrix<float64_t> km)
float64_t WithinBlockPermutation::operator()(SGMatrix<float64_t> km)
{
// http:/stackoverflow.com/questions/15858569/randomly-permute-rows-columns-of-a-matrix-with-eigen
SG_SDEBUG("Entering!\n");
inds.resize(km.num_rows);
std::iota(inds.data(), inds.data()+inds.size(), 0);
SGVector<index_t> permuted_inds(inds.data(), inds.size(), false);
CMath::permute(permuted_inds);

Eigen::Map<Eigen::MatrixXd> map(km.matrix, km.num_rows, km.num_cols);
const index_t n_y=km.num_rows-n_x;
SG_SDEBUG("number of samples are %d and %d!\n", n_x, n_y);

Eigen::PermutationMatrix<Eigen::Dynamic, Eigen::Dynamic> perm(km.num_rows);
perm.setIdentity();
SGVector<int> inds(perm.indices().data(), perm.indices().size(), false);
CMath::permute(inds);
auto term_1=0.0;
for (auto i=0; i<n_x; ++i)
{
for (auto j=0; j<n_x; ++j)
{
if (i>j)
term_1+=km(permuted_inds[i], permuted_inds[j]);
}
}
term_1*=2;
SG_SDEBUG("term_1 sum (without diagonal) = %f!\n", term_1);
if (stype==EStatisticType::BIASED_FULL)
{
for (auto i=0; i<n_x; ++i)
term_1+=km(permuted_inds[i], permuted_inds[i]);
SG_SDEBUG("term_1 sum (with diagonal) = %f!\n", term_1);
term_1/=n_x*n_x;
}
else
term_1/=n_x*(n_x-1);
SG_SDEBUG("term_1 (normalized) = %f!\n", term_1);

Eigen::MatrixXd permuted = perm.transpose() * map * perm;
SGMatrix<float64_t> permuted_km(permuted.data(), permuted.rows(), permuted.cols(), false);
auto term_2=0.0;
for (auto i=n_x; i<n_x+n_y; ++i)
{
for (auto j=n_x; j<n_x+n_y; ++j)
{
if (i>j)
term_2+=km(permuted_inds[i], permuted_inds[j]);
}
}
term_2*=2.0;
SG_SDEBUG("term_2 sum (without diagonal) = %f!\n", term_2);
if (stype==EStatisticType::BIASED_FULL)
{
for (auto i=n_x; i<n_x+n_y; ++i)
term_2+=km(permuted_inds[i], permuted_inds[i]);
SG_SDEBUG("term_2 sum (with diagonal) = %f!\n", term_2);
term_2/=n_y*n_y;
}
else
term_2/=n_y*(n_y-1);
SG_SDEBUG("term_2 (normalized) = %f!\n", term_2);

T statistic(n_x);
return statistic(permuted_km);
}
auto term_3=0.0;
for (auto i=n_x; i<n_x+n_y; ++i)
{
for (auto j=0; j<n_x; ++j)
term_3+=km(permuted_inds[i], permuted_inds[j]);
}
SG_SDEBUG("term_3 sum (with diagonal) = %f!\n", term_3);
if (stype==EStatisticType::UNBIASED_INCOMPLETE)
{
for (auto i=0; i<n_x; ++i)
term_3-=km(permuted_inds[i+n_x], permuted_inds[i]);
SG_SDEBUG("term_3 sum (without diagonal) = %f!\n", term_3);
term_3/=n_x*(n_x-1);
}
else
term_3/=n_x*n_y;
SG_SDEBUG("term_3 (normalized) = %f!\n", term_3);

template class WithinBlockPermutation<BiasedFull>;
template class WithinBlockPermutation<UnbiasedFull>;
template class WithinBlockPermutation<UnbiasedIncomplete>;
SG_SDEBUG("Leaving!\n");
return term_1+term_2-2*term_3;
}
Expand Up @@ -26,21 +26,23 @@ namespace shogun

template <typename T> class SGMatrix;
template <typename T> class CGPUMatrix;
enum class EStatisticType;

namespace internal
{

namespace mmd
{

template <class Statistic>
struct WithinBlockPermutation
{
using return_type = typename Statistic::return_type;
WithinBlockPermutation(index_t n);
using return_type = float64_t;
WithinBlockPermutation(index_t, EStatisticType);
return_type operator()(SGMatrix<float64_t> kernel_matrix);
// return_type operator()(CGPUMatrix<float64_t> kernel_matrix);
index_t n_x;
const index_t n_x;
const EStatisticType stype;
std::vector<index_t> inds;
};

}
Expand Down
86 changes: 77 additions & 9 deletions tests/unit/statistical_testing/QuadraticTimeMMD_unittest.cc
Expand Up @@ -238,7 +238,7 @@ TEST(QuadraticTimeMMD, biased_different_num_samples)
EXPECT_NEAR(statistic, 0.54418915736201567, 1E-8);
}

TEST(QuadraticTimeMMD,compute_variance_null)
TEST(QuadraticTimeMMD, compute_variance_null)
{
index_t m=8;
index_t d=3;
Expand Down Expand Up @@ -289,7 +289,7 @@ TEST(QuadraticTimeMMD,compute_variance_null)
EXPECT_NEAR(var, 0.0064888052500342575, 1E-10);
}

TEST(QuadraticTimeMMD, perform_test_permutation)
TEST(QuadraticTimeMMD, perform_test_permutation_biased_full)
{
const index_t m=20;
const index_t n=30;
Expand Down Expand Up @@ -317,24 +317,92 @@ TEST(QuadraticTimeMMD, perform_test_permutation)
auto mmd=some<CQuadraticTimeMMD>(feat_p, feat_q);
mmd->set_kernel(kernel);

index_t num_null_samples=250;
index_t num_null_samples=10;
mmd->set_num_null_samples(num_null_samples);
mmd->set_null_approximation_method(ENullApproximationMethod::PERMUTATION);

// biased case

// compute p-value using permutation for null distribution and
// assert against local machine computed result
mmd->set_statistic_type(EStatisticType::BIASED_FULL);
float64_t p_value=mmd->compute_p_value(mmd->compute_statistic());
EXPECT_NEAR(p_value, 0.0, 1E-10);
}

// unbiased case
TEST(QuadraticTimeMMD, perform_test_permutation_unbiased_full)
{
const index_t m=20;
const index_t n=30;
const index_t dim=3;

// use fixed seed
sg_rand->set_seed(12345);

float64_t difference=0.5;

// streaming data generator for mean shift distributions
auto gen_p=some<CMeanShiftDataGenerator>(0, dim, 0);
auto gen_q=some<CMeanShiftDataGenerator>(difference, dim, 0);

// stream some data from generator
CFeatures* feat_p=gen_p->get_streamed_features(m);
CFeatures* feat_q=gen_q->get_streamed_features(n);

// shoguns kernel width is different
float64_t sigma=2;
float64_t sq_sigma_twice=sigma*sigma*2;
CGaussianKernel* kernel=new CGaussianKernel(10, sq_sigma_twice);

// create MMD instance, convienience constructor
auto mmd=some<CQuadraticTimeMMD>(feat_p, feat_q);
mmd->set_kernel(kernel);

index_t num_null_samples=10;
mmd->set_num_null_samples(num_null_samples);
mmd->set_null_approximation_method(ENullApproximationMethod::PERMUTATION);

// compute p-value using permutation for null distribution and
// assert against local machine computed result
mmd->set_statistic_type(EStatisticType::UNBIASED_FULL);
p_value=mmd->compute_p_value(mmd->compute_statistic());
float64_t p_value=mmd->compute_p_value(mmd->compute_statistic());
EXPECT_NEAR(p_value, 0.0, 1E-10);
}

TEST(QuadraticTimeMMD, perform_test_permutation_unbiased_incomplete)
{
const index_t m=20;
const index_t n=20;
const index_t dim=3;

// use fixed seed
sg_rand->set_seed(12345);

float64_t difference=0.5;

// streaming data generator for mean shift distributions
auto gen_p=some<CMeanShiftDataGenerator>(0, dim, 0);
auto gen_q=some<CMeanShiftDataGenerator>(difference, dim, 0);

// stream some data from generator
CFeatures* feat_p=gen_p->get_streamed_features(m);
CFeatures* feat_q=gen_q->get_streamed_features(n);

// shoguns kernel width is different
float64_t sigma=2;
float64_t sq_sigma_twice=sigma*sigma*2;
CGaussianKernel* kernel=new CGaussianKernel(10, sq_sigma_twice);

// create MMD instance, convienience constructor
auto mmd=some<CQuadraticTimeMMD>(feat_p, feat_q);
mmd->set_kernel(kernel);

index_t num_null_samples=10;
mmd->set_num_null_samples(num_null_samples);
mmd->set_null_approximation_method(ENullApproximationMethod::PERMUTATION);

// compute p-value using permutation for null distribution and
// assert against local machine computed result
mmd->set_statistic_type(EStatisticType::UNBIASED_INCOMPLETE);
float64_t p_value=mmd->compute_p_value(mmd->compute_statistic());
EXPECT_NEAR(p_value, 0.0, 1E-10);
}

Expand Down Expand Up @@ -366,7 +434,7 @@ TEST(QuadraticTimeMMD, perform_test_spectrum)
auto mmd=some<CQuadraticTimeMMD>(feat_p, feat_q);
mmd->set_kernel(kernel);

index_t num_null_samples=250;
index_t num_null_samples=10;
index_t num_eigenvalues=10;
mmd->set_num_null_samples(num_null_samples);
mmd->set_null_approximation_method(ENullApproximationMethod::MMD2_SPECTRUM);
Expand All @@ -386,5 +454,5 @@ TEST(QuadraticTimeMMD, perform_test_spectrum)
// assert against local machine computed result
mmd->set_statistic_type(EStatisticType::UNBIASED_FULL);
p_value_spectrum=mmd->compute_p_value(mmd->compute_statistic());
EXPECT_NEAR(p_value_spectrum, 0.004, 1E-10);
EXPECT_NEAR(p_value_spectrum, 0.0, 1E-10);
}

0 comments on commit a8febee

Please sign in to comment.