Skip to content

Commit

Permalink
hsic bugfix and interface changes
Browse files Browse the repository at this point in the history
  • Loading branch information
lambday committed Mar 4, 2014
1 parent df06a0e commit f9a815d
Show file tree
Hide file tree
Showing 38 changed files with 1,152 additions and 754 deletions.
13 changes: 2 additions & 11 deletions examples/undocumented/libshogun/statistics_hsic.cpp
Expand Up @@ -89,10 +89,7 @@ void test_hsic_fixed()

index_t m=features_p->get_num_vectors();

/* unref features since convienience constructor is HSIC was used */
CHSIC* hsic=new CHSIC(kernel_p, kernel_q, features_p, features_q);
SG_UNREF(features_p);
SG_UNREF(features_q);

/* assert matlab result, note that compute statistic computes m*hsic */
float64_t difference=hsic->compute_statistic();
Expand All @@ -111,10 +108,7 @@ void test_hsic_gamma()
CKernel* kernel_q=NULL;
create_fixed_data_kernel_big(features_p, features_q, kernel_p, kernel_q);

/* unref features since convienience constructor is HSIC was used */
CHSIC* hsic=new CHSIC(kernel_p, kernel_q, features_p, features_q);
SG_UNREF(features_p);
SG_UNREF(features_q);

hsic->set_null_approximation_method(HSIC_GAMMA);
float64_t p=hsic->compute_p_value(0.05);
Expand All @@ -132,26 +126,23 @@ void test_hsic_sample_null()
CKernel* kernel_q=NULL;
create_fixed_data_kernel_big(features_p, features_q, kernel_p, kernel_q);

/* unref features since convienience constructor is HSIC was used */
CHSIC* hsic=new CHSIC(kernel_p, kernel_q, features_p, features_q);
SG_UNREF(features_p);
SG_UNREF(features_q);

/* do sampling null */
hsic->set_null_approximation_method(PERMUTATION);
float64_t p=hsic->compute_p_value(0.05);
SG_SPRINT("p-value: %f\n", p);

/* ensure that sampling null of hsic leads to same results as using
* CKernelIndependenceTestStatistic */
* CKernelIndependenceTest */
CMath::init_random(1);
float64_t mean1=CStatistics::mean(hsic->sample_null());
float64_t var1=CStatistics::variance(hsic->sample_null());
SG_SPRINT("mean1=%f, var1=%f\n", mean1, var1);

CMath::init_random(1);
float64_t mean2=CStatistics::mean(
hsic->CKernelIndependenceTestStatistic::sample_null());
hsic->CKernelIndependenceTest::sample_null());
float64_t var2=CStatistics::variance(hsic->sample_null());
SG_SPRINT("mean2=%f, var2=%f\n", mean2, var2);

Expand Down
18 changes: 10 additions & 8 deletions src/interfaces/modular/Statistics.i
Expand Up @@ -8,12 +8,13 @@
*/

/* Remove C Prefix */
%rename(TestStatistic) CTestStatistic;
%rename(TwoDistributionsTestStatistic) CTwoDistributionsTestStatistic;
%rename(KernelTwoSampleTestStatistic) CKernelTwoSampleTestStatistic;
%rename(HypothesisTest) CHypothesisTest;
%rename(IndependenceTest) CIndependenceTest;
%rename(TwoSampleTest) CTwoSampleTest;
%rename(KernelTwoSampleTest) CKernelTwoSampleTest;
%rename(LinearTimeMMD) CLinearTimeMMD;
%rename(QuadraticTimeMMD) CQuadraticTimeMMD;
%rename(KernelIndependenceTestStatistic) CKernelIndependenceTestStatistic;
%rename(KernelIndependenceTest) CKernelIndependenceTest;
%rename(HSIC) CHSIC;
%rename(KernelMeanMatching) CKernelMeanMatching;
%rename(MMDKernelSelection) CMMDKernelSelection;
Expand All @@ -26,12 +27,13 @@


/* Include Class Headers to make them visible from within the target language */
%include <shogun/statistics/TestStatistic.h>
%include <shogun/statistics/TwoDistributionsTestStatistic.h>
%include <shogun/statistics/KernelTwoSampleTestStatistic.h>
%include <shogun/statistics/HypothesisTest.h>
%include <shogun/statistics/IndependenceTest.h>
%include <shogun/statistics/TwoSampleTest.h>
%include <shogun/statistics/KernelTwoSampleTest.h>
%include <shogun/statistics/LinearTimeMMD.h>
%include <shogun/statistics/QuadraticTimeMMD.h>
%include <shogun/statistics/KernelIndependenceTestStatistic.h>
%include <shogun/statistics/KernelIndependenceTest.h>
%include <shogun/statistics/HSIC.h>
%include <shogun/statistics/KernelMeanMatching.h>
%include <shogun/statistics/MMDKernelSelection.h>
Expand Down
9 changes: 5 additions & 4 deletions src/interfaces/modular/Statistics_includes.i
@@ -1,10 +1,11 @@
%{
#include <shogun/statistics/TestStatistic.h>
#include <shogun/statistics/TwoDistributionsTestStatistic.h>
#include <shogun/statistics/KernelTwoSampleTestStatistic.h>
#include <shogun/statistics/HypothesisTest.h>
#include <shogun/statistics/IndependenceTest.h>
#include <shogun/statistics/TwoSampleTest.h>
#include <shogun/statistics/KernelTwoSampleTest.h>
#include <shogun/statistics/LinearTimeMMD.h>
#include <shogun/statistics/QuadraticTimeMMD.h>
#include <shogun/statistics/KernelIndependenceTestStatistic.h>
#include <shogun/statistics/KernelIndependenceTest.h>
#include <shogun/statistics/HSIC.h>
#include <shogun/statistics/KernelMeanMatching.h>
#include <shogun/statistics/MMDKernelSelection.h>
Expand Down
142 changes: 71 additions & 71 deletions src/shogun/statistics/HSIC.cpp
@@ -1,10 +1,32 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* Copyright (c) The Shogun Machine Learning Toolbox
* Written (w) 2012-2013 Heiko Strathmann
* Written (w) 2014 Soumyajit De
* All rights reserved.
*
* Written (W) 2012-2013 Heiko Strathmann
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* The views and conclusions contained in the software and documentation are those
* of the authors and should not be interpreted as representing official policies,
* either expressed or implied, of the Shogun Development Team.
*/

#include <shogun/statistics/HSIC.h>
Expand All @@ -15,53 +37,46 @@

using namespace shogun;

CHSIC::CHSIC() :
CKernelIndependenceTestStatistic()
CHSIC::CHSIC() : CKernelIndependenceTest()
{
init();
}

CHSIC::CHSIC(CKernel* kernel_p, CKernel* kernel_q, CFeatures* p_and_q,
index_t m) :
CKernelIndependenceTestStatistic(kernel_p, kernel_q, m_p_and_q, m)
{
if (p_and_q && p_and_q->get_num_vectors()/2!=m)
{
SG_ERROR("Only features with equal number of vectors are currently "
"possible\n");
}

init();
}

CHSIC::CHSIC(CKernel* kernel_p, CKernel* kernel_q, CFeatures* p,
CFeatures* q) :
CKernelIndependenceTestStatistic(kernel_p, kernel_q, p, q)
CKernelIndependenceTest(kernel_p, kernel_q, p, q)
{
init();

if (p && q && p->get_num_vectors()!=q->get_num_vectors())
{
SG_ERROR("Only features with equal number of vectors are currently "
"possible\n");
}

init();
else
m_num_features=p->get_num_vectors();
}

CHSIC::~CHSIC()
{

}

void CHSIC::init()
{
SG_ADD(&m_num_features, "num_features",
"Number of features from each of the distributions",
MS_NOT_AVAILABLE);

m_num_features=0;
}

float64_t CHSIC::compute_statistic()
{
SG_DEBUG("entering!\n");

REQUIRE(m_kernel_p && m_kernel_q, "No or only one kernel specified!\n");

REQUIRE(m_p_and_q, "features needed!\n")
REQUIRE(m_p && m_q, "features needed!\n")

/* compute kernel matrices */
SGMatrix<float64_t> K=get_kernel_matrix_K();
Expand All @@ -71,7 +86,7 @@ float64_t CHSIC::compute_statistic()
K.center();

/* compute MATLAB: sum(sum(Kc' .* (L))), which is biased HSIC */
index_t m=m_m;
index_t m=m_num_features;
float64_t result=0;
for (index_t i=0; i<m; ++i)
{
Expand All @@ -82,6 +97,8 @@ float64_t CHSIC::compute_statistic()
/* return m times statistic */
result/=m;

SG_DEBUG("leaving!\n");

return result;
}

Expand All @@ -100,7 +117,7 @@ float64_t CHSIC::compute_p_value(float64_t statistic)

default:
/* sampling null is handled there */
result=CTwoDistributionsTestStatistic::compute_p_value(statistic);
result=CIndependenceTest::compute_p_value(statistic);
break;
}

Expand All @@ -122,7 +139,7 @@ float64_t CHSIC::compute_threshold(float64_t alpha)

default:
/* sampling null is handled there */
result=CTwoDistributionsTestStatistic::compute_threshold(alpha);
result=CIndependenceTest::compute_threshold(alpha);
break;
}

Expand All @@ -133,9 +150,9 @@ SGVector<float64_t> CHSIC::fit_null_gamma()
{
REQUIRE(m_kernel_p && m_kernel_q, "No or only one kernel specified!\n");

REQUIRE(m_p_and_q, "features needed!\n")
REQUIRE(m_p && m_q, "features needed!\n")

index_t m=m_m;
index_t m=m_num_features;

/* compute kernel matrices */
SGMatrix<float64_t> K=get_kernel_matrix_K();
Expand Down Expand Up @@ -216,73 +233,52 @@ SGVector<float64_t> CHSIC::fit_null_gamma()

SGMatrix<float64_t> CHSIC::get_kernel_matrix_K()
{
SGMatrix<float64_t> K;
SG_DEBUG("entering!\n");

/* subset for selecting only data from one distribution */
SGVector<index_t> subset(m_m);
subset.range_fill();
SGMatrix<float64_t> K;

/* distinguish between custom and normal kernels */
if (m_kernel_p->get_kernel_type()==K_CUSTOM)
{
/* custom kernels need to to be initialised when a subset is added */
CCustomKernel* custom_kernel_p=(CCustomKernel*)m_kernel_p;
custom_kernel_p->add_row_subset(subset);
custom_kernel_p->add_col_subset(subset);

K=custom_kernel_p->get_kernel_matrix();

custom_kernel_p->remove_row_subset();
custom_kernel_p->remove_col_subset();
}
else
{
m_p_and_q->add_subset(subset);
m_kernel_p->init(m_p_and_q, m_p_and_q);
/* need to init the kernel if kernel is not precomputed - if subsets of
* features are in the stack (for permutation), this will handle it */
m_kernel_p->init(m_p, m_p);
K=m_kernel_p->get_kernel_matrix();
m_p_and_q->remove_subset();

/* re-init kernel since subset was changed */
m_kernel_p->init(m_p_and_q, m_p_and_q);
}

SG_DEBUG("leaving!\n");

return K;
}

SGMatrix<float64_t> CHSIC::get_kernel_matrix_L()
{
SGMatrix<float64_t> L;
SG_DEBUG("entering!\n");

/* subset for selecting only data from one distribution */
SGVector<index_t> subset(m_m);
/* setting the starting index at m_m which is the starting index for
* samples from q */
subset.range_fill(m_m);
SGMatrix<float64_t> L;

/* now second half of data for L */
if (m_kernel_q->get_kernel_type()==K_CUSTOM)
{
/* custom kernels need to to be initialised when a subset is added */
/* custom kernels need to to be initialised - no subsets here */
CCustomKernel* custom_kernel_q=(CCustomKernel*)m_kernel_q;
custom_kernel_q->add_row_subset(subset);
custom_kernel_q->add_col_subset(subset);

L=custom_kernel_q->get_kernel_matrix();

custom_kernel_q->remove_row_subset();
custom_kernel_q->remove_col_subset();
}
else
{
m_p_and_q->add_subset(subset);
m_kernel_q->init(m_p_and_q, m_p_and_q);
/* need to init the kernel if kernel is not precomputed */
m_kernel_q->init(m_q, m_q);
L=m_kernel_q->get_kernel_matrix();
m_p_and_q->remove_subset();

/* re-init kernel since subset was changed */
m_kernel_q->init(m_p_and_q, m_p_and_q);
}

SG_DEBUG("leaving!\n");

return L;
}

Expand All @@ -297,9 +293,11 @@ SGVector<float64_t> CHSIC::sample_null()
CKernel* kernel_p=m_kernel_p;
CKernel* kernel_q=m_kernel_q;

/* init kernels before to be sure that everything is fine */
m_kernel_p->init(m_p_and_q, m_p_and_q);
m_kernel_q->init(m_p_and_q, m_p_and_q);
/* init kernels before to be sure that everything is fine
* kernel function between two samples from different distributions
* is never computed - in fact, they may as well have different features */
m_kernel_p->init(m_p, m_p);
m_kernel_q->init(m_q, m_q);

/* precompute kernel matrices */
CCustomKernel* precomputed_p=new CCustomKernel(m_kernel_p);
Expand All @@ -311,9 +309,11 @@ SGVector<float64_t> CHSIC::sample_null()
m_kernel_p=precomputed_p;
m_kernel_q=precomputed_q;

/* use superclass sample_null which permutes custom kernels */
SGVector<float64_t> null_samples=
CKernelIndependenceTestStatistic::sample_null();
/* use superclass sample_null which shuffles the entries for one
* distribution using index permutation on rows and columns of
* kernel matrix from one distribution, while accessing the other
* in its original order and then compute statistic */
SGVector<float64_t> null_samples=CKernelIndependenceTest::sample_null();

/* restore kernels */
m_kernel_p=kernel_p;
Expand Down

0 comments on commit f9a815d

Please sign in to comment.