From eed22ff5f36c4e1e491de10ec957c2e019009671 Mon Sep 17 00:00:00 2001 From: David <2306427+dhelekal@users.noreply.github.com> Date: Mon, 29 Jan 2018 09:23:38 +0000 Subject: [PATCH] Fix issue #3612 (#4127) Fix issue #3612 adds unit test functions for combined kernel subsetting behaviour. Fixes an issue in which combined kernels would not subset custom kernels if features other than combinedfeatures were passed The latter is achieved by replacing the previous recursive call with a call to a helper function made aware of any subsets contained. --- src/shogun/kernel/CombinedKernel.cpp | 207 +++++++++++++------ src/shogun/kernel/CombinedKernel.h | 15 ++ tests/unit/kernel/CombinedKernel_unittest.cc | 138 ++++++++++++- 3 files changed, 300 insertions(+), 60 deletions(-) diff --git a/src/shogun/kernel/CombinedKernel.cpp b/src/shogun/kernel/CombinedKernel.cpp index 4d65511a9da..7b36c3a891b 100644 --- a/src/shogun/kernel/CombinedKernel.cpp +++ b/src/shogun/kernel/CombinedKernel.cpp @@ -61,54 +61,37 @@ void CCombinedKernel::init_subkernel_weights() eigen_wt = eigen_wt.array().exp(); set_subkernel_weights(wt); } - -bool CCombinedKernel::init(CFeatures* l, CFeatures* r) +bool CCombinedKernel::init_with_extracted_subsets( + CFeatures* l, CFeatures* r, SGVector lhs_subset, + SGVector rhs_subset) { - if(enable_subkernel_weight_opt && !weight_update) - { - init_subkernel_weights(); - } - - /* if the specified features are not combined features, but a single other - * feature type, assume that the caller wants to use all kernels on these */ - if (l && r && l->get_feature_class()==r->get_feature_class() && - l->get_feature_type()==r->get_feature_type() && - l->get_feature_class()!= C_COMBINED) - { - SG_DEBUG("Initialising combined kernel's combined features with the " - "same instance from parameters\n"); - /* construct combined features with each element being the parameter */ - CCombinedFeatures* combined_l=new CCombinedFeatures(); - CCombinedFeatures* combined_r=new CCombinedFeatures(); - for (index_t i=0; iappend_feature_obj(l); - combined_r->append_feature_obj(r); - } - - /* recursive call with constructed combined kernel */ - return init(combined_l, combined_r); - } - CKernel::init(l,r); - REQUIRE(l->get_feature_class()==C_COMBINED, "%s::init(): LHS features are" - " of class %s but need to be combined features!\n", - get_name(), l->get_name()); - REQUIRE(r->get_feature_class()==C_COMBINED, "%s::init(): RHS features are" - " of class %s but need to be combined features!\n", - get_name(), r->get_name()); - ASSERT(l->get_feature_type()==F_UNKNOWN) - ASSERT(r->get_feature_type()==F_UNKNOWN) - - CFeatures* lf=NULL; - CFeatures* rf=NULL; - CKernel* k=NULL; - - bool result=true; + CKernel::init(l, r); + REQUIRE( + l->get_feature_class() == C_COMBINED, + "%s::init(): LHS features are" + " of class %s but need to be combined features!\n", + get_name(), l->get_name()); + REQUIRE( + r->get_feature_class() == C_COMBINED, + "%s::init(): RHS features are" + " of class %s but need to be combined features!\n", + get_name(), r->get_name()); + ASSERT(l->get_feature_type() == F_UNKNOWN) + ASSERT(r->get_feature_type() == F_UNKNOWN) + + CFeatures* lf = NULL; + CFeatures* rf = NULL; + CKernel* k = NULL; + + auto l_combined = dynamic_cast(l); + auto r_combined = dynamic_cast(r); + + bool result = true; index_t f_idx = 0; SG_DEBUG("Starting for loop for kernels\n") - for (index_t k_idx=0; k_idxget_kernel_type() != K_CUSTOM) { - if (((CCombinedFeatures*)l)->get_num_feature_obj() > f_idx && - ((CCombinedFeatures*)r)->get_num_feature_obj() > f_idx) + if (l_combined->get_num_feature_obj() > f_idx && + r_combined->get_num_feature_obj() > f_idx) { - lf = ((CCombinedFeatures*)l)->get_feature_obj(f_idx); - rf = ((CCombinedFeatures*)r)->get_feature_obj(f_idx); + lf = l_combined->get_feature_obj(f_idx); + rf = r_combined->get_feature_obj(f_idx); } f_idx++; @@ -131,11 +114,13 @@ bool CCombinedKernel::init(CFeatures* l, CFeatures* r) SG_UNREF(lf); SG_UNREF(rf); SG_UNREF(k); - SG_ERROR("CombinedKernel: Number of features/kernels does not match - bailing out\n") + SG_ERROR( + "CombinedKernel: Number of features/kernels does not " + "match - bailing out\n") } SG_DEBUG("Initializing 0x%p - \"%s\"\n", this, k->get_name()) - result=k->init(lf,rf); + result = k->init(lf, rf); SG_UNREF(lf); SG_UNREF(rf); @@ -144,13 +129,35 @@ bool CCombinedKernel::init(CFeatures* l, CFeatures* r) } else { - SG_DEBUG("Initializing 0x%p - \"%s\" (skipping init, this is a CUSTOM kernel)\n", this, k->get_name()) + SG_DEBUG( + "Initializing 0x%p - \"%s\" (skipping init, this is a CUSTOM " + "kernel)\n", + this, k->get_name()) if (!k->has_features()) - SG_ERROR("No kernel matrix was assigned to this Custom kernel\n") + SG_ERROR( + "No kernel matrix was assigned to this Custom kernel\n") + + auto k_custom = dynamic_cast(k); + + // clear all previous subsets + k_custom->remove_all_row_subsets(); + // apply new subset + k_custom->add_row_subset(lhs_subset); + + k_custom->remove_all_col_subsets(); + // apply new subset + k_custom->add_col_subset(rhs_subset); + if (k->get_num_vec_lhs() != num_lhs) - SG_ERROR("Number of lhs-feature vectors (%d) not match with number of rows (%d) of custom kernel\n", num_lhs, k->get_num_vec_lhs()) + SG_ERROR( + "Number of lhs-feature vectors (%d) not match with number " + "of rows (%d) of custom kernel\n", + num_lhs, k->get_num_vec_lhs()) if (k->get_num_vec_rhs() != num_rhs) - SG_ERROR("Number of rhs-feature vectors (%d) not match with number of cols (%d) of custom kernel\n", num_rhs, k->get_num_vec_rhs()) + SG_ERROR( + "Number of rhs-feature vectors (%d) not match with number " + "of cols (%d) of custom kernel\n", + num_rhs, k->get_num_vec_rhs()) } SG_UNREF(k); @@ -169,15 +176,99 @@ bool CCombinedKernel::init(CFeatures* l, CFeatures* r) return false; } - if ( ((CCombinedFeatures*) l)->get_num_feature_obj()<=0 || - ((CCombinedFeatures*) l)->get_num_feature_obj() != ((CCombinedFeatures*) r)->get_num_feature_obj() ) - SG_ERROR("CombinedKernel: Number of features/kernels does not match - bailing out\n") + if (l_combined->get_num_feature_obj() <= 0 || + l_combined->get_num_feature_obj() != r_combined->get_num_feature_obj()) + SG_ERROR( + "CombinedKernel: Number of features/kernels does not match - " + "bailing out\n") init_normalizer(); - initialized=true; + initialized = true; return true; } +bool CCombinedKernel::init(CFeatures* l, CFeatures* r) +{ + if (enable_subkernel_weight_opt && !weight_update) + { + init_subkernel_weights(); + } + /* + * The two subsets, we will be passing those to init_with_extracted_subsets + */ + SGVector lhs_subset; + SGVector rhs_subset; + + /* + * We will be passing these features to init_with_extracted_subsets + */ + CCombinedFeatures* combined_l; + CCombinedFeatures* combined_r; + + /* + * Extract the subsets so that we can pass them on + */ + auto l_subset_stack = l->get_subset_stack(); + auto r_subset_stack = r->get_subset_stack(); + + if (l_subset_stack->has_subsets()) + { + lhs_subset = l_subset_stack->get_last_subset()->get_subset_idx(); + } + else + { + lhs_subset = SGVector(l->get_num_vectors()); + lhs_subset.range_fill(); + } + + if (r_subset_stack->has_subsets()) + { + rhs_subset = r_subset_stack->get_last_subset()->get_subset_idx(); + } + else + { + rhs_subset = SGVector(r->get_num_vectors()); + rhs_subset.range_fill(); + } + + SG_UNREF(l_subset_stack); + SG_UNREF(r_subset_stack); + + /* if the specified features are not combined features, but a single other + * feature type, assume that the caller wants to use all kernels on these */ + if (l && r && l->get_feature_class() == r->get_feature_class() && + l->get_feature_type() == r->get_feature_type() && + l->get_feature_class() != C_COMBINED) + { + SG_DEBUG( + "Initialising combined kernel's combined features with the " + "same instance from parameters\n"); + /* construct combined features with each element being the parameter + * The we must make sure that we make any custom kernels aware of any + * subsets present! + */ + combined_l = new CCombinedFeatures(); + combined_r = new CCombinedFeatures(); + + for (index_t i = 0; i < get_num_subkernels(); ++i) + { + combined_l->append_feature_obj(l); + combined_r->append_feature_obj(r); + } + } + else + { + /* + * Otherwise, we just pass l & r straight on + */ + combined_l = (CCombinedFeatures*)l; + combined_r = (CCombinedFeatures*)r; + } + + return init_with_extracted_subsets( + combined_l, combined_r, lhs_subset, rhs_subset); +} + void CCombinedKernel::remove_lhs() { delete_optimization(); @@ -971,4 +1062,4 @@ CList* CCombinedKernel::combine_kernels(CList* kernel_list) } return return_list; -} +} \ No newline at end of file diff --git a/src/shogun/kernel/CombinedKernel.h b/src/shogun/kernel/CombinedKernel.h index 5c217449a2c..8d13d30d3b5 100644 --- a/src/shogun/kernel/CombinedKernel.h +++ b/src/shogun/kernel/CombinedKernel.h @@ -458,6 +458,21 @@ class CCombinedKernel : public CKernel private: void init(); + /** + * The purpose of this function is to make customkernels aware of any + * subsets present, regardless whether the features passed are of type + * CCombinedFeatures or not + * @param lhs combined features + * @param rhs rombined features + * @param lhs_subset subset present on lhs - pass identity subset if + * none + * @param rhs_subset subset present on rhs - pass identity subset if + * none + * @return init succesful + */ + bool init_with_extracted_subsets( + CFeatures* lhs, CFeatures* rhs, SGVector lhs_subset, + SGVector rhs_subset); protected: /** list of kernels */ diff --git a/tests/unit/kernel/CombinedKernel_unittest.cc b/tests/unit/kernel/CombinedKernel_unittest.cc index 8ed166a4a7c..ac013ad4cb2 100644 --- a/tests/unit/kernel/CombinedKernel_unittest.cc +++ b/tests/unit/kernel/CombinedKernel_unittest.cc @@ -1,7 +1,12 @@ +#include +#include +#include +#include +#include #include +#include #include -#include -#include +#include using namespace shogun; @@ -37,6 +42,135 @@ TEST(CombinedKernelTest,test_array_operations) SG_UNREF(combined); } +TEST(CombinedKernelTest, test_subset_mixed) +{ + + CMeanShiftDataGenerator* gen = new CMeanShiftDataGenerator(0, 2); + CFeatures* feats = gen->get_streamed_features(10); + + CCombinedFeatures* cf = new CCombinedFeatures(); + + CCombinedKernel* combined = new CCombinedKernel(); + + CGaussianKernel* gaus_1 = new CGaussianKernel(5); + CGaussianKernel* gaus_2 = new CGaussianKernel(5); + + CGaussianKernel* gaus_ck = new CGaussianKernel(5); + gaus_ck->init(feats, feats); + + CCustomKernel* custom_1 = new CCustomKernel(gaus_ck); + CCustomKernel* custom_2 = new CCustomKernel(gaus_ck); + ; + + combined->append_kernel(custom_1); + combined->append_kernel(gaus_1); + cf->append_feature_obj(feats); + + combined->append_kernel(custom_2); + combined->append_kernel(gaus_2); + cf->append_feature_obj(feats); + + SGVector inds(10); + inds.range_fill(); + + for (index_t i = 0; i < 10; ++i) + { + CMath::permute(inds); + + cf->add_subset(inds); + combined->init(cf, cf); + + CKernel* k_g = combined->get_kernel(1); + CKernel* k_0 = combined->get_kernel(0); + CKernel* k_3 = combined->get_kernel(2); + + SGMatrix gauss_matrix = k_g->get_kernel_matrix(); + SGMatrix custom_matrix_1 = k_0->get_kernel_matrix(); + SGMatrix custom_matrix_2 = k_3->get_kernel_matrix(); + + for (index_t j = 0; j < 10; ++j) + { + for (index_t k = 0; k < 10; ++k) + { + EXPECT_LE( + CMath::abs(gauss_matrix(k, j) - custom_matrix_1(k, j)), + 1e-6); + EXPECT_LE( + CMath::abs(gauss_matrix(k, j) - custom_matrix_2(k, j)), + 1e-6); + } + } + + cf->remove_subset(); + SG_UNREF(k_g); + SG_UNREF(k_0); + SG_UNREF(k_3); + } + + SG_UNREF(gen); + SG_UNREF(gaus_ck); + SG_UNREF(combined); +} + +TEST(CombinedKernelTest, test_subset_combined_only) +{ + + CMeanShiftDataGenerator* gen = new CMeanShiftDataGenerator(0, 2); + CFeatures* feats = gen->get_streamed_features(10); + + CCombinedKernel* combined = new CCombinedKernel(); + + CGaussianKernel* gaus_ck = new CGaussianKernel(5); + gaus_ck->init(feats, feats); + + CCustomKernel* custom_1 = new CCustomKernel(gaus_ck); + CCustomKernel* custom_2 = new CCustomKernel(gaus_ck); + ; + + combined->append_kernel(custom_1); + combined->append_kernel(custom_2); + + SGVector inds(10); + inds.range_fill(); + + for (index_t i = 0; i < 10; ++i) + { + CMath::permute(inds); + + feats->add_subset(inds); + combined->init(feats, feats); + gaus_ck->init(feats, feats); + + CKernel* k_0 = combined->get_kernel(0); + CKernel* k_1 = combined->get_kernel(1); + + SGMatrix gauss_matrix = gaus_ck->get_kernel_matrix(); + SGMatrix custom_matrix_1 = k_0->get_kernel_matrix(); + SGMatrix custom_matrix_2 = k_1->get_kernel_matrix(); + + for (index_t j = 0; j < 10; ++j) + { + for (index_t k = 0; k < 10; ++k) + { + EXPECT_LE( + CMath::abs(gauss_matrix(k, j) - custom_matrix_1(k, j)), + 1e-6); + EXPECT_LE( + CMath::abs(gauss_matrix(k, j) - custom_matrix_2(k, j)), + 1e-6); + } + } + + feats->remove_subset(); + SG_UNREF(k_0); + SG_UNREF(k_1); + } + + SG_UNREF(gen); + SG_UNREF(gaus_ck); + SG_UNREF(combined); +} + TEST(CombinedKernelTest,weights) { CCombinedKernel* combined = new CCombinedKernel();