From 7fad933241c01cd3911af7052a661a818713f231 Mon Sep 17 00:00:00 2001 From: lambday Date: Tue, 19 Apr 2016 05:11:49 +0530 Subject: [PATCH] added feature shallow copy and merged copy methods --- .../internals/FeaturesUtil.cpp | 122 ++++++++++++++++++ .../internals/FeaturesUtil.h | 75 +++++++++++ .../internals/FeaturesUtil_unittest.cc | 115 +++++++++++++++++ 3 files changed, 312 insertions(+) create mode 100644 src/shogun/statistical_testing/internals/FeaturesUtil.cpp create mode 100644 src/shogun/statistical_testing/internals/FeaturesUtil.h create mode 100644 tests/unit/statistical_testing/internals/FeaturesUtil_unittest.cc diff --git a/src/shogun/statistical_testing/internals/FeaturesUtil.cpp b/src/shogun/statistical_testing/internals/FeaturesUtil.cpp new file mode 100644 index 00000000000..285059a3f76 --- /dev/null +++ b/src/shogun/statistical_testing/internals/FeaturesUtil.cpp @@ -0,0 +1,122 @@ +/* + * Copyright (c) The Shogun Machine Learning Toolbox + * Written (w) 2016 Soumyajit De + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation are those + * of the authors and should not be interpreted as representing official policies, + * either expressed or implied, of the Shogun Development Team. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace shogun; +using namespace internal; + +CFeatures* FeaturesUtil::create_shallow_copy(CFeatures* other) +{ + SG_SDEBUG("Entering!\n"); + CFeatures* shallow_copy=nullptr; + if (other->get_feature_type()==F_DREAL && other->get_feature_class()==C_DENSE) + { + auto casted=static_cast*>(other); + + // use the same underlying feature matrix, no ref-count + int32_t num_feats=0, num_vecs=0; + float64_t* data=casted->get_feature_matrix(num_feats, num_vecs); + SG_SDEBUG("Using underlying feature matrix with %d dimensions and %d feature vectors!\n", num_feats, num_vecs); + SGMatrix feats_matrix(data, num_feats, num_vecs, false); + shallow_copy=new CDenseFeatures(feats_matrix); + + // clone the subsets if there are any + CSubsetStack* src_subset_stack=casted->get_subset_stack(); + if (src_subset_stack->has_subsets()) + { + SG_SDEBUG("Subset present, cloning the subsets!\n"); + CSubsetStack* subset_stack=static_cast(src_subset_stack->clone()); + std::stack> stack; + while (subset_stack->has_subsets()) + { + stack.push(subset_stack->get_last_subset()->get_subset_idx()); + subset_stack->remove_subset(); + } + SG_UNREF(subset_stack); + while (!stack.empty()) + { + shallow_copy->add_subset(stack.top()); + stack.pop(); + } + } + SG_UNREF(src_subset_stack); + } + else + SG_SNOTIMPLEMENTED; + SG_SDEBUG("Leaving!\n"); + return shallow_copy; +} + +CFeatures* FeaturesUtil::create_merged_copy(CFeatures* feats_a, CFeatures* feats_b) +{ + SG_SDEBUG("Entering!\n"); + REQUIRE(feats_a->get_feature_type()==feats_b->get_feature_type(), + "The feature types of the underlying feature objects should be same!\n"); + REQUIRE(feats_a->get_feature_class()==feats_b->get_feature_class(), + "The feature classes of the underlying feature objects should be same!\n"); + + CFeatures* merged_copy=nullptr; + + if (feats_a->get_feature_type()==F_DREAL && feats_a->get_feature_class()==C_DENSE) + { + auto casted_a=static_cast*>(feats_a); + auto casted_b=static_cast*>(feats_b); + + REQUIRE(casted_a->get_num_features()==casted_b->get_num_features(), + "The number of features from a (%d) has to be equal with that of b (%d)!\n", + casted_a->get_num_features(), casted_b->get_num_features()); + + SGMatrix data_a=casted_a->get_feature_matrix(); + SGMatrix data_b=casted_b->get_feature_matrix(); + ASSERT(data_a.num_rows==data_b.num_rows); + + SGMatrix merged(data_a.num_rows, data_a.num_cols+data_b.num_cols); + std::copy(data_a.data(), data_a.data()+data_a.size(), merged.data()); + std::copy(data_b.data(), data_b.data()+data_b.size(), merged.data()+data_a.size()); + + merged_copy=new CDenseFeatures(merged); + } + else + SG_SNOTIMPLEMENTED; + + SG_SDEBUG("Leaving!\n"); + return merged_copy; +} diff --git a/src/shogun/statistical_testing/internals/FeaturesUtil.h b/src/shogun/statistical_testing/internals/FeaturesUtil.h new file mode 100644 index 00000000000..639e3a6089b --- /dev/null +++ b/src/shogun/statistical_testing/internals/FeaturesUtil.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) The Shogun Machine Learning Toolbox + * Written (w) 2016 Soumyajit De + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation are those + * of the authors and should not be interpreted as representing official policies, + * either expressed or implied, of the Shogun Development Team. + */ + +#ifndef FEATURES_UTIL_H__ +#define FEATURES_UTIL_H__ + +#include + +namespace shogun +{ + +class CFeatures; + +namespace internal +{ + +/** + * @brief Class FeaturesUtil for providing generic helper methods for + * handling Shogun's feature objects for the big-testing framework. + */ +struct FeaturesUtil +{ + /** + * This creates a shallow copy of the feature object. It uses the same + * underlying feature storage as the original object, but it clones all + * the subsets. + * + * @param other The feature object whose shallow copy has to be created. + * @return A shallow copy of the feature object. + */ + static CFeatures* create_shallow_copy(CFeatures* other); + + /** + * This creates a merged copy of the two feature objects. + * + * @param feats_a First feature object. + * @param feats_b Second feature object. + * @return A merged copy of the feature objects with total number of feature + * vectors of feats_a.num_vectors+feats_b.num_vectors. + */ + static CFeatures* create_merged_copy(CFeatures* feats_a, CFeatures* feats_b); +}; + +} + +} + +#endif // FEATURES_UTIL_H__ diff --git a/tests/unit/statistical_testing/internals/FeaturesUtil_unittest.cc b/tests/unit/statistical_testing/internals/FeaturesUtil_unittest.cc new file mode 100644 index 00000000000..8e5925ab8cb --- /dev/null +++ b/tests/unit/statistical_testing/internals/FeaturesUtil_unittest.cc @@ -0,0 +1,115 @@ +/* + * Copyright (c) The Shogun Machine Learning Toolbox + * Written (w) 2016 Soumyajit De + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation are those + * of the authors and should not be interpreted as representing official policies, + * either expressed or implied, of the Shogun Development Team. + */ + +#include +#include +#include +#include +#include +#include +#include + +using namespace shogun; +using namespace internal; + +TEST(FeaturesUtil, create_shallow_copy) +{ + const index_t dim=2; + const index_t num_vec=10; + + SGMatrix data(dim, num_vec); + std::iota(data.matrix, data.matrix+dim*num_vec, 0); + + auto feats=new CDenseFeatures(data); + SGVector inds(5); + std::iota(inds.data(), inds.data()+inds.size(), 0); + feats->add_subset(inds); + SGVector inds2(2); + std::iota(inds2.data(), inds2.data()+inds2.size(), 0); + feats->add_subset(inds2); + + auto shallow_copy=static_cast*>(FeaturesUtil::create_shallow_copy(feats)); + int32_t num_feats=0, num_vecs=0; + float64_t* copied_data=shallow_copy->get_feature_matrix(num_feats, num_vecs); + ASSERT_TRUE(data.data()==copied_data); + ASSERT_TRUE(dim==num_feats); + ASSERT_TRUE(num_vec==num_vecs); + + auto src_subset_stack=feats->get_subset_stack(); + auto dst_subset_stack=shallow_copy->get_subset_stack(); + ASSERT_TRUE(src_subset_stack->equals(dst_subset_stack)); + SG_UNREF(src_subset_stack); + SG_UNREF(dst_subset_stack); + + SGMatrix src=feats->get_feature_matrix(); + SGMatrix dst=shallow_copy->get_feature_matrix(); + ASSERT(src.equals(dst)); + + shallow_copy->remove_all_subsets(); + SG_UNREF(shallow_copy); + + feats->remove_all_subsets(); + SG_UNREF(feats); +} + +TEST(FeaturesUtil, create_merged_copy) +{ + const index_t dim=2; + const index_t num_vec=3; + + SGMatrix data(dim, num_vec); + std::iota(data.matrix, data.matrix+dim*num_vec, 0); + + auto feats_a=new CDenseFeatures(data); + SGVector inds_a(2); + inds_a[0]=1; + inds_a[1]=2; + feats_a->add_subset(inds_a); + SGMatrix data_a=feats_a->get_feature_matrix(); + + auto feats_b=new CDenseFeatures(data); + SGVector inds_b(2); + inds_b[0]=0; + inds_b[1]=2; + feats_b->add_subset(inds_b); + SGMatrix data_b=feats_b->get_feature_matrix(); + + SGMatrix merged(dim, data_a.num_cols+data_b.num_cols); + std::copy(data_a.data(), data_a.data()+data_a.size(), merged.data()); + std::copy(data_b.data(), data_b.data()+data_b.size(), merged.data()+data_a.size()); + + auto merged_copy=static_cast*>(FeaturesUtil::create_merged_copy(feats_a, feats_b)); + SGMatrix copied(merged_copy->get_feature_matrix()); + ASSERT_TRUE(merged.equals(copied)); + + SG_UNREF(merged_copy); + SG_UNREF(feats_a); + SG_UNREF(feats_b); +}