From f4755dd27d08eb64100c60917183c25fa1d9e803 Mon Sep 17 00:00:00 2001 From: lambday Date: Sun, 20 Mar 2016 23:51:33 +0530 Subject: [PATCH] added first version of bigtesting framework --- .../internals/BlockwiseDetails.cpp | 42 ++++ .../experimental/internals/BlockwiseDetails.h | 51 +++++ .../internals/ComputationManager.cpp | 94 ++++++++ .../internals/ComputationManager.h | 63 ++++++ .../experimental/internals/DataFetcher.cpp | 100 +++++++++ .../experimental/internals/DataFetcher.h | 61 ++++++ .../internals/DataFetcherFactory.cpp | 37 ++++ .../internals/DataFetcherFactory.h | 48 ++++ .../experimental/internals/DataManager.cpp | 207 ++++++++++++++++++ .../experimental/internals/DataManager.h | 72 ++++++ .../experimental/internals/Features.h | 56 +++++ .../experimental/internals/InitPerFeature.cpp | 49 +++++ .../experimental/internals/InitPerFeature.h | 54 +++++ .../experimental/internals/InitPerKernel.cpp | 43 ++++ .../experimental/internals/InitPerKernel.h | 50 +++++ .../experimental/internals/KernelManager.cpp | 78 +++++++ .../experimental/internals/KernelManager.h | 56 +++++ .../experimental/internals/NextSamples.cpp | 59 +++++ .../experimental/internals/NextSamples.h | 101 +++++++++ .../internals/StreamingDataFetcher.cpp | 97 ++++++++ .../internals/StreamingDataFetcher.h | 57 +++++ .../experimental/internals/TestTypes.h | 75 +++++++ .../experimental/internals/mmd/BiasedFull.cpp | 48 ++++ .../experimental/internals/mmd/BiasedFull.h | 53 +++++ .../experimental/internals/mmd/FullDirect.cpp | 47 ++++ .../experimental/internals/mmd/FullDirect.h | 49 +++++ .../internals/mmd/UnbiasedFull.cpp | 53 +++++ .../experimental/internals/mmd/UnbiasedFull.h | 53 +++++ .../internals/mmd/UnbiasedIncomplete.cpp | 52 +++++ .../internals/mmd/UnbiasedIncomplete.h | 51 +++++ .../internals/mmd/WithinBlockDirect.cpp | 47 ++++ .../internals/mmd/WithinBlockDirect.h | 49 +++++ .../internals/mmd/WithinBlockPermutation.cpp | 55 +++++ .../internals/mmd/WithinBlockPermutation.h | 52 +++++ 34 files changed, 2159 insertions(+) create mode 100644 src/shogun/statistics/experimental/internals/BlockwiseDetails.cpp create mode 100644 src/shogun/statistics/experimental/internals/BlockwiseDetails.h create mode 100644 src/shogun/statistics/experimental/internals/ComputationManager.cpp create mode 100644 src/shogun/statistics/experimental/internals/ComputationManager.h create mode 100644 src/shogun/statistics/experimental/internals/DataFetcher.cpp create mode 100644 src/shogun/statistics/experimental/internals/DataFetcher.h create mode 100644 src/shogun/statistics/experimental/internals/DataFetcherFactory.cpp create mode 100644 src/shogun/statistics/experimental/internals/DataFetcherFactory.h create mode 100644 src/shogun/statistics/experimental/internals/DataManager.cpp create mode 100644 src/shogun/statistics/experimental/internals/DataManager.h create mode 100644 src/shogun/statistics/experimental/internals/Features.h create mode 100644 src/shogun/statistics/experimental/internals/InitPerFeature.cpp create mode 100644 src/shogun/statistics/experimental/internals/InitPerFeature.h create mode 100644 src/shogun/statistics/experimental/internals/InitPerKernel.cpp create mode 100644 src/shogun/statistics/experimental/internals/InitPerKernel.h create mode 100644 src/shogun/statistics/experimental/internals/KernelManager.cpp create mode 100644 src/shogun/statistics/experimental/internals/KernelManager.h create mode 100644 src/shogun/statistics/experimental/internals/NextSamples.cpp create mode 100644 src/shogun/statistics/experimental/internals/NextSamples.h create mode 100644 src/shogun/statistics/experimental/internals/StreamingDataFetcher.cpp create mode 100644 src/shogun/statistics/experimental/internals/StreamingDataFetcher.h create mode 100644 src/shogun/statistics/experimental/internals/TestTypes.h create mode 100644 src/shogun/statistics/experimental/internals/mmd/BiasedFull.cpp create mode 100644 src/shogun/statistics/experimental/internals/mmd/BiasedFull.h create mode 100644 src/shogun/statistics/experimental/internals/mmd/FullDirect.cpp create mode 100644 src/shogun/statistics/experimental/internals/mmd/FullDirect.h create mode 100644 src/shogun/statistics/experimental/internals/mmd/UnbiasedFull.cpp create mode 100644 src/shogun/statistics/experimental/internals/mmd/UnbiasedFull.h create mode 100644 src/shogun/statistics/experimental/internals/mmd/UnbiasedIncomplete.cpp create mode 100644 src/shogun/statistics/experimental/internals/mmd/UnbiasedIncomplete.h create mode 100644 src/shogun/statistics/experimental/internals/mmd/WithinBlockDirect.cpp create mode 100644 src/shogun/statistics/experimental/internals/mmd/WithinBlockDirect.h create mode 100644 src/shogun/statistics/experimental/internals/mmd/WithinBlockPermutation.cpp create mode 100644 src/shogun/statistics/experimental/internals/mmd/WithinBlockPermutation.h diff --git a/src/shogun/statistics/experimental/internals/BlockwiseDetails.cpp b/src/shogun/statistics/experimental/internals/BlockwiseDetails.cpp new file mode 100644 index 00000000000..64f061a201d --- /dev/null +++ b/src/shogun/statistics/experimental/internals/BlockwiseDetails.cpp @@ -0,0 +1,42 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2016 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +using namespace shogun; +using namespace internal; + +BlockwiseDetails::BlockwiseDetails() +: m_blocksize(0), m_num_blocks_per_burst(1), m_max_num_samples_per_burst(0), + m_next_block_index(0), m_total_num_blocks(0) +{ +} + +BlockwiseDetails& BlockwiseDetails::with_blocksize(index_t blocksize) +{ + m_blocksize = blocksize; + m_max_num_samples_per_burst = m_blocksize * m_num_blocks_per_burst; + return *this; +} + +BlockwiseDetails& BlockwiseDetails::with_num_blocks_per_burst(index_t num_blocks_per_burst) +{ + m_num_blocks_per_burst = num_blocks_per_burst; + m_max_num_samples_per_burst = m_blocksize * m_num_blocks_per_burst; + return *this; +} diff --git a/src/shogun/statistics/experimental/internals/BlockwiseDetails.h b/src/shogun/statistics/experimental/internals/BlockwiseDetails.h new file mode 100644 index 00000000000..546eaa1f1b2 --- /dev/null +++ b/src/shogun/statistics/experimental/internals/BlockwiseDetails.h @@ -0,0 +1,51 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2016 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#ifndef BLOCK_WISE_DETAILS_H__ +#define BLOCK_WISE_DETAILS_H__ + +namespace shogun +{ + +namespace internal +{ + +class BlockwiseDetails +{ + friend class DataFetcher; + friend class StreamingDataFetcher; + friend class DataManager; +public: + BlockwiseDetails(); + BlockwiseDetails& with_blocksize(index_t blocksize); + BlockwiseDetails& with_num_blocks_per_burst(index_t num_blocks_per_burst); +private: + index_t m_blocksize; + index_t m_num_blocks_per_burst; + index_t m_max_num_samples_per_burst; + // the following will be set by data fetchers + index_t m_next_block_index; + index_t m_total_num_blocks; +}; + +} + +} +#endif // BLOCK_WISE_DETAILS_H__ diff --git a/src/shogun/statistics/experimental/internals/ComputationManager.cpp b/src/shogun/statistics/experimental/internals/ComputationManager.cpp new file mode 100644 index 00000000000..099cb2905f9 --- /dev/null +++ b/src/shogun/statistics/experimental/internals/ComputationManager.cpp @@ -0,0 +1,94 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2016 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +using namespace shogun; +using namespace internal; + +ComputationManager::ComputationManager() +{ +} + +ComputationManager::~ComputationManager() +{ +} + +void ComputationManager::num_data(index_t n) +{ + kernel_matrices.resize(n); +} + +SGMatrix& ComputationManager::data(index_t i) +{ + return kernel_matrices[i]; +} + +void ComputationManager::enqueue_job(std::function)> job) +{ + jobq.push(job); +} + +void ComputationManager::compute() +{ + while (!jobq.empty()) + { + std::vector results; + if (gpu) + { + // TODO results = operation.compute_using_gpu(kernel_matrices); + } + else + { + results.resize(kernel_matrices.size()); +#pragma omp parallel for + for (auto i = 0; i < kernel_matrices.size(); ++i) + { + const auto& operation = jobq.front(); + results[i] = operation(kernel_matrices[i]); + } + } + resultq.push(results); + jobq.pop(); + } +} + +std::vector ComputationManager::next_result() +{ + std::vector result; + if (!resultq.empty()) + { + result = resultq.front(); + resultq.pop(); + } + return result; +} + +ComputationManager& ComputationManager::use_gpu() +{ + gpu = true; + return *this; +} + +ComputationManager& ComputationManager::use_cpu() +{ + gpu = false; + return *this; +} diff --git a/src/shogun/statistics/experimental/internals/ComputationManager.h b/src/shogun/statistics/experimental/internals/ComputationManager.h new file mode 100644 index 00000000000..263641b01a2 --- /dev/null +++ b/src/shogun/statistics/experimental/internals/ComputationManager.h @@ -0,0 +1,63 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2016 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef COMPUTATION_MANAGER_H__ +#define COMPUTATION_MANAGER_H__ + +#include +#include +#include +#include + +namespace shogun +{ + +template class SGMatrix; + +namespace internal +{ + +class ComputationManager +{ +public: + ComputationManager(); + ~ComputationManager(); + + void num_data(index_t n); + SGMatrix& data(index_t i); + + void enqueue_job(std::function)> job); + + void compute(); + + std::vector next_result(); + + ComputationManager& use_cpu(); + ComputationManager& use_gpu(); +private: + bool gpu; + std::vector> kernel_matrices; + std::queue)>> jobq; + std::queue> resultq; +}; + +} + +} + +#endif // COMPUTATION_MANAGER_H__ diff --git a/src/shogun/statistics/experimental/internals/DataFetcher.cpp b/src/shogun/statistics/experimental/internals/DataFetcher.cpp new file mode 100644 index 00000000000..ef7d0bb8930 --- /dev/null +++ b/src/shogun/statistics/experimental/internals/DataFetcher.cpp @@ -0,0 +1,100 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2016 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + + +using namespace shogun; +using namespace internal; + +DataFetcher::DataFetcher() : m_num_samples(0) +{ +} + +DataFetcher::DataFetcher(CFeatures* samples) +{ + SG_REF(samples); + m_samples = std::shared_ptr(samples, [](CFeatures* ptr) { SG_UNREF(ptr); }); + m_num_samples = m_samples->get_num_vectors(); +} + +DataFetcher::~DataFetcher() +{ +} + +const char* DataFetcher::get_name() const +{ + return "DataFetcher"; +} + +void DataFetcher::start() +{ + if (m_block_details.m_blocksize == 0) + { + m_block_details.with_blocksize(m_num_samples); + } + m_block_details.m_total_num_blocks = m_num_samples / m_block_details.m_blocksize; + reset(); +} + +std::shared_ptr DataFetcher::next() +{ + auto num_more_samples = m_num_samples - m_block_details.m_next_block_index * m_block_details.m_blocksize; + if (num_more_samples > 0) + { + auto num_samples_this_burst = m_block_details.m_max_num_samples_per_burst; + if (num_samples_this_burst > num_more_samples) + { + num_samples_this_burst = num_more_samples; + } + if (num_samples_this_burst < m_num_samples) + { + m_samples->remove_subset(); + SGVector inds(num_samples_this_burst); + std::iota(inds.vector, inds.vector + inds.vlen, m_block_details.m_next_block_index * m_block_details.m_blocksize); + m_samples->add_subset(inds); + } + + m_block_details.m_next_block_index += m_block_details.m_num_blocks_per_burst; + return m_samples; + } + return nullptr; +} + +void DataFetcher::reset() +{ + m_block_details.m_next_block_index = 0; + m_samples->remove_all_subsets(); +} + +void DataFetcher::end() +{ + m_samples->remove_all_subsets(); +} + +const index_t DataFetcher::get_num_samples() const +{ + return m_num_samples; +} + +BlockwiseDetails& DataFetcher::fetch_blockwise() +{ + return m_block_details; +} diff --git a/src/shogun/statistics/experimental/internals/DataFetcher.h b/src/shogun/statistics/experimental/internals/DataFetcher.h new file mode 100644 index 00000000000..7019c795400 --- /dev/null +++ b/src/shogun/statistics/experimental/internals/DataFetcher.h @@ -0,0 +1,61 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2016 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#ifndef DATA_FETCHER_H__ +#define DATA_FETCHER_H__ + +namespace shogun +{ + +class CFeatures; + +namespace internal +{ + +class DataManager; + +class DataFetcher +{ + friend class DataManager; + friend class InitPerFeature; +public: + DataFetcher(CFeatures* samples); + virtual ~DataFetcher(); + virtual void start(); + virtual std::shared_ptr next(); + virtual void reset(); + virtual void end(); + const index_t get_num_samples() const; + BlockwiseDetails& fetch_blockwise(); + virtual const char* get_name() const; +protected: + DataFetcher(); + BlockwiseDetails m_block_details; + index_t m_num_samples; +private: + std::shared_ptr m_samples; +}; + +} + +} +#endif // DATA_FETCHER_H__ diff --git a/src/shogun/statistics/experimental/internals/DataFetcherFactory.cpp b/src/shogun/statistics/experimental/internals/DataFetcherFactory.cpp new file mode 100644 index 00000000000..9b9532fa1be --- /dev/null +++ b/src/shogun/statistics/experimental/internals/DataFetcherFactory.cpp @@ -0,0 +1,37 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2016 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include + +using namespace shogun; +using namespace internal; + +DataFetcher* DataFetcherFactory::get_instance(CFeatures* feats) +{ + EFeatureClass fclass = feats->get_feature_class(); + if (fclass == C_STREAMING_DENSE || fclass == C_STREAMING_SPARSE || fclass == C_STREAMING_STRING) + { + return new StreamingDataFetcher(static_cast(feats)); + } + return new DataFetcher(feats); +} + diff --git a/src/shogun/statistics/experimental/internals/DataFetcherFactory.h b/src/shogun/statistics/experimental/internals/DataFetcherFactory.h new file mode 100644 index 00000000000..091e6468cee --- /dev/null +++ b/src/shogun/statistics/experimental/internals/DataFetcherFactory.h @@ -0,0 +1,48 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2016 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#ifndef DATA_FETCHER_FACTORY_H__ +#define DATA_FETCHER_FACTORY_H__ + +namespace shogun +{ + +class CFeatures; + +namespace internal +{ + +class DataFetcher; + +struct DataFetcherFactory +{ + DataFetcherFactory() = delete; + DataFetcherFactory(const DataFetcherFactory& other) = delete; + DataFetcherFactory& operator=(const DataFetcherFactory& other) = delete; + ~DataFetcherFactory() = delete; + + static DataFetcher* get_instance(CFeatures* feats); +}; + +} + +} +#endif // DATA_FETCHER_FACTORY_H__ diff --git a/src/shogun/statistics/experimental/internals/DataManager.cpp b/src/shogun/statistics/experimental/internals/DataManager.cpp new file mode 100644 index 00000000000..70cfb587247 --- /dev/null +++ b/src/shogun/statistics/experimental/internals/DataManager.cpp @@ -0,0 +1,207 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2016 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include // TODO remove +#include +#include +#include +#include +#include + +using namespace shogun; +using namespace internal; + +DataManager::DataManager(index_t num_distributions) +{ + fetchers.resize(num_distributions); + std::fill(fetchers.begin(), fetchers.end(), nullptr); +} + +DataManager::~DataManager() +{ +} + +index_t DataManager::get_num_samples() const +{ + index_t n = 0; + using fetcher_type = const std::unique_ptr; + if (std::any_of(fetchers.begin(), fetchers.end(), [](fetcher_type& f) { return f->m_num_samples == 0; })) + { + std::cout << "number of samples from all the distributions are not set" << std::endl; + } + else + { + std::for_each(fetchers.begin(), fetchers.end(), [&n](fetcher_type& f) { n+= f->m_num_samples; }); + } + return n; +} + +index_t DataManager::get_min_blocksize() const +{ + index_t min_blocksize = 0; + using fetcher_type = const std::unique_ptr; + if (std::any_of(fetchers.begin(), fetchers.end(), [](fetcher_type& f) { return f->m_num_samples == 0; })) + { + std::cout << "number of samples from all the distributions are not set" << std::endl; + } + else + { + index_t divisor = 0; + std::function gcd = [&gcd](index_t m, index_t n) + { + return n == 0 ? m : gcd(n, m % n); + }; + for (auto i = 0; i < fetchers.size(); ++i) + { + divisor = gcd(divisor, fetchers[i]->m_num_samples); + } + min_blocksize = get_num_samples() / divisor; + } + std::cout << "min blocksize is " << min_blocksize << std::endl; + return min_blocksize; +} + +void DataManager::set_blocksize(index_t blocksize) +{ + auto n = get_num_samples(); + + ASSERT(n > 0); + ASSERT(blocksize > 0 && blocksize <= n); + ASSERT(n % blocksize == 0); + + for (auto i = 0; i < fetchers.size(); ++i) + { + index_t m = fetchers[i]->m_num_samples; + ASSERT((blocksize * m) % n == 0); + fetchers[i]->fetch_blockwise().with_blocksize(blocksize * m / n); + std::cout << "block[" << i << "].size = " << blocksize * m / n << std::endl; + } +} + +void DataManager::set_num_blocks_per_burst(index_t num_blocks_per_burst) +{ + ASSERT(num_blocks_per_burst > 0); + + index_t blocksize = 0; + using fetcher_type = std::unique_ptr; + std::for_each(fetchers.begin(), fetchers.end(), [&blocksize](fetcher_type& f) + { + blocksize += f->m_block_details.m_blocksize; + }); + ASSERT(blocksize > 0); + + index_t max_num_blocks_per_burst = get_num_samples() / blocksize; + ASSERT(num_blocks_per_burst <= max_num_blocks_per_burst); + + for (auto i = 0; i < fetchers.size(); ++i) + { + fetchers[i]->fetch_blockwise().with_num_blocks_per_burst(num_blocks_per_burst); + } +} + +InitPerFeature DataManager::samples_at(index_t i) +{ + std::cout << "DataManager::samples_at()" << std::endl; + ASSERT(i < fetchers.size()); + return InitPerFeature(fetchers[i]); +} + +CFeatures* DataManager::samples_at(index_t i) const +{ + std::cout << "DataManager::samples_at() const" << std::endl; + ASSERT(i < fetchers.size()); + return fetchers[i]->m_samples.get(); +} + +index_t& DataManager::num_samples_at(index_t i) +{ + std::cout << "DataManager::num_samples_at()" << std::endl; + ASSERT(i < fetchers.size()); + return fetchers[i]->m_num_samples; +} + +const index_t DataManager::num_samples_at(index_t i) const +{ + std::cout << "DataManager::num_samples_at() const" << std::endl; + ASSERT(i < fetchers.size()); + return fetchers[i]->m_num_samples; +} + +const index_t DataManager::blocksize_at(index_t i) const +{ + std::cout << "DataManager::blocksize_at() const" << std::endl; + ASSERT(i < fetchers.size()); + return fetchers[i]->m_block_details.m_blocksize; +} + +void DataManager::start() +{ + using fetcher_type = std::unique_ptr; + std::for_each(fetchers.begin(), fetchers.end(), [](fetcher_type& f) { f->start(); }); +} + +NextSamples DataManager::next() +{ + std::cout << "DataManager::next()" << std::endl; + NextSamples next_samples(fetchers.size()); + // fetch a number of blocks (per burst) from each distribution + for (auto i = 0; i < fetchers.size(); ++i) + { + auto feats = fetchers[i]->next(); + if (feats != nullptr) + { + auto blocksize = fetchers[i]->m_block_details.m_blocksize; + auto num_blocks_curr_burst = feats->get_num_vectors() / blocksize; + if (next_samples.m_num_blocks == 0) + { + next_samples.m_num_blocks = num_blocks_curr_burst; + } + else + { + ASSERT(next_samples.m_num_blocks == num_blocks_curr_burst); + } + + // next samples are gonna hold one feats obj per block for this burst + next_samples[i].resize(num_blocks_curr_burst); + SGVector inds(blocksize); + std::iota(inds.vector, inds.vector + inds.vlen, 0); + for (auto j = 0; j < num_blocks_curr_burst; ++j) + { + // subset each block and clone it separately + feats->add_subset(inds); + auto block = static_cast(feats->clone()); + next_samples[i][j] = std::shared_ptr(block, [](CFeatures* ptr) { SG_UNREF(ptr); }); + feats->remove_subset(); + std::for_each(inds.vector, inds.vector + inds.vlen, [&blocksize](index_t& val) { val += blocksize; }); + } + } + } + return next_samples; +} + +void DataManager::end() +{ + using fetcher_type = std::unique_ptr; + std::for_each(fetchers.begin(), fetchers.end(), [](fetcher_type& f) { f->end(); }); +} + +void DataManager::reset() +{ + using fetcher_type = std::unique_ptr; + std::for_each(fetchers.begin(), fetchers.end(), [](fetcher_type& f) { f->reset(); }); +} diff --git a/src/shogun/statistics/experimental/internals/DataManager.h b/src/shogun/statistics/experimental/internals/DataManager.h new file mode 100644 index 00000000000..fe22887230b --- /dev/null +++ b/src/shogun/statistics/experimental/internals/DataManager.h @@ -0,0 +1,72 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2016 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef DATA_MANAGER_H__ +#define DATA_MANAGER_H__ + +#include +#include +#include +#include + +namespace shogun +{ + +class CFeatures; + +namespace internal +{ + +class DataFetcher; +class NextSamples; + +class DataManager +{ +public: + DataManager(index_t num_distributions); + DataManager(const DataManager& other) = delete; + DataManager& operator=(const DataManager& other) = delete; + ~DataManager(); + + void set_blocksize(index_t blocksize); + void set_num_blocks_per_burst(index_t num_blocks_per_burst); + + InitPerFeature samples_at(index_t i); + CFeatures* samples_at(index_t i) const; + + index_t& num_samples_at(index_t i); + const index_t num_samples_at(index_t i) const; + + const index_t blocksize_at(index_t i) const; + + index_t get_num_samples() const; + index_t get_min_blocksize() const; + + void start(); + NextSamples next(); + void end(); + void reset(); +private: + std::vector> fetchers; +}; + +} + +} + +#endif // DATA_MANAGER_H__ diff --git a/src/shogun/statistics/experimental/internals/Features.h b/src/shogun/statistics/experimental/internals/Features.h new file mode 100644 index 00000000000..c85c1713d35 --- /dev/null +++ b/src/shogun/statistics/experimental/internals/Features.h @@ -0,0 +1,56 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2014 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __FEATURES_H_ +#define __FEATURES_H_ + +#include +#include +#include +#include + +namespace shogun +{ + +namespace internal +{ + +// feat traits - required for proper typecasting +template +struct feats_traits +{ + using type = CFeatures; +}; + +template <> +struct feats_traits +{ + using type = CDenseFeatures; +}; + +template <> +struct feats_traits +{ + using type = CStreamingDenseFeatures; +}; + +} + +} + +#endif // __FEATURES_H_ diff --git a/src/shogun/statistics/experimental/internals/InitPerFeature.cpp b/src/shogun/statistics/experimental/internals/InitPerFeature.cpp new file mode 100644 index 00000000000..eec65684dab --- /dev/null +++ b/src/shogun/statistics/experimental/internals/InitPerFeature.cpp @@ -0,0 +1,49 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2014 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include // TODO remove +#include +#include +#include +#include + +using namespace shogun; +using namespace internal; + +InitPerFeature::InitPerFeature(std::unique_ptr& fetcher) : m_fetcher(fetcher) +{ + std::cout << "InitPerFeature::Constructor()" << std::endl; +} + +InitPerFeature::~InitPerFeature() +{ + std::cout << "InitPerFeature::Destructor()" << std::endl; +} + +InitPerFeature& InitPerFeature::operator=(CFeatures* feats) +{ + std::cout << "InitPerFeature::Assignment() : setting the fetcher" << std::endl; + m_fetcher = std::unique_ptr(DataFetcherFactory::get_instance(feats)); + return *this; +} + +InitPerFeature::operator const CFeatures*() const +{ + std::cout << "InitPerFeature::cast() : casting to feature type" << std::endl; + return m_fetcher->m_samples.get(); +} diff --git a/src/shogun/statistics/experimental/internals/InitPerFeature.h b/src/shogun/statistics/experimental/internals/InitPerFeature.h new file mode 100644 index 00000000000..ee3e45d7471 --- /dev/null +++ b/src/shogun/statistics/experimental/internals/InitPerFeature.h @@ -0,0 +1,54 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2016 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef INIT_PER_FEATURE_H__ +#define INIT_PER_FEATURE_H__ + +#include +#include + +namespace shogun +{ + +class CFeatures; + +namespace internal +{ + +class DataFetcher; +class DataManager; + +class InitPerFeature +{ + friend class DataManager; +private: + explicit InitPerFeature(std::unique_ptr& fetcher); +public: + ~InitPerFeature(); + InitPerFeature& operator=(CFeatures* feats); + operator const CFeatures*() const; +private: + std::unique_ptr& m_fetcher; +}; + +} + +} + +#endif // INIT_PER_FEATURE_H__ + diff --git a/src/shogun/statistics/experimental/internals/InitPerKernel.cpp b/src/shogun/statistics/experimental/internals/InitPerKernel.cpp new file mode 100644 index 00000000000..3c3ffca2e09 --- /dev/null +++ b/src/shogun/statistics/experimental/internals/InitPerKernel.cpp @@ -0,0 +1,43 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2016 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +using namespace shogun; +using namespace internal; + +InitPerKernel::InitPerKernel(std::shared_ptr& kernel) : m_kernel(kernel) +{ +} + +InitPerKernel::~InitPerKernel() +{ +} + +InitPerKernel& InitPerKernel::operator=(CKernel* kernel) +{ + SG_REF(kernel); + m_kernel = std::shared_ptr(kernel, [](CKernel* ptr) { SG_UNREF(ptr); }); + return *this; +} + +InitPerKernel::operator CKernel*() const +{ + return m_kernel.get(); +} diff --git a/src/shogun/statistics/experimental/internals/InitPerKernel.h b/src/shogun/statistics/experimental/internals/InitPerKernel.h new file mode 100644 index 00000000000..cbedd68ac95 --- /dev/null +++ b/src/shogun/statistics/experimental/internals/InitPerKernel.h @@ -0,0 +1,50 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2016 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef INIT_PER_KERNEL_H__ +#define INIT_PER_KERNEL_H__ + +#include +#include + +namespace shogun +{ + +class CKernel; + +namespace internal +{ + +class InitPerKernel +{ + friend class KernelManager; +private: + explicit InitPerKernel(std::shared_ptr& kernel); +public: + ~InitPerKernel(); + InitPerKernel& operator=(CKernel* kernel); + operator CKernel*() const; +private: + std::shared_ptr& m_kernel; +}; + +} + +} + +#endif // INIT_PER_KERNEL_H__ diff --git a/src/shogun/statistics/experimental/internals/KernelManager.cpp b/src/shogun/statistics/experimental/internals/KernelManager.cpp new file mode 100644 index 00000000000..d843c8fc254 --- /dev/null +++ b/src/shogun/statistics/experimental/internals/KernelManager.cpp @@ -0,0 +1,78 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2016 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +using namespace shogun; +using namespace internal; + +KernelManager::KernelManager(index_t num_kernels) +{ + m_kernels.resize(num_kernels); + m_precomputed_kernels.resize(num_kernels); + std::fill(m_kernels.begin(), m_kernels.end(), nullptr); + std::fill(m_precomputed_kernels.begin(), m_precomputed_kernels.end(), nullptr); +} + +KernelManager::~KernelManager() +{ +} + +InitPerKernel KernelManager::kernel_at(index_t i) +{ + std::cout << "KernelManager::kernel_at() : setting the kernel " << i << std::endl; + ASSERT(i <= m_kernels.size()); + return InitPerKernel(m_kernels[i]); +} + +CKernel* KernelManager::kernel_at(index_t i) const +{ + std::cout << "KernelManager::kernel_at() : getting the kernel " << i << std::endl; + ASSERT(i <= m_kernels.size()); + if (m_precomputed_kernels[i] == nullptr) + { + return m_kernels[i].get(); + } + return m_precomputed_kernels[i].get(); +} + +void KernelManager::precompute_kernel_at(index_t i) +{ + std::cout << "KernelManager::precompute_kernel_at() : precomputing the kernel " << i << std::endl; + ASSERT(i <= m_kernels.size()); + auto kernel = m_kernels[i].get(); + if (kernel->get_kernel_type() != K_CUSTOM) + { + // TODO give option to use different policies to precompute the kernel matrix + // this one here is default setting : use shogun's pthread parallelism to compute + // the kernel matrix. + m_precomputed_kernels[i] = std::shared_ptr(new CCustomKernel(kernel)); + } +} + +void KernelManager::restore_kernel_at(index_t i) +{ + std::cout << "KernelManager::precompute_kernel_at() : restoring the kernel " << i << std::endl; + ASSERT(i <= m_kernels.size()); + m_precomputed_kernels[i] = nullptr; +} diff --git a/src/shogun/statistics/experimental/internals/KernelManager.h b/src/shogun/statistics/experimental/internals/KernelManager.h new file mode 100644 index 00000000000..f445cc1f0da --- /dev/null +++ b/src/shogun/statistics/experimental/internals/KernelManager.h @@ -0,0 +1,56 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2016 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef KERNEL_MANAGER_H__ +#define KERNEL_MANAGER_H__ + +#include +#include +#include +#include + +namespace shogun +{ + +class CKernel; +class CCustomKernel; + +namespace internal +{ + +class KernelManager +{ +public: + KernelManager(index_t num_kernels); + ~KernelManager(); + + InitPerKernel kernel_at(index_t i); + CKernel* kernel_at(index_t i) const; + + void precompute_kernel_at(index_t i); + void restore_kernel_at(index_t i); +private: + std::vector> m_kernels; + std::vector> m_precomputed_kernels; +}; + +} + +} + +#endif // KERNEL_MANAGER_H__ diff --git a/src/shogun/statistics/experimental/internals/NextSamples.cpp b/src/shogun/statistics/experimental/internals/NextSamples.cpp new file mode 100644 index 00000000000..15caec3db8c --- /dev/null +++ b/src/shogun/statistics/experimental/internals/NextSamples.cpp @@ -0,0 +1,59 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2016 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +using namespace shogun; +using namespace internal; + +NextSamples::NextSamples(index_t num_distributions) : m_num_blocks(0) +{ + next_samples.resize(num_distributions); +} + +NextSamples::~NextSamples() +{ +} + +std::vector>& NextSamples::operator[](index_t i) +{ +// std::cout << "NextSamples::acessing fetched sample at " << i << " using non-const access operator" << std::endl; + REQUIRE(i >= 0 && i < next_samples.size(), "index (%d) must be between [0,%d]!\n", i, next_samples.size() - 1); + return next_samples[i]; +} + +const std::vector>& NextSamples::operator[](index_t i) const +{ +// std::cout << "NextSamples::acessing fetched sample at " << i << " using const access operator" << std::endl; + REQUIRE(i >= 0 && i < next_samples.size(), "index (%d) must be between [0,%d]!\n", i, next_samples.size() - 1); + return next_samples[i]; +} + +const index_t NextSamples::num_blocks() const +{ + return m_num_blocks; +} + +const bool NextSamples::empty() const +{ + using type = const std::vector>; + return std::any_of(next_samples.cbegin(), next_samples.cend(), [](type& f) { return f.size() == 0; }); +} diff --git a/src/shogun/statistics/experimental/internals/NextSamples.h b/src/shogun/statistics/experimental/internals/NextSamples.h new file mode 100644 index 00000000000..f710e2872bc --- /dev/null +++ b/src/shogun/statistics/experimental/internals/NextSamples.h @@ -0,0 +1,101 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2016 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef NEXT_SAMPLES_H__ +#define NEXT_SAMPLES_H__ + +#include +#include +#include + +namespace shogun +{ + +class CFeatures; + +namespace internal +{ + +/** + * @brief class NextSamples is the return type for next() call in DataManager. + * If there are no more samples (from any one of the distributions), an empty + * instance of NextSamples is supposed to be returned. This can be verified + * from the caller by calling the empty() method. Otherwise, always a get() + * call with appropriate index would give the samples from that distribution. + * If an inappropriate index is provided, e.g. get(2) for a two-sample test, + * a runtime exception is thrown. + * + * Example usage: + * @code + * NextSamples next_samples(2); + * next_samples[0] = fetchers[0].next(); + * next_samples[1] = fetchers[1].next(); + * if (!next_samples.empty()) + * { + * auto first = next_samples[0]; + * auto second = next_samples[1]; + * auto third = next_samples[2]; // Runtime Error + * } + * @endcode + */ +class NextSamples +{ + friend class DataManager; +private: + NextSamples(index_t num_distributions); +public: + ~NextSamples(); + /** + * Contains a number of blocks (of samples) fetched in the current burst from a + * specified distribution. + * + * @param i determines samples from which distribution + * @return a vector of fetched blocks of features from the specified distribution + */ + std::vector>& operator[](index_t i); + + /** + * Const version of the above. This is called when a const instance of NextSamples + * is returned. + */ + const std::vector>& operator[](index_t i) const; + + /** + * @return number of blocks fetched from each of the distribution. It is assumed + * that this number is same for all the distributions. + */ + const index_t num_blocks() const; + + /** + * This returns true if any of the distribution fetched 0 blocks (checked from the + * size of the vector for that distribution) + * + * @return whether this instance does not contain any blocks of samples from any + * of the distribution + */ + const bool empty() const; +private: + index_t m_num_blocks; + std::vector>> next_samples; +}; + +} + +} + +#endif // NEXT_SAMPLES_H__ diff --git a/src/shogun/statistics/experimental/internals/StreamingDataFetcher.cpp b/src/shogun/statistics/experimental/internals/StreamingDataFetcher.cpp new file mode 100644 index 00000000000..5f2cb693ac4 --- /dev/null +++ b/src/shogun/statistics/experimental/internals/StreamingDataFetcher.cpp @@ -0,0 +1,97 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2016 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + + +using namespace shogun; +using namespace internal; + +StreamingDataFetcher::StreamingDataFetcher(CStreamingFeatures* samples) : DataFetcher(), parser_running(false) +{ + SG_REF(samples); + m_samples = std::shared_ptr(samples, [](CFeatures* ptr) { SG_UNREF(ptr); }); + m_num_samples = 0; +} + +StreamingDataFetcher::~StreamingDataFetcher() +{ + end(); +} + +const char* StreamingDataFetcher::get_name() const +{ + return "StreamingDataFetcher"; +} + +void StreamingDataFetcher::set_num_samples(index_t num_samples) +{ + m_num_samples = num_samples; +} + +void StreamingDataFetcher::start() +{ + ASSERT(m_num_samples); + if (m_block_details.m_blocksize == 0) + { + m_block_details.with_blocksize(m_num_samples); + } + m_block_details.m_total_num_blocks = m_num_samples / m_block_details.m_blocksize; + m_block_details.m_next_block_index = 0; + if (!parser_running) + { + m_samples->start_parser(); + parser_running = true; + } +} + +std::shared_ptr StreamingDataFetcher::next() +{ + auto num_more_samples = m_num_samples - m_block_details.m_next_block_index * m_block_details.m_blocksize; + if (num_more_samples > 0) + { + auto num_samples_this_burst = m_block_details.m_max_num_samples_per_burst; + if (num_samples_this_burst > num_more_samples) + { + num_samples_this_burst = num_more_samples; + } + + CFeatures* streamed = m_samples->get_streamed_features(num_samples_this_burst); + m_block_details.m_next_block_index += m_block_details.m_num_blocks_per_burst; + return std::shared_ptr(streamed, [](CFeatures* ptr) { SG_UNREF(ptr); }); + } + return nullptr; +} + +void StreamingDataFetcher::reset() +{ + m_block_details.m_next_block_index = 0; + m_samples->reset_stream(); +} + +void StreamingDataFetcher::end() +{ + if (parser_running) + { + m_samples->end_parser(); + parser_running = false; + } +} diff --git a/src/shogun/statistics/experimental/internals/StreamingDataFetcher.h b/src/shogun/statistics/experimental/internals/StreamingDataFetcher.h new file mode 100644 index 00000000000..4e02cfc6396 --- /dev/null +++ b/src/shogun/statistics/experimental/internals/StreamingDataFetcher.h @@ -0,0 +1,57 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2016 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +#ifndef STREMING_DATA_FETCHER_H__ +#define STREMING_DATA_FETCHER_H__ + +namespace shogun +{ + +class CStreamingFeatures; + +namespace internal +{ + +class DataManager; + +class StreamingDataFetcher : public DataFetcher +{ + friend class DataManager; +public: + StreamingDataFetcher(CStreamingFeatures* samples); + virtual ~StreamingDataFetcher() override; + virtual void start() override; + virtual std::shared_ptr next() override; + virtual void reset() override; + virtual void end() override; + void set_num_samples(index_t num_samples); + virtual const char* get_name() const override; +private: + std::shared_ptr m_samples; + bool parser_running; +}; + +} + +} +#endif // STREMING_DATA_FETCHER_H__ diff --git a/src/shogun/statistics/experimental/internals/TestTypes.h b/src/shogun/statistics/experimental/internals/TestTypes.h new file mode 100644 index 00000000000..a3bb282c181 --- /dev/null +++ b/src/shogun/statistics/experimental/internals/TestTypes.h @@ -0,0 +1,75 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2016 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef TEST_TYPES_H__ +#define TEST_TYPES_H__ + +#include +#include + +namespace shogun +{ + +class CFeatures; + +namespace internal +{ + +struct TwoSampleTestPermutationPolicy; +struct IndependenceTestPermutationPolicy; + +struct OneDistributionTest +{ + enum { num_feats = 1 }; +}; + +struct TwoDistributionTest +{ + enum { num_feats = 2 }; +}; + +struct ThreeDistributionTest +{ + enum { num_feats = 3 }; +}; + +struct GoodnessOfFitTest : OneDistributionTest +{ + enum { num_kernels = 1 }; + using return_type = std::shared_ptr; +}; + +struct TwoSampleTest : TwoDistributionTest +{ + enum { num_kernels = 1 }; + using permutation_policy = TwoSampleTestPermutationPolicy; + using return_type = std::shared_ptr; +}; + +struct IndependenceTest : TwoDistributionTest +{ + enum { num_kernels = 2 }; + using permutation_policy = IndependenceTestPermutationPolicy; + using return_type = std::vector>; +}; + +} + +} + +#endif // TEST_TYPES_H__ diff --git a/src/shogun/statistics/experimental/internals/mmd/BiasedFull.cpp b/src/shogun/statistics/experimental/internals/mmd/BiasedFull.cpp new file mode 100644 index 00000000000..265dad2510a --- /dev/null +++ b/src/shogun/statistics/experimental/internals/mmd/BiasedFull.cpp @@ -0,0 +1,48 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2014 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +using namespace shogun; +using namespace internal; +using namespace mmd; + +BiasedFull::BiasedFull(index_t n) : n_x(n) +{ +} + +float64_t BiasedFull::operator()(SGMatrix km) +{ + using MatrixXt = const Eigen::MatrixXd; + using Block = const Eigen::Block>; + + Eigen::Map map(km.matrix, km.num_rows, km.num_cols); + index_t n_y = km.num_rows - n_x; + + auto term_1 = map.block(0, 0, n_x, n_x).sum(); + auto term_2 = map.block(n_x, n_x, n_y, n_y).sum(); + auto term_3 = map.block(n_x, 0, n_y, n_x).sum(); + + auto statistic = term_1/n_x/(n_x-1) + term_2/n_y/(n_y-1) - 2*term_3/n_x/n_y; + + return statistic; + +} diff --git a/src/shogun/statistics/experimental/internals/mmd/BiasedFull.h b/src/shogun/statistics/experimental/internals/mmd/BiasedFull.h new file mode 100644 index 00000000000..558f6aa61f9 --- /dev/null +++ b/src/shogun/statistics/experimental/internals/mmd/BiasedFull.h @@ -0,0 +1,53 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2014 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef BIASED_FULL_H_ +#define BIASED_FULL_H_ + +#include + +namespace shogun +{ + +template class SGMatrix; +template class CGPUMatrix; + +namespace internal +{ + +namespace mmd +{ + +struct BiasedFull +{ + using return_type = float64_t; + BiasedFull(index_t n); + + return_type operator()(SGMatrix kernel_matrix); +// return_type operator()(CGPUMatrix kernel_matrix); + + index_t n_x; +}; + +} + +} + +} + +#endif // BIASED_FULL_H_ diff --git a/src/shogun/statistics/experimental/internals/mmd/FullDirect.cpp b/src/shogun/statistics/experimental/internals/mmd/FullDirect.cpp new file mode 100644 index 00000000000..9c82de65f5a --- /dev/null +++ b/src/shogun/statistics/experimental/internals/mmd/FullDirect.cpp @@ -0,0 +1,47 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2014 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include + +using namespace shogun; +using namespace internal; +using namespace mmd; + +float64_t FullDirect::operator()(SGMatrix km) +{ + Eigen::Map map(km.matrix, km.num_rows, km.num_cols); + index_t B = km.num_rows; + + Eigen::VectorXd diag = map.diagonal(); + map.diagonal().setZero(); + + auto term_1 = CMath::sq(map.array().sum()/B/(B-1)); + auto term_2 = map.array().square().sum()/B/(B-1); + auto term_3 = (map.colwise().sum()/(B-1)).array().sum()/B; + + map.diagonal() = diag; + + auto variance_estimate = 2*(term_1 + term_2 - 2 * term_3); + + return variance_estimate; + +} diff --git a/src/shogun/statistics/experimental/internals/mmd/FullDirect.h b/src/shogun/statistics/experimental/internals/mmd/FullDirect.h new file mode 100644 index 00000000000..b21129c91d7 --- /dev/null +++ b/src/shogun/statistics/experimental/internals/mmd/FullDirect.h @@ -0,0 +1,49 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2014 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef FULL_DIRECT_H_ +#define FULL_DIRECT_H_ + +#include + +namespace shogun +{ + +template class SGMatrix; +template class CGPUMatrix; + +namespace internal +{ + +namespace mmd +{ + +struct FullDirect +{ + using return_type = float64_t; + return_type operator()(SGMatrix kernel_matrix); +// return_type operator()(CGPUMatrix kernel_matrix); +}; + +} + +} + +} + +#endif // FULL_DIRECT_H_ diff --git a/src/shogun/statistics/experimental/internals/mmd/UnbiasedFull.cpp b/src/shogun/statistics/experimental/internals/mmd/UnbiasedFull.cpp new file mode 100644 index 00000000000..4241452cb9f --- /dev/null +++ b/src/shogun/statistics/experimental/internals/mmd/UnbiasedFull.cpp @@ -0,0 +1,53 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2014 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +using namespace shogun; +using namespace internal; +using namespace mmd; + +UnbiasedFull::UnbiasedFull(index_t n) : n_x(n) +{ +} + +float64_t UnbiasedFull::operator()(SGMatrix km) +{ + using MatrixXt = const Eigen::MatrixXd; + using Block = const Eigen::Block>; + + Eigen::Map map(km.matrix, km.num_rows, km.num_cols); + index_t n_y = km.num_rows - n_x; + + Block& b_x = map.block(0, 0, n_x, n_x); + auto term_1 = b_x.sum() - b_x.diagonal().sum(); + + Block& b_y = map.block(n_x, n_x, n_y, n_y); + auto term_2 = b_y.sum() - b_y.diagonal().sum(); + + Block& b_xy = map.block(n_x, 0, n_y, n_x); + auto term_3 = b_xy.sum(); + + auto statistic = term_1/n_x/(n_x-1) + term_2/n_y/(n_y-1) - 2*term_3/n_x/n_y; + + return statistic; + +} diff --git a/src/shogun/statistics/experimental/internals/mmd/UnbiasedFull.h b/src/shogun/statistics/experimental/internals/mmd/UnbiasedFull.h new file mode 100644 index 00000000000..ead5151906a --- /dev/null +++ b/src/shogun/statistics/experimental/internals/mmd/UnbiasedFull.h @@ -0,0 +1,53 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2014 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef UNBIASED_FULL_H_ +#define UNBIASED_FULL_H_ + +#include + +namespace shogun +{ + +template class SGMatrix; +template class CGPUMatrix; + +namespace internal +{ + +namespace mmd +{ + +struct UnbiasedFull +{ + using return_type = float64_t; + UnbiasedFull(index_t n); + + return_type operator()(SGMatrix kernel_matrix); +// return_type operator()(CGPUMatrix kernel_matrix); + + index_t n_x; +}; + +} + +} + +} + +#endif // UNBIASED_FULL_H_ diff --git a/src/shogun/statistics/experimental/internals/mmd/UnbiasedIncomplete.cpp b/src/shogun/statistics/experimental/internals/mmd/UnbiasedIncomplete.cpp new file mode 100644 index 00000000000..94767ed566b --- /dev/null +++ b/src/shogun/statistics/experimental/internals/mmd/UnbiasedIncomplete.cpp @@ -0,0 +1,52 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2014 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +using namespace shogun; +using namespace internal; +using namespace mmd; + +UnbiasedIncomplete::UnbiasedIncomplete(index_t _n) : n(_n) +{ +} + +float64_t UnbiasedIncomplete::operator()(SGMatrix km) +{ + using MatrixXt = const Eigen::MatrixXd; + using Block = const Eigen::Block>; + + Eigen::Map map(km.matrix, km.num_rows, km.num_cols); + + Block& b_x = map.block(0, 0, n, n); + auto term_1 = b_x.sum() - b_x.diagonal().sum(); + + Block& b_y = map.block(n, n, n, n); + auto term_2 = b_y.sum() - b_y.diagonal().sum(); + + Block& b_xy = map.block(n, 0, n, n); + auto term_3 = b_xy.sum() - b_xy.diagonal().sum(); + + auto statistic = term_1/n/(n-1) + term_2/n/(n-1) - 2*term_3/n/n; + + return statistic; + +} diff --git a/src/shogun/statistics/experimental/internals/mmd/UnbiasedIncomplete.h b/src/shogun/statistics/experimental/internals/mmd/UnbiasedIncomplete.h new file mode 100644 index 00000000000..96d356f30b4 --- /dev/null +++ b/src/shogun/statistics/experimental/internals/mmd/UnbiasedIncomplete.h @@ -0,0 +1,51 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2014 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef UNBIASED_INCOMPLETE_H_ +#define UNBIASED_INCOMPLETE_H_ + +#include + +namespace shogun +{ + +template class SGMatrix; +template class CGPUMatrix; + +namespace internal +{ + +namespace mmd +{ + +struct UnbiasedIncomplete +{ + using return_type = float64_t; + UnbiasedIncomplete(index_t _n); + return_type operator()(SGMatrix kernel_matrix); +// return_type operator()(CGPUMatrix kernel_matrix); + index_t n; +}; + +} + +} + +} + +#endif // UNBIASED_INCOMPLETE_H_ diff --git a/src/shogun/statistics/experimental/internals/mmd/WithinBlockDirect.cpp b/src/shogun/statistics/experimental/internals/mmd/WithinBlockDirect.cpp new file mode 100644 index 00000000000..ba2f27180e2 --- /dev/null +++ b/src/shogun/statistics/experimental/internals/mmd/WithinBlockDirect.cpp @@ -0,0 +1,47 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2014 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include + +using namespace shogun; +using namespace internal; +using namespace mmd; + +float64_t WithinBlockDirect::operator()(SGMatrix km) +{ + Eigen::Map map(km.matrix, km.num_rows, km.num_cols); + index_t B = km.num_rows; + + Eigen::VectorXd diag = map.diagonal(); + map.diagonal().setZero(); + + auto term_1 = map.array().square().sum(); + auto term_2 = CMath::sq(map.array().sum()); + auto term_3 = (map * map).array().sum(); + + map.diagonal() = diag; + + auto variance_estimate = 2*(term_1 + term_2/(B-1)/(B-2) - 2*term_3/(B-2))/B/(B-3); + + return variance_estimate; + +} diff --git a/src/shogun/statistics/experimental/internals/mmd/WithinBlockDirect.h b/src/shogun/statistics/experimental/internals/mmd/WithinBlockDirect.h new file mode 100644 index 00000000000..8064c8da3e6 --- /dev/null +++ b/src/shogun/statistics/experimental/internals/mmd/WithinBlockDirect.h @@ -0,0 +1,49 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2014 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef WITHIN_BLOCK_DIRECT_H_ +#define WITHIN_BLOCK_DIRECT_H_ + +#include + +namespace shogun +{ + +template class SGMatrix; +template class CGPUMatrix; + +namespace internal +{ + +namespace mmd +{ + +struct WithinBlockDirect +{ + using return_type = float64_t; + return_type operator()(SGMatrix kernel_matrix); +// return_type operator()(CGPUMatrix kernel_matrix); +}; + +} + +} + +} + +#endif // WITHIN_BLOCK_DIRECT_H_ diff --git a/src/shogun/statistics/experimental/internals/mmd/WithinBlockPermutation.cpp b/src/shogun/statistics/experimental/internals/mmd/WithinBlockPermutation.cpp new file mode 100644 index 00000000000..25881a46f97 --- /dev/null +++ b/src/shogun/statistics/experimental/internals/mmd/WithinBlockPermutation.cpp @@ -0,0 +1,55 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2014 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include + +using namespace shogun; +using namespace internal; +using namespace mmd; + +template +WithinBlockPermutation::WithinBlockPermutation(index_t n) : n_x(n) +{ +} + +template +typename T::return_type WithinBlockPermutation::operator()(SGMatrix km) +{ + // http://stackoverflow.com/questions/15858569/randomly-permute-rows-columns-of-a-matrix-with-eigen + + Eigen::Map map(km.matrix, km.num_rows, km.num_cols); + + Eigen::PermutationMatrix perm(km.num_rows); + perm.setIdentity(); + std::random_shuffle(perm.indices().data(), perm.indices().data() + perm.indices().size()); + + map = perm.transpose() * map * perm; + + T statistic(n_x); + return statistic(km); +} + +template class WithinBlockPermutation; +template class WithinBlockPermutation; +template class WithinBlockPermutation; diff --git a/src/shogun/statistics/experimental/internals/mmd/WithinBlockPermutation.h b/src/shogun/statistics/experimental/internals/mmd/WithinBlockPermutation.h new file mode 100644 index 00000000000..c27d7f0ca62 --- /dev/null +++ b/src/shogun/statistics/experimental/internals/mmd/WithinBlockPermutation.h @@ -0,0 +1,52 @@ +/* + * Restructuring Shogun's statistical hypothesis testing framework. + * Copyright (C) 2014 Soumyajit De + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef WITHIN_BLOCK_PERMUTATION_H_ +#define WITHIN_BLOCK_PERMUTATION_H_ + +#include + +namespace shogun +{ + +template class SGMatrix; +template class CGPUMatrix; + +namespace internal +{ + +namespace mmd +{ + +template +struct WithinBlockPermutation +{ + using return_type = typename Statistic::return_type; + WithinBlockPermutation(index_t n); + return_type operator()(SGMatrix kernel_matrix); +// return_type operator()(CGPUMatrix kernel_matrix); + index_t n_x; +}; + +} + +} + +} + +#endif // WITHIN_BLOCK_PERMUTATION_H_