diff --git a/NEWS b/NEWS index c8dff0ba6c3..337b1dd6e4e 100644 --- a/NEWS +++ b/NEWS @@ -7,6 +7,7 @@ * Features: - Add new Shogun cookbook for documentation and testing across all target languages [Heiko Strathmann, Sergey Lisitsyn]. + - Add multi thread support for unlocked cross-validation [Saurabh Mahindre] * Bugfixes: - Fix gTest segfaults with GCC >= 6.0.0 [Björn Esser]. - Make Java and CSharp install-dir configurable [Björn Esser]. diff --git a/src/shogun/evaluation/CrossValidation.cpp b/src/shogun/evaluation/CrossValidation.cpp index cb145188cc8..25a416ea79a 100644 --- a/src/shogun/evaluation/CrossValidation.cpp +++ b/src/shogun/evaluation/CrossValidation.cpp @@ -264,11 +264,23 @@ float64_t CCrossValidation::evaluate_one_run() m_machine->set_store_model_features(true); /* do actual cross-validation */ + #pragma omp parallel for for (index_t i=0; i get_num_threads()==1) + machine=m_machine; + else + machine=(CMachine*)m_machine->clone(); + /* evtl. update xvalidation output class */ CCrossValidationOutput* current=(CCrossValidationOutput*) m_xval_outputs->get_first_element(); + #pragma omp critical + { while (current) { current->update_fold_index(i); @@ -276,20 +288,25 @@ float64_t CCrossValidation::evaluate_one_run() current=(CCrossValidationOutput*) m_xval_outputs->get_next_element(); } + } /* set feature subset for training */ SGVector inverse_subset_indices= m_splitting_strategy->generate_subset_inverse(i); - m_features->add_subset(inverse_subset_indices); - for (index_t p=0; pget_num_preprocessors(); p++) - { - CPreprocessor* preprocessor = m_features->get_preprocessor(p); - preprocessor->init(m_features); - SG_UNREF(preprocessor); - } + + if (get_global_parallel()->get_num_threads()==1) + features=m_features; + else + features=(CFeatures*)m_features->clone(); + + features->add_subset(inverse_subset_indices); /* set label subset for training */ - m_labels->add_subset(inverse_subset_indices); + if (get_global_parallel()->get_num_threads()==1) + labels=m_labels; + else + labels=machine->get_labels(); + labels->add_subset(inverse_subset_indices); SG_DEBUG("training set %d:\n", i) if (io->get_loglevel()==MSG_DEBUG) @@ -300,30 +317,33 @@ float64_t CCrossValidation::evaluate_one_run() /* train machine on training features and remove subset */ SG_DEBUG("starting training\n") - m_machine->train(m_features); + machine->train(features); SG_DEBUG("finished training\n") /* evtl. update xvalidation output class */ + #pragma omp critical + { current=(CCrossValidationOutput*)m_xval_outputs->get_first_element(); while (current) { current->update_train_indices(inverse_subset_indices, "\t"); - current->update_trained_machine(m_machine, "\t"); + current->update_trained_machine(machine, "\t"); SG_UNREF(current); current=(CCrossValidationOutput*) m_xval_outputs->get_next_element(); } + } - m_features->remove_subset(); - m_labels->remove_subset(); + features->remove_subset(); + labels->remove_subset(); /* set feature subset for testing (subset method that stores pointer) */ SGVector subset_indices = m_splitting_strategy->generate_subset_indices(i); - m_features->add_subset(subset_indices); + features->add_subset(subset_indices); /* set label subset for testing */ - m_labels->add_subset(subset_indices); + labels->add_subset(subset_indices); SG_DEBUG("test set %d:\n", i) if (io->get_loglevel()==MSG_DEBUG) @@ -334,33 +354,42 @@ float64_t CCrossValidation::evaluate_one_run() /* apply machine to test features and remove subset */ SG_DEBUG("starting evaluation\n") - SG_DEBUG("%p\n", m_features) - CLabels* result_labels=m_machine->apply(m_features); + SG_DEBUG("%p\n", features) + CLabels* result_labels=machine->apply(features); SG_DEBUG("finished evaluation\n") - m_features->remove_subset(); + features->remove_subset(); SG_REF(result_labels); /* evaluate */ - results[i]=m_evaluation_criterion->evaluate(result_labels, m_labels); + results[i]=m_evaluation_criterion->evaluate(result_labels, labels); SG_DEBUG("result on fold %d is %f\n", i, results[i]) /* evtl. update xvalidation output class */ + #pragma omp critical + { current=(CCrossValidationOutput*)m_xval_outputs->get_first_element(); while (current) { current->update_test_indices(subset_indices, "\t"); current->update_test_result(result_labels, "\t"); - current->update_test_true_result(m_labels, "\t"); + current->update_test_true_result(labels, "\t"); current->post_update_results(); current->update_evaluation_result(results[i], "\t"); SG_UNREF(current); current=(CCrossValidationOutput*) m_xval_outputs->get_next_element(); } + } /* clean up, remove subsets */ + labels->remove_subset(); + if (get_global_parallel()->get_num_threads()!=1) + { + SG_UNREF(machine); + SG_UNREF(features); + SG_UNREF(labels); + } SG_UNREF(result_labels); - m_labels->remove_subset(); } SG_DEBUG("done unlocked evaluation\n", get_name()) diff --git a/src/shogun/evaluation/CrossValidation.h b/src/shogun/evaluation/CrossValidation.h index 3454a703479..c2704151877 100644 --- a/src/shogun/evaluation/CrossValidation.h +++ b/src/shogun/evaluation/CrossValidation.h @@ -108,6 +108,11 @@ class CCrossValidationResult : public CEvaluationResult * speed up computations. Can be turned off by the set_autolock() method. * Locking in general may speed up things (eg for kernel machines the kernel * matrix is precomputed), however, it is not always supported. + * + * Crossvalidation runs with current number of threads + * (Parallel::set_num_threads) for unlocked case, and currently duplicates all + * objects (might be changed later). + * */ class CCrossValidation: public CMachineEvaluation { diff --git a/tests/unit/evaluation/CrossValidation_multithread_unittest.cc b/tests/unit/evaluation/CrossValidation_multithread_unittest.cc new file mode 100644 index 00000000000..0c9d6020aa7 --- /dev/null +++ b/tests/unit/evaluation/CrossValidation_multithread_unittest.cc @@ -0,0 +1,168 @@ +/* + * Copyright (c) The Shogun Machine Learning Toolbox + * Written (w) 2016 Saurabh Mahindre + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation are those + * of the authors and should not be interpreted as representing official policies, + * either expressed or implied, of the Shogun Development Team. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace shogun; + +void generate_data(SGMatrix& mat, SGVector &lab) +{ + int32_t num=lab.size(); + + for (index_t i=0; i mat(2, num); + + /* training labels +/- 1 for each cluster */ + SGVector lab(num); + + /*create simple linearly separable data*/ + generate_data(mat, lab); + + for (index_t i=0; i* features= + new CDenseFeatures(mat); + SG_REF(features); + + int32_t width=100; + CGaussianKernel* kernel=new CGaussianKernel(width); + kernel->init(features, features); + + /* create svm via libsvm */ + float64_t svm_C=1; + CLibSVM* svm=new CLibSVM(svm_C, kernel, labels); + + CContingencyTableEvaluation* eval_crit= + new CContingencyTableEvaluation(ACCURACY); + + index_t n_folds=4; + CStratifiedCrossValidationSplitting* splitting= + new CStratifiedCrossValidationSplitting(labels, n_folds); + + CCrossValidation* cross=new CCrossValidation(svm, features, labels, + splitting, eval_crit); + + cross->set_autolock(false); + cross->set_num_runs(4); + cross->parallel->set_num_threads(1); + + CCrossValidationResult* result1=(CCrossValidationResult*)cross->evaluate(); + float64_t mean1 = result1->mean; + + cross->parallel->set_num_threads(3); + + CCrossValidationResult* result2=(CCrossValidationResult*)cross->evaluate(); + float64_t mean2 = result2->mean; + + EXPECT_EQ(mean1, mean2); + + /* clean up */ + SG_UNREF(result1); + SG_UNREF(result2); + SG_UNREF(cross); + SG_UNREF(features); +} + +TEST(CrossValidation_multithread, KNN) +{ + int32_t num=100; + SGMatrix mat(2, num); + + SGVector lab(num); + + /*create simple linearly separable data*/ + generate_data(mat, lab); + CMulticlassLabels* labels=new CMulticlassLabels(lab); + + CDenseFeatures* features= + new CDenseFeatures(mat); + SG_REF(features); + + /* create knn */ + CEuclideanDistance* distance = new CEuclideanDistance(features, features); + CKNN* knn=new CKNN (4, distance, labels); + /* evaluation criterion */ + CMulticlassAccuracy* eval_crit = new CMulticlassAccuracy (); + + /* splitting strategy */ + index_t n_folds=4; + CStratifiedCrossValidationSplitting* splitting= + new CStratifiedCrossValidationSplitting(labels, n_folds); + + CCrossValidation* cross=new CCrossValidation(knn, features, labels, + splitting, eval_crit); + + cross->set_autolock(false); + cross->set_num_runs(4); + cross->parallel->set_num_threads(1); + + CCrossValidationResult* result1=(CCrossValidationResult*)cross->evaluate(); + float64_t mean1 = result1->mean; + + cross->parallel->set_num_threads(3); + + CCrossValidationResult* result2=(CCrossValidationResult*)cross->evaluate(); + float64_t mean2 = result2->mean; + + EXPECT_EQ(mean1, mean2); + + SG_UNREF(result1); + SG_UNREF(result2); + SG_UNREF(cross); + SG_UNREF(features); +}