Skip to content

Commit

Permalink
Merge pull request #3224 from Saurabh7/xvalclone
Browse files Browse the repository at this point in the history
parallel xval with clone
  • Loading branch information
karlnapf committed Jun 6, 2016
2 parents 4fb6397 + ecd50bd commit 1605b67
Show file tree
Hide file tree
Showing 4 changed files with 223 additions and 20 deletions.
1 change: 1 addition & 0 deletions NEWS
Expand Up @@ -7,6 +7,7 @@
* Features:
- Add new Shogun cookbook for documentation and testing across all
target languages [Heiko Strathmann, Sergey Lisitsyn].
- Add multi thread support for unlocked cross-validation [Saurabh Mahindre]
* Bugfixes:
- Fix gTest segfaults with GCC >= 6.0.0 [Björn Esser].
- Make Java and CSharp install-dir configurable [Björn Esser].
Expand Down
69 changes: 49 additions & 20 deletions src/shogun/evaluation/CrossValidation.cpp
Expand Up @@ -264,32 +264,49 @@ float64_t CCrossValidation::evaluate_one_run()
m_machine->set_store_model_features(true);

/* do actual cross-validation */
#pragma omp parallel for
for (index_t i=0; i <num_subsets; ++i)
{
CMachine* machine;
CFeatures* features;
CLabels* labels;

if (get_global_parallel()->get_num_threads()==1)
machine=m_machine;
else
machine=(CMachine*)m_machine->clone();

/* evtl. update xvalidation output class */
CCrossValidationOutput* current=(CCrossValidationOutput*)
m_xval_outputs->get_first_element();
#pragma omp critical
{
while (current)
{
current->update_fold_index(i);
SG_UNREF(current);
current=(CCrossValidationOutput*)
m_xval_outputs->get_next_element();
}
}

/* set feature subset for training */
SGVector<index_t> inverse_subset_indices=
m_splitting_strategy->generate_subset_inverse(i);
m_features->add_subset(inverse_subset_indices);
for (index_t p=0; p<m_features->get_num_preprocessors(); p++)
{
CPreprocessor* preprocessor = m_features->get_preprocessor(p);
preprocessor->init(m_features);
SG_UNREF(preprocessor);
}

if (get_global_parallel()->get_num_threads()==1)
features=m_features;
else
features=(CFeatures*)m_features->clone();

features->add_subset(inverse_subset_indices);

/* set label subset for training */
m_labels->add_subset(inverse_subset_indices);
if (get_global_parallel()->get_num_threads()==1)
labels=m_labels;
else
labels=machine->get_labels();
labels->add_subset(inverse_subset_indices);

SG_DEBUG("training set %d:\n", i)
if (io->get_loglevel()==MSG_DEBUG)
Expand All @@ -300,30 +317,33 @@ float64_t CCrossValidation::evaluate_one_run()

/* train machine on training features and remove subset */
SG_DEBUG("starting training\n")
m_machine->train(m_features);
machine->train(features);
SG_DEBUG("finished training\n")

/* evtl. update xvalidation output class */
#pragma omp critical
{
current=(CCrossValidationOutput*)m_xval_outputs->get_first_element();
while (current)
{
current->update_train_indices(inverse_subset_indices, "\t");
current->update_trained_machine(m_machine, "\t");
current->update_trained_machine(machine, "\t");
SG_UNREF(current);
current=(CCrossValidationOutput*)
m_xval_outputs->get_next_element();
}
}

m_features->remove_subset();
m_labels->remove_subset();
features->remove_subset();
labels->remove_subset();

/* set feature subset for testing (subset method that stores pointer) */
SGVector<index_t> subset_indices =
m_splitting_strategy->generate_subset_indices(i);
m_features->add_subset(subset_indices);
features->add_subset(subset_indices);

/* set label subset for testing */
m_labels->add_subset(subset_indices);
labels->add_subset(subset_indices);

SG_DEBUG("test set %d:\n", i)
if (io->get_loglevel()==MSG_DEBUG)
Expand All @@ -334,33 +354,42 @@ float64_t CCrossValidation::evaluate_one_run()

/* apply machine to test features and remove subset */
SG_DEBUG("starting evaluation\n")
SG_DEBUG("%p\n", m_features)
CLabels* result_labels=m_machine->apply(m_features);
SG_DEBUG("%p\n", features)
CLabels* result_labels=machine->apply(features);
SG_DEBUG("finished evaluation\n")
m_features->remove_subset();
features->remove_subset();
SG_REF(result_labels);

/* evaluate */
results[i]=m_evaluation_criterion->evaluate(result_labels, m_labels);
results[i]=m_evaluation_criterion->evaluate(result_labels, labels);
SG_DEBUG("result on fold %d is %f\n", i, results[i])

/* evtl. update xvalidation output class */
#pragma omp critical
{
current=(CCrossValidationOutput*)m_xval_outputs->get_first_element();
while (current)
{
current->update_test_indices(subset_indices, "\t");
current->update_test_result(result_labels, "\t");
current->update_test_true_result(m_labels, "\t");
current->update_test_true_result(labels, "\t");
current->post_update_results();
current->update_evaluation_result(results[i], "\t");
SG_UNREF(current);
current=(CCrossValidationOutput*)
m_xval_outputs->get_next_element();
}
}

/* clean up, remove subsets */
labels->remove_subset();
if (get_global_parallel()->get_num_threads()!=1)
{
SG_UNREF(machine);
SG_UNREF(features);
SG_UNREF(labels);
}
SG_UNREF(result_labels);
m_labels->remove_subset();
}

SG_DEBUG("done unlocked evaluation\n", get_name())
Expand Down
5 changes: 5 additions & 0 deletions src/shogun/evaluation/CrossValidation.h
Expand Up @@ -108,6 +108,11 @@ class CCrossValidationResult : public CEvaluationResult
* speed up computations. Can be turned off by the set_autolock() method.
* Locking in general may speed up things (eg for kernel machines the kernel
* matrix is precomputed), however, it is not always supported.
*
* Crossvalidation runs with current number of threads
* (Parallel::set_num_threads) for unlocked case, and currently duplicates all
* objects (might be changed later).
*
*/
class CCrossValidation: public CMachineEvaluation
{
Expand Down
168 changes: 168 additions & 0 deletions tests/unit/evaluation/CrossValidation_multithread_unittest.cc
@@ -0,0 +1,168 @@
/*
* Copyright (c) The Shogun Machine Learning Toolbox
* Written (w) 2016 Saurabh Mahindre
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* The views and conclusions contained in the software and documentation are those
* of the authors and should not be interpreted as representing official policies,
* either expressed or implied, of the Shogun Development Team.
*
*/

#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/labels/BinaryLabels.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/classifier/svm/LibSVM.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
#include <shogun/evaluation/ContingencyTableEvaluation.h>
#include <gtest/gtest.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/multiclass/KNN.h>
#include <shogun/evaluation/MulticlassAccuracy.h>
#include <shogun/distance/EuclideanDistance.h>

using namespace shogun;

void generate_data(SGMatrix<float64_t>& mat, SGVector<float64_t> &lab)
{
int32_t num=lab.size();

for (index_t i=0; i<num; i++)
{
mat(0,i)=i<num/2 ? 0+(CMath::randn_double()*4) : 100+(CMath::randn_double()*4) ;
mat(1,i)=i;
}

for (index_t i=0; i<num; ++i)
lab.vector[i]=i<num/2 ? 0 : 1;

}

TEST(CrossValidation_multithread, LibSVM_unlocked)
{
int32_t num=100;
SGMatrix<float64_t> mat(2, num);

/* training labels +/- 1 for each cluster */
SGVector<float64_t> lab(num);

/*create simple linearly separable data*/
generate_data(mat, lab);

for (index_t i=0; i<num/2; ++i)
lab.vector[i]-=1;
CBinaryLabels* labels=new CBinaryLabels(lab);

CDenseFeatures<float64_t>* features=
new CDenseFeatures<float64_t>(mat);
SG_REF(features);

int32_t width=100;
CGaussianKernel* kernel=new CGaussianKernel(width);
kernel->init(features, features);

/* create svm via libsvm */
float64_t svm_C=1;
CLibSVM* svm=new CLibSVM(svm_C, kernel, labels);

CContingencyTableEvaluation* eval_crit=
new CContingencyTableEvaluation(ACCURACY);

index_t n_folds=4;
CStratifiedCrossValidationSplitting* splitting=
new CStratifiedCrossValidationSplitting(labels, n_folds);

CCrossValidation* cross=new CCrossValidation(svm, features, labels,
splitting, eval_crit);

cross->set_autolock(false);
cross->set_num_runs(4);
cross->parallel->set_num_threads(1);

CCrossValidationResult* result1=(CCrossValidationResult*)cross->evaluate();
float64_t mean1 = result1->mean;

cross->parallel->set_num_threads(3);

CCrossValidationResult* result2=(CCrossValidationResult*)cross->evaluate();
float64_t mean2 = result2->mean;

EXPECT_EQ(mean1, mean2);

/* clean up */
SG_UNREF(result1);
SG_UNREF(result2);
SG_UNREF(cross);
SG_UNREF(features);
}

TEST(CrossValidation_multithread, KNN)
{
int32_t num=100;
SGMatrix<float64_t> mat(2, num);

SGVector<float64_t> lab(num);

/*create simple linearly separable data*/
generate_data(mat, lab);
CMulticlassLabels* labels=new CMulticlassLabels(lab);

CDenseFeatures<float64_t>* features=
new CDenseFeatures<float64_t>(mat);
SG_REF(features);

/* create knn */
CEuclideanDistance* distance = new CEuclideanDistance(features, features);
CKNN* knn=new CKNN (4, distance, labels);
/* evaluation criterion */
CMulticlassAccuracy* eval_crit = new CMulticlassAccuracy ();

/* splitting strategy */
index_t n_folds=4;
CStratifiedCrossValidationSplitting* splitting=
new CStratifiedCrossValidationSplitting(labels, n_folds);

CCrossValidation* cross=new CCrossValidation(knn, features, labels,
splitting, eval_crit);

cross->set_autolock(false);
cross->set_num_runs(4);
cross->parallel->set_num_threads(1);

CCrossValidationResult* result1=(CCrossValidationResult*)cross->evaluate();
float64_t mean1 = result1->mean;

cross->parallel->set_num_threads(3);

CCrossValidationResult* result2=(CCrossValidationResult*)cross->evaluate();
float64_t mean2 = result2->mean;

EXPECT_EQ(mean1, mean2);

SG_UNREF(result1);
SG_UNREF(result2);
SG_UNREF(cross);
SG_UNREF(features);
}

0 comments on commit 1605b67

Please sign in to comment.