From 39d027110ba6c8f59f8109cabedd5b3ac687dae7 Mon Sep 17 00:00:00 2001 From: Viktor Gal Date: Wed, 25 Jan 2017 20:09:44 +0800 Subject: [PATCH] Only run threading if pthread is available make sure that if pthread is being used for parallel computing, only try to run it when pthread is actually available. Mark all these implementations to be fixed, i.e. ported to use OpenMP --- src/shogun/classifier/svm/SVM.cpp | 4 -- src/shogun/classifier/svm/SVMLight.cpp | 69 ++++++++++++------- src/shogun/clustering/Hierarchical.cpp | 4 -- .../KernelLocallyLinearEmbedding.cpp | 4 -- src/shogun/distance/Distance.cpp | 1 + src/shogun/features/DotFeatures.cpp | 18 +++-- .../hashed/HashedWDFeaturesTransposed.cpp | 18 +++-- src/shogun/kernel/CombinedKernel.cpp | 10 +++ .../kernel/string/SpectrumRBFKernel.cpp | 5 -- .../WeightedDegreePositionStringKernel.cpp | 9 +-- .../string/WeightedDegreeStringKernel.cpp | 11 +-- src/shogun/lib/external/shogun_libsvm.cpp | 11 ++- src/shogun/machine/DistanceMachine.cpp | 12 +++- src/shogun/machine/KernelMachine.cpp | 10 +++ src/shogun/regression/svr/SVRLight.cpp | 20 ++++-- 15 files changed, 134 insertions(+), 72 deletions(-) diff --git a/src/shogun/classifier/svm/SVM.cpp b/src/shogun/classifier/svm/SVM.cpp index cda852497ae..eb9d0536b3d 100644 --- a/src/shogun/classifier/svm/SVM.cpp +++ b/src/shogun/classifier/svm/SVM.cpp @@ -19,10 +19,6 @@ #include -#ifdef HAVE_PTHREAD -#include -#endif - using namespace shogun; CSVM::CSVM(int32_t num_sv) diff --git a/src/shogun/classifier/svm/SVMLight.cpp b/src/shogun/classifier/svm/SVMLight.cpp index 282a8f4b858..0facabb00b9 100644 --- a/src/shogun/classifier/svm/SVMLight.cpp +++ b/src/shogun/classifier/svm/SVMLight.cpp @@ -1049,7 +1049,13 @@ void CSVMLight::compute_matrices_for_optimization_parallel( float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc, float64_t *aicache, QP *qp) { - if (parallel->get_num_threads()<=1) + // TODO: port to use OpenMP backend instead of pthread +#ifdef HAVE_PTHREAD + int32_t num_threads=parallel->get_num_threads(); +#else + int32_t num_threads=1; +#endif + if (num_threads < 2) { compute_matrices_for_optimization(docs, label, exclude_from_eq_const, eq_target, chosen, active2dnum, key, a, lin, c, @@ -1079,7 +1085,7 @@ void CSVMLight::compute_matrices_for_optimization_parallel( qp->opt_g0[i]=lin[key[i]]; } - ASSERT(parallel->get_num_threads()>1) + ASSERT(num_threads>1) int32_t *KI=SG_MALLOC(int32_t, varnum*varnum); int32_t *KJ=SG_MALLOC(int32_t, varnum*varnum); int32_t Knum=0 ; @@ -1099,11 +1105,11 @@ void CSVMLight::compute_matrices_for_optimization_parallel( } ASSERT(Knum<=varnum*(varnum+1)/2) - pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1); - S_THREAD_PARAM_KERNEL* params = SG_MALLOC(S_THREAD_PARAM_KERNEL, parallel->get_num_threads()-1); - int32_t step= Knum/parallel->get_num_threads(); + pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1); + S_THREAD_PARAM_KERNEL* params = SG_MALLOC(S_THREAD_PARAM_KERNEL, num_threads-1); + int32_t step= Knum/num_threads; //SG_DEBUG("\nkernel-step size: %i\n", step) - for (int32_t t=0; tget_num_threads()-1; t++) + for (int32_t t=0; tget_num_threads()-2].end; iget_num_threads()-1; t++) + for (int32_t t=0; t0) { - if (parallel->get_num_threads() < 2) + // TODO: port to use OpenMP backend instead of pthread +#ifdef HAVE_PTHREAD + int32_t num_threads=parallel->get_num_threads(); +#else + int32_t num_threads=1; +#endif + if (num_threads < 2) { for (jj=0;(j=active2dnum[jj])>=0;jj++) { lin[j]+=kernel->compute_optimized(docs[j]); @@ -1470,13 +1482,13 @@ void CSVMLight::update_linear_component( int32_t num_elem = 0 ; for (jj=0;(j=active2dnum[jj])>=0;jj++) num_elem++ ; - pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1); - S_THREAD_PARAM_SVMLIGHT* params = SG_MALLOC(S_THREAD_PARAM_SVMLIGHT, parallel->get_num_threads()-1); + pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1); + S_THREAD_PARAM_SVMLIGHT* params = SG_MALLOC(S_THREAD_PARAM_SVMLIGHT, num_threads-1); int32_t start = 0 ; - int32_t step = num_elem/parallel->get_num_threads(); + int32_t step = num_elem/num_threads; int32_t end = step ; - for (int32_t t=0; tget_num_threads()-1; t++) + for (int32_t t=0; tget_num_threads()-2].end;(j=active2dnum[jj])>=0;jj++) { + for (jj=params[num_threads-2].end;(j=active2dnum[jj])>=0;jj++) { lin[j]+=kernel->compute_optimized(docs[j]); } void* ret; - for (int32_t t=0; tget_num_threads()-1; t++) + for (int32_t t=0; tadd_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]); } } + // TODO: port to use OpenMP backend instead of pthread +#ifdef HAVE_PTHREAD + int32_t num_threads=parallel->get_num_threads(); +#else + int32_t num_threads=1; +#endif - if (parallel->get_num_threads() < 2) + if (num_threads < 2) { // determine contributions of different kernels for (int32_t i=0; iget_num_threads()-1); - S_THREAD_PARAM_SVMLIGHT* params = SG_MALLOC(S_THREAD_PARAM_SVMLIGHT, parallel->get_num_threads()-1); - int32_t step= num/parallel->get_num_threads(); + pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1); + S_THREAD_PARAM_SVMLIGHT* params = SG_MALLOC(S_THREAD_PARAM_SVMLIGHT, num_threads-1); + int32_t step= num/num_threads; - for (int32_t t=0; tget_num_threads()-1; t++) + for (int32_t t=0; tget_num_threads()-2].end; icompute_by_subkernel(i,&W[i*num_kernels]); - for (int32_t t=0; tget_num_threads()-1; t++) + for (int32_t t=0; t0) { - int32_t num_threads=parallel->get_num_threads(); + // TODO: port to use OpenMP backend instead of pthread +#ifdef HAVE_PTHREAD + int32_t num_threads=parallel->get_num_threads(); +#else + int32_t num_threads=1; +#endif ASSERT(num_threads>0) if (num_threads < 2) { diff --git a/src/shogun/clustering/Hierarchical.cpp b/src/shogun/clustering/Hierarchical.cpp index b66d25a4103..02e9335c681 100644 --- a/src/shogun/clustering/Hierarchical.cpp +++ b/src/shogun/clustering/Hierarchical.cpp @@ -15,10 +15,6 @@ #include #include -#ifdef HAVE_PTHREAD -#include -#endif - using namespace shogun; #ifndef DOXYGEN_SHOULD_SKIP_THIS diff --git a/src/shogun/converter/KernelLocallyLinearEmbedding.cpp b/src/shogun/converter/KernelLocallyLinearEmbedding.cpp index fbc9f1553be..352bc72d2fe 100644 --- a/src/shogun/converter/KernelLocallyLinearEmbedding.cpp +++ b/src/shogun/converter/KernelLocallyLinearEmbedding.cpp @@ -12,10 +12,6 @@ #include #include -#ifdef HAVE_PTHREAD -#include -#endif - using namespace shogun; CKernelLocallyLinearEmbedding::CKernelLocallyLinearEmbedding() : diff --git a/src/shogun/distance/Distance.cpp b/src/shogun/distance/Distance.cpp index 1cce8c0624a..24cf54d1818 100644 --- a/src/shogun/distance/Distance.cpp +++ b/src/shogun/distance/Distance.cpp @@ -349,6 +349,7 @@ SGMatrix CDistance::get_distance_matrix() result=SG_MALLOC(T, total_num); + // TODO: port this to use OpenMP #ifdef HAVE_PTHREAD int32_t num_threads=parallel->get_num_threads(); #else diff --git a/src/shogun/features/DotFeatures.cpp b/src/shogun/features/DotFeatures.cpp index 6313f88947e..16fe0c8f6f8 100644 --- a/src/shogun/features/DotFeatures.cpp +++ b/src/shogun/features/DotFeatures.cpp @@ -77,15 +77,18 @@ void CDotFeatures::dense_dot_range(float64_t* output, int32_t start, int32_t sto int32_t num_vectors=stop-start; ASSERT(num_vectors>0) + // TODO: port to use OpenMP backend instead of pthread +#ifdef HAVE_PTHREAD int32_t num_threads=parallel->get_num_threads(); +#else + int32_t num_threads=1; +#endif ASSERT(num_threads>0) CSignal::clear_cancel(); -#ifdef HAVE_PTHREAD if (num_threads < 2) { -#endif DF_THREAD_PARAM params; params.df=this; params.sub_index=NULL; @@ -98,8 +101,8 @@ void CDotFeatures::dense_dot_range(float64_t* output, int32_t start, int32_t sto params.bias=b; params.progress=false; //true; dense_dot_range_helper((void*) ¶ms); -#ifdef HAVE_PTHREAD } +#ifdef HAVE_PTHREAD else { pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1); @@ -155,15 +158,18 @@ void CDotFeatures::dense_dot_range_subset(int32_t* sub_index, int32_t num, float ASSERT(sub_index) ASSERT(output) + // TODO: port to use OpenMP backend instead of pthread +#ifdef HAVE_PTHREAD int32_t num_threads=parallel->get_num_threads(); +#else + int32_t num_threads=1; +#endif ASSERT(num_threads>0) CSignal::clear_cancel(); -#ifdef HAVE_PTHREAD if (num_threads < 2) { -#endif DF_THREAD_PARAM params; params.df=this; params.sub_index=sub_index; @@ -176,8 +182,8 @@ void CDotFeatures::dense_dot_range_subset(int32_t* sub_index, int32_t num, float params.bias=b; params.progress=false; //true; dense_dot_range_helper((void*) ¶ms); -#ifdef HAVE_PTHREAD } +#ifdef HAVE_PTHREAD else { pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1); diff --git a/src/shogun/features/hashed/HashedWDFeaturesTransposed.cpp b/src/shogun/features/hashed/HashedWDFeaturesTransposed.cpp index 3d038fcdce5..cda8ab0b491 100644 --- a/src/shogun/features/hashed/HashedWDFeaturesTransposed.cpp +++ b/src/shogun/features/hashed/HashedWDFeaturesTransposed.cpp @@ -211,7 +211,12 @@ void CHashedWDFeaturesTransposed::dense_dot_range(float64_t* output, int32_t sta int32_t num_vectors=stop-start; ASSERT(num_vectors>0) + // TODO: port to use OpenMP backend instead of pthread +#ifdef HAVE_PTHREAD int32_t num_threads=parallel->get_num_threads(); +#else + int32_t num_threads=1; +#endif ASSERT(num_threads>0) CSignal::clear_cancel(); @@ -219,10 +224,8 @@ void CHashedWDFeaturesTransposed::dense_dot_range(float64_t* output, int32_t sta if (dim != w_dim) SG_ERROR("Dimensions don't match, vec_len=%d, w_dim=%d\n", dim, w_dim) -#ifdef HAVE_PTHREAD if (num_threads < 2) { -#endif HASHEDWD_THREAD_PARAM params; params.hf=this; params.sub_index=NULL; @@ -235,8 +238,8 @@ void CHashedWDFeaturesTransposed::dense_dot_range(float64_t* output, int32_t sta params.progress=false; //true; params.index=index; dense_dot_range_helper((void*) ¶ms); -#ifdef HAVE_PTHREAD } +#ifdef HAVE_PTHREAD else { pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1); @@ -295,7 +298,12 @@ void CHashedWDFeaturesTransposed::dense_dot_range_subset(int32_t* sub_index, int uint32_t* index=SG_MALLOC(uint32_t, num); + // TODO: port to use OpenMP backend instead of pthread +#ifdef HAVE_PTHREAD int32_t num_threads=parallel->get_num_threads(); +#else + int32_t num_threads=1; +#endif ASSERT(num_threads>0) CSignal::clear_cancel(); @@ -303,10 +311,8 @@ void CHashedWDFeaturesTransposed::dense_dot_range_subset(int32_t* sub_index, int if (dim != w_dim) SG_ERROR("Dimensions don't match, vec_len=%d, w_dim=%d\n", dim, w_dim) -#ifdef HAVE_PTHREAD if (num_threads < 2) { -#endif HASHEDWD_THREAD_PARAM params; params.hf=this; params.sub_index=sub_index; @@ -319,8 +325,8 @@ void CHashedWDFeaturesTransposed::dense_dot_range_subset(int32_t* sub_index, int params.progress=false; //true; params.index=index; dense_dot_range_helper((void*) ¶ms); -#ifdef HAVE_PTHREAD } +#ifdef HAVE_PTHREAD else { pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1); diff --git a/src/shogun/kernel/CombinedKernel.cpp b/src/shogun/kernel/CombinedKernel.cpp index c7784d7b985..9e398c4add7 100644 --- a/src/shogun/kernel/CombinedKernel.cpp +++ b/src/shogun/kernel/CombinedKernel.cpp @@ -446,7 +446,12 @@ void CCombinedKernel::emulate_compute_batch( { k->init_optimization(num_suppvec, IDX, weights); + // TODO: port to use OpenMP backend instead of pthread +#ifdef HAVE_PTHREAD int32_t num_threads=parallel->get_num_threads(); +#else + int32_t num_threads=1; +#endif ASSERT(num_threads>0) if (num_threads < 2) @@ -503,7 +508,12 @@ void CCombinedKernel::emulate_compute_batch( if (k->get_combined_kernel_weight()!=0) { // compute the usual way for any non-optimized kernel + // TODO: port to use OpenMP backend instead of pthread +#ifdef HAVE_PTHREAD int32_t num_threads=parallel->get_num_threads(); +#else + int32_t num_threads=1; +#endif ASSERT(num_threads>0) if (num_threads < 2) diff --git a/src/shogun/kernel/string/SpectrumRBFKernel.cpp b/src/shogun/kernel/string/SpectrumRBFKernel.cpp index 3abf627b96f..636ffd87532 100644 --- a/src/shogun/kernel/string/SpectrumRBFKernel.cpp +++ b/src/shogun/kernel/string/SpectrumRBFKernel.cpp @@ -30,11 +30,6 @@ #include -#ifdef HAVE_PTHREAD -#include -#endif - - using namespace shogun; CSpectrumRBFKernel::CSpectrumRBFKernel() diff --git a/src/shogun/kernel/string/WeightedDegreePositionStringKernel.cpp b/src/shogun/kernel/string/WeightedDegreePositionStringKernel.cpp index 46c6c635fdc..809840b2ecd 100644 --- a/src/shogun/kernel/string/WeightedDegreePositionStringKernel.cpp +++ b/src/shogun/kernel/string/WeightedDegreePositionStringKernel.cpp @@ -1244,18 +1244,19 @@ void CWeightedDegreePositionStringKernel::compute_batch( int32_t num_feat=((CStringFeatures*) rhs)->get_max_vector_length(); ASSERT(num_feat>0) + // TODO: port to use OpenMP backend instead of pthread +#ifdef HAVE_PTHREAD int32_t num_threads=parallel->get_num_threads(); +#else + int32_t num_threads=1; +#endif ASSERT(num_threads>0) int32_t* vec=SG_MALLOC(int32_t, num_threads*num_feat); if (num_threads < 2) { -#ifdef WIN32 - for (int32_t j=0; j params; diff --git a/src/shogun/kernel/string/WeightedDegreeStringKernel.cpp b/src/shogun/kernel/string/WeightedDegreeStringKernel.cpp index 0150f19f882..50632dac655 100644 --- a/src/shogun/kernel/string/WeightedDegreeStringKernel.cpp +++ b/src/shogun/kernel/string/WeightedDegreeStringKernel.cpp @@ -21,7 +21,7 @@ #include #include -#ifndef WIN32 +#ifdef HAVE_PTHREAD #include #endif @@ -875,18 +875,19 @@ void CWeightedDegreeStringKernel::compute_batch( int32_t num_feat=((CStringFeatures*) rhs)->get_max_vector_length(); ASSERT(num_feat>0) + // TODO: port to use OpenMP backend instead of pthread +#ifdef HAVE_PTHREAD int32_t num_threads=parallel->get_num_threads(); +#else + int32_t num_threads=1; +#endif ASSERT(num_threads>0) int32_t* vec=SG_MALLOC(int32_t, num_threads*num_feat); if (num_threads < 2) { -#ifdef CYGWIN - for (int32_t j=0; jget_num_threads(); + // TODO: port to use OpenMP backend instead of pthread +#ifdef HAVE_PTHREAD + int32_t num_threads=parallel->get_num_threads(); +#else + int32_t num_threads=1; +#endif if (num_threads < 2) { Q_THREAD_PARAM params; @@ -281,9 +286,9 @@ class LibSVMKernel: public QMatrix { params.q=this; compute_Q_parallel_helper((void*) ¶ms); } +#ifdef HAVE_PTHREAD else { -#ifdef HAVE_PTHREAD int32_t total_num=(len-start); pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1); Q_THREAD_PARAM* params = SG_MALLOC(Q_THREAD_PARAM, num_threads); @@ -329,8 +334,8 @@ class LibSVMKernel: public QMatrix { SG_FREE(params); SG_FREE(threads); -#endif /* HAVE_PTHREAD */ } +#endif /* HAVE_PTHREAD */ } inline float64_t kernel_function(int32_t i, int32_t j) const diff --git a/src/shogun/machine/DistanceMachine.cpp b/src/shogun/machine/DistanceMachine.cpp index 6a8130bc5dc..b342d45f5a5 100644 --- a/src/shogun/machine/DistanceMachine.cpp +++ b/src/shogun/machine/DistanceMachine.cpp @@ -51,7 +51,12 @@ void CDistanceMachine::init() void CDistanceMachine::distances_lhs(float64_t* result,int32_t idx_a1,int32_t idx_a2,int32_t idx_b) { + // TODO: port to use OpenMP backend instead of pthread +#ifdef HAVE_PTHREAD int32_t num_threads=parallel->get_num_threads(); +#else + int32_t num_threads=1; +#endif ASSERT(num_threads>0) ASSERT(result) @@ -113,7 +118,12 @@ void CDistanceMachine::distances_lhs(float64_t* result,int32_t idx_a1,int32_t id void CDistanceMachine::distances_rhs(float64_t* result,int32_t idx_b1,int32_t idx_b2,int32_t idx_a) { + // TODO: port to use OpenMP backend instead of pthread +#ifdef HAVE_PTHREAD int32_t num_threads=parallel->get_num_threads(); +#else + int32_t num_threads=1; +#endif ASSERT(num_threads>0) ASSERT(result) @@ -130,7 +140,7 @@ void CDistanceMachine::distances_rhs(float64_t* result,int32_t idx_b1,int32_t id run_distance_thread_rhs((void*) ¶m); } -#ifndef WIN32 +#ifdef HAVE_PTHREAD else { pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1); diff --git a/src/shogun/machine/KernelMachine.cpp b/src/shogun/machine/KernelMachine.cpp index fafd87f7128..06335587a5e 100644 --- a/src/shogun/machine/KernelMachine.cpp +++ b/src/shogun/machine/KernelMachine.cpp @@ -333,7 +333,12 @@ SGVector CKernelMachine::apply_get_outputs(CFeatures* data) } else { + // TODO: port to use OpenMP backend instead of pthread +#ifdef HAVE_PTHREAD int32_t num_threads=parallel->get_num_threads(); +#else + int32_t num_threads=1; +#endif ASSERT(num_threads>0) if (num_threads < 2) @@ -548,7 +553,12 @@ SGVector CKernelMachine::apply_locked_get_output( io->disable_progress(); /* custom kernel never has batch evaluation property so dont do this here */ + // TODO: port to use OpenMP backend instead of pthread +#ifdef HAVE_PTHREAD int32_t num_threads=parallel->get_num_threads(); +#else + int32_t num_threads=1; +#endif ASSERT(num_threads>0) if (num_threads<2) diff --git a/src/shogun/regression/svr/SVRLight.cpp b/src/shogun/regression/svr/SVRLight.cpp index 963b72a134f..b4c545faa1b 100644 --- a/src/shogun/regression/svr/SVRLight.cpp +++ b/src/shogun/regression/svr/SVRLight.cpp @@ -428,7 +428,13 @@ void CSVRLight::update_linear_component( if (num_working>0) { - if (parallel->get_num_threads() < 2) + // TODO: port to use OpenMP backend instead of pthread +#ifdef HAVE_PTHREAD + int32_t num_threads=parallel->get_num_threads(); +#else + int32_t num_threads=1; +#endif + if (num_threads < 2) { for(jj=0;(j=active2dnum[jj])>=0;jj++) { lin[j]+=kernel->compute_optimized(regression_fix_index(docs[j])); @@ -440,13 +446,13 @@ void CSVRLight::update_linear_component( int32_t num_elem = 0 ; for(jj=0;(j=active2dnum[jj])>=0;jj++) num_elem++ ; - pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1); - S_THREAD_PARAM_SVRLIGHT* params = SG_MALLOC(S_THREAD_PARAM_SVRLIGHT, parallel->get_num_threads()-1); + pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1); + S_THREAD_PARAM_SVRLIGHT* params = SG_MALLOC(S_THREAD_PARAM_SVRLIGHT, num_threads-1); int32_t start = 0 ; - int32_t step = num_elem/parallel->get_num_threads() ; + int32_t step = num_elem/num_threads ; int32_t end = step ; - for (int32_t t=0; tget_num_threads()-1; t++) + for (int32_t t=0; tget_num_threads()-2].end;(j=active2dnum[jj])>=0;jj++) { + for(jj=params[num_threads-2].end;(j=active2dnum[jj])>=0;jj++) { lin[j]+=kernel->compute_optimized(regression_fix_index(docs[j])); } void* ret; - for (int32_t t=0; tget_num_threads()-1; t++) + for (int32_t t=0; t