Skip to content

Commit

Permalink
more parallel
Browse files Browse the repository at this point in the history
  • Loading branch information
jpn-- committed Feb 3, 2017
1 parent ec7053d commit 9813d68
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 7 deletions.
19 changes: 12 additions & 7 deletions src/etk/etk_ndarray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1047,17 +1047,22 @@ void ndarray::logsumexp_2 (ndarray* out, const size_t& siz1, const size_t& siz2)
out->pool = (PyArrayObject*)PyArray_New((PyTypeObject*)get_array_type("Array"), 1, &dims[0], NPY_DOUBLE, nullptr, nullptr, 0, 0, nullptr);
Py_INCREF(out->pool);
}
unsigned x1, x2; double temp;
// unsigned x1, x2; double temp;
if (PyArray_NDIM(pool)!=2) {
OOPS("can only calculate logsums on a 2d array");
} else {
for ( x1=0; x1<siz1; x1++ ) {
{
temp = 0;
for ( x2=0; x2<siz2; x2++ ) { temp += ::exp(this->operator()(x1,x2)); }
// for ( x1=0; x1<siz1; x1++ ) {
// {
// temp = 0;
// for ( x2=0; x2<siz2; x2++ ) { temp += ::exp(this->operator()(x1,x2)); }
// out->operator()(x1) = ::log(temp);
// }
// }
ThreadPool::ParallelFor0((unsigned long)0, siz1, [&](size_t& x1){
double temp =0;
for ( unsigned x2=0; x2<siz2; x2++ ) { temp += ::exp(this->operator()(x1,x2)); }
out->operator()(x1) = ::log(temp);
}
}
} );
}
}

Expand Down
48 changes: 48 additions & 0 deletions src/etk/etk_thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,54 @@ class ThreadPool {
}
}




template<typename Index, typename Callable>
static void ParallelFor0(Index start, Index end, Callable func ) {
// Estimate number of threads in the pool
const static unsigned nb_threads_hint = std::thread::hardware_concurrency();
const static unsigned nb_threads = (nb_threads_hint == 0u ? 8u : nb_threads_hint);

// Size of a slice for the range functions
Index n = end - start + 1;
Index slice = (Index) std::round(n / static_cast<double> (nb_threads));
slice = std::max(slice, Index(1));

// [Helper] Inner loop
auto launchRange = [&func] (int k1, int k2) {
for (Index k = k1; k < k2; k++) {
func(k);
}
};

// Create pool and launch jobs
std::vector<std::thread> pool;
pool.reserve(nb_threads);
Index i1 = start;
Index i2 = std::min(start + slice, end);
for (unsigned i = 0; i + 1 < nb_threads && i1 < end; ++i) {
pool.emplace_back(launchRange, i1, i2);
i1 = i2;
i2 = std::min(i2 + slice, end);
}
if (i1 < end) {
pool.emplace_back(launchRange, i1, end);
}

// Wait for jobs to finish
for (std::thread &t : pool) {
if (t.joinable()) {
t.join();
}
}
}






// Serial version for easy comparison
template<typename Index, typename Callable>
static void SequentialFor(Index start, Index end, Callable func) {
Expand Down

0 comments on commit 9813d68

Please sign in to comment.