Skip to content

Commit

Permalink
merge bitcoin#18710: Add local thread pool to CCheckQueue
Browse files Browse the repository at this point in the history
  • Loading branch information
kwvg committed Jun 25, 2021
1 parent 03a3f6c commit c9d0d92
Show file tree
Hide file tree
Showing 8 changed files with 99 additions and 88 deletions.
13 changes: 6 additions & 7 deletions src/bench/checkqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
#include <checkqueue.h>
#include <prevector.h>
#include <vector>
#include <boost/thread/thread.hpp>
#include <random.h>


Expand Down Expand Up @@ -37,10 +36,11 @@ static void CCheckQueueSpeedPrevectorJob(benchmark::State& state)
void swap(PrevectorJob& x){p.swap(x.p);};
};
CCheckQueue<PrevectorJob> queue {QUEUE_BATCH_SIZE};
boost::thread_group tg;
for (auto x = 0; x < std::max(MIN_CORES, GetNumCores()); ++x) {
tg.create_thread([&]{queue.Thread();});
}

// The main thread should be counted to prevent thread oversubscription, and
// to decrease the variance of benchmark results.
queue.StartWorkerThreads(GetNumCores() - 1);

while (state.KeepRunning()) {
// Make insecure_rand here so that each iteration is identical.
FastRandomContext insecure_rand(true);
Expand All @@ -56,7 +56,6 @@ static void CCheckQueueSpeedPrevectorJob(benchmark::State& state)
// it is done explicitly here for clarity
control.Wait();
}
tg.interrupt_all();
tg.join_all();
queue.StopWorkerThreads();
}
BENCHMARK(CCheckQueueSpeedPrevectorJob, 1400);
92 changes: 63 additions & 29 deletions src/checkqueue.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,12 @@
#define BITCOIN_CHECKQUEUE_H

#include <sync.h>
#include <tinyformat.h>
#include <utilthreadnames.h>

#include <algorithm>
#include <vector>

#include <boost/thread/condition_variable.hpp>
#include <boost/thread/mutex.hpp>

template <typename T>
class CCheckQueueControl;

Expand All @@ -31,61 +30,64 @@ class CCheckQueue
{
private:
//! Mutex to protect the inner state
boost::mutex mutex;
Mutex m_mutex;

//! Worker threads block on this when out of work
boost::condition_variable condWorker;
std::condition_variable m_worker_cv;

//! Master thread blocks on this when out of work
boost::condition_variable condMaster;
std::condition_variable m_master_cv;

//! The queue of elements to be processed.
//! As the order of booleans doesn't matter, it is used as a LIFO (stack)
std::vector<T> queue;
std::vector<T> queue GUARDED_BY(m_mutex);

//! The number of workers (including the master) that are idle.
int nIdle;
int nIdle GUARDED_BY(m_mutex){0};

//! The total number of workers (including the master).
int nTotal;
int nTotal GUARDED_BY(m_mutex){0};

//! The temporary evaluation result.
bool fAllOk;
bool fAllOk GUARDED_BY(m_mutex){true};

/**
* Number of verifications that haven't completed yet.
* This includes elements that are no longer queued, but still in the
* worker's own batches.
*/
unsigned int nTodo;
unsigned int nTodo GUARDED_BY(m_mutex){0};

//! The maximum number of elements to be processed in one batch
unsigned int nBatchSize;
const unsigned int nBatchSize;

std::vector<std::thread> m_worker_threads;
bool m_request_stop GUARDED_BY(m_mutex){false};

/** Internal function that does bulk of the verification work. */
bool Loop(bool fMaster = false)
bool Loop(bool fMaster)
{
boost::condition_variable& cond = fMaster ? condMaster : condWorker;
std::condition_variable& cond = fMaster ? m_master_cv : m_worker_cv;
std::vector<T> vChecks;
vChecks.reserve(nBatchSize);
unsigned int nNow = 0;
bool fOk = true;
do {
{
boost::unique_lock<boost::mutex> lock(mutex);
WAIT_LOCK(m_mutex, lock);
// first do the clean-up of the previous loop run (allowing us to do it in the same critsect)
if (nNow) {
fAllOk &= fOk;
nTodo -= nNow;
if (nTodo == 0 && !fMaster)
// We processed the last element; inform the master it can exit and return the result
condMaster.notify_one();
m_master_cv.notify_one();
} else {
// first iteration
nTotal++;
}
// logically, the do loop starts here
while (queue.empty()) {
while (queue.empty() && !m_request_stop) {
if (fMaster && nTodo == 0) {
nTotal--;
bool fRet = fAllOk;
Expand All @@ -99,6 +101,10 @@ class CCheckQueue
cond.wait(lock); // wait
nIdle--;
}
if (m_request_stop) {
return false;
}

// Decide how many work units to process now.
// * Do not try to do everything at once, but aim for increasingly smaller batches so
// all workers finish approximately simultaneously.
Expand All @@ -107,7 +113,7 @@ class CCheckQueue
nNow = std::max(1U, std::min(nBatchSize, (unsigned int)queue.size() / (nTotal + nIdle + 1)));
vChecks.resize(nNow);
for (unsigned int i = 0; i < nNow; i++) {
// We want the lock on the mutex to be as short as possible, so swap jobs from the global
// We want the lock on the m_mutex to be as short as possible, so swap jobs from the global
// queue to the local batch vector instead of copying.
vChecks[i].swap(queue.back());
queue.pop_back();
Expand All @@ -125,40 +131,68 @@ class CCheckQueue

public:
//! Mutex to ensure only one concurrent CCheckQueueControl
boost::mutex ControlMutex;
Mutex m_control_mutex;

//! Create a new check queue
explicit CCheckQueue(unsigned int nBatchSizeIn) : nIdle(0), nTotal(0), fAllOk(true), nTodo(0), nBatchSize(nBatchSizeIn) {}
explicit CCheckQueue(unsigned int nBatchSizeIn)
: nBatchSize(nBatchSizeIn)
{
}

//! Worker thread
void Thread()
//! Create a pool of new worker threads.
void StartWorkerThreads(const int threads_num)
{
Loop();
{
LOCK(m_mutex);
nIdle = 0;
nTotal = 0;
fAllOk = true;
}
assert(m_worker_threads.empty());
for (int n = 0; n < threads_num; ++n) {
m_worker_threads.emplace_back([this, n]() {
util::ThreadRename(strprintf("scriptch.%i", n));
Loop(false /* worker thread */);
});
}
}

//! Wait until execution finishes, and return whether all evaluations were successful.
bool Wait()
{
return Loop(true);
return Loop(true /* master thread */);
}

//! Add a batch of checks to the queue
void Add(std::vector<T>& vChecks)
{
boost::unique_lock<boost::mutex> lock(mutex);
LOCK(m_mutex);
for (T& check : vChecks) {
queue.push_back(T());
check.swap(queue.back());
}
nTodo += vChecks.size();
if (vChecks.size() == 1)
condWorker.notify_one();
m_worker_cv.notify_one();
else if (vChecks.size() > 1)
condWorker.notify_all();
m_worker_cv.notify_all();
}

//! Stop all of the worker threads.
void StopWorkerThreads()
{
WITH_LOCK(m_mutex, m_request_stop = true);
m_worker_cv.notify_all();
for (std::thread& t : m_worker_threads) {
t.join();
}
m_worker_threads.clear();
WITH_LOCK(m_mutex, m_request_stop = false);
}

~CCheckQueue()
{
assert(m_worker_threads.empty());
}

};
Expand All @@ -182,7 +216,7 @@ class CCheckQueueControl
{
// passed queue is supposed to be unused, or nullptr
if (pqueue != nullptr) {
ENTER_CRITICAL_SECTION(pqueue->ControlMutex);
ENTER_CRITICAL_SECTION(pqueue->m_control_mutex);
}
}

Expand All @@ -206,7 +240,7 @@ class CCheckQueueControl
if (!fDone)
Wait();
if (pqueue != nullptr) {
LEAVE_CRITICAL_SECTION(pqueue->ControlMutex);
LEAVE_CRITICAL_SECTION(pqueue->m_control_mutex);
}
}
};
Expand Down
5 changes: 2 additions & 3 deletions src/init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ void PrepareShutdown()
// CScheduler/checkqueue threadGroup
threadGroup.interrupt_all();
threadGroup.join_all();
StopScriptCheckWorkerThreads();

// After there are no more peers/RPC left to give us new data which may generate
// CValidationInterface callbacks, flush them...
Expand Down Expand Up @@ -1736,9 +1737,7 @@ bool AppInitMain()
LogPrintf("Script verification uses %d additional threads\n", script_threads);
if (script_threads >= 1) {
g_parallel_script_checks = true;
for (int i = 0; i < script_threads; ++i) {
threadGroup.create_thread([i]() { return ThreadScriptCheck(i); });
}
StartScriptCheckWorkerThreads(script_threads);
}

std::vector<std::string> vSporkAddresses;
Expand Down
Loading

0 comments on commit c9d0d92

Please sign in to comment.