Skip to content

Commit

Permalink
Use spinlock instead of mutex for Threads and SplitPoint
Browse files Browse the repository at this point in the history
It is reported to be defenitly faster with increasing
number of threads, we go from a +3.5% with 4 threads
to a +15% with 16 threads.

The only drawback is that now when testing with more
threads than physical available cores, the speed slows
down to a crawl. This is expected and was similar at what
we had setting the old sleepingThreads to false.

No functional change.
  • Loading branch information
mcostalba committed Feb 23, 2015
1 parent 775f823 commit 3811206
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 41 deletions.
36 changes: 18 additions & 18 deletions src/search.cpp
Expand Up @@ -765,7 +765,7 @@ namespace {
continue;

moveCount = ++splitPoint->moveCount;
splitPoint->mutex.unlock();
splitPoint->spinlock.release();
}
else
++moveCount;
Expand Down Expand Up @@ -834,7 +834,7 @@ namespace {
&& moveCount >= FutilityMoveCounts[improving][depth])
{
if (SpNode)
splitPoint->mutex.lock();
splitPoint->spinlock.acquire();

continue;
}
Expand All @@ -853,7 +853,7 @@ namespace {

if (SpNode)
{
splitPoint->mutex.lock();
splitPoint->spinlock.acquire();
if (bestValue > splitPoint->bestValue)
splitPoint->bestValue = bestValue;
}
Expand All @@ -865,7 +865,7 @@ namespace {
if (predictedDepth < 4 * ONE_PLY && pos.see_sign(move) < VALUE_ZERO)
{
if (SpNode)
splitPoint->mutex.lock();
splitPoint->spinlock.acquire();

continue;
}
Expand Down Expand Up @@ -965,7 +965,7 @@ namespace {
// Step 18. Check for new best move
if (SpNode)
{
splitPoint->mutex.lock();
splitPoint->spinlock.acquire();
bestValue = splitPoint->bestValue;
alpha = splitPoint->alpha;
}
Expand Down Expand Up @@ -1526,21 +1526,21 @@ void Thread::idle_loop() {
// If this thread has been assigned work, launch a search
while (searching)
{
Threads.mutex.lock();
Threads.spinlock.acquire();

assert(activeSplitPoint);

SplitPoint* sp = activeSplitPoint;

Threads.mutex.unlock();
Threads.spinlock.release();

Stack stack[MAX_PLY+4], *ss = stack+2; // To allow referencing (ss-2) and (ss+2)
Position pos(*sp->pos, this);

std::memcpy(ss-2, sp->ss-2, 5 * sizeof(Stack));
ss->splitPoint = sp;

sp->mutex.lock();
sp->spinlock.acquire();

assert(activePosition == nullptr);

Expand Down Expand Up @@ -1578,7 +1578,7 @@ void Thread::idle_loop() {
// After releasing the lock we can't access any SplitPoint related data
// in a safe way because it could have been released under our feet by
// the sp master.
sp->mutex.unlock();
sp->spinlock.release();

// Try to late join to another split point if none of its slaves has
// already finished.
Expand All @@ -1593,7 +1593,7 @@ void Thread::idle_loop() {
if ( sp
&& sp->allSlavesSearching
&& sp->slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT
&& available_to(th))
&& available_to(sp->master))
{
assert(this != th);
assert(!(this_sp && this_sp->slavesMask.none()));
Expand All @@ -1618,8 +1618,8 @@ void Thread::idle_loop() {
sp = bestSp;

// Recheck the conditions under lock protection
Threads.mutex.lock();
sp->mutex.lock();
Threads.spinlock.acquire();
sp->spinlock.acquire();

if ( sp->allSlavesSearching
&& sp->slavesMask.count() < MAX_SLAVES_PER_SPLITPOINT
Expand All @@ -1630,8 +1630,8 @@ void Thread::idle_loop() {
searching = true;
}

sp->mutex.unlock();
Threads.mutex.unlock();
sp->spinlock.release();
Threads.spinlock.release();
}
}

Expand Down Expand Up @@ -1687,7 +1687,7 @@ void check_time() {

else if (Limits.nodes)
{
Threads.mutex.lock();
Threads.spinlock.acquire();

int64_t nodes = RootPos.nodes_searched();

Expand All @@ -1698,18 +1698,18 @@ void check_time() {
{
SplitPoint& sp = th->splitPoints[i];

sp.mutex.lock();
sp.spinlock.acquire();

nodes += sp.nodes;

for (size_t idx = 0; idx < Threads.size(); ++idx)
if (sp.slavesMask.test(idx) && Threads[idx]->activePosition)
nodes += Threads[idx]->activePosition->nodes_searched();

sp.mutex.unlock();
sp.spinlock.release();
}

Threads.mutex.unlock();
Threads.spinlock.release();

if (nodes >= Limits.nodes)
Signals.stop = true;
Expand Down
16 changes: 8 additions & 8 deletions src/thread.cpp
Expand Up @@ -165,8 +165,8 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes
// Try to allocate available threads and ask them to start searching setting
// 'searching' flag. This must be done under lock protection to avoid concurrent
// allocation of the same slave by another master.
Threads.mutex.lock();
sp.mutex.lock();
Threads.spinlock.acquire();
sp.spinlock.acquire();

sp.allSlavesSearching = true; // Must be set under lock protection
++splitPointsSize;
Expand All @@ -188,8 +188,8 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes
// it will instantly launch a search, because its 'searching' flag is set.
// The thread will return from the idle loop when all slaves have finished
// their work at this split point.
sp.mutex.unlock();
Threads.mutex.unlock();
sp.spinlock.release();
Threads.spinlock.release();

Thread::idle_loop(); // Force a call to base class idle_loop()

Expand All @@ -202,8 +202,8 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes
// We have returned from the idle loop, which means that all threads are
// finished. Note that setting 'searching' and decreasing splitPointsSize must
// be done under lock protection to avoid a race with Thread::available_to().
Threads.mutex.lock();
sp.mutex.lock();
Threads.spinlock.acquire();
sp.spinlock.acquire();

searching = true;
--splitPointsSize;
Expand All @@ -213,8 +213,8 @@ void Thread::split(Position& pos, Stack* ss, Value alpha, Value beta, Value* bes
*bestMove = sp.bestMove;
*bestValue = sp.bestValue;

sp.mutex.unlock();
Threads.mutex.unlock();
sp.spinlock.release();
Threads.spinlock.release();
}


Expand Down
30 changes: 15 additions & 15 deletions src/thread.h
Expand Up @@ -39,6 +39,19 @@ const size_t MAX_THREADS = 128;
const size_t MAX_SPLITPOINTS_PER_THREAD = 8;
const size_t MAX_SLAVES_PER_SPLITPOINT = 4;

/// Spinlock class wraps low level atomic operations to provide spin lock functionality

class Spinlock {

std::atomic_flag lock;

public:
Spinlock() { std::atomic_flag_clear(&lock); }
void acquire() { while (lock.test_and_set(std::memory_order_acquire)) {} }
void release() { lock.clear(std::memory_order_release); }
};


/// SplitPoint struct stores information shared by the threads searching in
/// parallel below the same split point. It is populated at splitting time.

Expand All @@ -58,7 +71,7 @@ struct SplitPoint {
SplitPoint* parentSplitPoint;

// Shared variable data
std::mutex mutex;
Spinlock spinlock;
std::bitset<MAX_THREADS> slavesMask;
volatile bool allSlavesSearching;
volatile uint64_t nodes;
Expand All @@ -70,19 +83,6 @@ struct SplitPoint {
};


/// Spinlock class wraps low level atomic operations to provide spin lock functionality

class Spinlock {

std::atomic_flag lock;

public:
Spinlock() { std::atomic_flag_clear(&lock); }
void acquire() { while (lock.test_and_set(std::memory_order_acquire)) {} }
void release() { lock.clear(std::memory_order_release); }
};


/// ThreadBase struct is the base of the hierarchy from where we derive all the
/// specialized thread classes.

Expand Down Expand Up @@ -162,7 +162,7 @@ struct ThreadPool : public std::vector<Thread*> {
void start_thinking(const Position&, const Search::LimitsType&, Search::StateStackPtr&);

Depth minimumSplitDepth;
std::mutex mutex;
Spinlock spinlock;
std::condition_variable sleepCondition;
TimerThread* timer;
};
Expand Down

0 comments on commit 3811206

Please sign in to comment.