Skip to content

Commit

Permalink
Fix remap private view memory leak and formalize w->X ugprade process.
Browse files Browse the repository at this point in the history
Makes the upgrade R_to_W() block until it succeeds.

Replaces "runExclusively" with a new "X" state, which is equivalent to "W", but
can only be reached from "w".  Client code in "w" calls "w_to_X()", which blocks
until all other threads in "w" call w_to_X() or call unlock_w().  At that point,
exactly one thread returns from w_to_X() with return value "true".  This thread
is the "exclusive writer", and may behave like it's in "W" state.  When that
thread calls "X_to_w()", it reverts to "w" state, and releases all the other
threads that called w_to_X() back into "w" state.

Because X_to_w() is effectively a barrier, we use generation counters to make
sure that fast racers don't cause deadlocks or race through the barrier.  The
generation counters are generationX and generationXExit.

Use of w_to_X() is wrapped in the Lock::DBWrite::UpgradeToExclusive() guard
object, which has a "gotUpgrade()" method to check if a particular thread was
the exclusive worker for the upgrade period.

Decision making about which condition variables to notify is more fully
delegated to the notifyWeUnlocked() method of QLock, to eliminate some
inadvertent deadlocks due that surfaced when R_to_W() was made to wait until
success.

May help SERVER-5533, because it fixes a greediness logic bug exercised by
fsync-and-lock.

This patch also introduces a directed test of the w->X functionality,
though it could use the addition of of extra "noise" work.
  • Loading branch information
Andy Schwerin committed May 9, 2012
1 parent a0d04bc commit 15770de
Show file tree
Hide file tree
Showing 5 changed files with 489 additions and 174 deletions.
63 changes: 24 additions & 39 deletions src/mongo/db/d_concurrency.cpp
Expand Up @@ -222,9 +222,10 @@ namespace mongo {
}

// todo timing stats? :
void runExclusively(void (*f)(void)) { q.runExclusively(f); }
void W_to_R() { q.W_to_R(); }
bool R_to_W() { return q.R_to_W(); }
void R_to_W() { q.R_to_W(); }
bool w_to_X() { return q.w_to_X(); }
void X_to_w() { q.X_to_w(); }
};

static WrapperForQLock& q = *new WrapperForQLock();
Expand All @@ -243,37 +244,6 @@ namespace mongo {
result.append("locks", b.obj());
}

void runExclusively(void (*f)(void)) {
q.runExclusively(f);
}

/** commitIfNeeded(), we have to do work when no one else is writing, and do it at a
point where there is data consistency. yet we have multiple writers so what to do.
this is the solution chosen. we wait until all writers either finish (quick ones)
or also call commitIfNeeded (long ones) -- a little like a synchronization barrier.
a more elegant solution likely is best long term.
*/
void QLock::runExclusively(void (*f)(void)) {
dlog(1) << "QLock::runExclusively" << endl;
boost::mutex::scoped_lock lk( m );
verify( w.n > 0 );
greed++; // stop new acquisitions
X.n++;
while( X.n ) {
if( X.n == w.n ) {
// we're all here
f();
X.n = 0; // sentinel, tell everyone we're done
X.c.notify_all();
}
else {
X.c.wait(lk);
}
}
greed--;
dlog(1) << "run exclusively end" << endl;
}

int Lock::isLocked() {
return threadState();
}
Expand Down Expand Up @@ -457,15 +427,13 @@ namespace mongo {
q.W_to_R();
lockState().changeLockState( 'R' );
}

// you will deadlock if 2 threads doing this
bool Lock::GlobalWrite::upgrade() {
void Lock::GlobalWrite::upgrade() {
verify( !noop );
verify( threadState() == 'R' );
if( q.R_to_W() ) {
lockState().changeLockState( 'W' );
return true;
}
return false;
q.R_to_W();
lockState().changeLockState( 'W' );
}

Lock::GlobalRead::GlobalRead( int timeoutms ) {
Expand Down Expand Up @@ -724,6 +692,23 @@ namespace mongo {
_weLocked = ls.otherLock();
}

Lock::DBWrite::UpgradeToExclusive::UpgradeToExclusive() {
fassert( 0, lockState().threadState() == 'w' );
_gotUpgrade = q.w_to_X();
if ( _gotUpgrade )
lockState().locked('W');
}

Lock::DBWrite::UpgradeToExclusive::~UpgradeToExclusive() {
if ( _gotUpgrade ) {
fassert( 0, lockState().threadState() == 'W' );
q.X_to_w();
lockState().locked('w');
}
else {
fassert( 0, lockState().threadState() == 'w' );
}
}

writelocktry::writelocktry( int tryms ) :
_got( false ),
Expand Down
15 changes: 12 additions & 3 deletions src/mongo/db/d_concurrency.h
Expand Up @@ -87,7 +87,7 @@ namespace mongo {
GlobalWrite(bool stopGreed = false, int timeoutms = -1 );
virtual ~GlobalWrite();
void downgrade(); // W -> R
bool upgrade(); // caution see notes
void upgrade(); // caution see notes
};
class GlobalRead : public ScopedLock { // recursive is ok
public:
Expand Down Expand Up @@ -124,14 +124,23 @@ namespace mongo {
public:
DBWrite(const StringData& dbOrNs);
virtual ~DBWrite();


class UpgradeToExclusive : private boost::noncopyable {
public:
UpgradeToExclusive();
~UpgradeToExclusive();

bool gotUpgrade() const { return _gotUpgrade; }
private:
bool _gotUpgrade;
};

private:
bool _locked_w;
bool _locked_W;
WrapperForRWLock *_weLocked;
const string _what;
bool _nested;

};

// lock this database for reading. do not shared_lock globally first, that is handledin herein.
Expand Down
40 changes: 11 additions & 29 deletions src/mongo/db/dur.cpp
Expand Up @@ -74,7 +74,6 @@ using namespace mongoutils;
namespace mongo {

bool lockedForWriting();
void runExclusively(void (*f)(void));

namespace dur {

Expand Down Expand Up @@ -251,29 +250,12 @@ namespace mongo {
return commitJob.bytes() > UncommittedBytesLimit;
}

static int in_f;
void f_commitEarlyInRunExclusively() {
dassert( in_f == 0 );
in_f++;
try {
assertNothingSpooled();
getDur().commitNow();
assertNothingSpooled(); // a light sanity check that we truly were exclusive
}
catch(...) {
in_f--;
throw;
}
in_f--;
}

void assertLockedForCommitting() {
char t = Lock::isLocked();
if( t == 'R' || t == 'W' )
return;
// 'w' case we use runExclusively
fassert( 16110, t == 'w' );
fassert( 16111, in_f == 1 );
// 'w' case we upgrade to exclusive (X).
fassertFailed( 16110 );
}

bool NOINLINE_DECL DurableImpl::_aCommitIsNeeded() {
Expand All @@ -300,8 +282,12 @@ namespace mongo {
return false;
}
else {
log(1) << "commitIfNeeded calling runExclusively" << endl;
runExclusively(f_commitEarlyInRunExclusively);
log(1) << "commitIfNeeded upgrading from shared write to exclusive write state"
<< endl;
Lock::DBWrite::UpgradeToExclusive ex;
if (ex.gotUpgrade()) {
commitNow();
}
}
}
else {
Expand Down Expand Up @@ -627,7 +613,7 @@ namespace mongo {
static void _groupCommit(Lock::GlobalWrite *lgw) {
LOG(4) << "_groupCommit " << endl;

// runExclusively is in 'w', else we are 'R' or 'W'
// We are 'R' or 'W'
assertLockedForCommitting();

unspoolWriteIntents(); // in case we were doing some writing ourself
Expand Down Expand Up @@ -682,12 +668,8 @@ namespace mongo {
// to be in W the entire time we were committing about (in particular for WRITETOJOURNAL() which takes time).
if( lgw ) {
LOG(4) << "_groupCommit upgrade" << endl;
if( lgw->upgrade() ) {
REMAPPRIVATEVIEW();
}
else {
log() << "info timeout on lock upgrade for REMAPPRIVATEVIEW" << endl;
}
lgw->upgrade();
REMAPPRIVATEVIEW();
}
}
else {
Expand Down

0 comments on commit 15770de

Please sign in to comment.