From b07eeee2f1d2e1d74928f2867837ed40bd61f0f5 Mon Sep 17 00:00:00 2001 From: dwight Date: Fri, 30 Jul 2010 15:00:19 -0400 Subject: [PATCH 1/2] RS rollback fix --- db/repl/consensus.cpp | 2 +- db/repl/heartbeat.cpp | 2 +- db/repl/rs.cpp | 8 +++----- db/repl/rs_sync.cpp | 34 +++++++++++++++++++++++++++++++--- 4 files changed, 36 insertions(+), 10 deletions(-) diff --git a/db/repl/consensus.cpp b/db/repl/consensus.cpp index f67d9509a21b6..8d2753ba5fc2c 100644 --- a/db/repl/consensus.cpp +++ b/db/repl/consensus.cpp @@ -307,7 +307,7 @@ namespace mongo { } else { /* succeeded. */ - log() << "replSet election succeeded, assuming primary role" << rsLog; + log(1) << "replSet election succeeded, assuming primary role" << rsLog; success = true; rs.assumePrimary(); } diff --git a/db/repl/heartbeat.cpp b/db/repl/heartbeat.cpp index 67ae44354520b..3811804cee252 100644 --- a/db/repl/heartbeat.cpp +++ b/db/repl/heartbeat.cpp @@ -130,7 +130,7 @@ namespace mongo { string name() { return "ReplSetHealthPollTask"; } void doWork() { if ( !theReplSet ) { - log() << "theReplSet not initialized yet, skipping health poll this round" << rsLog; + log(2) << "theReplSet not initialized yet, skipping health poll this round" << rsLog; return; } diff --git a/db/repl/rs.cpp b/db/repl/rs.cpp index 9cd8556da5b20..ac6ad20fd29fd 100644 --- a/db/repl/rs.cpp +++ b/db/repl/rs.cpp @@ -34,7 +34,7 @@ namespace mongo { assert( iAmPotentiallyHot() ); writelock lk("admin."); // so we are synchronized with _logOp() box.setSelfPrimary(_self); - log(2) << "replSet self (" << _self->id() << ") is now primary" << rsLog; + log() << "replSet PRIMARY" << rsLog; // self (" << _self->id() << ") is now primary" << rsLog; } void ReplSetImpl::changeState(MemberState s) { box.change(s, _self); } @@ -164,7 +164,7 @@ namespace mongo { seedSet.insert(m); //uassert(13101, "can't use localhost in replset host list", !m.isLocalHost()); if( m.isSelf() ) { - log() << "replSet ignoring seed " << m.toString() << " (=self)" << rsLog; + log(1) << "replSet ignoring seed " << m.toString() << " (=self)" << rsLog; } else seeds.push_back(m); if( *comma == 0 ) @@ -372,13 +372,11 @@ namespace mongo { log() << "replSet have you ran replSetInitiate yet?" << rsLog; if( _seeds->size() == 0 ) log() << "replSet no seed hosts were specified on the --replSet command line - that might be the issue" << rsLog; - log() << "replSet sleeping 20sec and will try again." << rsLog; } else { startupStatus = EMPTYUNREACHABLE; startupStatusMsg = "can't currently get " + rsConfigNs + " config from self or any seed (EMPTYUNREACHABLE)"; - log() << "replSet can't get " << rsConfigNs << " config from self or any seed." << rsLog; - log() << "replSet sleeping 20sec and will try again." << rsLog; + log() << "replSet can't get " << rsConfigNs << " config from self or any seed (yet)" << rsLog; } sleepsecs(10); diff --git a/db/repl/rs_sync.cpp b/db/repl/rs_sync.cpp index 29fc0207f82ba..3608e992a927e 100644 --- a/db/repl/rs_sync.cpp +++ b/db/repl/rs_sync.cpp @@ -22,6 +22,8 @@ namespace mongo { + using namespace bson; + void startSyncThread() { Client::initThread("rs_sync"); theReplSet->syncThread(); @@ -82,14 +84,39 @@ namespace mongo { { if( !r.more() ) { + /* maybe we are ahead and need to roll back? */ + try { + bo theirLastOp = r.getLastOp(rsoplog); + if( theirLastOp.isEmpty() ) { + log() << "replSet error empty query result from " << hn << " oplog" << rsLog; + sleepsecs(2); + return; + } + OpTime theirTS = theirLastOp["ts"]._opTime(); + if( theirTS < lastOpTimeWritten ) { + log() << "replSet we are ahead of the primary, will try to roll back" << rsLog; + syncRollback(r); + return; + } + /* we're not ahead? maybe our new query got fresher data. best to come back and try again */ + log() << "replSet syncTail condition 1" << rsLog; + sleepsecs(1); + } + catch(DBException& e) { + log() << "replSet error querying " << hn << ' ' << e.toString() << rsLog; + sleepsecs(2); + } + return; + /* log() << "replSet syncTail error querying oplog >= " << lastOpTimeWritten.toString() << " from " << hn << rsLog; try { log() << "replSet " << hn << " last op: " << r.getLastOp(rsoplog).toString() << rsLog; } catch(...) { } sleepsecs(1); - return; + return;*/ } + BSONObj o = r.nextSafe(); OpTime ts = o["ts"]._opTime(); long long h = o["h"].numberLong(); @@ -127,10 +154,11 @@ namespace mongo { } if( golive ) { sethbmsg(""); + log() << "replSet SECONDARY" << rsLog; changeState(MemberState::RS_SECONDARY); } else { - sethbmsg("recovering; not yet to minValid optime"); + sethbmsg("still syncing, not yet to minValid optime"); } /* todo: too stale capability */ @@ -201,7 +229,7 @@ namespace mongo { _syncThread(); } catch(DBException& e) { - log() << "replSet syncThread: " << e.toString() << rsLog; + sethbmsg("syncThread: " + e.toString()); sleepsecs(10); } catch(...) { From 6a5c95a8d350f6a6b93cace865b18eca4112cf7f Mon Sep 17 00:00:00 2001 From: dwight Date: Fri, 30 Jul 2010 15:01:03 -0400 Subject: [PATCH 2/2] unref local var warning --- client/dbclient.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/dbclient.cpp b/client/dbclient.cpp index 407b50252f3e4..187a5d8a3b8dd 100644 --- a/client/dbclient.cpp +++ b/client/dbclient.cpp @@ -1045,7 +1045,7 @@ namespace mongo { string e; _conns[i]->auth( dbname , username , pwd , e , digestPassword ); } - catch ( AssertionException& e ){ + catch ( AssertionException& ){ } }