Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

rs

  • Loading branch information...
commit 8d69487c7857fd73840fe39448190ac6d0b61ac0 1 parent 8d285b5
@dwight dwight authored
View
6 db/db.vcxproj
@@ -192,9 +192,6 @@
</Link>
</ItemDefinitionGroup>
<ItemGroup>
- <ResourceCompile Include="db.rc" />
- </ItemGroup>
- <ItemGroup>
<ClCompile Include="..\client\dbclientcursor.cpp" />
<ClCompile Include="..\pcre-7.4\pcrecpp.cc">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
@@ -715,6 +712,9 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
</Library>
</ItemGroup>
+ <ItemGroup>
+ <ResourceCompile Include="db.rc" />
+ </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
View
11 db/db.vcxproj.filters
@@ -764,6 +764,9 @@
<Filter Include="util\core">
<UniqueIdentifier>{9775f24c-3a29-4e0d-b5de-991c592cf376}</UniqueIdentifier>
</Filter>
+ <Filter Include="Resource Files">
+ <UniqueIdentifier>{9aea1b83-cdcb-48a8-97e6-47805cacdc29}</UniqueIdentifier>
+ </Filter>
</ItemGroup>
<ItemGroup>
<None Include="repl\notes.txt">
@@ -781,9 +784,6 @@
<None Include="readme.txt">
<Filter>util\concurrency</Filter>
</None>
- <None Include="mongo.ico">
- <Filter>libs</Filter>
- </None>
<None Include="..\SConstruct">
<Filter>db</Filter>
</None>
@@ -793,6 +793,9 @@
<None Include="..\jstests\rs\test_framework.js">
<Filter>rs\test stuff</Filter>
</None>
+ <None Include="mongo.ico">
+ <Filter>Resource Files</Filter>
+ </None>
</ItemGroup>
<ItemGroup>
<Library Include="..\..\js\js64r.lib">
@@ -810,7 +813,7 @@
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="db.rc">
- <Filter>db\core</Filter>
+ <Filter>Resource Files</Filter>
</ResourceCompile>
</ItemGroup>
</Project>
View
BIN  db/mongo.ico
Binary file not shown
View
79 db/repl/consensus.cpp
@@ -21,6 +21,8 @@
namespace mongo {
+
+
class CmdReplSetFresh : public ReplSetCommand {
public:
CmdReplSetFresh() : ReplSetCommand("replSetFresh") { }
@@ -138,13 +140,28 @@ namespace mongo {
b.append("round", round);
}
- void ReplSetImpl::getTargets(list<Target>& L) {
+ void ReplSetImpl::_getTargets(list<Target>& L, int& configVersion) {
+ configVersion = config().version;
for( Member *m = head(); m; m=m->next() )
if( m->hbinfo().up() )
L.push_back( Target(m->fullName()) );
}
- /* allUp only meaningful when true returned! */
+ /* config version is returned as it is ok to use this unlocked. BUT, if unlocked, you would need
+ to check later that the config didn't change. */
+ void ReplSetImpl::getTargets(list<Target>& L, int& configVersion) {
+ if( lockedByMe() ) {
+ _getTargets(L, configVersion);
+ return;
+ }
+ lock lk(this);
+ _getTargets(L, configVersion);
+ }
+
+ /* Do we have the newest data of them all?
+ @param allUp - set to true if all members are up. Only set if true returned.
+ @return true if we are freshest. Note we may tie.
+ */
bool Consensus::weAreFreshest(bool& allUp) {
BSONObj cmd = BSON(
"replSetFresh" << 1 <<
@@ -152,7 +169,8 @@ namespace mongo {
"who" << rs._self->fullName() <<
"cfgver" << rs._cfg->version );
list<Target> L;
- rs.getTargets(L);
+ int ver;
+ rs.getTargets(L, ver);
multiCommand(cmd, L);
int nok = 0;
allUp = true;
@@ -163,16 +181,21 @@ namespace mongo {
return false;
}
else {
- log() << "replSet TEMP freshest returns " << i->result.toString() << rsLog;
+ DEV log() << "replSet freshest returns " << i->result.toString() << rsLog;
allUp = false;
}
}
- log() << "replSet TEMP we are freshest of up nodes, nok:" << nok << rsLog;
+ DEV log() << "replSet we are freshest of up nodes, nok:" << nok << rsLog;
return true;
}
extern time_t started;
+ void Consensus::multiCommand(BSONObj cmd, list<Target>& L) {
+ assert( !rs.lockedByMe() );
+ mongo::multiCommand(cmd, L);
+ }
+
void Consensus::_electSelf() {
bool allUp;
if( !weAreFreshest(allUp) ) {
@@ -180,11 +203,14 @@ namespace mongo {
return;
}
if( !allUp && time(0) - started < 60 * 5 ) {
+ /* the idea here is that if a bunch of nodes bounce all at once, we don't want to drop data
+ if we don't have to -- we'd rather be offline and wait a little longer instead */
log() << "replSet info not electing self, not all members up and we have been up less than 5 minutes" << rsLog;
+ return;
}
time_t start = time(0);
- Member& me = *rs._self;
+ Member& me = *rs._self;
int tally = yea( me.id() );
log() << "replSet info electSelf" << rsLog;
@@ -197,33 +223,40 @@ namespace mongo {
"round" << OID::gen() /* this is just for diagnostics */
);
+ int configVersion;
list<Target> L;
- rs.getTargets(L);
+ rs.getTargets(L, configVersion);
multiCommand(electCmd, L);
- for( list<Target>::iterator i = L.begin(); i != L.end(); i++ ) {
- log() << "replSet TEMP elect res: " << i->result.toString() << rsLog;
- if( i->ok ) {
- int v = i->result["vote"].Int();
- tally += v;
+ {
+ RSBase::lock lk(&rs);
+ for( list<Target>::iterator i = L.begin(); i != L.end(); i++ ) {
+ DEV log() << "replSet elect res: " << i->result.toString() << rsLog;
+ if( i->ok ) {
+ int v = i->result["vote"].Int();
+ tally += v;
+ }
}
- }
- if( tally*2 > totalVotes() ) {
- if( time(0) - start > 30 ) {
+ if( tally*2 <= totalVotes() ) {
+ log() << "replSet couldn't elect self, only received " << tally << " votes" << rsLog;
+ }
+ else if( time(0) - start > 30 ) {
// defensive; should never happen as we have timeouts on connection and operation for our conn
- log() << "replSet too much time passed during election, ignoring result" << rsLog;
+ log() << "replSet too much time passed during our election, ignoring result" << rsLog;
}
- /* succeeded. */
- log() << "replSet election succeeded assuming primary role" << rsLog;
- rs.assumePrimary();
- return;
- }
- else {
- log() << "replSet couldn't elect self, only received " << tally << " votes" << rsLog;
+ else if( configVersion != rs.config().version ) {
+ log() << "replSet config version changed during our election, ignoring result" << rsLog;
+ }
+ else {
+ /* succeeded. */
+ log() << "replSet election succeeded, assuming primary role" << rsLog;
+ rs.assumePrimary();
+ }
}
}
void Consensus::electSelf() {
+ assert( !rs.lockedByMe() );
try {
_electSelf();
}
View
81 db/repl/manager.cpp
@@ -44,7 +44,7 @@ namespace mongo {
}
Manager::Manager(ReplSetImpl *_rs) :
- task::Server("Manager"), rs(_rs), _primary(NOPRIMARY)
+ task::Server("Manager"), rs(_rs), busy(false), _primary(NOPRIMARY)
{
}
@@ -56,47 +56,56 @@ namespace mongo {
/** called as the health threads get new results */
void Manager::msgCheckNewState() {
- const Member *p = rs->currentPrimary();
- const Member *p2;
- try { p2 = findOtherPrimary(); }
- catch(string s) {
- /* two other nodes think they are primary (asynchronously polled) -- wait for things to settle down. */
- log() << "replSet warning DIAG TODO 2primary" << s << rsLog;
- return;
- }
+ {
+ RSBase::lock lk(rs);
- if( p == p2 && p ) return;
+ if( busy ) return;
- if( p2 ) {
- /* someone else thinks they are primary. */
- if( p == p2 ) // already match
+ const Member *p = rs->currentPrimary();
+ const Member *p2;
+ try { p2 = findOtherPrimary(); }
+ catch(string s) {
+ /* two other nodes think they are primary (asynchronously polled) -- wait for things to settle down. */
+ log() << "replSet warning DIAG TODO 2primary" << s << rsLog;
return;
- if( p == 0 )
- noteARemoteIsPrimary(p2); return;
- if( p != rs->_self )
- noteARemoteIsPrimary(p2); return;
- /* we thought we were primary, yet now someone else thinks they are. */
- if( !rs->elect.aMajoritySeemsToBeUp() )
- noteARemoteIsPrimary(p2); return;
- /* ignore for now, keep thinking we are master */
- return;
- }
+ }
- if( p ) {
- /* we are already primary, and nothing significant out there has changed. */
- /* todo: if !aMajoritySeemsToBeUp, relinquish */
- assert( p == rs->_self );
- return;
- }
+ if( p == p2 && p ) return;
- /* no one seems to be primary. shall we try to elect ourself? */
- if( !rs->elect.aMajoritySeemsToBeUp() ) {
- rs->_self->lhb() = "can't see a majority, won't consider electing self";
- return;
- }
+ if( p2 ) {
+ /* someone else thinks they are primary. */
+ if( p == p2 ) // already match
+ return;
+ if( p == 0 )
+ noteARemoteIsPrimary(p2); return;
+ if( p != rs->_self )
+ noteARemoteIsPrimary(p2); return;
+ /* we thought we were primary, yet now someone else thinks they are. */
+ if( !rs->elect.aMajoritySeemsToBeUp() )
+ noteARemoteIsPrimary(p2); return;
+ /* ignore for now, keep thinking we are master */
+ return;
+ }
- rs->_self->lhb() = "";
- rs->elect.electSelf();
+ if( p ) {
+ /* we are already primary, and nothing significant out there has changed. */
+ /* todo: if !aMajoritySeemsToBeUp, relinquish */
+ assert( p == rs->_self );
+ return;
+ }
+
+ /* no one seems to be primary. shall we try to elect ourself? */
+ if( !rs->elect.aMajoritySeemsToBeUp() ) {
+ rs->_self->lhb() = "can't see a majority, won't consider electing self";
+ return;
+ }
+
+ rs->_self->lhb() = "";
+ busy = true; // don't try to do further elections & such while we are already working on one.
+ }
+ try { rs->elect.electSelf(); }
+ catch(...) { log() << "replSet error unexpected assertion in rs manager" << rsLog; }
+ busy = false;
}
}
View
3  db/repl/multicmd.h
@@ -31,9 +31,6 @@ namespace mongo {
BSONObj result;
};
- /** send a command in parallel to many servers, and collect the results. */
- void multiCommand(BSONObj cmd, list<Target>& L);
-
/* -- implementation ------------- */
class _MultiCommandJob : public BackgroundJob {
View
22 db/repl/rs.h
@@ -59,6 +59,7 @@ namespace mongo {
class Manager : public task::Server {
bool got(const any&);
ReplSetImpl *rs;
+ bool busy;
int _primary;
const Member* findOtherPrimary();
void noteARemoteIsPrimary(const Member *);
@@ -68,6 +69,7 @@ namespace mongo {
void msgCheckNewState();
};
+ struct Target;
class Consensus {
ReplSetImpl &rs;
struct LastYea {
@@ -85,6 +87,8 @@ namespace mongo {
bool aMajoritySeemsToBeUp() const;
void electSelf();
void electCmdReceived(BSONObj, BSONObjBuilder*);
+
+ void multiCommand(BSONObj cmd, list<Target>& L);
};
/** most operations on a ReplSet object should be done while locked. */
@@ -92,21 +96,32 @@ namespace mongo {
private:
mutex m;
int _locked;
+ ThreadLocalValue<bool> _lockedByMe;
protected:
RSBase() : m("RSBase"), _locked(0) { }
class lock : scoped_lock {
RSBase& _b;
public:
lock(RSBase* b) : scoped_lock(b->m), _b(*b) {
- DEV assert(b->_locked == 0);
- b->_locked++;
+ DEV assert(_b._locked == 0);
+ _b._locked++;
+ _b._lockedByMe.set(true);
}
~lock() {
+ assert( _b._lockedByMe.get() );
DEV assert(_b._locked == 1);
+ _b._lockedByMe.set(false);
_b._locked--;
}
};
+ public:
+ /* for asserts */
bool locked() const { return _locked != 0; }
+
+ /* if true, is locked, and was locked by this thread. note if false, it could be in the lock or not for another
+ just for asserts & such so we can make the contracts clear on who locks what when.
+ */
+ bool lockedByMe() { return _lockedByMe.get(); }
};
/* information about the entire repl set, such as the various servers in the set, and their state */
@@ -185,7 +200,8 @@ namespace mongo {
private:
Member* head() const { return _members.head(); }
Member* findById(unsigned id) const;
- void getTargets(list<Target>&);
+ void _getTargets(list<Target>&, int &configVersion);
+ void getTargets(list<Target>&, int &configVersion);
void startThreads();
friend class FeedbackThread;
friend class CmdReplSetElect;
View
1  db/repl/rs_config.h
@@ -25,6 +25,7 @@
namespace mongo {
+ /* singleton config object is stored here */
const string rsConfigNs = "local.system.replset";
class ReplSetConfig {
View
2  db/resource.h
@@ -8,7 +8,7 @@
//
#ifdef APSTUDIO_INVOKED
#ifndef APSTUDIO_READONLY_SYMBOLS
-#define _APS_NEXT_RESOURCE_VALUE 103
+#define _APS_NEXT_RESOURCE_VALUE 104
#define _APS_NEXT_COMMAND_VALUE 40001
#define _APS_NEXT_CONTROL_VALUE 1001
#define _APS_NEXT_SYMED_VALUE 101
View
3  dbtests/test.vcxproj
@@ -253,9 +253,6 @@
<ClInclude Include="..\util\unittest.h" />
</ItemGroup>
<ItemGroup>
- <ResourceCompile Include="..\db\db.rc" />
- </ItemGroup>
- <ItemGroup>
<ClCompile Include="..\client\dbclientcursor.cpp" />
<ClCompile Include="..\client\gridfs.cpp" />
<ClCompile Include="..\db\repl\consensus.cpp" />
View
5 dbtests/test.vcxproj.filters
@@ -240,11 +240,6 @@
</ClInclude>
</ItemGroup>
<ItemGroup>
- <ResourceCompile Include="..\db\db.rc">
- <Filter>misc and third party</Filter>
- </ResourceCompile>
- </ItemGroup>
- <ItemGroup>
<Library Include="..\..\js\js64r.lib">
<Filter>misc and third party</Filter>
</Library>
View
4 s/dbgrid.vcxproj.filters
@@ -12,10 +12,6 @@
<Filter Include="Header Files\Header Shared">
<UniqueIdentifier>{4048b883-7255-40b3-b0e9-4c1044cff049}</UniqueIdentifier>
</Filter>
- <Filter Include="Resource Files">
- <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
- <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
- </Filter>
<Filter Include="libs_etc">
<UniqueIdentifier>{17d48ddf-5c49-4dfd-bafa-16d5fed290cd}</UniqueIdentifier>
</Filter>
View
6 shell/msvc/mongo.vcxproj
@@ -230,7 +230,6 @@
<None Include="..\query.js" />
<None Include="..\servers.js" />
<None Include="..\utils.js" />
- <None Include="mongo.ico" />
</ItemGroup>
<ItemGroup>
<Library Include="..\..\..\js\js32d.lib">
@@ -243,8 +242,9 @@
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\db\lasterror.h" />
- <ClInclude Include="resource.h" />
- <ClInclude Include="resource1.h" />
+ </ItemGroup>
+ <ItemGroup>
+ <ResourceCompile Include="..\..\db\db.rc" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
View
10 shell/msvc/mongo.vcxproj.filters
@@ -240,9 +240,6 @@
<None Include="..\utils.js">
<Filter>_js files</Filter>
</None>
- <None Include="mongo.ico">
- <Filter>Resource Files</Filter>
- </None>
</ItemGroup>
<ItemGroup>
<Library Include="..\..\..\js\js32d.lib" />
@@ -253,7 +250,10 @@
<ClInclude Include="..\..\db\lasterror.h">
<Filter>db</Filter>
</ClInclude>
- <ClInclude Include="resource.h" />
- <ClInclude Include="resource1.h" />
+ </ItemGroup>
+ <ItemGroup>
+ <ResourceCompile Include="..\..\db\db.rc">
+ <Filter>Resource Files</Filter>
+ </ResourceCompile>
</ItemGroup>
</Project>
Please sign in to comment.
Something went wrong with that request. Please try again.