Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

rs

  • Loading branch information...
commit 8d69487c7857fd73840fe39448190ac6d0b61ac0 1 parent 8d285b5
Dwight Merriman authored June 17, 2010
6  db/db.vcxproj
@@ -192,9 +192,6 @@
192 192
     </Link>
193 193
   </ItemDefinitionGroup>
194 194
   <ItemGroup>
195  
-    <ResourceCompile Include="db.rc" />
196  
-  </ItemGroup>
197  
-  <ItemGroup>
198 195
     <ClCompile Include="..\client\dbclientcursor.cpp" />
199 196
     <ClCompile Include="..\pcre-7.4\pcrecpp.cc">
200 197
       <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
@@ -715,6 +712,9 @@
715 712
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
716 713
     </Library>
717 714
   </ItemGroup>
  715
+  <ItemGroup>
  716
+    <ResourceCompile Include="db.rc" />
  717
+  </ItemGroup>
718 718
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
719 719
   <ImportGroup Label="ExtensionTargets">
720 720
   </ImportGroup>
11  db/db.vcxproj.filters
@@ -764,6 +764,9 @@
764 764
     <Filter Include="util\core">
765 765
       <UniqueIdentifier>{9775f24c-3a29-4e0d-b5de-991c592cf376}</UniqueIdentifier>
766 766
     </Filter>
  767
+    <Filter Include="Resource Files">
  768
+      <UniqueIdentifier>{9aea1b83-cdcb-48a8-97e6-47805cacdc29}</UniqueIdentifier>
  769
+    </Filter>
767 770
   </ItemGroup>
768 771
   <ItemGroup>
769 772
     <None Include="repl\notes.txt">
@@ -781,9 +784,6 @@
781 784
     <None Include="readme.txt">
782 785
       <Filter>util\concurrency</Filter>
783 786
     </None>
784  
-    <None Include="mongo.ico">
785  
-      <Filter>libs</Filter>
786  
-    </None>
787 787
     <None Include="..\SConstruct">
788 788
       <Filter>db</Filter>
789 789
     </None>
@@ -793,6 +793,9 @@
793 793
     <None Include="..\jstests\rs\test_framework.js">
794 794
       <Filter>rs\test stuff</Filter>
795 795
     </None>
  796
+    <None Include="mongo.ico">
  797
+      <Filter>Resource Files</Filter>
  798
+    </None>
796 799
   </ItemGroup>
797 800
   <ItemGroup>
798 801
     <Library Include="..\..\js\js64r.lib">
@@ -810,7 +813,7 @@
810 813
   </ItemGroup>
811 814
   <ItemGroup>
812 815
     <ResourceCompile Include="db.rc">
813  
-      <Filter>db\core</Filter>
  816
+      <Filter>Resource Files</Filter>
814 817
     </ResourceCompile>
815 818
   </ItemGroup>
816 819
 </Project>
BIN  db/mongo.ico
Binary file not shown
79  db/repl/consensus.cpp
@@ -21,6 +21,8 @@
21 21
 
22 22
 namespace mongo { 
23 23
 
  24
+
  25
+
24 26
     class CmdReplSetFresh : public ReplSetCommand { 
25 27
     public:
26 28
         CmdReplSetFresh() : ReplSetCommand("replSetFresh") { }
@@ -138,13 +140,28 @@ namespace mongo {
138 140
         b.append("round", round);
139 141
     }
140 142
 
141  
-    void ReplSetImpl::getTargets(list<Target>& L) { 
  143
+    void ReplSetImpl::_getTargets(list<Target>& L, int& configVersion) {
  144
+        configVersion = config().version;
142 145
         for( Member *m = head(); m; m=m->next() )
143 146
             if( m->hbinfo().up() )
144 147
                 L.push_back( Target(m->fullName()) );
145 148
     }
146 149
 
147  
-    /* allUp only meaningful when true returned! */
  150
+    /* config version is returned as it is ok to use this unlocked.  BUT, if unlocked, you would need 
  151
+       to check later that the config didn't change. */
  152
+    void ReplSetImpl::getTargets(list<Target>& L, int& configVersion) {
  153
+        if( lockedByMe() ) { 
  154
+            _getTargets(L, configVersion);
  155
+            return;
  156
+        }
  157
+        lock lk(this);
  158
+        _getTargets(L, configVersion);
  159
+    }
  160
+
  161
+    /* Do we have the newest data of them all?
  162
+       @param allUp - set to true if all members are up.  Only set if true returned.
  163
+       @return true if we are freshest.  Note we may tie.
  164
+    */
148 165
     bool Consensus::weAreFreshest(bool& allUp) {
149 166
         BSONObj cmd = BSON(
150 167
                "replSetFresh" << 1 <<
@@ -152,7 +169,8 @@ namespace mongo {
152 169
                "who" << rs._self->fullName() << 
153 170
                "cfgver" << rs._cfg->version );
154 171
         list<Target> L;
155  
-        rs.getTargets(L);
  172
+        int ver;
  173
+        rs.getTargets(L, ver);
156 174
         multiCommand(cmd, L);
157 175
         int nok = 0;
158 176
         allUp = true;
@@ -163,16 +181,21 @@ namespace mongo {
163 181
                     return false;
164 182
             }
165 183
             else {
166  
-                log() << "replSet TEMP freshest returns " << i->result.toString() << rsLog;
  184
+                DEV log() << "replSet freshest returns " << i->result.toString() << rsLog;
167 185
                 allUp = false;
168 186
             }
169 187
         }
170  
-        log() << "replSet TEMP we are freshest of up nodes, nok:" << nok << rsLog; 
  188
+        DEV log() << "replSet we are freshest of up nodes, nok:" << nok << rsLog; 
171 189
         return true;
172 190
     }
173 191
 
174 192
     extern time_t started;
175 193
 
  194
+    void Consensus::multiCommand(BSONObj cmd, list<Target>& L) { 
  195
+        assert( !rs.lockedByMe() );
  196
+        mongo::multiCommand(cmd, L);
  197
+    }
  198
+
176 199
     void Consensus::_electSelf() {
177 200
         bool allUp;
178 201
         if( !weAreFreshest(allUp) ) { 
@@ -180,11 +203,14 @@ namespace mongo {
180 203
             return;
181 204
         }
182 205
         if( !allUp && time(0) - started < 60 * 5 ) { 
  206
+            /* the idea here is that if a bunch of nodes bounce all at once, we don't want to drop data 
  207
+               if we don't have to -- we'd rather be offline and wait a little longer instead */
183 208
             log() << "replSet info not electing self, not all members up and we have been up less than 5 minutes" << rsLog;
  209
+            return;
184 210
         }
185 211
 
186 212
         time_t start = time(0);
187  
-        Member& me = *rs._self;        
  213
+        Member& me = *rs._self;
188 214
         int tally = yea( me.id() );
189 215
         log() << "replSet info electSelf" << rsLog;
190 216
 
@@ -197,33 +223,40 @@ namespace mongo {
197 223
                "round" << OID::gen() /* this is just for diagnostics */
198 224
             );
199 225
 
  226
+        int configVersion;
200 227
         list<Target> L;
201  
-        rs.getTargets(L);
  228
+        rs.getTargets(L, configVersion);
202 229
         multiCommand(electCmd, L);
203 230
 
204  
-        for( list<Target>::iterator i = L.begin(); i != L.end(); i++ ) {
205  
-            log() << "replSet TEMP elect res: " << i->result.toString() << rsLog;
206  
-            if( i->ok ) {
207  
-                int v = i->result["vote"].Int();
208  
-                tally += v;
  231
+        {
  232
+            RSBase::lock lk(&rs);
  233
+            for( list<Target>::iterator i = L.begin(); i != L.end(); i++ ) {
  234
+                DEV log() << "replSet elect res: " << i->result.toString() << rsLog;
  235
+                if( i->ok ) {
  236
+                    int v = i->result["vote"].Int();
  237
+                    tally += v;
  238
+                }
209 239
             }
210  
-        }
211  
-        if( tally*2 > totalVotes() ) {
212  
-            if( time(0) - start > 30 ) {
  240
+            if( tally*2 <= totalVotes() ) {
  241
+                log() << "replSet couldn't elect self, only received " << tally << " votes" << rsLog;
  242
+            }
  243
+            else if( time(0) - start > 30 ) {
213 244
                 // defensive; should never happen as we have timeouts on connection and operation for our conn
214  
-                log() << "replSet too much time passed during election, ignoring result" << rsLog;
  245
+                log() << "replSet too much time passed during our election, ignoring result" << rsLog;
215 246
             }
216  
-            /* succeeded. */
217  
-            log() << "replSet election succeeded assuming primary role" << rsLog;
218  
-            rs.assumePrimary();
219  
-            return;
220  
-        } 
221  
-        else { 
222  
-            log() << "replSet couldn't elect self, only received " << tally << " votes" << rsLog;
  247
+            else if( configVersion != rs.config().version ) { 
  248
+                log() << "replSet config version changed during our election, ignoring result" << rsLog;
  249
+            }
  250
+            else {
  251
+                /* succeeded. */
  252
+                log() << "replSet election succeeded, assuming primary role" << rsLog;
  253
+                rs.assumePrimary();
  254
+            } 
223 255
         }
224 256
     }
225 257
 
226 258
     void Consensus::electSelf() {
  259
+        assert( !rs.lockedByMe() );
227 260
         try { 
228 261
             _electSelf(); 
229 262
         } 
81  db/repl/manager.cpp
@@ -44,7 +44,7 @@ namespace mongo {
44 44
     }
45 45
 
46 46
     Manager::Manager(ReplSetImpl *_rs) : 
47  
-      task::Server("Manager"), rs(_rs), _primary(NOPRIMARY)
  47
+      task::Server("Manager"), rs(_rs), busy(false), _primary(NOPRIMARY)
48 48
     { 
49 49
     }
50 50
  
@@ -56,47 +56,56 @@ namespace mongo {
56 56
 
57 57
     /** called as the health threads get new results */
58 58
     void Manager::msgCheckNewState() {
59  
-        const Member *p = rs->currentPrimary();
60  
-        const Member *p2;
61  
-        try { p2 = findOtherPrimary(); }
62  
-        catch(string s) { 
63  
-            /* two other nodes think they are primary (asynchronously polled) -- wait for things to settle down. */
64  
-            log() << "replSet warning DIAG TODO 2primary" << s << rsLog;
65  
-            return;
66  
-        }
  59
+        {
  60
+            RSBase::lock lk(rs);
67 61
 
68  
-        if( p == p2 && p ) return;
  62
+            if( busy ) return;
69 63
 
70  
-        if( p2 ) { 
71  
-            /* someone else thinks they are primary. */
72  
-            if( p == p2 ) // already match
  64
+            const Member *p = rs->currentPrimary();
  65
+            const Member *p2;
  66
+            try { p2 = findOtherPrimary(); }
  67
+            catch(string s) { 
  68
+                /* two other nodes think they are primary (asynchronously polled) -- wait for things to settle down. */
  69
+                log() << "replSet warning DIAG TODO 2primary" << s << rsLog;
73 70
                 return;
74  
-            if( p == 0 )
75  
-                noteARemoteIsPrimary(p2); return;
76  
-            if( p != rs->_self )
77  
-                noteARemoteIsPrimary(p2); return;
78  
-            /* we thought we were primary, yet now someone else thinks they are. */
79  
-            if( !rs->elect.aMajoritySeemsToBeUp() )
80  
-                noteARemoteIsPrimary(p2); return;
81  
-            /* ignore for now, keep thinking we are master */
82  
-            return;
83  
-        }
  71
+            }
84 72
 
85  
-        if( p ) { 
86  
-            /* we are already primary, and nothing significant out there has changed. */
87  
-            /* todo: if !aMajoritySeemsToBeUp, relinquish */
88  
-            assert( p == rs->_self );
89  
-            return;
90  
-        }
  73
+            if( p == p2 && p ) return;
91 74
 
92  
-        /* no one seems to be primary.  shall we try to elect ourself? */
93  
-        if( !rs->elect.aMajoritySeemsToBeUp() ) { 
94  
-            rs->_self->lhb() = "can't see a majority, won't consider electing self";
95  
-            return;
96  
-        }
  75
+            if( p2 ) { 
  76
+                /* someone else thinks they are primary. */
  77
+                if( p == p2 ) // already match
  78
+                    return;
  79
+                if( p == 0 )
  80
+                    noteARemoteIsPrimary(p2); return;
  81
+                if( p != rs->_self )
  82
+                    noteARemoteIsPrimary(p2); return;
  83
+                /* we thought we were primary, yet now someone else thinks they are. */
  84
+                if( !rs->elect.aMajoritySeemsToBeUp() )
  85
+                    noteARemoteIsPrimary(p2); return;
  86
+                /* ignore for now, keep thinking we are master */
  87
+                return;
  88
+            }
97 89
 
98  
-        rs->_self->lhb() = "";
99  
-        rs->elect.electSelf();
  90
+            if( p ) { 
  91
+                /* we are already primary, and nothing significant out there has changed. */
  92
+                /* todo: if !aMajoritySeemsToBeUp, relinquish */
  93
+                assert( p == rs->_self );
  94
+                return;
  95
+            }
  96
+
  97
+            /* no one seems to be primary.  shall we try to elect ourself? */
  98
+            if( !rs->elect.aMajoritySeemsToBeUp() ) { 
  99
+                rs->_self->lhb() = "can't see a majority, won't consider electing self";
  100
+                return;
  101
+            }
  102
+
  103
+            rs->_self->lhb() = "";
  104
+            busy = true; // don't try to do further elections & such while we are already working on one.
  105
+        }
  106
+        try { rs->elect.electSelf(); }
  107
+        catch(...) { log() << "replSet error unexpected assertion in rs manager" << rsLog; }
  108
+        busy = false;
100 109
     }
101 110
 
102 111
 }
3  db/repl/multicmd.h
@@ -31,9 +31,6 @@ namespace mongo {
31 31
         BSONObj result;
32 32
     };
33 33
 
34  
-    /** send a command in parallel to many servers, and collect the results. */
35  
-    void multiCommand(BSONObj cmd, list<Target>& L);
36  
-
37 34
     /* -- implementation ------------- */
38 35
 
39 36
     class _MultiCommandJob : public BackgroundJob { 
22  db/repl/rs.h
@@ -59,6 +59,7 @@ namespace mongo {
59 59
     class Manager : public task::Server {
60 60
         bool got(const any&);
61 61
         ReplSetImpl *rs;
  62
+        bool busy;
62 63
         int _primary;
63 64
         const Member* findOtherPrimary();
64 65
         void noteARemoteIsPrimary(const Member *);
@@ -68,6 +69,7 @@ namespace mongo {
68 69
         void msgCheckNewState();
69 70
     };
70 71
 
  72
+    struct Target;
71 73
     class Consensus {
72 74
         ReplSetImpl &rs;
73 75
         struct LastYea { 
@@ -85,6 +87,8 @@ namespace mongo {
85 87
         bool aMajoritySeemsToBeUp() const;
86 88
         void electSelf();
87 89
         void electCmdReceived(BSONObj, BSONObjBuilder*);
  90
+
  91
+        void multiCommand(BSONObj cmd, list<Target>& L);
88 92
     };
89 93
 
90 94
     /** most operations on a ReplSet object should be done while locked. */
@@ -92,21 +96,32 @@ namespace mongo {
92 96
     private:
93 97
         mutex m;
94 98
         int _locked;
  99
+        ThreadLocalValue<bool> _lockedByMe;
95 100
     protected:
96 101
         RSBase() : m("RSBase"), _locked(0) { }
97 102
         class lock : scoped_lock { 
98 103
             RSBase& _b;
99 104
         public:
100 105
             lock(RSBase* b) : scoped_lock(b->m), _b(*b) { 
101  
-                DEV assert(b->_locked == 0);
102  
-                b->_locked++; 
  106
+                DEV assert(_b._locked == 0);
  107
+                _b._locked++; 
  108
+                _b._lockedByMe.set(true);
103 109
             }
104 110
             ~lock() { 
  111
+                assert( _b._lockedByMe.get() );
105 112
                 DEV assert(_b._locked == 1);
  113
+                _b._lockedByMe.set(false);
106 114
                 _b._locked--; 
107 115
             }
108 116
         };
  117
+    public:
  118
+        /* for asserts */
109 119
         bool locked() const { return _locked != 0; }
  120
+
  121
+        /* if true, is locked, and was locked by this thread. note if false, it could be in the lock or not for another 
  122
+           just for asserts & such so we can make the contracts clear on who locks what when.
  123
+        */
  124
+        bool lockedByMe() { return _lockedByMe.get(); } 
110 125
     };
111 126
 
112 127
     /* information about the entire repl set, such as the various servers in the set, and their state */
@@ -185,7 +200,8 @@ namespace mongo {
185 200
     private:
186 201
         Member* head() const { return _members.head(); }
187 202
         Member* findById(unsigned id) const;
188  
-        void getTargets(list<Target>&);
  203
+        void _getTargets(list<Target>&, int &configVersion);
  204
+        void getTargets(list<Target>&, int &configVersion);
189 205
         void startThreads();
190 206
         friend class FeedbackThread;
191 207
         friend class CmdReplSetElect;
1  db/repl/rs_config.h
@@ -25,6 +25,7 @@
25 25
 
26 26
 namespace mongo { 
27 27
 
  28
+    /* singleton config object is stored here */
28 29
     const string rsConfigNs = "local.system.replset";
29 30
 
30 31
     class ReplSetConfig {
2  db/resource.h
@@ -8,7 +8,7 @@
8 8
 // 
9 9
 #ifdef APSTUDIO_INVOKED
10 10
 #ifndef APSTUDIO_READONLY_SYMBOLS
11  
-#define _APS_NEXT_RESOURCE_VALUE        103
  11
+#define _APS_NEXT_RESOURCE_VALUE        104
12 12
 #define _APS_NEXT_COMMAND_VALUE         40001
13 13
 #define _APS_NEXT_CONTROL_VALUE         1001
14 14
 #define _APS_NEXT_SYMED_VALUE           101
3  dbtests/test.vcxproj
@@ -253,9 +253,6 @@
253 253
     <ClInclude Include="..\util\unittest.h" />
254 254
   </ItemGroup>
255 255
   <ItemGroup>
256  
-    <ResourceCompile Include="..\db\db.rc" />
257  
-  </ItemGroup>
258  
-  <ItemGroup>
259 256
     <ClCompile Include="..\client\dbclientcursor.cpp" />
260 257
     <ClCompile Include="..\client\gridfs.cpp" />
261 258
     <ClCompile Include="..\db\repl\consensus.cpp" />
5  dbtests/test.vcxproj.filters
@@ -240,11 +240,6 @@
240 240
     </ClInclude>
241 241
   </ItemGroup>
242 242
   <ItemGroup>
243  
-    <ResourceCompile Include="..\db\db.rc">
244  
-      <Filter>misc and third party</Filter>
245  
-    </ResourceCompile>
246  
-  </ItemGroup>
247  
-  <ItemGroup>
248 243
     <Library Include="..\..\js\js64r.lib">
249 244
       <Filter>misc and third party</Filter>
250 245
     </Library>
4  s/dbgrid.vcxproj.filters
@@ -12,10 +12,6 @@
12 12
     <Filter Include="Header Files\Header Shared">
13 13
       <UniqueIdentifier>{4048b883-7255-40b3-b0e9-4c1044cff049}</UniqueIdentifier>
14 14
     </Filter>
15  
-    <Filter Include="Resource Files">
16  
-      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
17  
-      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav</Extensions>
18  
-    </Filter>
19 15
     <Filter Include="libs_etc">
20 16
       <UniqueIdentifier>{17d48ddf-5c49-4dfd-bafa-16d5fed290cd}</UniqueIdentifier>
21 17
     </Filter>
6  shell/msvc/mongo.vcxproj
@@ -230,7 +230,6 @@
230 230
     <None Include="..\query.js" />
231 231
     <None Include="..\servers.js" />
232 232
     <None Include="..\utils.js" />
233  
-    <None Include="mongo.ico" />
234 233
   </ItemGroup>
235 234
   <ItemGroup>
236 235
     <Library Include="..\..\..\js\js32d.lib">
@@ -243,8 +242,9 @@
243 242
   </ItemGroup>
244 243
   <ItemGroup>
245 244
     <ClInclude Include="..\..\db\lasterror.h" />
246  
-    <ClInclude Include="resource.h" />
247  
-    <ClInclude Include="resource1.h" />
  245
+  </ItemGroup>
  246
+  <ItemGroup>
  247
+    <ResourceCompile Include="..\..\db\db.rc" />
248 248
   </ItemGroup>
249 249
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
250 250
   <ImportGroup Label="ExtensionTargets">
10  shell/msvc/mongo.vcxproj.filters
@@ -240,9 +240,6 @@
240 240
     <None Include="..\utils.js">
241 241
       <Filter>_js files</Filter>
242 242
     </None>
243  
-    <None Include="mongo.ico">
244  
-      <Filter>Resource Files</Filter>
245  
-    </None>
246 243
   </ItemGroup>
247 244
   <ItemGroup>
248 245
     <Library Include="..\..\..\js\js32d.lib" />
@@ -253,7 +250,10 @@
253 250
     <ClInclude Include="..\..\db\lasterror.h">
254 251
       <Filter>db</Filter>
255 252
     </ClInclude>
256  
-    <ClInclude Include="resource.h" />
257  
-    <ClInclude Include="resource1.h" />
  253
+  </ItemGroup>
  254
+  <ItemGroup>
  255
+    <ResourceCompile Include="..\..\db\db.rc">
  256
+      <Filter>Resource Files</Filter>
  257
+    </ResourceCompile>
258 258
   </ItemGroup>
259 259
 </Project>

0 notes on commit 8d69487

Please sign in to comment.
Something went wrong with that request. Please try again.