Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

SERVER-2771 Retry index builds on startup

  • Loading branch information...
commit ab1bceba74845666aaad957cab256cadf2a0c8de 1 parent 3abcc53
Kristina kchodorow authored
88 jstests/slowNightly/index_retry.js
... ... @@ -0,0 +1,88 @@
  1 +// Check index rebuild when MongoDB is killed
  2 +
  3 +var ports = allocatePorts(1);
  4 +mongod = new MongodRunner(ports[0], "/data/db/index_retry", null, null, ["--journal"]);
  5 +var conn = mongod.start();
  6 +
  7 +var test = conn.getDB("test");
  8 +
  9 +var name = 'jstests_slownightly_index_retry';
  10 +t = test.getCollection(name);
  11 +t.drop();
  12 +
  13 +// Insert a large number of documents, enough to ensure that an index build on these documents can
  14 +// be interrupted before complete.
  15 +for (i = 0; i < 1e6; ++i) {
  16 + t.save( { a:i } );
  17 + if (i % 10000 == 0) {
  18 + print("i: " + i);
  19 + }
  20 +}
  21 +test.getLastError();
  22 +
  23 +function debug(x) {
  24 + printjson(x);
  25 +}
  26 +
  27 +/**
  28 + * @return if there's a current running index build
  29 + */
  30 +function indexBuildInProgress() {
  31 + inprog = test.currentOp().inprog;
  32 + debug(inprog);
  33 + indexBuildOpId = -1;
  34 + inprog.forEach(
  35 + function( op ) {
  36 + // Identify the index build as an insert into the 'test.system.indexes'
  37 + // namespace. It is assumed that no other clients are concurrently
  38 + // accessing the 'test' database.
  39 + if ( op.op == 'insert' && op.ns == 'test.system.indexes' ) {
  40 + debug(op.opid);
  41 + indexBuildOpId = op.opid;
  42 + }
  43 + }
  44 + );
  45 + return indexBuildOpId != -1;
  46 +}
  47 +
  48 +function abortDuringIndexBuild(options) {
  49 +
  50 + // Create an index asynchronously by using a new connection.
  51 + new Mongo(test.getMongo().host ).getCollection( t.toString() ).createIndex( { a:1 }, options);
  52 +
  53 + // Wait for the index build to start.
  54 + var times = 0;
  55 + assert.soon(
  56 + function() {
  57 + return indexBuildInProgress() && times++ >= 2;
  58 + }
  59 + );
  60 +
  61 + print("killing the mongod");
  62 + stopMongod(ports[0], /* signal */ 9);
  63 +}
  64 +
  65 +abortDuringIndexBuild({background:true});
  66 +
  67 +print("sleeping");
  68 +sleep(2000);
  69 +
  70 +conn = mongod.start(/* reuseData */ true);
  71 +
  72 +assert.soon(
  73 + function() {
  74 + try {
  75 + printjson(conn.getDB("test").getCollection(name).find({a:42}).hint({a:1}).next());
  76 + } catch (e) {
  77 + print(e);
  78 + return false;
  79 + }
  80 + return true;
  81 + },
  82 + 'index builds successfully'
  83 +);
  84 +
  85 +print("Index built");
  86 +
  87 +stopMongod(ports[0]);
  88 +print("SUCCESS!");
1  src/mongo/SConscript
@@ -344,6 +344,7 @@ serverOnlyFiles = [ "db/curop.cpp",
344 344 "db/extsort.cpp",
345 345 "db/index.cpp",
346 346 "db/index_update.cpp",
  347 + "db/index_rebuilder.cpp",
347 348 "db/scanandorder.cpp",
348 349 "db/explain.cpp",
349 350 "db/geo/2d.cpp",
5 src/mongo/db/cmdline.h
@@ -72,6 +72,7 @@ namespace mongo {
72 72 bool usingReplSets() const { return !_replSet.empty(); }
73 73
74 74 std::string rsIndexPrefetch;// --indexPrefetch
  75 + bool indexBuildRetry; // --noIndexBuildRetry
75 76
76 77 // for master/slave replication
77 78 std::string source; // --source
@@ -141,7 +142,7 @@ namespace mongo {
141 142
142 143 SSLManager* sslServerManager; // currently leaks on close
143 144 #endif
144   -
  145 +
145 146 static void launchOk();
146 147
147 148 static void addGlobalOptions( boost::program_options::options_description& general ,
@@ -176,7 +177,7 @@ namespace mongo {
176 177
177 178 // todo move to cmdline.cpp?
178 179 inline CmdLine::CmdLine() :
179   - port(DefaultDBPort), rest(false), jsonp(false), quiet(false),
  180 + port(DefaultDBPort), rest(false), jsonp(false), indexBuildRetry(true), quiet(false),
180 181 noTableScan(false), prealloc(true), preallocj(true), smallfiles(sizeof(int*) == 4),
181 182 configsvr(false), quota(false), quotaFiles(8), cpu(false),
182 183 durOptions(0), objcheck(false), oplogSize(0), defaultProfile(0),
26 src/mongo/db/db.cpp
@@ -34,6 +34,7 @@
34 34 #include "mongo/db/dbmessage.h"
35 35 #include "mongo/db/dbwebserver.h"
36 36 #include "mongo/db/dur.h"
  37 +#include "mongo/db/index_rebuilder.h"
37 38 #include "mongo/db/initialize_server_global_state.h"
38 39 #include "mongo/db/instance.h"
39 40 #include "mongo/db/introspect.h"
@@ -78,7 +79,6 @@ namespace mongo {
78 79 extern int diagLogging;
79 80 extern unsigned lenForNewNsFiles;
80 81 extern int lockFile;
81   - extern bool checkNsFilesOnLoad;
82 82 extern string repairpath;
83 83
84 84 void setupSignals( bool inFork );
@@ -294,9 +294,6 @@ namespace mongo {
294 294 Client::GodScope gs;
295 295 LOG(1) << "enter repairDatabases (to check pdfile version #)" << endl;
296 296
297   - //verify(checkNsFilesOnLoad);
298   - checkNsFilesOnLoad = false; // we are mainly just checking the header - don't scan the whole .ns file for every db here.
299   -
300 297 Lock::GlobalWrite lk;
301 298 vector< string > dbNames;
302 299 getDatabaseNames( dbNames );
@@ -345,8 +342,6 @@ namespace mongo {
345 342 cc().shutdown();
346 343 dbexit( EXIT_CLEAN );
347 344 }
348   -
349   - checkNsFilesOnLoad = true;
350 345 }
351 346
352 347 void clearTmpFiles() {
@@ -403,7 +398,7 @@ namespace mongo {
403 398 */
404 399 class DataFileSync : public BackgroundJob , public ServerStatusSection {
405 400 public:
406   - DataFileSync()
  401 + DataFileSync()
407 402 : ServerStatusSection( "backgroundFlushing" ),
408 403 _total_time( 0 ),
409 404 _flushes( 0 ),
@@ -412,7 +407,7 @@ namespace mongo {
412 407
413 408 virtual bool includeByDefault() const { return true; }
414 409 virtual bool adminOnly() const { return false; }
415   -
  410 +
416 411 string name() const { return "DataFileSync"; }
417 412
418 413 void run() {
@@ -469,7 +464,7 @@ namespace mongo {
469 464 _last_time = ms;
470 465 _last = jsTime();
471 466 }
472   -
  467 +
473 468 long long _total_time;
474 469 long long _flushes;
475 470 int _last_time;
@@ -485,16 +480,16 @@ namespace mongo {
485 480 virtual void appendAtLeaf( BSONObjBuilder& b ) const {
486 481 int m = static_cast<int>(MemoryMappedFile::totalMappedLength() / ( 1024 * 1024 ));
487 482 b.appendNumber( "mapped" , m );
488   -
  483 +
489 484 if ( cmdLine.dur ) {
490 485 m *= 2;
491 486 b.appendNumber( "mappedWithJournal" , m );
492 487 }
493   -
  488 +
494 489 }
495 490 } memJournalServerStatusMetric;
496 491 }
497   -
  492 +
498 493
499 494 const char * jsInterruptCallback() {
500 495 // should be safe to interrupt in js code, even if we have a write lock
@@ -640,6 +635,8 @@ namespace mongo {
640 635 /* this is for security on certain platforms (nonce generation) */
641 636 srand((unsigned) (curTimeMicros() ^ startupSrandTimer.micros()));
642 637
  638 + indexRebuilder.go();
  639 +
643 640 snapshotThread.go();
644 641 d.clientCursorMonitor.go();
645 642 PeriodicTask::theRunner->go();
@@ -754,6 +751,8 @@ static void buildOptionsDescriptions(po::options_description *pVisible,
754 751 ("jsonp","allow JSONP access via http (has security implications)")
755 752 ("noauth", "run without security")
756 753 ("nohttpinterface", "disable http interface")
  754 + ("noIndexBuildRetry", po::value<int>(),
  755 + "don't retry any index builds that were interrupted by shutdown")
757 756 ("nojournal", "disable journaling (journaling is on by default for 64 bit)")
758 757 ("noprealloc", "disable data file preallocation - will often hurt performance")
759 758 ("noscripting", "disable scripting engine")
@@ -1037,6 +1036,9 @@ static void processCommandLineOptions(const std::vector<std::string>& argv) {
1037 1036 if (params.count("replIndexPrefetch")) {
1038 1037 cmdLine.rsIndexPrefetch = params["replIndexPrefetch"].as<std::string>();
1039 1038 }
  1039 + if (params.count("noIndexBuildRetry")) {
  1040 + cmdLine.indexBuildRetry = false;
  1041 + }
1040 1042 if (params.count("only")) {
1041 1043 cmdLine.only = params["only"].as<string>().c_str();
1042 1044 }
112 src/mongo/db/index_rebuilder.cpp
... ... @@ -0,0 +1,112 @@
  1 +/**
  2 + * Copyright (C) 2012 10gen Inc.
  3 + *
  4 + * This program is free software: you can redistribute it and/or modify
  5 + * it under the terms of the GNU Affero General Public License, version 3,
  6 + * as published by the Free Software Foundation.
  7 + *
  8 + * This program is distributed in the hope that it will be useful,
  9 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11 + * GNU Affero General Public License for more details.
  12 + *
  13 + * You should have received a copy of the GNU Affero General Public License
  14 + * along with this program. If not, see <http://www.gnu.org/licenses/>.
  15 + */
  16 +
  17 +#include "mongo/db/index_rebuilder.h"
  18 +
  19 +#include "mongo/db/instance.h"
  20 +#include "mongo/db/pdfile.h"
  21 +
  22 +namespace mongo {
  23 +
  24 + IndexRebuilder indexRebuilder;
  25 +
  26 + IndexRebuilder::IndexRebuilder() {}
  27 +
  28 + std::string IndexRebuilder::name() const {
  29 + return "IndexRebuilder";
  30 + }
  31 +
  32 + void IndexRebuilder::run() {
  33 + Client::initThread(name().c_str());
  34 + Lock::GlobalWrite lk;
  35 + Client::GodScope gs;
  36 + std::vector<std::string> dbNames;
  37 + getDatabaseNames(dbNames);
  38 +
  39 + for (std::vector<std::string>::const_iterator it = dbNames.begin();
  40 + it < dbNames.end();
  41 + it++) {
  42 + checkDB(*it);
  43 + }
  44 +
  45 + cc().shutdown();
  46 + }
  47 +
  48 + void IndexRebuilder::checkDB(const std::string& dbName) {
  49 + const std::string systemNS = dbName + ".system.namespaces";
  50 + DBDirectClient cli;
  51 + scoped_ptr<DBClientCursor> cursor(cli.query(systemNS, Query()));
  52 +
  53 + while (cursor->more()) {
  54 + BSONObj nsDoc = cursor->next();
  55 + const char* ns = nsDoc["name"].valuestrsafe();
  56 +
  57 + Client::Context ctx(ns, dbpath, false, false);
  58 + NamespaceDetails* nsd = nsdetails(ns);
  59 +
  60 + if (!nsd || !nsd->indexBuildInProgress) {
  61 + continue;
  62 + }
  63 +
  64 + log() << "Found interrupted index build on " << ns << endl;
  65 +
  66 + // If the indexBuildRetry flag isn't set, just clear the inProg flag
  67 + if (!cmdLine.indexBuildRetry) {
  68 + // If we crash between unsetting the inProg flag and cleaning up the index, the
  69 + // index space will be lost.
  70 + getDur().writingInt(nsd->indexBuildInProgress) = 0;
  71 + nsd->idx(nsd->nIndexes).kill_idx();
  72 + continue;
  73 + }
  74 +
  75 + retryIndexBuild(dbName, nsd);
  76 + }
  77 + }
  78 +
  79 + void IndexRebuilder::retryIndexBuild(const std::string& dbName, NamespaceDetails* nsd) {
  80 + // details.info is always a valid system.indexes entry because DataFileMgr::insert journals
  81 + // creating the index doc and then insert_makeIndex durably assigns its DiskLoc to info.
  82 + // indexBuildInProgress is set after that, so if it is set, info must be set.
  83 + IndexDetails& details = nsd->idx(nsd->nIndexes);
  84 +
  85 + // First, clean up the in progress index build. Save the system.indexes entry so that we
  86 + // can add it again afterwards.
  87 + BSONObj indexObj = details.info.obj().getOwned();
  88 +
  89 + // Clean up the in-progress index build
  90 + getDur().writingInt(nsd->indexBuildInProgress) = 0;
  91 + details.kill_idx();
  92 + // The index has now been removed from system.indexes, so the only record of it is in-
  93 + // memory. If there is a journal commit between now and when insert() rewrites the entry and
  94 + // the db crashes before the new system.indexes entry is journalled, the index will be lost
  95 + // forever. Thus, we're assuming no journaling will happen between now and the entry being
  96 + // re-written.
  97 +
  98 + // We need to force a foreground index build to prevent replication from replaying an
  99 + // incompatible op (like a drop) during a yield.
  100 + // TODO: once commands can interrupt/wait for index builds, this can be removed.
  101 + indexObj = indexObj.removeField("background");
  102 +
  103 + try {
  104 + const std::string ns = dbName + ".system.indexes";
  105 + theDataFileMgr.insert(ns.c_str(), indexObj.objdata(), indexObj.objsize(), false, true);
  106 + }
  107 + catch (const DBException& e) {
  108 + log() << "Rebuilding index failed: " << e.what() << " (" << e.getCode() << ")"
  109 + << endl;
  110 + }
  111 + }
  112 +}
45 src/mongo/db/index_rebuilder.h
... ... @@ -0,0 +1,45 @@
  1 +/**
  2 + * Copyright (C) 2012 10gen Inc.
  3 + *
  4 + * This program is free software: you can redistribute it and/or modify
  5 + * it under the terms of the GNU Affero General Public License, version 3,
  6 + * as published by the Free Software Foundation.
  7 + *
  8 + * This program is distributed in the hope that it will be useful,
  9 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11 + * GNU Affero General Public License for more details.
  12 + *
  13 + * You should have received a copy of the GNU Affero General Public License
  14 + * along with this program. If not, see <http://www.gnu.org/licenses/>.
  15 + */
  16 +
  17 +#pragma once
  18 +
  19 +#include "mongo/db/namespace_details.h"
  20 +#include "mongo/util/background.h"
  21 +
  22 +namespace mongo {
  23 +
  24 + class IndexRebuilder : public BackgroundJob {
  25 + public:
  26 + IndexRebuilder();
  27 +
  28 + std::string name() const;
  29 + void run();
  30 +
  31 + private:
  32 + /**
  33 + * Check each collection in a database to see if it has any in-progress index builds that
  34 + * need to be retried. If so, calls retryIndexBuild.
  35 + */
  36 + void checkDB(const std::string& dbname);
  37 +
  38 + /**
  39 + * Actually retry the index build on a given namespace.
  40 + */
  41 + void retryIndexBuild(const std::string& dbName, NamespaceDetails* nsd);
  42 + };
  43 +
  44 + extern IndexRebuilder indexRebuilder;
  45 +}
28 src/mongo/db/index_update.cpp
@@ -515,29 +515,6 @@ namespace mongo {
515 515 }
516 516 };
517 517
518   - /**
519   - * For the lifetime of this object, an index build is indicated on the specified
520   - * namespace and the newest index is marked as absent. This simplifies
521   - * the cleanup required on recovery.
522   - */
523   - class RecoverableIndexState {
524   - public:
525   - RecoverableIndexState( NamespaceDetails *d ) : _d( d ) {
526   - indexBuildInProgress() = 1;
527   - nIndexes()--;
528   - }
529   - ~RecoverableIndexState() {
530   - DESTRUCTOR_GUARD (
531   - nIndexes()++;
532   - indexBuildInProgress() = 0;
533   - )
534   - }
535   - private:
536   - int &nIndexes() { return getDur().writingInt( _d->nIndexes ); }
537   - int &indexBuildInProgress() { return getDur().writingInt( _d->indexBuildInProgress ); }
538   - NamespaceDetails *_d;
539   - };
540   -
541 518 // throws DBException
542 519 void buildAnIndex(const std::string& ns,
543 520 NamespaceDetails* d,
@@ -550,9 +527,7 @@ namespace mongo {
550 527 unsigned long long n;
551 528
552 529 verify( !BackgroundOperation::inProgForNs(ns.c_str()) ); // should have been checked earlier, better not be...
553   - verify( d->indexBuildInProgress == 0 );
554 530 verify( Lock::isWriteLocked(ns) );
555   - RecoverableIndexState recoverable( d );
556 531
557 532 // Build index spec here in case the collection is empty and the index details are invalid
558 533 idx.getSpec();
@@ -676,7 +651,8 @@ namespace mongo {
676 651 d->nIndexes = 0;
677 652 }
678 653 if ( idIndex ) {
679   - d->addIndex(ns) = *idIndex;
  654 + d->getNextIndexDetails(ns) = *idIndex;
  655 + d->addIndex(ns);
680 656 wassert( d->nIndexes == 1 );
681 657 }
682 658 /* assuming here that id index is not multikey: */
35 src/mongo/db/namespace_details.cpp
@@ -120,28 +120,6 @@ namespace mongo {
120 120 }
121 121 #endif
122 122
123   - void NamespaceDetails::onLoad(const Namespace& k) {
124   -
125   - if( k.isExtra() ) {
126   - /* overflow storage for indexes - so don't treat as a NamespaceDetails object. */
127   - return;
128   - }
129   -
130   - if( indexBuildInProgress ) {
131   - verify( Lock::isW() ); // TODO(erh) should this be per db?
132   - if( indexBuildInProgress ) {
133   - log() << "indexBuildInProgress was " << indexBuildInProgress << " for " << k << ", indicating an abnormal db shutdown" << endl;
134   - getDur().writingInt( indexBuildInProgress ) = 0;
135   - }
136   - }
137   - }
138   -
139   - static void namespaceOnLoadCallback(const Namespace& k, NamespaceDetails& v) {
140   - v.onLoad(k);
141   - }
142   -
143   - bool checkNsFilesOnLoad = true;
144   -
145 123 NOINLINE_DECL void NamespaceIndex::_init() {
146 124 verify( !ht );
147 125
@@ -194,8 +172,6 @@ namespace mongo {
194 172
195 173 verify( len <= 0x7fffffff );
196 174 ht = new HashTable<Namespace,NamespaceDetails>(p, (int) len, "namespace index");
197   - if( checkNsFilesOnLoad )
198   - ht->iterAll(namespaceOnLoadCallback);
199 175 }
200 176
201 177 static void namespaceGetNamespacesCallback( const Namespace& k , NamespaceDetails& v , void * extra ) {
@@ -523,8 +499,7 @@ namespace mongo {
523 499 NamespaceDetailsTransient::get(thisns).clearQueryCache();
524 500 }
525 501
526   - /* you MUST call when adding an index. see pdfile.cpp */
527   - IndexDetails& NamespaceDetails::addIndex(const char *thisns, bool resetTransient) {
  502 + IndexDetails& NamespaceDetails::getNextIndexDetails(const char* thisns) {
528 503 IndexDetails *id;
529 504 try {
530 505 id = &idx(nIndexes,true);
@@ -533,11 +508,13 @@ namespace mongo {
533 508 allocExtra(thisns, nIndexes);
534 509 id = &idx(nIndexes,false);
535 510 }
  511 + return *id;
  512 + }
536 513
  514 + /* you MUST call when adding an index. see pdfile.cpp */
  515 + void NamespaceDetails::addIndex(const char* thisns) {
537 516 (*getDur().writing(&nIndexes))++;
538   - if ( resetTransient )
539   - NamespaceDetailsTransient::get(thisns).addedIndex();
540   - return *id;
  517 + NamespaceDetailsTransient::get(thisns).addedIndex();
541 518 }
542 519
543 520 // must be called when renaming a NS to fix up extra
16 src/mongo/db/namespace_details.h
@@ -138,9 +138,6 @@ namespace mongo {
138 138 Extra* allocExtra(const char *ns, int nindexessofar);
139 139 void copyingFrom(const char *thisns, NamespaceDetails *src); // must be called when renaming a NS to fix up extra
140 140
141   - /* called when loaded from disk */
142   - void onLoad(const Namespace& k);
143   -
144 141 /* dump info on this namespace. for debugging. */
145 142 void dump(const Namespace& k);
146 143
@@ -226,10 +223,17 @@ namespace mongo {
226 223 bool isMultikey(int i) const { return (multiKeyIndexBits & (((unsigned long long) 1) << i)) != 0; }
227 224 void setIndexIsMultikey(const char *thisns, int i);
228 225
229   - /* add a new index. does not add to system.indexes etc. - just to NamespaceDetails.
230   - caller must populate returned object.
  226 + /**
  227 + * This fetches the IndexDetails for the next empty index slot. The caller must populate
  228 + * returned object. This handles allocating extra index space, if necessary.
  229 + */
  230 + IndexDetails& getNextIndexDetails(const char* thisns);
  231 +
  232 + /**
  233 + * Add a new index. This does not add it to system.indexes etc. - just to NamespaceDetails.
  234 + * This resets the transient namespace details.
231 235 */
232   - IndexDetails& addIndex(const char *thisns, bool resetTransient=true);
  236 + void addIndex(const char* thisns);
233 237
234 238 void aboutToDeleteAnIndex() {
235 239 clearSystemFlag( Flag_HaveIdIndex );
70 src/mongo/db/pdfile.cpp
@@ -1404,36 +1404,58 @@ namespace mongo {
1404 1404 }
1405 1405
1406 1406 int idxNo = tableToIndex->nIndexes;
1407   - IndexDetails& idx = tableToIndex->addIndex(tabletoidxns.c_str(), !background); // clear transient info caches so they refresh; increments nIndexes
1408   - getDur().writingDiskLoc(idx.info) = loc;
  1407 +
1409 1408 try {
1410   - buildAnIndex(tabletoidxns, tableToIndex, idx, idxNo, background, mayInterrupt);
1411   - }
1412   - catch( DBException& e ) {
1413   - // save our error msg string as an exception or dropIndexes will overwrite our message
1414   - LastError *le = lastError.get();
1415   - int savecode = 0;
1416   - string saveerrmsg;
1417   - if ( le ) {
1418   - savecode = le->code;
1419   - saveerrmsg = le->msg;
  1409 + IndexDetails& idx = tableToIndex->getNextIndexDetails(tabletoidxns.c_str());
  1410 + // It's important that this is outside the inner try/catch so that we never try to call
  1411 + // kill_idx on a half-formed disk loc (if this asserts).
  1412 + getDur().writingDiskLoc(idx.info) = loc;
  1413 +
  1414 + try {
  1415 + getDur().writingInt(tableToIndex->indexBuildInProgress) = 1;
  1416 + buildAnIndex(tabletoidxns, tableToIndex, idx, idxNo, background, mayInterrupt);
1420 1417 }
1421   - else {
1422   - savecode = e.getCode();
1423   - saveerrmsg = e.what();
  1418 + catch (DBException& e) {
  1419 + // save our error msg string as an exception or dropIndexes will overwrite our message
  1420 + LastError *le = lastError.get();
  1421 + int savecode = 0;
  1422 + string saveerrmsg;
  1423 + if ( le ) {
  1424 + savecode = le->code;
  1425 + saveerrmsg = le->msg;
  1426 + }
  1427 + else {
  1428 + savecode = e.getCode();
  1429 + saveerrmsg = e.what();
  1430 + }
  1431 +
  1432 + // roll back this index
  1433 + idx.kill_idx();
  1434 +
  1435 + verify(le && !saveerrmsg.empty());
  1436 + setLastError(savecode,saveerrmsg.c_str());
  1437 + throw;
1424 1438 }
1425 1439
1426   - // roll back this index
1427   - string name = idx.indexName();
1428   - BSONObjBuilder b;
1429   - string errmsg;
1430   - bool ok = dropIndexes(tableToIndex, tabletoidxns.c_str(), name.c_str(), errmsg, b, true);
1431   - if( !ok ) {
1432   - log() << "failed to drop index after a unique key error building it: " << errmsg << ' ' << tabletoidxns << ' ' << name << endl;
  1440 + // clear transient info caches so they refresh; increments nIndexes
  1441 + tableToIndex->addIndex(tabletoidxns.c_str());
  1442 + getDur().writingInt(tableToIndex->indexBuildInProgress) = 0;
  1443 + }
  1444 + catch (...) {
  1445 + // Generally, this will be called as an exception from building the index bubbles up.
  1446 + // Thus, the index will have already been cleaned up. This catch just ensures that the
  1447 + // metadata is consistent on any exception. It may leak like a sieve if the index
  1448 + // successfully finished building and addIndex or kill_idx threw.
  1449 +
  1450 + // Check if nIndexes was incremented
  1451 + if (idxNo < tableToIndex->nIndexes) {
  1452 + // TODO: this will have to change when we can have multiple simultanious index
  1453 + // builds
  1454 + getDur().writingInt(tableToIndex->nIndexes) -= 1;
1433 1455 }
1434 1456
1435   - verify( le && !saveerrmsg.empty() );
1436   - setLastError(savecode,saveerrmsg.c_str());
  1457 + getDur().writingInt(tableToIndex->indexBuildInProgress) = 0;
  1458 +
1437 1459 throw;
1438 1460 }
1439 1461 }
20 src/mongo/db/repl/rs_rollback.cpp
@@ -16,12 +16,14 @@
16 16 */
17 17
18 18 #include "pch.h"
19   -#include "../client.h"
20   -#include "rs.h"
21   -#include "../repl.h"
22   -#include "../cloner.h"
23   -#include "../ops/update.h"
24   -#include "../ops/delete.h"
  19 +
  20 +#include "mongo/db/client.h"
  21 +#include "mongo/db/cloner.h"
  22 +#include "mongo/db/index_rebuilder.h"
  23 +#include "mongo/db/ops/update.h"
  24 +#include "mongo/db/ops/delete.h"
  25 +#include "mongo/db/repl/rs.h"
  26 +#include "mongo/db/repl.h"
25 27
26 28 /* Scenarios
27 29
@@ -573,6 +575,12 @@ namespace mongo {
573 575 }
574 576
575 577 void ReplSetImpl::syncRollback(OplogReader&r) {
  578 + // If this is startup, wait for any index build retries to finish first
  579 + while (indexRebuilder.getState() != BackgroundJob::Done) {
  580 + OCCASIONALLY LOG(0) << "replSet rollback waiting for index rebuild to finish" << endl;
  581 + indexRebuilder.wait(1000);
  582 + }
  583 +
576 584 // check that we are at minvalid, otherwise we cannot rollback as we may be in an
577 585 // inconsistent state
578 586 {
43 src/mongo/dbtests/clienttests.cpp
@@ -17,9 +17,11 @@
17 17 // client.cpp
18 18
19 19 #include "pch.h"
  20 +
20 21 #include "dbtests.h"
21   -#include "../db/d_concurrency.h"
22 22 #include "mongo/client/dbclientcursor.h"
  23 +#include "mongo/db/d_concurrency.h"
  24 +#include "mongo/db/pdfile.h"
23 25
24 26 namespace ClientTests {
25 27
@@ -27,6 +29,7 @@ namespace ClientTests {
27 29 public:
28 30
29 31 Base( string coll ) {
  32 + db.dropDatabase("test");
30 33 _ns = (string)"test." + coll;
31 34 }
32 35
@@ -98,6 +101,43 @@ namespace ClientTests {
98 101
99 102 };
100 103
  104 + /**
  105 + * Check that nIndexes is incremented correctly when an index builds (and that it is not
  106 + * incremented when an index fails to build), system.indexes has an entry added (or not), and
  107 + * system.namespaces has a doc added (or not).
  108 + */
  109 + class BuildIndex : public Base {
  110 + public:
  111 + BuildIndex() : Base("buildIndex") {}
  112 + void run() {
  113 + Lock::DBWrite lock(ns());
  114 + Client::WriteContext ctx(ns());
  115 +
  116 + db.insert(ns(), BSON("x" << 1 << "y" << 2));
  117 + db.insert(ns(), BSON("x" << 2 << "y" << 2));
  118 +
  119 + ASSERT_EQUALS(1, nsdetails(ns())->nIndexes);
  120 + // _id index
  121 + ASSERT_EQUALS(1U, db.count("test.system.indexes"));
  122 + // test.buildindex
  123 + // test.buildindex_$id
  124 + // test.system.indexes
  125 + ASSERT_EQUALS(3U, db.count("test.system.namespaces"));
  126 +
  127 + db.ensureIndex(ns(), BSON("y" << 1), true);
  128 +
  129 + ASSERT_EQUALS(1, nsdetails(ns())->nIndexes);
  130 + ASSERT_EQUALS(1U, db.count("test.system.indexes"));
  131 + ASSERT_EQUALS(3U, db.count("test.system.namespaces"));
  132 +
  133 + db.ensureIndex(ns(), BSON("x" << 1), true);
  134 +
  135 + ASSERT_EQUALS(2, nsdetails(ns())->nIndexes);
  136 + ASSERT_EQUALS(2U, db.count("test.system.indexes"));
  137 + ASSERT_EQUALS(4U, db.count("test.system.namespaces"));
  138 + }
  139 + };
  140 +
101 141 class CS_10 : public Base {
102 142 public:
103 143 CS_10() : Base( "CS_10" ) {}
@@ -187,6 +227,7 @@ namespace ClientTests {
187 227 add<DropIndex>();
188 228 add<ReIndex>();
189 229 add<ReIndex2>();
  230 + add<BuildIndex>();
190 231 add<CS_10>();
191 232 add<PushBack>();
192 233 add<Create>();

0 comments on commit ab1bceb

Please sign in to comment.
Something went wrong with that request. Please try again.