Skip to content

Commit

Permalink
SERVER-6540 fix writeback hangs when messages are too big
Browse files Browse the repository at this point in the history
  • Loading branch information
Greg Studer committed Sep 19, 2012
1 parent 242597a commit 0037700
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 2 deletions.
115 changes: 115 additions & 0 deletions jstests/sharding/writeback_bulk_insert.js
@@ -0,0 +1,115 @@
//
// Tests whether a writeback error during bulk insert hangs GLE
//

jsTest.log("Starting sharded cluster...")

var st = new ShardingTest({shards : 1,
mongos : 3,
verbose : 2,
separateConfig : 1})

st.stopBalancer()

var mongosA = st.s0
var mongosB = st.s1
var mongosC = st.s2

jsTest.log("Adding new collection...")

var collA = mongosA.getCollection(jsTestName() + ".coll")
collA.insert({hello : "world"})
assert.eq(null, collA.getDB().getLastError())

var collB = mongosB.getCollection("" + collA)
collB.insert({hello : "world"})
assert.eq(null, collB.getDB().getLastError())

var collC = mongosB.getCollection("" + collA)
collC.insert({hello : "world"})
assert.eq(null, collC.getDB().getLastError())

jsTest.log("Enabling sharding...")

printjson(mongosA.getDB("admin").runCommand({enableSharding : collA.getDB()
+ ""}))
printjson(mongosA.getDB("admin").runCommand({shardCollection : collA + "",
key : {_id : 1}}))

// MongoD doesn't know about the config shard version *until* MongoS tells it
collA.findOne()

// Preparing insert of exactly 16MB

jsTest.log("Preparing bulk insert...")

var data1MB = "x"
while (data1MB.length < 1024 * 1024)
data1MB += data1MB;

var data7MB = ""
// Data now at 7MB
for ( var i = 0; i < 7; i++)
data7MB += data1MB;

print("7MB object size is : " + Object.bsonsize({_id : 0,
d : data7MB}))

var dataCloseTo8MB = data7MB;
// WARNING - MAGIC NUMBERS HERE
// The idea is to exceed the 16MB limit by just enough so that the message gets passed in the
// shell, but adding additional writeback information fails.
for ( var i = 0; i < 1031 * 1024 + 862; i++) {
dataCloseTo8MB += "x"
}

print("Object size is: " + Object.bsonsize([{_id : 0,
d : dataCloseTo8MB},
{_id : 1,
d : dataCloseTo8MB}]))

jsTest.log("Trigger wbl for mongosB...")

collB.insert([{_id : 0,
d : dataCloseTo8MB},
{_id : 1,
d : dataCloseTo8MB}])

// Will hang if overflow is not detected correctly
jsTest.log("Waiting for GLE...")

assert.neq(null, collB.getDB().getLastError())

print("GLE correctly returned error...")

assert.eq(3, collA.find().itcount())
assert.eq(3, collB.find().itcount())

var data8MB = data8MB;
for ( var i = 0; i < 1024 * 1024; i++) {
data8MB += "x"
}

print("Object size is: " + Object.bsonsize([{_id : 0,
d : data8MB},
{_id : 1,
d : data8MB}]))

jsTest.log("Trigger wbl for mongosC...")

collC.insert([{_id : 0,
d : data8MB},
{_id : 1,
d : data8MB}])

// Should succeed since our insert size is 16MB (plus very small overhead)
jsTest.log("Waiting for GLE...")

assert.eq(null, collC.getDB().getLastError())

print("GLE Successful...")

assert.eq(5, collA.find().itcount())
assert.eq(5, collB.find().itcount())

st.stop()
14 changes: 12 additions & 2 deletions src/mongo/s/d_logic.cpp
Expand Up @@ -100,7 +100,7 @@ namespace mongo {
dbresponse->responseTo = m.header()->id;
return true;
}

uassert( 9517 , "writeback" , ( d.reservedField() & Reserved_FromWriteback ) == 0 );

OID writebackID;
Expand All @@ -109,6 +109,13 @@ namespace mongo {
const OID& clientID = ShardedConnectionInfo::get(false)->getID();
massert( 10422 , "write with bad shard config and no server id!" , clientID.isSet() );

// We need to check this here, since otherwise we'll get errors wrapping the writeback -
// not just here, but also when returning as a command result.
// We choose 1/2 the overhead of the internal maximum so that we can still handle ops of
// 16MB exactly.
massert( 16437, "data size of operation is too large to queue for writeback",
m.dataSize() < BSONObjMaxInternalSize - (8 * 1024));

LOG(1) << "writeback queued for " << m.toString() << endl;

BSONObjBuilder b;
Expand All @@ -123,11 +130,14 @@ namespace mongo {
b.appendBinData( "msg" , m.header()->len , bdtCustom , (char*)(m.singleData()) );
LOG(2) << "writing back msg with len: " << m.header()->len << " op: " << m.operation() << endl;

// Convert to new BSONObj here just to be safe
BSONObj wbObj = b.obj();

// Don't register the writeback until immediately before we queue it -
// after this line, mongos will wait for an hour if we don't queue correctly
lastError.getSafe()->writeback( writebackID );

writeBackManager.queueWriteBack( clientID.str() , b.obj() );
writeBackManager.queueWriteBack( clientID.str() , wbObj );

return true;
}
Expand Down

0 comments on commit 0037700

Please sign in to comment.