Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Calculating which shard(s) to send $in queries to was taking a long t…

…ime. This fix changes mongos to stop limiting the shards to send to after the first $in clause - possibly sending the query to more shards than necessary, but saving time. SERVER-4745.
  • Loading branch information...
commit dd046106ba1c8de1c1ae8f5830981e18ce3dd597 1 parent d77cc8d
@stbrody stbrody authored
Showing with 62 additions and 5 deletions.
  1. +4 −5 db/queryutil.cpp
  2. +58 −0 jstests/sharding/inTiming.js
View
9 db/queryutil.cpp
@@ -1021,13 +1021,13 @@ namespace mongo {
BoundBuilders builders;
builders.push_back( make_pair( shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ), shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ) ) );
BSONObjIterator i( keyPattern );
- bool ineq = false; // until ineq is true, we are just dealing with equality and $in bounds
+ bool equalityOnly = true; // until equalityOnly is false, we are just dealing with equality (no range or $in querys).
while( i.more() ) {
BSONElement e = i.next();
const FieldRange &fr = range( e.fieldName() );
int number = (int) e.number(); // returns 0.0 if not numeric
bool forward = ( ( number >= 0 ? 1 : -1 ) * ( direction >= 0 ? 1 : -1 ) > 0 );
- if ( !ineq ) {
+ if ( equalityOnly ) {
if ( fr.equality() ) {
for( BoundBuilders::const_iterator j = builders.begin(); j != builders.end(); ++j ) {
j->first->appendAs( fr.min(), "" );
@@ -1035,9 +1035,8 @@ namespace mongo {
}
}
else {
- if ( !fr.inQuery() ) {
- ineq = true;
- }
+ equalityOnly = false;
+
BoundBuilders newBuilders;
const vector<FieldInterval> &intervals = fr.intervals();
for( BoundBuilders::const_iterator i = builders.begin(); i != builders.end(); ++i ) {
View
58 jstests/sharding/inTiming.js
@@ -0,0 +1,58 @@
+// Check that shard selection does not take a really long time on $in queries: SERVER-4745
+
+s = new ShardingTest( 'sharding_inqueries', 3, 0, 1, {chunksize:1});
+
+db = s.getDB( 'test' );
+
+s.adminCommand( { enablesharding: 'test' } );
+s.adminCommand( { shardcollection: 'test.foo', key: { a:1, b:1 } } );
+
+var lst = [];
+for (var i = 0; i < 500; i++) { lst.push(i); }
+
+/*
+* Time how long it takes to do $in querys on a sharded and unsharded collection.
+* There is no data in either collection, so the query time is coming almost
+* entirely from the code that selects which shard(s) to send the query to.
+*/
+unshardedQuery = function() {db.bar.find({a:{$in:lst}, b:{$in:lst}}).itcount()};
+shardedQuery = function() {db.foo.find({a:{$in:lst}, b:{$in:lst}}).itcount()};
+// Run queries a few times to warm memory
+for (var i = 0; i < 3; i++) {
+ unshardedQuery();
+ shardedQuery();
+}
+
+unshardedTime = Date.timeFunc(unshardedQuery , 5);
+shardedTime = Date.timeFunc(shardedQuery, 5);
+
+print("Unsharded $in query ran in " + unshardedTime);
+print("Sharded $in query ran in " + shardedTime);
+assert(unshardedTime * 10 > shardedTime, "Sharded query is more than 10 times as slow as unsharded query");
+
+s.getDB('config').settings.update( { _id: "balancer" }, { $set : { stopped: true } } , true );
+
+db.adminCommand({split : "test.foo", middle : { a:1, b:10}});
+db.adminCommand({split : "test.foo", middle : { a:3, b:0}});
+
+db.adminCommand({moveChunk : "test.foo", find : {a:1, b:0}, to : "shard0000"});
+db.adminCommand({moveChunk : "test.foo", find : {a:1, b:15}, to : "shard0001"});
+db.adminCommand({moveChunk : "test.foo", find : {a:3, b:15}, to : "shard0002"});
+
+// Now make sure we get the same results from sharded and unsharded query.
+
+for (var i = 0; i < 20; i++) {
+ db.foo.save({a:1, b:i});
+ db.foo.save({a:2, b:i});
+ db.foo.save({a:3, b:i});
+ db.foo.save({a:4, b:i});
+}
+
+db.printShardingStatus();
+
+assert.eq(6, db.foo.find({a : {$in : [1, 2]}, b : {$in : [0, 3, 5]}}).itcount());
+assert.eq(14, db.foo.find({a : {$in : [1, 2]}, b : {$in : [0, 3, 5, 10, 11, 15, 19]}}).itcount());
+assert.eq(28, db.foo.find({a : {$in : [1, 2, 3, 4]}, b : {$in : [0, 3, 5, 10, 11, 15, 19]}}).itcount());
+assert.eq(14, db.foo.find({a : {$in : [3, 4]}, b : {$in : [0, 3, 5, 10, 11, 15, 19]}}).itcount());
+
+s.stop();
Please sign in to comment.
Something went wrong with that request. Please try again.