Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

SERVER-2245 use IndexSpec::GetKeys for or deduping

  • Loading branch information...
commit f3376694022769a6c4d55bc410c45caf39d86e8d 1 parent 6714943
@astaple astaple authored
View
41 db/queryutil.cpp
@@ -23,6 +23,7 @@
#include "queryoptimizer.h"
#include "../util/unittest.h"
#include "dbmessage.h"
+#include "indexkey.h"
namespace mongo {
extern BSONObj staticNull;
@@ -884,28 +885,34 @@ namespace mongo {
}
bool FieldRangeVector::matches( const BSONObj &obj ) const {
- BSONObjIterator k( _keyPattern );
- for( int i = 0; i < (int)_ranges.size(); ++i ) {
- if ( _ranges[ i ].empty() ) {
- return false;
- }
- BSONElement kk = k.next();
- int number = (int) kk.number();
- bool forward = ( number >= 0 ? 1 : -1 ) * ( _direction >= 0 ? 1 : -1 ) > 0;
- BSONElementSet keys;
- obj.getFieldsDotted( kk.fieldName(), keys );
- bool match = false;
- for( BSONElementSet::const_iterator j = keys.begin(); j != keys.end(); ++j ) {
- if ( matchesElement( *j, i, forward ) ) {
- match = true;
+ if ( !_indexSpec.get() ) {
+ _indexSpec.reset( new IndexSpec( _keyPattern ) );
+ }
+ // TODO The representation of matching keys could potentially be optimized
+ // more for the case at hand. (For example, we can potentially consider
+ // fields individually instead of constructing several bson objects using
+ // multikey arrays.) But getKeys() canonically defines the key set for a
+ // given object and for now we are using it as is.
+ BSONObjSetDefaultOrder keys;
+ _indexSpec->getKeys( obj, keys );
+ for( BSONObjSetDefaultOrder::const_iterator i = keys.begin(); i != keys.end(); ++i ) {
+ BSONObjIterator j( *i );
+ BSONObjIterator k( _keyPattern );
+ bool match = true;
+ for( int l = 0; l < (int)_ranges.size(); ++l ) {
+ int number = (int) k.next().number();
+ bool forward = ( number >= 0 ? 1 : -1 ) * ( _direction >= 0 ? 1 : -1 ) > 0;
+ if ( !matchesElement( j.next(), l, forward ) ) {
+ match = false;
break;
}
}
- if ( !match ) {
- return false;
+ if ( match ) {
+ // The *i key matched a valid range for every element.
+ return true;
}
}
- return true;
+ return false;
}
// TODO optimize more
View
4 db/queryutil.h
@@ -341,6 +341,8 @@ namespace mongo {
vector< BSONObj > _queries;
};
+ class IndexSpec;
+
class FieldRangeVector {
public:
FieldRangeVector( const FieldRangeSet &frs, const BSONObj &keyPattern, int direction )
@@ -482,6 +484,8 @@ namespace mongo {
BSONObj _keyPattern;
int _direction;
vector< BSONObj > _queries; // make sure mem owned
+ // This IndexSpec is lazily constructed directly from _keyPattern if needed.
+ mutable shared_ptr< IndexSpec > _indexSpec;
};
// generages FieldRangeSet objects, accounting for or clauses
View
29 jstests/orc.js
@@ -0,0 +1,29 @@
+// test that or duplicates are dropped in certain special cases
+t = db.jstests_orc;
+t.drop();
+
+// The goal here will be to ensure the full range of valid values is scanned for each or clause, in order to ensure that
+// duplicates are eliminated properly in the cases below when field range elimination is not employed. The deduplication
+// of interest will occur on field a. The range specifications for fields b and c are such that (in the current
+// implementation) field range elimination will not occur between the or clauses, meaning that the full range of valid values
+// will be scanned for each clause and deduplication will be forced.
+
+// NOTE This test uses some tricks to avoid or range elimination, but in future implementations these tricks may not apply.
+// Perhaps it would be worthwhile to create a mode where range elimination is disabled so it will be possible to write a more
+// robust test.
+
+t.ensureIndex( {a:-1,b:1,c:1} );
+
+// sanity test
+t.save( {a:null,b:4,c:4} );
+assert.eq( 1, t.count( {$or:[{a:null,b:{$gte:0,$lte:5},c:{$gte:0,$lte:5}},{a:null,b:{$gte:3,$lte:8},c:{$gte:3,$lte:8}}]} ) );
+
+// from here on is SERVER-2245
+t.remove();
+t.save( {b:4,c:4} );
+assert.eq( 1, t.count( {$or:[{a:null,b:{$gte:0,$lte:5},c:{$gte:0,$lte:5}},{a:null,b:{$gte:3,$lte:8},c:{$gte:3,$lte:8}}]} ) );
+
+//t.remove();
+//t.save( {a:[],b:4,c:4} );
+//printjson( t.find( {$or:[{a:[],b:{$gte:0,$lte:5},c:{$gte:0,$lte:5}},{a:[],b:{$gte:3,$lte:8},c:{$gte:3,$lte:8}}]} ).explain() );
+//assert.eq( 1, t.count( {$or:[{a:[],b:{$gte:0,$lte:5},c:{$gte:0,$lte:5}},{a:[],b:{$gte:3,$lte:8},c:{$gte:3,$lte:8}}]} ) );
View
34 jstests/ord.js
@@ -0,0 +1,34 @@
+// check that we don't crash if an index used by an earlier or clause is dropped
+
+// Dropping an index kills all cursors on the indexed namespace, not just those
+// cursors using the dropped index. This test is to serve as a reminder that
+// the $or implementation may need minor adjustments (memory ownership) if this
+// behavior is changed.
+
+t = db.jstests_ord;
+t.drop();
+
+t.ensureIndex( {a:1} );
+t.ensureIndex( {b:1} );
+
+for( i = 0; i < 80; ++i ) {
+ t.save( {a:1} );
+}
+
+for( i = 0; i < 100; ++i ) {
+ t.save( {b:1} );
+}
+
+c = t.find( { $or: [ {a:1}, {b:1} ] } ).batchSize( 100 );
+for( i = 0; i < 90; ++i ) {
+ c.next();
+}
+// At this point, our initial query has ended and there is a client cursor waiting
+// to read additional documents from index {b:1}. Deduping is performed against
+// the index key {a:1}
+
+t.dropIndex( {a:1} );
+
+// Dropping an index kills all cursors on the indexed namespace, not just those
+// cursors using the dropped index.
+assert.throws( c.next() );
View
13 jstests/ore.js
@@ -0,0 +1,13 @@
+// verify that index direction is considered when deduping based on an earlier
+// index
+
+t = db.jstests_ore;
+t.drop();
+
+t.ensureIndex( {a:-1} )
+t.ensureIndex( {b:1} );
+
+t.save( {a:1,b:1} );
+t.save( {a:2,b:1} );
+
+assert.eq( 2, t.count( {$or:[{a:{$in:[1,2]}},{b:1}]} ) );
Please sign in to comment.
Something went wrong with that request. Please try again.