Browse files

SERVER-480 SERVER-2884 Perform in memory sorts by generating keys sui…

…table for an index
  • Loading branch information...
1 parent d70079c commit 70fbe10acfeb620022dd949a9ee9e90a3f683d32 @astaple astaple committed Jun 2, 2011
Showing with 117 additions and 48 deletions.
  1. +6 −7 bson/bsonmisc.h
  2. +2 −2 db/geo/2d.cpp
  3. +2 −2 db/geo/haystack.cpp
  4. +2 −2 db/indexkey.cpp
  5. +3 −3 db/indexkey.h
  6. +1 −1 db/query.cpp
  7. +1 −0 db/queryoptimizer.cpp
  8. +3 −0 db/queryoptimizer.h
  9. +28 −14 db/queryutil.cpp
  10. +7 −0 db/queryutil.h
  11. +20 −12 db/scanandorder.h
  12. +1 −1 jstests/find8.js
  13. +0 −2 jstests/sort7.js
  14. +15 −2 jstests/sort8.js
  15. +26 −0 jstests/sort9.js
View
13 bson/bsonmisc.h
@@ -29,21 +29,20 @@ namespace mongo {
class BSONObjCmp {
public:
- BSONObjCmp( const BSONObj &_order = BSONObj() ) : order( _order ) {}
+ BSONObjCmp( const BSONObj &order = BSONObj() ) : _order( order ) {}
bool operator()( const BSONObj &l, const BSONObj &r ) const {
- return l.woCompare( r, order ) < 0;
+ return l.woCompare( r, _order ) < 0;
}
private:
- BSONObj order;
+ BSONObj _order;
};
- class BSONObjCmpDefaultOrder : public BSONObjCmp {
+ typedef set<BSONObj,BSONObjCmp> BSONObjSet;
+ class BSONObjSetDefaultOrder : public BSONObjSet {
public:
- BSONObjCmpDefaultOrder() : BSONObjCmp( BSONObj() ) {}
+ BSONObjSetDefaultOrder() : BSONObjSet( BSONObjCmp() ) {}
};
- typedef set< BSONObj, BSONObjCmpDefaultOrder > BSONObjSetDefaultOrder;
-
enum FieldCompareResult {
LEFT_SUBFIELD = -2,
LEFT_BEFORE = -1,
View
4 db/geo/2d.cpp
@@ -164,7 +164,7 @@ namespace mongo {
}
/** Finds the key objects to put in an index */
- virtual void getKeys( const BSONObj& obj, BSONObjSetDefaultOrder& keys ) const {
+ virtual void getKeys( const BSONObj& obj, BSONObjSet& keys ) const {
getKeys( obj, &keys, NULL );
}
@@ -175,7 +175,7 @@ namespace mongo {
}
/** Finds the key objects and/or locations for a geo-indexed object */
- void getKeys( const BSONObj &obj, BSONObjSetDefaultOrder* keys, vector< BSONObj >* locs ) const {
+ void getKeys( const BSONObj &obj, BSONObjSet* keys, vector< BSONObj >* locs ) const {
BSONElementMSet bSet;
View
4 db/geo/haystack.cpp
@@ -119,7 +119,7 @@ namespace mongo {
return ss.str();
}
- void _add( const BSONObj& obj, const string& root , const BSONElement& e , BSONObjSetDefaultOrder& keys ) const {
+ void _add( const BSONObj& obj, const string& root , const BSONElement& e , BSONObjSet& keys ) const {
BSONObjBuilder buf;
buf.append( "" , root );
if ( e.eoo() )
@@ -132,7 +132,7 @@ namespace mongo {
keys.insert( key );
}
- void getKeys( const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const {
+ void getKeys( const BSONObj &obj, BSONObjSet &keys ) const {
BSONElement loc = obj.getFieldDotted( _geo );
if ( loc.eoo() )
View
4 db/indexkey.cpp
@@ -117,7 +117,7 @@ namespace mongo {
}
- void IndexSpec::getKeys( const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const {
+ void IndexSpec::getKeys( const BSONObj &obj, BSONObjSet &keys ) const {
if ( _indexType.get() ) { //plugin (eg geo)
_indexType->getKeys( obj , keys );
return;
@@ -129,7 +129,7 @@ namespace mongo {
keys.insert( _nullKey );
}
- void IndexSpec::_getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const {
+ void IndexSpec::_getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys ) const {
BSONElement arrElt;
unsigned arrIdx = ~0;
int numNotFound = 0;
View
6 db/indexkey.h
@@ -44,7 +44,7 @@ namespace mongo {
IndexType( const IndexPlugin * plugin , const IndexSpec * spec );
virtual ~IndexType();
- virtual void getKeys( const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const = 0;
+ virtual void getKeys( const BSONObj &obj, BSONObjSet &keys ) const = 0;
virtual shared_ptr<Cursor> newCursor( const BSONObj& query , const BSONObj& order , int numWanted ) const = 0;
/** optional op : changes query to match what's in the index */
@@ -139,7 +139,7 @@ namespace mongo {
void reset( const DiskLoc& infoLoc ) { reset(infoLoc.obj()); }
void reset( const IndexDetails * details );
- void getKeys( const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const;
+ void getKeys( const BSONObj &obj, BSONObjSet &keys ) const;
BSONElement missingField() const { return _nullElt; }
@@ -163,7 +163,7 @@ namespace mongo {
IndexSuitability _suitability( const BSONObj& query , const BSONObj& order ) const ;
- void _getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSetDefaultOrder &keys ) const;
+ void _getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys ) const;
BSONSizeTracker _sizeTracker;
View
2 db/query.cpp
@@ -687,7 +687,7 @@ namespace mongo {
if ( qp().scanAndOrderRequired() ) {
_inMemSort = true;
- _so.reset( new ScanAndOrder( _pq.getSkip() , _pq.getNumToReturn() , _pq.getOrder() ) );
+ _so.reset( new ScanAndOrder( _pq.getSkip() , _pq.getNumToReturn() , _pq.getOrder(), qp().multikeyFrs() ) );
}
if ( _pq.isExplain() ) {
View
1 db/queryoptimizer.cpp
@@ -55,6 +55,7 @@ namespace mongo {
const FieldRangeSetPair &frsp, const FieldRangeSetPair &originalFrsp, const BSONObj &originalQuery, const BSONObj &order, const BSONObj &startKey, const BSONObj &endKey , string special ) :
_d(d), _idxNo(idxNo),
_frs( frsp.frsForIndex( _d, _idxNo ) ),
+ _frsMulti( frsp.frsForIndex( _d, -1 ) ),
_originalQuery( originalQuery ),
_order( order ),
_index( 0 ),
View
3 db/queryoptimizer.h
@@ -76,6 +76,8 @@ namespace mongo {
const FieldRange &range( const char *fieldName ) const { return _frs.range( fieldName ); }
shared_ptr<FieldRangeVector> originalFrv() const { return _originalFrv; }
+ const FieldRangeSet &multikeyFrs() const { return _frsMulti; }
+
/** just for testing */
shared_ptr<FieldRangeVector> frv() const { return _frv; }
@@ -85,6 +87,7 @@ namespace mongo {
NamespaceDetails * _d;
int _idxNo;
const FieldRangeSet &_frs;
+ const FieldRangeSet &_frsMulti;
const BSONObj &_originalQuery;
const BSONObj &_order;
const IndexDetails * _index;
View
42 db/queryutil.cpp
@@ -1150,6 +1150,19 @@ namespace mongo {
return l;
}
+ bool FieldRangeVector::matchesKey( const BSONObj &key ) const {
+ BSONObjIterator j( key );
+ BSONObjIterator k( _indexSpec.keyPattern );
+ for( int l = 0; l < (int)_ranges.size(); ++l ) {
+ int number = (int) k.next().number();
+ bool forward = ( number >= 0 ? 1 : -1 ) * ( _direction >= 0 ? 1 : -1 ) > 0;
+ if ( !matchesElement( j.next(), l, forward ) ) {
+ return false;
+ }
+ }
+ return true;
+ }
+
bool FieldRangeVector::matches( const BSONObj &obj ) const {
// TODO The representation of matching keys could potentially be optimized
// more for the case at hand. (For example, we can potentially consider
@@ -1159,25 +1172,26 @@ namespace mongo {
BSONObjSetDefaultOrder keys;
_indexSpec.getKeys( obj, keys );
for( BSONObjSetDefaultOrder::const_iterator i = keys.begin(); i != keys.end(); ++i ) {
- BSONObjIterator j( *i );
- BSONObjIterator k( _indexSpec.keyPattern );
- bool match = true;
- for( int l = 0; l < (int)_ranges.size(); ++l ) {
- int number = (int) k.next().number();
- bool forward = ( number >= 0 ? 1 : -1 ) * ( _direction >= 0 ? 1 : -1 ) > 0;
- if ( !matchesElement( j.next(), l, forward ) ) {
- match = false;
- break;
- }
- }
- if ( match ) {
- // The *i key matched a valid range for every element.
- return true;
+ if ( matchesKey( *i ) ) {
+ return true;
}
}
return false;
}
+ BSONObj FieldRangeVector::firstMatch( const BSONObj &obj ) const {
+ // NOTE Only works in forward direction.
+ assert( _direction >= 0 );
+ BSONObjSet keys( BSONObjCmp( _indexSpec.keyPattern ) );
+ _indexSpec.getKeys( obj, keys );
+ for( BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i ) {
+ if ( matchesKey( *i ) ) {
+ return *i;
+ }
+ }
+ return BSONObj();
+ }
+
// TODO optimize more
int FieldRangeVectorIterator::advance( const BSONObj &curr ) {
BSONObjIterator j( curr );
View
7 db/queryutil.h
@@ -313,9 +313,16 @@ namespace mongo {
*/
bool matches( const BSONObj &obj ) const;
+ /**
+ * @return first key of 'obj' that would be encountered by a forward
+ * index scan using this FieldRangeVector, BSONObj() if no such key.
+ */
+ BSONObj firstMatch( const BSONObj &obj ) const;
+
private:
int matchingLowElement( const BSONElement &e, int i, bool direction, bool &lowEquality ) const;
bool matchesElement( const BSONElement &e, int i, bool direction ) const;
+ bool matchesKey( const BSONObj &key ) const;
vector<FieldRange> _ranges;
const IndexSpec &_indexSpec;
int _direction;
View
32 db/scanandorder.h
@@ -20,27 +20,32 @@
#pragma once
+#include "indexkey.h"
+#include "queryutil.h"
+
namespace mongo {
/* todo:
- _ handle compound keys with differing directions. we don't handle this yet: neither here nor in indexes i think!!!
_ limit amount of data
*/
- /* see also IndexDetails::getKeysFromObject, which needs some merging with this. */
-
class KeyType : boost::noncopyable {
public:
- BSONObj _pattern; // e.g., { ts : -1 }
+ IndexSpec _spec;
+ FieldRangeVector _keyCutter;
public:
- KeyType(BSONObj keyPattern) {
- _pattern = keyPattern;
- assert( !_pattern.isEmpty() );
+ KeyType(BSONObj pattern, const FieldRangeSet &frs):
+ _spec((assert(!pattern.isEmpty()),pattern)),
+ _keyCutter(frs, _spec, 1) {
}
- // returns the key value for o
+ /**
+ * @return first key of the object that would be encountered while
+ * scanning index with keySpec 'pattern' using constraints 'frs', or
+ * BSONObj() if no such key.
+ */
BSONObj getKeyFromObject(BSONObj o) {
- return o.extractFields(_pattern,true);
+ return _keyCutter.firstMatch(o);
}
};
@@ -86,7 +91,7 @@ namespace mongo {
void _addIfBetter(BSONObj& k, BSONObj o, BestMap::iterator i, DiskLoc* loc) {
/* todo : we don't correct _approxSize here. */
const BSONObj& worstBestKey = i->first;
- int c = worstBestKey.woCompare(k, _order._pattern);
+ int c = worstBestKey.woCompare(k, _order._spec.keyPattern);
if ( c > 0 ) {
// k is better, 'upgrade'
_best.erase(i);
@@ -95,9 +100,9 @@ namespace mongo {
}
public:
- ScanAndOrder(int startFrom, int limit, BSONObj order) :
+ ScanAndOrder(int startFrom, int limit, BSONObj order, const FieldRangeSet &frs) :
_best( BSONObjCmp( order ) ),
- _startFrom(startFrom), _order(order) {
+ _startFrom(startFrom), _order(order, frs) {
_limit = limit > 0 ? limit + _startFrom : 0x7fffffff;
_approxSize = 0;
}
@@ -109,6 +114,9 @@ namespace mongo {
void add(BSONObj o, DiskLoc* loc) {
assert( o.isValid() );
BSONObj k = _order.getKeyFromObject(o);
+ if ( k.isEmpty() ) {
+ return;
+ }
if ( (int) _best.size() < _limit ) {
_approxSize += k.objsize();
_approxSize += o.objsize();
View
2 jstests/find8.js
@@ -12,7 +12,7 @@ assert.eq( 1, t.count( { a: { $gt:5,$lt:2} } ) );
t.save( {a:[-1,12]} );
// Check that we can do a query with 'invalid' range and sort.
-assert.eq( -1, t.find( { a: { $gt:5,$lt:2} } ).sort( {a:1} ).toArray()[ 0 ].a[ 0 ] );
+assert.eq( 1, t.find( { a: { $gt:5,$lt:2} } ).sort( {a:1} ).toArray()[ 0 ].a[ 0 ] );
assert.eq( 2, t.find( { a: { $gt:5,$lt:2} } ).sort( {$natural:-1} ).itcount() );
// SERVER-2864
View
2 jstests/sort7.js
@@ -1,6 +1,5 @@
// Check sorting of array sub field SERVER-480.
-if ( 0 ) { // SERVER-480
t = db.jstests_sort7;
t.drop();
@@ -24,4 +23,3 @@ unindexed = t.find().sort( {"a.x":1} ).toArray();
t.ensureIndex( { "a.x" : 1 } );
indexed = t.find().sort( {"a.x":1} ).hint( {"a.x":1} ).toArray();
assert.eq( unindexed, indexed );
-}
View
17 jstests/sort8.js
@@ -1,6 +1,5 @@
// Check sorting of arrays indexed by key SERVER-2884
-if ( 0 ) { // SERVER-2884
t = db.jstests_sort8;
t.drop();
@@ -14,4 +13,18 @@ indexedReverse = t.find().sort( {a:1} ).hint( {a:1} ).toArray();
assert.eq( unindexedForward, indexedForward );
assert.eq( unindexedReverse, indexedReverse );
-}
+
+// Sorting is based on array members, not the array itself.
+assert.eq( [1,10], unindexedForward[ 0 ].a );
+assert.eq( [1,10], unindexedReverse[ 0 ].a );
+
+// Now try with a bounds constraint.
+t.dropIndexes();
+unindexedForward = t.find({a:{$gte:5}}).sort( {a:1} ).toArray();
+unindexedReverse = t.find({a:{$lte:5}}).sort( {a:-1} ).toArray();
+t.ensureIndex( {a:1} );
+indexedForward = t.find({a:{$gte:5}}).sort( {a:1} ).hint( {a:1} ).toArray();
+indexedReverse = t.find({a:{$lte:5}}).sort( {a:-1} ).hint( {a:1} ).toArray();
+
+assert.eq( unindexedForward, indexedForward );
+assert.eq( unindexedReverse, indexedReverse );
View
26 jstests/sort9.js
@@ -0,0 +1,26 @@
+// Unindexed array sorting SERVER-2884
+
+t = db.jstests_sort9;
+t.drop();
+
+t.save( {a:[]} );
+t.save( {a:[[]]} );
+assert.eq( 2, t.find( {a:{$ne:4}} ).sort( {a:1} ).itcount() );
+assert.eq( 2, t.find( {'a.b':{$ne:4}} ).sort( {'a.b':1} ).itcount() );
+assert.eq( 2, t.find( {a:{$ne:4}} ).sort( {'a.b':1} ).itcount() );
+
+t.drop();
+t.save( {} );
+assert.eq( 1, t.find( {a:{$ne:4}} ).sort( {a:1} ).itcount() );
+assert.eq( 1, t.find( {'a.b':{$ne:4}} ).sort( {'a.b':1} ).itcount() );
+assert.eq( 1, t.find( {a:{$ne:4}} ).sort( {'a.b':1} ).itcount() );
+assert.eq( 1, t.find( {a:{$exists:0}} ).sort( {a:1} ).itcount() );
+assert.eq( 1, t.find( {a:{$exists:0}} ).sort( {'a.b':1} ).itcount() );
+
+t.drop();
+t.save( {a:{}} );
+assert.eq( 1, t.find( {a:{$ne:4}} ).sort( {a:1} ).itcount() );
+assert.eq( 1, t.find( {'a.b':{$ne:4}} ).sort( {'a.b':1} ).itcount() );
+assert.eq( 1, t.find( {a:{$ne:4}} ).sort( {'a.b':1} ).itcount() );
+assert.eq( 1, t.find( {'a.b':{$exists:0}} ).sort( {a:1} ).itcount() );
+assert.eq( 1, t.find( {'a.b':{$exists:0}} ).sort( {'a.b':1} ).itcount() );

0 comments on commit 70fbe10

Please sign in to comment.