Permalink
Browse files

SERVER-4180 Fixes and cleanups for $elemMatch index range calculation…

…, in particular to clean up incorrect bound generation for SERVER-1264 style $elemMatch queries. Includes fixes for SERVER-5740 and SERVER-5741.
  • Loading branch information...
1 parent 6877579 commit 2b572b9c07d8d3feda949153e20b9eee8706ff7d @astaple astaple committed Jun 9, 2012
Showing with 357 additions and 89 deletions.
  1. +26 −0 jstests/arrayfind6.js
  2. +55 −0 jstests/arrayfind7.js
  3. +1 −1 jstests/not2.js
  4. +130 −84 src/mongo/db/queryutil.cpp
  5. +19 −3 src/mongo/db/queryutil.h
  6. +126 −1 src/mongo/dbtests/queryutiltests.cpp
View
26 jstests/arrayfind6.js
@@ -0,0 +1,26 @@
+// Check index bound determination for $not:$elemMatch queries. SERVER-5740
+
+t = db.jstests_arrayfind6;
+t.drop();
+
+t.save( { a:[ { b:1, c:2 } ] } );
+
+function checkElemMatchMatches() {
+ assert.eq( 1, t.count( { a:{ $elemMatch:{ b:1, c:2 } } } ) );
+ assert.eq( 0, t.count( { a:{ $not:{ $elemMatch:{ b:1, c:2 } } } } ) );
+ assert.eq( 1, t.count( { a:{ $not:{ $elemMatch:{ b:1, c:3 } } } } ) );
+ assert.eq( 1, t.count( { a:{ $not:{ $elemMatch:{ b:{ $ne:1 }, c:3 } } } } ) );
+ // Index bounds must be determined for $not:$elemMatch, not $not:$ne. In this case if index
+ // bounds are determined for $not:$ne, the a.b index will be constrained to the interval [2,2]
+ // and the saved document will not be matched as it should.
+ assert.eq( 1, t.count( { a:{ $not:{ $elemMatch:{ b:{ $ne:2 }, c:3 } } } } ) );
+}
+
+checkElemMatchMatches();
+t.ensureIndex( { 'a.b':1 } );
+checkElemMatchMatches();
+
+// Verify index bounds - see comment above for the $ne:2 case.
+assert( !friendlyEqual( [ [ 2, 2 ] ],
+ t.find( { a:{ $not:{ $elemMatch:{ b:{ $ne:2 }, c:3 } } } } ).explain()
+ .indexBounds[ 'a.b' ] ) );
View
55 jstests/arrayfind7.js
@@ -0,0 +1,55 @@
+// Nested $elemMatch clauses. SERVER-5741
+
+t = db.jstests_arrayfind7;
+t.drop();
+
+t.save( { a:[ { b:[ { c:1, d:2 } ] } ] } );
+
+function checkElemMatchMatches() {
+ assert.eq( 1, t.count( { a:{ $elemMatch:{ b:{ $elemMatch:{ c:1, d:2 } } } } } ) );
+}
+
+// The document is matched using nested $elemMatch expressions, with and without an index.
+checkElemMatchMatches();
+t.ensureIndex( { 'a.b.c':1 } );
+checkElemMatchMatches();
+
+function checkIndexCharacterBasedBounds( index, document, query, singleKeyBounds, multiKeyBounds ) {
+ // The document is matched without an index, and with single and multi key indexes.
+ t.drop();
+ t.save( document );
+ assert.eq( 1, t.count( query ) );
+ t.ensureIndex( index );
+ assert.eq( 1, t.count( query ) );
+ t.save( { a:{ b:{ c:[ 10, 11 ] } } } ); // Make the index multikey.
+ assert.eq( 1, t.count( query ) );
+
+ // The single and multi key index bounds are as expected.
+ t.drop();
+ t.ensureIndex( index );
+ assert.eq( singleKeyBounds, t.find( query ).explain().indexBounds[ 'a.b.c' ] );
+ t.save( { a:{ b:{ c:[ 10, 11 ] } } } );
+ assert.eq( multiKeyBounds, t.find( query ).explain().indexBounds[ 'a.b.c' ] );
+}
+
+// Two constraints within a nested $elemMatch expression.
+checkIndexCharacterBasedBounds( { 'a.b.c':1 },
+ { a:[ { b:[ { c:1 } ] } ] },
+ { a:{ $elemMatch:{ b:{ $elemMatch:{ c:{ $gte:1, $lte:1 } } } } } },
+ [ [ 1, 1 ] ],
+ [ [ 1, 1.7976931348623157e+308 ] ] );
+
+// Two nested $elemMatch expressions.
+checkIndexCharacterBasedBounds( { 'a.d.e':1, 'a.b.c':1 },
+ { a:[ { b:[ { c:1 } ], d:[ { e:1 } ] } ] },
+ { a:{ $elemMatch:{ d:{ $elemMatch:{ e:{ $lte:1 } } },
+ b:{ $elemMatch:{ c:{ $gte:1 } } } } } },
+ [ [ 1, 1.7976931348623157e+308 ] ],
+ [ [ { $minElement:1 }, { $maxElement:1 } ] ] );
+
+// A non $elemMatch expression and a nested $elemMatch expression.
+checkIndexCharacterBasedBounds( { 'a.x':1, 'a.b.c':1 },
+ { a:[ { b:[ { c:1 } ], x:1 } ] },
+ { 'a.x':1, a:{ $elemMatch:{ b:{ $elemMatch:{ c:{ $gte:1 } } } } } },
+ [ [ 1, 1.7976931348623157e+308 ] ],
+ [ [ { $minElement:1 }, { $maxElement:1 } ] ] );
View
2 jstests/not2.js
@@ -140,4 +140,4 @@ t.ensureIndex( {"i.j":1} );
indexed( {i:{$elemMatch:{j:1}}}, 1, 1 );
//indexed( {i:{$not:{$elemMatch:{j:1}}}}, {$minElement:1}, {$maxElement:1} );
not( {i:{$not:{$elemMatch:{j:1}}}} );
-indexed( {i:{$not:{$elemMatch:{j:{$ne:1}}}}}, 1, 1 );
+not( {i:{$not:{$elemMatch:{j:{$ne:1}}}}}, 1, 1 );
View
214 src/mongo/db/queryutil.cpp
@@ -412,7 +412,8 @@ namespace mongo {
}
case BSONObj::opREGEX:
case BSONObj::opOPTIONS:
- // do nothing
+ // These operators are handled via their encapsulating object, so no bounds are
+ // generated for them individually.
break;
case BSONObj::opELEM_MATCH: {
log() << "warning: shouldn't get here?" << endl;
@@ -834,111 +835,146 @@ namespace mongo {
}
}
- void FieldRangeSet::processOpElement( const char *fieldName, const BSONElement &f, bool isNot, bool optimize ) {
- BSONElement g = f;
- int op2 = g.getGtLtOp();
- if ( op2 == BSONObj::opALL ) {
- BSONElement h = g;
- uassert( 13050 , "$all requires array", h.type() == Array );
- BSONObjIterator i( h.embeddedObject() );
- if( i.more() ) {
- BSONElement x = i.next();
- if ( x.type() == Object && x.embeddedObject().firstElement().getGtLtOp() == BSONObj::opELEM_MATCH ) {
- g = x.embeddedObject().firstElement();
- op2 = g.getGtLtOp();
+ void FieldRangeSet::handleOp( const char* matchFieldName, const BSONElement& op, bool isNot,
+ bool optimize ) {
+ int opType = op.getGtLtOp();
+
+ // If the first $all element's first op is an $elemMatch, generate bounds for it
+ // and ignore the remaining $all elements. SERVER-664
+ if ( opType == BSONObj::opALL ) {
+ uassert( 13050, "$all requires array", op.type() == Array );
+ BSONElement firstAllClause = op.embeddedObject().firstElement();
+ if ( firstAllClause.type() == Object ) {
+ BSONElement firstAllClauseOp = firstAllClause.embeddedObject().firstElement();
+ if ( firstAllClauseOp.getGtLtOp() == BSONObj::opELEM_MATCH ) {
+ handleElemMatch( matchFieldName, firstAllClauseOp, isNot, optimize );
+ return;
}
}
}
- if ( op2 == BSONObj::opELEM_MATCH ) {
- adjustMatchField();
- BSONObjIterator k( g.embeddedObjectUserCheck() );
- while ( k.more() ) {
- BSONElement h = k.next();
- StringBuilder buf;
- buf << fieldName << "." << h.fieldName();
- string fullname = buf.str();
-
- int op3 = getGtLtOp( h );
- if ( op3 == BSONObj::Equality ) {
- intersectMatchField( fullname.c_str(), h, isNot, optimize );
- }
- else {
- BSONObjIterator l( h.embeddedObject() );
- while ( l.more() ) {
- intersectMatchField( fullname.c_str(), l.next(), isNot, optimize );
- }
- }
- }
+
+ if ( opType == BSONObj::opELEM_MATCH ) {
+ handleElemMatch( matchFieldName, op, isNot, optimize );
}
else {
- intersectMatchField( fieldName, f, isNot, optimize );
+ intersectMatchField( matchFieldName, op, isNot, optimize );
}
}
- void FieldRangeSet::processQueryField( const BSONElement &e, bool optimize ) {
- if ( e.fieldName()[ 0 ] == '$' ) {
- if ( str::equals( e.fieldName(), "$and" ) ) {
- uassert( 14816 , "$and expression must be a nonempty array" , e.type() == Array && e.embeddedObject().nFields() > 0 );
- BSONObjIterator i( e.embeddedObject() );
- while( i.more() ) {
- BSONElement e = i.next();
- uassert( 14817 , "$and elements must be objects" , e.type() == Object );
- BSONObjIterator j( e.embeddedObject() );
- while( j.more() ) {
- processQueryField( j.next(), optimize );
+ void FieldRangeSet::handleNotOp( const char* matchFieldName, const BSONElement& notOp,
+ bool optimize ) {
+ switch( notOp.type() ) {
+ case Object: {
+ BSONObjIterator notOpIterator( notOp.embeddedObject() );
+ while( notOpIterator.more() ) {
+ BSONElement opToNegate = notOpIterator.next();
+ uassert( 13034, "invalid use of $not",
+ opToNegate.getGtLtOp() != BSONObj::Equality );
+ handleOp( matchFieldName, opToNegate, true, optimize );
+ }
+ break;
+ }
+ case RegEx:
+ handleOp( matchFieldName, notOp, true, optimize );
+ break;
+ default:
+ uassert( 13041, "invalid use of $not", false );
+ }
+ }
+
+ void FieldRangeSet::handleElemMatch( const char* matchFieldName, const BSONElement& elemMatch,
+ bool isNot, bool optimize ) {
+ adjustMatchField();
+ if ( isNot ) {
+ // SERVER-5740 $elemMatch queries may depend on multiple fields, so $not:$elemMatch
+ // cannot in general generate range constraints for a single field.
+ return;
+ }
+ BSONObj elemMatchObj = elemMatch.embeddedObjectUserCheck();
+ BSONElement firstElemMatchElement = elemMatchObj.firstElement();
+ if ( firstElemMatchElement.getGtLtOp() != BSONObj::Equality ||
+ str::equals( firstElemMatchElement.fieldName(), "$not" ) ) {
+ // TODO SERVER-1264 Handle $elemMatch applied to top level elements (where $elemMatch
+ // fields are operators).
+ }
+ else {
+ // Handle $elemMatch applied to nested elements ($elemMatch fields are not operators).
+ FieldRangeSet elemMatchRanges( _ns.c_str(), elemMatchObj, _singleKey, optimize );
+ scoped_ptr<FieldRangeSet> prefixedRanges
+ ( elemMatchRanges.prefixed( matchFieldName ) );
+ *this &= *prefixedRanges;
+ }
+ }
+
+ void FieldRangeSet::handleMatchField( const BSONElement& matchElement, bool optimize ) {
+ const char* matchFieldName = matchElement.fieldName();
+ if ( matchFieldName[ 0 ] == '$' ) {
+ if ( str::equals( matchFieldName, "$and" ) ) {
+ uassert( 14816, "$and expression must be a nonempty array",
+ matchElement.type() == Array &&
+ matchElement.embeddedObject().nFields() > 0 );
+ BSONObjIterator andExpressionIterator( matchElement.embeddedObject() );
+ while( andExpressionIterator.more() ) {
+ BSONElement andClause = andExpressionIterator.next();
+ uassert( 14817, "$and elements must be objects", andClause.type() == Object );
+ BSONObjIterator andClauseIterator( andClause.embeddedObject() );
+ while( andClauseIterator.more() ) {
+ BSONElement andMatchElement = andClauseIterator.next();
+ handleMatchField( andMatchElement, optimize );
}
}
return;
}
adjustMatchField();
- if ( str::equals( e.fieldName(), "$where" ) ) {
+ if ( str::equals( matchFieldName, "$where" ) ) {
return;
}
- if ( str::equals( e.fieldName(), "$or" ) ) {
+ if ( str::equals( matchFieldName, "$or" ) ) {
return;
}
- if ( str::equals( e.fieldName(), "$nor" ) ) {
+ if ( str::equals( matchFieldName, "$nor" ) ) {
return;
}
}
- bool equality = ( getGtLtOp( e ) == BSONObj::Equality );
- if ( equality && e.type() == Object ) {
- equality = !str::equals( e.embeddedObject().firstElementFieldName(), "$not" );
- }
-
- if ( equality || ( e.type() == Object && e.embeddedObject().hasField( "$regex" ) ) ) {
- intersectMatchField( e.fieldName(), e, false, optimize );
- }
- if ( !equality ) {
- BSONObjIterator j( e.embeddedObject() );
- while( j.more() ) {
- BSONElement f = j.next();
- if ( str::equals( f.fieldName(), "$not" ) ) {
- switch( f.type() ) {
- case Object: {
- BSONObjIterator k( f.embeddedObject() );
- while( k.more() ) {
- BSONElement g = k.next();
- uassert( 13034, "invalid use of $not", g.getGtLtOp() != BSONObj::Equality );
- processOpElement( e.fieldName(), g, true, optimize );
- }
- break;
- }
- case RegEx:
- processOpElement( e.fieldName(), f, true, optimize );
- break;
- default:
- uassert( 13041, "invalid use of $not", false );
- }
- }
- else {
- processOpElement( e.fieldName(), f, false, optimize );
- }
+ bool equality =
+ // Check for a parsable '$' operator within a match element, indicating the object
+ // should not be matched as is but parsed.
+ // NOTE This only checks for a '$' prefix in the first embedded field whereas Matcher
+ // checks all embedded fields.
+ ( getGtLtOp( matchElement ) == BSONObj::Equality ) &&
+ // Similarly check for the $not meta operator.
+ !( matchElement.type() == Object &&
+ str::equals( matchElement.embeddedObject().firstElementFieldName(), "$not" ) );
+
+ if ( equality ) {
+ intersectMatchField( matchFieldName, matchElement, false, optimize );
+ return;
+ }
+
+ bool untypedRegex =
+ ( matchElement.type() == Object ) &&
+ matchElement.embeddedObject().hasField( "$regex" );
+
+ if ( untypedRegex ) {
+ // $regex/$options pairs must be handled together and so are passed via the
+ // element encapsulating them.
+ intersectMatchField( matchFieldName, matchElement, false, optimize );
+ // Other elements may remain to be handled, below.
+ }
+
+ BSONObjIterator matchExpressionIterator( matchElement.embeddedObject() );
+ while( matchExpressionIterator.more() ) {
+ BSONElement opElement = matchExpressionIterator.next();
+ if ( str::equals( opElement.fieldName(), "$not" ) ) {
+ handleNotOp( matchFieldName, opElement, optimize );
+ }
+ else {
+ handleOp( matchFieldName, opElement, false, optimize );
}
}
}
@@ -950,9 +986,8 @@ namespace mongo {
_singleKey( singleKey ),
_simpleFiniteSet( true ) {
BSONObjIterator i( _queries[ 0 ] );
-
while( i.more() ) {
- processQueryField( i.next(), optimize );
+ handleMatchField( i.next(), optimize );
}
}
@@ -1192,7 +1227,18 @@ namespace mongo {
ret->_queries = _queries;
return ret;
}
-
+
+ FieldRangeSet* FieldRangeSet::prefixed( const string& prefix ) const {
+ auto_ptr<FieldRangeSet> ret( new FieldRangeSet( ns(), BSONObj(), _singleKey, true ) );
+ for( map<string,FieldRange>::const_iterator i = ranges().begin(); i != ranges().end();
+ ++i ) {
+ string prefixedFieldName = prefix + "." + i->first;
+ ret->range( prefixedFieldName.c_str() ) = i->second;
+ }
+ ret->_queries = _queries;
+ return ret.release();
+ }
+
string FieldRangeSet::toString() const {
BSONObjBuilder bob;
for( map<string,FieldRange>::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i ) {
View
22 src/mongo/db/queryutil.h
@@ -365,7 +365,7 @@ namespace mongo {
/** @return range for the given field. */
const FieldRange &range( const char *fieldName ) const;
- /** @return range for the given field. */
+ /** @return range for the given field. Public for testing. */
FieldRange &range( const char *fieldName );
/** @return the number of non universal ranges. */
int numNonUniversalRanges() const;
@@ -449,6 +449,12 @@ namespace mongo {
*/
FieldRangeSet *subset( const BSONObj &fields ) const;
+ /**
+ * @return A new FieldRangeSet based on this FieldRangeSet, but with all field names
+ * prefixed by the specified @param prefix field name.
+ */
+ FieldRangeSet* prefixed( const string& prefix ) const;
+
bool singleKey() const { return _singleKey; }
BSONObj originalQuery() const { return _queries[ 0 ]; }
@@ -457,8 +463,18 @@ namespace mongo {
private:
void appendQueries( const FieldRangeSet &other );
void makeEmpty();
- void processQueryField( const BSONElement &e, bool optimize );
- void processOpElement( const char *fieldName, const BSONElement &f, bool isNot, bool optimize );
+
+ /**
+ * Query parsing routines.
+ * TODO integrate these with an external query parser shared by the matcher. SERVER-1009
+ */
+ void handleMatchField( const BSONElement& matchElement, bool optimize );
+ void handleOp( const char* matchFieldName, const BSONElement& op, bool isNot,
+ bool optimize );
+ void handleNotOp( const char* matchFieldName, const BSONElement& notOp, bool optimize );
+ void handleElemMatch( const char* matchFieldName, const BSONElement& elemMatch, bool isNot,
+ bool optimize );
+
/** Must be called when a match element is skipped or modified to generate a FieldRange. */
void adjustMatchField();
void intersectMatchField( const char *fieldName, const BSONElement &matchElement,
View
127 src/mongo/dbtests/queryutiltests.cpp
@@ -1141,7 +1141,125 @@ namespace QueryUtilTests {
auto_ptr<FieldRangeSet> _frs1;
auto_ptr<FieldRangeSet> _frs2;
};
-
+
+ /** Duplicate a FieldRangeSet, but with prefixed field names. */
+ class Prefixed {
+ public:
+ void run() {
+ FieldRangeSet original( "", BSON( "a" << 1 ), true );
+ scoped_ptr<FieldRangeSet> prefixed( original.prefixed( "prefix" ) );
+ ASSERT( prefixed->singleKey() );
+ ASSERT( prefixed->range( "a" ).universal() );
+ ASSERT( prefixed->range( "prefix.a" ).equality() );
+ }
+ };
+
+ namespace ElemMatch {
+
+ /** Field ranges generated for the $elemMatch operator. */
+ class Ranges {
+ public:
+ void run() {
+ FieldRangeSet set( "", fromjson( "{ a:{ $elemMatch:{ b:1, c:2 } } }" ), true );
+ ASSERT( set.range( "a.b" ).equality() );
+ ASSERT( set.range( "a.c" ).equality() );
+ ASSERT( !set.range( "a.d" ).equality() );
+ ASSERT( !set.range( "a" ).equality() );
+ }
+ };
+
+ /**
+ * Field ranges generated when the $elemMatch operator is applied to top level
+ * elements.
+ */
+ class TopLevelElements {
+ public:
+ void run() {
+ FieldRangeSet set( "", fromjson( "{ a:{ $elemMatch:{ $gte:5, $lte:5 } } }" ),
+ true );
+ // No constraints exist for operator based field names like "a.$gte".
+ ASSERT( set.range( "a.$gte" ).universal() );
+ ASSERT( set.range( "a.$lte" ).universal() );
+ if ( 0 ) { // SERVER-1264
+ ASSERT( set.range( "a" ).equality() );
+ }
+ }
+ };
+
+ /**
+ * Field ranges generated when the $elemMatch operator is applied to a top level
+ * $not meta operator.
+ */
+ class TopLevelNotElement {
+ public:
+ void run() {
+ FieldRangeSet set( "", fromjson( "{ a:{ $elemMatch:{ $not:{ $ne:5 } } } }" ),
+ true );
+ // No constraints exist for operator based field names like "a.$not".
+ ASSERT( set.range( "a.$not" ).universal() );
+ ASSERT( set.range( "a.$not.$ne" ).universal() );
+ if ( 0 ) { // SERVER-1264
+ ASSERT( set.range( "a" ).equality() );
+ }
+ }
+ };
+
+ /** Field ranges generated for $not:$elemMatch queries. */
+ class Not {
+ public:
+ void run() {
+ FieldRangeSet set( "",
+ fromjson( "{ a:{ $not:{ $elemMatch:{ b:{ $ne:1 } } } } }" ),
+ true );
+ ASSERT( !set.range( "a.b" ).equality() );
+ ASSERT( set.range( "a.b" ).universal() );
+ }
+ };
+
+ /** Field ranges generated for nested $elemMatch expressions. */
+ class Nested {
+ public:
+ void run() {
+ FieldRangeSet set( "",
+ fromjson( "{ a:{ $elemMatch:{ b:{ $elemMatch:{ c:1"
+ " } } } } }" ),
+ true );
+ // No constraints are generated for the following fields.
+ BSONArray universalFields = BSON_ARRAY( "a" << "a.b" << "b" << "b.c" << "c" );
+ BSONObjIterator i( universalFields );
+ while( i.more() ) {
+ ASSERT( set.range( i.next().String().c_str() ).universal() );
+ }
+ // A correct constraint is generated for a nested $elemMatch field.
+ ASSERT( set.range( "a.b.c" ).equality() );
+ ASSERT_EQUALS( 1, set.range( "a.b.c" ).min().number() );
+ }
+ };
+
+ /** Field ranges generated for an $elemMatch expression nested within $all. */
+ class AllNested {
+ public:
+ void run() {
+ FieldRangeSet set( "",
+ fromjson( "{ a:{ $elemMatch:{ b:{ $all:"
+ "[ { $elemMatch:{ c:1 } }, { $elemMatch:{ d:2 } } ]"
+ " } } } }" ),
+ true );
+ // No constraints are generated for the following fields.
+ BSONArray universalFields = BSON_ARRAY( "a" << "a.b" << "b" << "b.c" << "c"
+ << "b.d" << "d" << "a.b.d" );
+ BSONObjIterator i( universalFields );
+ while( i.more() ) {
+ ASSERT( set.range( i.next().String().c_str() ).universal() );
+ }
+ // A correct constraint is generated for the first nested $elemMatch field.
+ ASSERT( set.range( "a.b.c" ).equality() );
+ ASSERT_EQUALS( 1, set.range( "a.b.c" ).min().number() );
+ }
+ };
+
+ } // namespace ElemMatch
+
namespace SimpleFiniteSet {
class SimpleFiniteSet {
@@ -2197,6 +2315,13 @@ namespace QueryUtilTests {
add<FieldRangeSetTests::MatchPossible>();
add<FieldRangeSetTests::MatchPossibleForIndex>();
add<FieldRangeSetTests::Subset>();
+ add<FieldRangeSetTests::Prefixed>();
+ add<FieldRangeSetTests::ElemMatch::Ranges>();
+ add<FieldRangeSetTests::ElemMatch::TopLevelElements>();
+ add<FieldRangeSetTests::ElemMatch::TopLevelNotElement>();
+ add<FieldRangeSetTests::ElemMatch::Not>();
+ add<FieldRangeSetTests::ElemMatch::Nested>();
+ add<FieldRangeSetTests::ElemMatch::AllNested>();
add<FieldRangeSetTests::SimpleFiniteSet::EmptyQuery>();
add<FieldRangeSetTests::SimpleFiniteSet::Equal>();
add<FieldRangeSetTests::SimpleFiniteSet::In>();

0 comments on commit 2b572b9

Please sign in to comment.