Skip to content

Commit

Permalink
SERVER-9639 For 2dsphere V2 indices don't index any docs missing any …
Browse files Browse the repository at this point in the history
…geo fields

This reverts commit 3b5b35c.
  • Loading branch information
Hari Khalsa committed Feb 20, 2014
1 parent a71a95c commit 269c683
Show file tree
Hide file tree
Showing 7 changed files with 410 additions and 51 deletions.
124 changes: 96 additions & 28 deletions jstests/geo_s2sparse.js
Original file line number Diff line number Diff line change
@@ -1,45 +1,113 @@
// Test that 2dsphere ignores the sparse option.
// Test behavior of 2dsphere and sparse. See SERVER-9639.
// All V2 2dsphere indices are sparse in the geo fields.

var coll = db.geo_s2sparse;
coll.drop();

// 2d geo object used in inserted documents
var point = { type: "Point", coordinates: [5, 5] }

// the index we'll use
var index = { geo: "2dsphere", nonGeo: 1 };
var indexName = "geo_2dsphere_nonGeo_1";
var indexSpec = { geo: "2dsphere", nonGeo: 1 };

var indexName = 'test.geo_s2sparse.$geo_2dsphere_nonGeo_1';

// ensure a sparse index
coll.ensureIndex(index, { sparse: 1 });
//
// V2 indices are "geo sparse" always.
//

// note the initial size of the index
var initialIndexSize = coll.stats().indexSizes[indexName];
assert.gte(initialIndexSize, 0);
// Clean up.
coll.drop();
coll.ensureIndex(indexSpec);

// insert matching docs
for (var i = 0; i < 1000; i++) {
// Insert N documents with the geo field.
var N = 1000;
for (var i = 0; i < N; i++) {
coll.insert({ geo: point, nonGeo: "point_"+i });
}

// the new size of the index
var indexSizeAfterFirstInsert = coll.stats().indexSizes[indexName];
assert.gte(indexSizeAfterFirstInsert, initialIndexSize);
// Expect N keys.
assert.eq(N, coll.validate().keysPerIndex[indexName]);

// Insert N documents without the geo field.
for (var i = 0; i < N; i++) {
coll.insert({ wrongGeo: point, nonGeo: i});
}

// Still expect N keys as we didn't insert any geo stuff.
assert.eq(N, coll.validate().keysPerIndex[indexName]);

// Insert N documents with just the geo field.
for (var i = 0; i < N; i++) {
coll.insert({ geo: point});
}

// Expect 2N keys.
assert.eq(N + N, coll.validate().keysPerIndex[indexName]);

// Add some "not geo" stuff.
for (var i = 0; i < N; i++) {
coll.insert({ geo: null});
coll.insert({ geo: []});
coll.insert({ geo: undefined});
coll.insert({ geo: {}});
}

// Still expect 2N keys.
assert.eq(N + N, coll.validate().keysPerIndex[indexName]);

//
// V1 indices are never sparse
//

coll.drop();
coll.ensureIndex(indexSpec, {"2dsphereIndexVersion": 1});

// Insert N documents with the geo field.
for (var i = 0; i < N; i++) {
coll.insert({ geo: point, nonGeo: "point_"+i });
}

// Expect N keys.
assert.eq(N, coll.validate().keysPerIndex[indexName]);

// Insert N documents without the geo field.
for (var i = 0; i < N; i++) {
coll.insert({ wrongGeo: point, nonGeo: i});
}

// Expect N keys as it's a V1 index.
assert.eq(N + N, coll.validate().keysPerIndex[indexName]);

//
// V2 indices with several 2dsphere-indexed fields are only sparse if all are missing.
//

// Clean up.
coll.drop();
coll.ensureIndex({geo: "2dsphere", otherGeo: "2dsphere"});

indexName = 'test.geo_s2sparse.$geo_2dsphere_otherGeo_2dsphere';

// Insert N documents with the first geo field.
var N = 1000;
for (var i = 0; i < N; i++) {
coll.insert({ geo: point});
}

// Expect N keys.
assert.eq(N, coll.validate().keysPerIndex[indexName]);

// insert docs missing the nonGeo field
for (var i = 0; i < 1000; i++) {
coll.insert({ geo: point, wrongNonGeo: "point_"+i });
// Insert N documents with the second geo field.
var N = 1000;
for (var i = 0; i < N; i++) {
coll.insert({ otherGeo: point});
}

// the new size, should be bigger
var indexSizeAfterSecondInsert = coll.stats().indexSizes[indexName];
assert.gte(indexSizeAfterSecondInsert, indexSizeAfterFirstInsert);
// They get inserted too.
assert.eq(N + N, coll.validate().keysPerIndex[indexName]);

// insert docs missing the geo field, to make sure they're filtered out
for (var i = 0; i < 1000; i++) {
coll.insert({ wrongGeo: point, nonGeo: "point_"+i });
// Insert N documents with neither geo field.
for (var i = 0; i < N; i++) {
coll.insert({ nonGeo: i});
}

// the new size, should be unchanged
var indexSizeAfterThirdInsert = coll.stats().indexSizes[indexName];
assert.gte(indexSizeAfterThirdInsert, indexSizeAfterSecondInsert);
// Still expect 2N keys as the neither geo docs were omitted from the index.
assert.eq(N + N, coll.validate().keysPerIndex[indexName]);
40 changes: 40 additions & 0 deletions src/mongo/db/index/expression_key_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,10 @@ namespace mongo {
void getS2Keys(const BSONObj& obj, const BSONObj& keyPattern,
const S2IndexingParams& params, BSONObjSet* keys) {
BSONObjSet keysToAdd;

// Does one of our documents have a geo field?
bool haveGeoField = false;

// We output keys in the same order as the fields we index.
BSONObjIterator i(keyPattern);
while (i.more()) {
Expand All @@ -393,6 +397,35 @@ namespace mongo {

BSONObjSet keysForThisField;
if (IndexNames::GEO_2DSPHERE == e.valuestr()) {
if (S2_INDEX_VERSION_2 == params.indexVersion) {
// For V2,
// geo: null,
// geo: undefined
// geo: []
// should all behave like there is no geo field. So we look for these cases and
// throw out the field elements if we find them.
if (1 == fieldElements.size()) {
BSONElement elt = *fieldElements.begin();
// Get the :null and :undefined cases.
if (elt.isNull() || Undefined == elt.type()) {
fieldElements.clear();
}
else if (elt.isABSONObj()) {
// And this is the :[] case.
BSONObj obj = elt.Obj();
if (0 == obj.nFields()) {
fieldElements.clear();
}
}
}

// V2 2dsphere indices require that at least one geo field to be present in a
// document in order to index it.
if (fieldElements.size() > 0) {
haveGeoField = true;
}
}

getGeoKeys(obj, fieldElements, params, &keysForThisField);
} else {
getLiteralKeys(fieldElements, &keysForThisField);
Expand Down Expand Up @@ -424,6 +457,13 @@ namespace mongo {
keysToAdd = updatedKeysToAdd;
}

// Make sure that if we're V2 there's at least one geo field present in the doc.
if (S2_INDEX_VERSION_2 == params.indexVersion) {
if (!haveGeoField) {
return;
}
}

if (keysToAdd.size() > params.maxKeysPerInsert) {
warning() << "insert of geo object generated lots of keys (" << keysToAdd.size()
<< ") consider creating larger buckets. obj="
Expand Down
5 changes: 5 additions & 0 deletions src/mongo/db/index/s2_access_method.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@ namespace mongo {
}
uassert(16750, "Expect at least one geo field, spec=" + descriptor->keyPattern().toString(),
geoFields >= 1);

if (descriptor->isSparse()) {
warning() << "Sparse option ignored for index spec "
<< descriptor->keyPattern().toString() << "\n";
}
}

// static
Expand Down
145 changes: 145 additions & 0 deletions src/mongo/db/query/planner_ixselect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,23 @@ namespace mongo {
}
}

// static
void QueryPlannerIXSelect::stripInvalidAssignments(MatchExpression* node,
const vector<IndexEntry>& indices) {

stripInvalidAssignmentsToTextIndexes(node, indices);

if (MatchExpression::GEO != node->matchType() &&
MatchExpression::GEO_NEAR != node->matchType()) {

stripInvalidAssignmentsTo2dsphereIndices(node, indices);
}
}

//
// Helpers used by stripInvalidAssignments
//

/**
* Remove 'idx' from the RelevantTag lists for 'node'. 'node' must be a leaf.
*/
Expand All @@ -386,6 +403,10 @@ namespace mongo {
}
}

//
// Text index quirks
//

/**
* Traverse the subtree rooted at 'node' to remove invalid RelevantTag assignments to text index
* 'idx', which has prefix paths 'prefixPaths'.
Expand Down Expand Up @@ -512,4 +533,128 @@ namespace mongo {
}
}

//
// 2dsphere V2 sparse quirks
//

static void stripInvalidAssignmentsTo2dsphereIndex(
MatchExpression* node,
size_t idx,
const unordered_set<StringData, StringData::Hasher>& geoFields) {

if (Indexability::nodeCanUseIndexOnOwnField(node)) {
removeIndexRelevantTag(node, idx);
return;
}

const MatchExpression::MatchType nodeType = node->matchType();

// Don't bother peeking inside of negations.
if (MatchExpression::NOT == nodeType || MatchExpression::NOR == nodeType) {
return;
}

if (MatchExpression::AND != nodeType) {
// It's an OR or some kind of array operator.
for (size_t i = 0; i < node->numChildren(); ++i) {
stripInvalidAssignmentsTo2dsphereIndex(node->getChild(i), idx, geoFields);
}
return;
}

bool hasGeoField = false;

for (size_t i = 0; i < node->numChildren(); ++i) {
MatchExpression* child = node->getChild(i);
RelevantTag* tag = static_cast<RelevantTag*>(child->getTag());

if (NULL == tag) {
// 'child' could be a logical operator. Maybe there are some assignments hiding
// inside.
stripInvalidAssignmentsTo2dsphereIndex(child, idx, geoFields);
continue;
}

bool inFirst = tag->first.end() != std::find(tag->first.begin(),
tag->first.end(),
idx);

bool inNotFirst = tag->notFirst.end() != std::find(tag->notFirst.begin(),
tag->notFirst.end(),
idx);

// If there is an index assignment...
if (inFirst || inNotFirst) {
// And it's a geo predicate...
if (MatchExpression::GEO == child->matchType() ||
MatchExpression::GEO_NEAR == child->matchType()) {

hasGeoField = true;
}
}
else {
// Recurse on the children to ensure that they're not hiding any assignments
// to idx.
stripInvalidAssignmentsTo2dsphereIndex(child, idx, geoFields);
}
}

// If there isn't a geo predicate our results aren't a subset of what's in the geo index, so
// if we use the index we'll miss results.
if (!hasGeoField) {
for (size_t i = 0; i < node->numChildren(); ++i) {
stripInvalidAssignmentsTo2dsphereIndex(node->getChild(i), idx, geoFields);
}
}
}

// static
void QueryPlannerIXSelect::stripInvalidAssignmentsTo2dsphereIndices(
MatchExpression* node,
const vector<IndexEntry>& indices) {

for (size_t i = 0; i < indices.size(); ++i) {
const IndexEntry& index = indices[i];

// We only worry about 2dsphere indices.
if (INDEX_2DSPHERE != index.type) {
continue;
}

// They also have to be V2. Both ignore the sparse flag but V1 is
// never-sparse, V2 geo-sparse.
BSONElement elt = index.infoObj["2dsphereIndexVersion"];
if (elt.eoo()) {
continue;
}
if (!elt.isNumber()) {
continue;
}
if (2 != elt.numberInt()) {
continue;
}

// Gather the set of geo fields in this index.
unordered_set<StringData, StringData::Hasher> geoFields;
BSONObjIterator it(index.keyPattern);
while (it.more()) {
BSONElement elt = it.next();
if (String == elt.type()) {
geoFields.insert(elt.fieldName());
}
}

// If every field is geo don't bother doing anything.
if (geoFields.size() == static_cast<size_t>(index.keyPattern.nFields())) {
continue;
}

// You can't have a 2dsphere index without a 2dsphere field.
invariant(!geoFields.empty());

// Remove bad assignments from this index.
stripInvalidAssignmentsTo2dsphereIndex(node, i, geoFields);
}
}

} // namespace mongo
Loading

0 comments on commit 269c683

Please sign in to comment.