Skip to content

Commit

Permalink
SERVER-37446 Change PlanStages which don't require a collection to av…
Browse files Browse the repository at this point in the history
…oid keeping a Collection*.

In order to prevent COUNT stage from requiring a Collection*, splits
fast count into a new RECORD_STORE_FAST_COUNT stage.
  • Loading branch information
dstorch committed Nov 7, 2018
1 parent 56248d5 commit f0b39d9
Show file tree
Hide file tree
Showing 31 changed files with 282 additions and 219 deletions.
2 changes: 1 addition & 1 deletion jstests/core/explain_shell_helpers.js
Expand Up @@ -228,7 +228,7 @@ assert("queryPlanner" in explain.stages[0].$cursor);
// Basic count.
explain = t.explain().count();
assert.commandWorked(explain);
assert(planHasStage(db, explain.queryPlanner.winningPlan, "COUNT"));
assert(planHasStage(db, explain.queryPlanner.winningPlan, "RECORD_STORE_FAST_COUNT"));

// Tests for applySkipLimit argument to .count. When we don't apply the skip, we
// count one result. When we do apply the skip we count zero.
Expand Down
2 changes: 1 addition & 1 deletion jstests/core/profile_count.js
Expand Up @@ -32,7 +32,7 @@
assert.eq(profileObj.protocol, getProfilerProtocolStringForCommand(conn), tojson(profileObj));
assert.eq(profileObj.command.count, coll.getName(), tojson(profileObj));
assert.eq(profileObj.command.collation, {locale: "fr"}, tojson(profileObj));
assert.eq(profileObj.planSummary, "COUNT", tojson(profileObj));
assert.eq(profileObj.planSummary, "RECORD_STORE_FAST_COUNT", tojson(profileObj));
assert(profileObj.execStats.hasOwnProperty("stage"), tojson(profileObj));
assert(profileObj.hasOwnProperty("responseLength"), tojson(profileObj));
assert(profileObj.hasOwnProperty("millis"), tojson(profileObj));
Expand Down
9 changes: 9 additions & 0 deletions jstests/core/record_store_count.js
Expand Up @@ -20,11 +20,20 @@ load("jstests/libs/fixture_helpers.js"); // For isMongos and isSharded.
//
// Logically empty predicates should use the record store's count.
//
// If the collection is sharded, however, then we can't use fast count, since we need to perform
// shard filtering to avoid counting data that is not logically owned by the shard.
//
var explain = coll.explain().count({});
assert(!planHasStage(db, explain.queryPlanner.winningPlan, "COLLSCAN"));
if (!isMongos(db) || !FixtureHelpers.isSharded(coll)) {
assert(planHasStage(db, explain.queryPlanner.winningPlan, "RECORD_STORE_FAST_COUNT"));
}

explain = coll.explain().count({$comment: "hi"});
assert(!planHasStage(db, explain.queryPlanner.winningPlan, "COLLSCAN"));
if (!isMongos(db) || !FixtureHelpers.isSharded(coll)) {
assert(planHasStage(db, explain.queryPlanner.winningPlan, "RECORD_STORE_FAST_COUNT"));
}

//
// A non-empty query predicate should prevent the use of the record store's count.
Expand Down
10 changes: 6 additions & 4 deletions jstests/libs/analyze_plan.js
Expand Up @@ -298,8 +298,8 @@ function getChunkSkips(root) {
}

/**
* Given explain output at executionStats level verbosity, confirms that the root stage is COUNT and
* that the result of the count is equal to 'expectedCount'.
* Given explain output at executionStats level verbosity, confirms that the root stage is COUNT or
* RECORD_STORE_FAST_COUNT and that the result of the count is equal to 'expectedCount'.
*/
function assertExplainCount({explainResults, expectedCount}) {
const execStages = explainResults.executionStats.executionStages;
Expand All @@ -311,12 +311,14 @@ function assertExplainCount({explainResults, expectedCount}) {
let totalCounted = 0;
for (let shardExplain of execStages.shards) {
const countStage = shardExplain.executionStages;
assert.eq(countStage.stage, "COUNT", "root stage on shard is not COUNT");
assert(countStage.stage === "COUNT" || countStage.stage === "RECORD_STORE_FAST_COUNT",
"root stage on shard is not COUNT or RECORD_STORE_FAST_COUNT");
totalCounted += countStage.nCounted;
}
assert.eq(totalCounted, expectedCount, "wrong count result");
} else {
assert.eq(execStages.stage, "COUNT", "root stage is not COUNT");
assert(execStages.stage === "COUNT" || execStages.stage === "RECORD_STORE_FAST_COUNT",
"root stage on shard is not COUNT or RECORD_STORE_FAST_COUNT");
assert.eq(execStages.nCounted, expectedCount, "wrong count result");
}
}
Expand Down
1 change: 1 addition & 0 deletions src/mongo/db/SConscript
Expand Up @@ -1040,6 +1040,7 @@ env.Library(
'exec/projection.cpp',
'exec/projection_exec.cpp',
'exec/queued_data_stage.cpp',
'exec/record_store_fast_count.cpp',
'exec/requires_collection_stage.cpp',
'exec/shard_filter.cpp',
'exec/skip.cpp',
Expand Down
7 changes: 3 additions & 4 deletions src/mongo/db/commands/count_cmd.cpp
Expand Up @@ -239,10 +239,9 @@ class CmdCount : public BasicCommand {
}

// Plan is done executing. We just need to pull the count out of the root stage.
invariant(STAGE_COUNT == exec->getRootStage()->stageType());
CountStage* countStage = static_cast<CountStage*>(exec->getRootStage());
const CountStats* countStats =
static_cast<const CountStats*>(countStage->getSpecificStats());
invariant(STAGE_COUNT == exec->getRootStage()->stageType() ||
STAGE_RECORD_STORE_FAST_COUNT == exec->getRootStage()->stageType());
auto* countStats = static_cast<const CountStats*>(exec->getRootStage()->getSpecificStats());

result.appendNumber("n", countStats->nCounted);
return true;
Expand Down
9 changes: 2 additions & 7 deletions src/mongo/db/exec/and_hash.cpp
Expand Up @@ -56,21 +56,16 @@ const size_t AndHashStage::kLookAheadWorks = 10;
// static
const char* AndHashStage::kStageType = "AND_HASH";

AndHashStage::AndHashStage(OperationContext* opCtx, WorkingSet* ws, const Collection* collection)
AndHashStage::AndHashStage(OperationContext* opCtx, WorkingSet* ws)
: PlanStage(kStageType, opCtx),
_collection(collection),
_ws(ws),
_hashingChildren(true),
_currentChild(0),
_memUsage(0),
_maxMemUsage(kDefaultMaxMemUsageBytes) {}

AndHashStage::AndHashStage(OperationContext* opCtx,
WorkingSet* ws,
const Collection* collection,
size_t maxMemUsage)
AndHashStage::AndHashStage(OperationContext* opCtx, WorkingSet* ws, size_t maxMemUsage)
: PlanStage(kStageType, opCtx),
_collection(collection),
_ws(ws),
_hashingChildren(true),
_currentChild(0),
Expand Down
10 changes: 2 additions & 8 deletions src/mongo/db/exec/and_hash.h
Expand Up @@ -49,15 +49,12 @@ namespace mongo {
*/
class AndHashStage final : public PlanStage {
public:
AndHashStage(OperationContext* opCtx, WorkingSet* ws, const Collection* collection);
AndHashStage(OperationContext* opCtx, WorkingSet* ws);

/**
* For testing only. Allows tests to set memory usage threshold.
*/
AndHashStage(OperationContext* opCtx,
WorkingSet* ws,
const Collection* collection,
size_t maxMemUsage);
AndHashStage(OperationContext* opCtx, WorkingSet* ws, size_t maxMemUsage);

void addChild(PlanStage* child);

Expand Down Expand Up @@ -87,9 +84,6 @@ class AndHashStage final : public PlanStage {
StageState hashOtherChildren(WorkingSetID* out);
StageState workChild(size_t childNo, WorkingSetID* out);

// Not owned by us.
const Collection* _collection;

// Not owned by us.
WorkingSet* _ws;

Expand Down
5 changes: 1 addition & 4 deletions src/mongo/db/exec/and_sorted.cpp
Expand Up @@ -46,11 +46,8 @@ using stdx::make_unique;
// static
const char* AndSortedStage::kStageType = "AND_SORTED";

AndSortedStage::AndSortedStage(OperationContext* opCtx,
WorkingSet* ws,
const Collection* collection)
AndSortedStage::AndSortedStage(OperationContext* opCtx, WorkingSet* ws)
: PlanStage(kStageType, opCtx),
_collection(collection),
_ws(ws),
_targetNode(numeric_limits<size_t>::max()),
_targetId(WorkingSet::INVALID_ID),
Expand Down
5 changes: 1 addition & 4 deletions src/mongo/db/exec/and_sorted.h
Expand Up @@ -48,7 +48,7 @@ namespace mongo {
*/
class AndSortedStage final : public PlanStage {
public:
AndSortedStage(OperationContext* opCtx, WorkingSet* ws, const Collection* collection);
AndSortedStage(OperationContext* opCtx, WorkingSet* ws);

void addChild(PlanStage* child);

Expand All @@ -73,9 +73,6 @@ class AndSortedStage final : public PlanStage {
// Returns the target node in 'out' if all children successfully advance to it.
PlanStage::StageState moveTowardTargetRecordId(WorkingSetID* out);

// Not owned by us.
const Collection* _collection;

// Not owned by us.
WorkingSet* _ws;

Expand Down
47 changes: 4 additions & 43 deletions src/mongo/db/exec/count.cpp
Expand Up @@ -51,62 +51,23 @@ CountStage::CountStage(OperationContext* opCtx,
CountStageParams params,
WorkingSet* ws,
PlanStage* child)
: PlanStage(kStageType, opCtx),
_collection(collection),
_params(std::move(params)),
_leftToSkip(_params.skip),
_ws(ws) {
if (child)
_children.emplace_back(child);
: PlanStage(kStageType, opCtx), _params(std::move(params)), _leftToSkip(_params.skip), _ws(ws) {
invariant(child);
_children.emplace_back(child);
}

bool CountStage::isEOF() {
if (_specificStats.recordStoreCount) {
return true;
}

if (_params.limit > 0 && _specificStats.nCounted >= _params.limit) {
return true;
}

return !_children.empty() && child()->isEOF();
}

void CountStage::recordStoreCount() {
invariant(_collection);
long long nCounted = _collection->numRecords(getOpCtx());

if (0 != _params.skip) {
nCounted -= _params.skip;
if (nCounted < 0) {
nCounted = 0;
}
}

long long limit = _params.limit;
if (limit < 0) {
limit = -limit;
}

if (limit < nCounted && 0 != limit) {
nCounted = limit;
}

_specificStats.nCounted = nCounted;
_specificStats.nSkipped = _params.skip;
_specificStats.recordStoreCount = true;
return child()->isEOF();
}

PlanStage::StageState CountStage::doWork(WorkingSetID* out) {
// This stage never returns a working set member.
*out = WorkingSet::INVALID_ID;

if (_params.useRecordStoreCount) {
invariant(_collection);
recordStoreCount();
return PlanStage::IS_EOF;
}

if (isEOF()) {
_commonStats.isEOF = true;
return PlanStage::IS_EOF;
Expand Down
40 changes: 9 additions & 31 deletions src/mongo/db/exec/count.h
Expand Up @@ -30,18 +30,14 @@

#pragma once


#include "mongo/db/exec/plan_stage.h"
#include "mongo/db/query/count_request.h"

namespace mongo {

struct CountStageParams {
CountStageParams(const CountRequest& request, bool useRecordStoreCount)
: nss(request.getNs()),
limit(request.getLimit()),
skip(request.getSkip()),
useRecordStoreCount(useRecordStoreCount) {}
CountStageParams(const CountRequest& request)
: nss(request.getNs()), limit(request.getLimit()), skip(request.getSkip()) {}

// Namespace to operate on (e.g. "foo.bar").
NamespaceString nss;
Expand All @@ -51,25 +47,18 @@ struct CountStageParams {

// An integer indicating to not include the first n documents in the count. 0 means no skip.
long long skip;

// True if this count stage should just ask the record store for a count instead of computing
// one itself.
//
// Note: This strategy can lead to inaccurate counts on certain storage engines (including
// WiredTiger).
bool useRecordStoreCount;
};

/**
* Stage used by the count command. This stage sits at the root of a plan tree
* and counts the number of results returned by its child stage.
* Stage used by the count command. This stage sits at the root of a plan tree and counts the number
* of results returned by its child stage.
*
* This should not be confused with the CountScan stage. CountScan is a special
* index access stage which can optimize index access for count operations in
* some cases. On the other hand, *every* count op has a CountStage at its root.
* This should not be confused with the CountScan stage. CountScan is a special index access stage
* which can optimize index access for count operations in some cases. On the other hand, *every*
* count op has a CountStage at its root.
*
* Only returns NEED_TIME until hitting EOF. The count result can be obtained by examining
* the specific stats.
* Only returns NEED_TIME until hitting EOF. The count result can be obtained by examining the
* specific stats.
*/
class CountStage final : public PlanStage {
public:
Expand All @@ -93,17 +82,6 @@ class CountStage final : public PlanStage {
static const char* kStageType;

private:
/**
* Asks the record store for the count, applying the skip and limit if necessary. The result is
* stored in '_specificStats'.
*
* This is only valid if the query and hint are both empty.
*/
void recordStoreCount();

// The collection over which we are counting.
Collection* _collection;

CountStageParams _params;

// The number of documents that we still need to skip.
Expand Down
4 changes: 1 addition & 3 deletions src/mongo/db/exec/merge_sort.cpp
Expand Up @@ -50,10 +50,8 @@ const char* MergeSortStage::kStageType = "SORT_MERGE";

MergeSortStage::MergeSortStage(OperationContext* opCtx,
const MergeSortStageParams& params,
WorkingSet* ws,
const Collection* collection)
WorkingSet* ws)
: PlanStage(kStageType, opCtx),
_collection(collection),
_ws(ws),
_pattern(params.pattern),
_collator(params.collator),
Expand Down
8 changes: 1 addition & 7 deletions src/mongo/db/exec/merge_sort.h
Expand Up @@ -58,10 +58,7 @@ class MergeSortStageParams;
*/
class MergeSortStage final : public PlanStage {
public:
MergeSortStage(OperationContext* opCtx,
const MergeSortStageParams& params,
WorkingSet* ws,
const Collection* collection);
MergeSortStage(OperationContext* opCtx, const MergeSortStageParams& params, WorkingSet* ws);

void addChild(PlanStage* child);

Expand Down Expand Up @@ -106,9 +103,6 @@ class MergeSortStage final : public PlanStage {
const CollatorInterface* _collator;
};

// Not owned by us.
const Collection* _collection;

// Not owned by us.
WorkingSet* _ws;

Expand Down
1 change: 0 additions & 1 deletion src/mongo/db/exec/pipeline_proxy.h
Expand Up @@ -33,7 +33,6 @@
#include <boost/intrusive_ptr.hpp>
#include <boost/optional/optional.hpp>

#include "mongo/db/catalog/collection.h"
#include "mongo/db/exec/plan_stage.h"
#include "mongo/db/exec/plan_stats.h"
#include "mongo/db/pipeline/pipeline.h"
Expand Down
17 changes: 1 addition & 16 deletions src/mongo/db/exec/plan_stats.h
Expand Up @@ -204,7 +204,7 @@ struct CollectionScanStats : public SpecificStats {
};

struct CountStats : public SpecificStats {
CountStats() : nCounted(0), nSkipped(0), recordStoreCount(false) {}
CountStats() : nCounted(0), nSkipped(0) {}

SpecificStats* clone() const final {
CountStats* specific = new CountStats(*this);
Expand All @@ -216,9 +216,6 @@ struct CountStats : public SpecificStats {

// The number of results we skipped over.
long long nSkipped;

// True if we computed the count via Collection::numRecords().
bool recordStoreCount;
};

struct CountScanStats : public SpecificStats {
Expand Down Expand Up @@ -345,18 +342,6 @@ struct FetchStats : public SpecificStats {
size_t docsExamined = 0u;
};

struct GroupStats : public SpecificStats {
GroupStats() : nGroups(0) {}

SpecificStats* clone() const final {
GroupStats* specific = new GroupStats(*this);
return specific;
}

// The total number of groups.
size_t nGroups;
};

struct IDHackStats : public SpecificStats {
IDHackStats() : keysExamined(0), docsExamined(0) {}

Expand Down

0 comments on commit f0b39d9

Please sign in to comment.