Skip to content

Commit

Permalink
SERVER-22178 Always retry sorted findAndModify upon write conflict.
Browse files Browse the repository at this point in the history
Previously, if there was a WriteConflictException while actually doing
the update or delete, we would retry the findAndModify, but if the
update or delete stage detected that the document was already deleted
or that it no longer matched the predicate, it would not retry. This
patch ensures the findAndModify will be retried in either of those
scenarios.
  • Loading branch information
cswanson310 committed Mar 25, 2016
1 parent 4a38dcf commit 178e241
Show file tree
Hide file tree
Showing 15 changed files with 462 additions and 130 deletions.
10 changes: 7 additions & 3 deletions jstests/concurrency/fsm_all_sharded_replication.js
Expand Up @@ -45,9 +45,13 @@ var blacklist = [
'compact_simultaneous_padding_bytes.js', // compact can only be run against a mongod
'convert_to_capped_collection.js', // convertToCapped can't be run on mongos processes
'convert_to_capped_collection_index.js', // convertToCapped can't be run on mongos processes
'findAndModify_remove_queue.js', // remove cannot be {} for findAndModify
'findAndModify_update_collscan.js', // findAndModify requires a shard key
'findAndModify_update_queue.js', // findAndModify requires a shard key
'findAndModify_mixed_queue.js', // findAndModify requires a shard key
'findAndModify_mixed_queue_unindexed.js', // findAndModify requires a shard key
'findAndModify_remove_queue.js', // remove cannot be {} for findAndModify
'findAndModify_remove_queue_unindexed.js', // findAndModify requires a shard key
'findAndModify_update_collscan.js', // findAndModify requires a shard key
'findAndModify_update_queue.js', // findAndModify requires a shard key
'findAndModify_update_queue_unindexed.js', // findAndModify requires a shard key
'group.js', // the group command cannot be issued against a sharded cluster
'group_cond.js', // the group command cannot be issued against a sharded cluster
'indexed_insert_eval.js', // eval doesn't work with sharded collections
Expand Down
10 changes: 7 additions & 3 deletions jstests/concurrency/fsm_all_sharded_replication_with_balancer.js
Expand Up @@ -50,9 +50,13 @@ var blacklist = [
'compact_simultaneous_padding_bytes.js', // compact can only be run against a mongod
'convert_to_capped_collection.js', // convertToCapped can't be run on mongos processes
'convert_to_capped_collection_index.js', // convertToCapped can't be run on mongos processes
'findAndModify_remove_queue.js', // remove cannot be {} for findAndModify
'findAndModify_update_collscan.js', // findAndModify requires a shard key
'findAndModify_update_queue.js', // findAndModify requires a shard key
'findAndModify_mixed_queue.js', // findAndModify requires a shard key
'findAndModify_mixed_queue_unindexed.js', // findAndModify requires a shard key
'findAndModify_remove_queue.js', // remove cannot be {} for findAndModify
'findAndModify_remove_queue_unindexed.js', // findAndModify requires a shard key
'findAndModify_update_collscan.js', // findAndModify requires a shard key
'findAndModify_update_queue.js', // findAndModify requires a shard key
'findAndModify_update_queue_unindexed.js', // findAndModify requires a shard key
'group.js', // the group command cannot be issued against a sharded cluster
'group_cond.js', // the group command cannot be issued against a sharded cluster
'indexed_insert_eval.js', // eval doesn't work with sharded collections
Expand Down
@@ -0,0 +1,96 @@
'use strict';

/**
* findAndModify_mixed_queue_unindexed.js
*
* This workload is a combination of findAndModify_remove_queue_unindexed.js and
* findAndModify_update_queue_unindexed.js.
*
* Each thread contends on the same document as one another by randomly performing either a
* findAndModify update operation or a findAndModify remove operation. The lack of an index that
* could satisfy the sort forces the findAndModify operations to scan all the matching documents in
* order to find the relevant document. This increases the amount of work each findAndModify
* operation has to do before getting to the matching document, and thus increases the chance of a
* write conflict because each is trying to update or remove the same document.
*
* This workload was designed to reproduce SERVER-21434.
*/
load('jstests/concurrency/fsm_libs/extend_workload.js'); // for extendWorkload
load('jstests/concurrency/fsm_workloads/findAndModify_remove_queue.js'); // for $config
load('jstests/concurrency/fsm_workload_helpers/server_types.js'); // for isMMAPv1

var $config = extendWorkload(
$config,
function($config, $super) {

// Use the workload name as the database name, since the workload name is assumed to be
// unique.
$config.data.uniqueDBName = 'findAndModify_mixed_queue_unindexed';

$config.data.newDocForInsert = function newDocForInsert(i) {
return {
_id: i,
rand: Random.rand(),
counter: 0
};
};

$config.data.getIndexSpecs = function getIndexSpecs() {
return [];
};

$config.data.opName = 'modified';

$config.data.validateResult = function validateResult(db, collName, res) {
assertAlways.commandWorked(res);

var doc = res.value;
if (isMongod(db) && !isMMAPv1(db)) {
// MMAPv1 does not automatically retry if there was a conflict, so it is expected
// that it may return null in the case of a conflict. All other storage engines
// should automatically retry the operation, and thus should never return null.
assertWhenOwnColl.neq(
doc, null, 'findAndModify should have found a matching document');
}
if (doc !== null) {
this.saveDocId(db, collName, doc._id);
}
};

$config.states = (function() {
// Avoid removing documents that were already updated.
function remove(db, collName) {
var res = db.runCommand({
findAndModify: db[collName].getName(),
query: {counter: 0},
sort: {rand: -1},
remove: true
});
this.validateResult(db, collName, res);
}

function update(db, collName) {
// Update the counter field to avoid matching the same document again.
var res = db.runCommand({
findAndModify: db[collName].getName(),
query: {counter: 0},
sort: {rand: -1},
update: {$inc: {counter: 1}}, new: false
});
this.validateResult(db, collName, res);
}

return {
remove: remove,
update: update,
};

})();

$config.transitions = {
remove: {remove: 0.5, update: 0.5},
update: {remove: 0.5, update: 0.5},
};

return $config;
});
14 changes: 7 additions & 7 deletions jstests/concurrency/fsm_workloads/findAndModify_remove_queue.js
Expand Up @@ -26,10 +26,8 @@ var $config = (function() {
};
},

getIndexSpec: function getIndexSpec() {
return {
rand: 1
};
getIndexSpecs: function getIndexSpecs() {
return [{rand: 1}];
},

opName: 'removed',
Expand Down Expand Up @@ -114,7 +112,9 @@ var $config = (function() {
assertAlways.writeOK(res);
assertAlways.eq(this.numDocs, res.nInserted);

assertAlways.commandWorked(db[collName].ensureIndex(this.getIndexSpec()));
this.getIndexSpecs().forEach(function ensureIndex(indexSpec) {
assertAlways.commandWorked(db[collName].ensureIndex(indexSpec));
});
}

function teardown(db, collName, cluster) {
Expand All @@ -133,7 +133,7 @@ var $config = (function() {
}
}

assertWhenOwnColl(function() {
assertWhenOwnColl(() => {
var docs = ownedDB[collName].find().toArray();
var ids = [];

Expand All @@ -142,7 +142,7 @@ var $config = (function() {
}

checkForDuplicateIds(ids, this.opName);
}.bind(this));
});

var res = ownedDB.dropDatabase();
assertAlways.commandWorked(res);
Expand Down
@@ -0,0 +1,31 @@
'use strict';

/**
* findAndModify_remove_queue_unindexed.js
*
* This is the same workload as findAndModify_remove_queue.js, but without the relevant index.
*
* The lack of an index that could satisfy the sort forces the findAndModify operations to scan all
* the matching documents in order to find the relevant document. This increases the amount of work
* each findAndModify operation has to do before getting to the matching document, and thus
* increases the chance of a write conflict because each concurrent findAndModify operation is
* trying to remove the same document from the queue.
*
* This workload was designed to reproduce SERVER-21434.
*/
load('jstests/concurrency/fsm_libs/extend_workload.js'); // for extendWorkload
load('jstests/concurrency/fsm_workloads/findAndModify_remove_queue.js'); // for $config

var $config = extendWorkload($config,
function($config, $super) {

// Use the workload name as the database name, since the workload
// name is assumed to be unique.
$config.data.uniqueDBName = 'findAndModify_remove_queue_unindexed';

$config.data.getIndexSpecs = function getIndexSpecs() {
return [];
};

return $config;
});
Expand Up @@ -31,11 +31,8 @@ var $config = extendWorkload(
};
};

$config.data.getIndexSpec = function getIndexSpec() {
return {
counter: 1,
rand: -1
};
$config.data.getIndexSpecs = function getIndexSpecs() {
return [{counter: 1, rand: -1}];
};

$config.data.opName = 'updated';
Expand Down
@@ -0,0 +1,31 @@
'use strict';

/**
* findAndModify_update_queue_unindexed.js
*
* This is the same workload as findAndModify_update_queue.js, but without the relevant index.
*
* The lack of an index that could satisfy the sort forces the findAndModify operations to scan all
* the matching documents in order to find the relevant document. This increases the amount of work
* each findAndModify operation has to do before getting to the matching document, and thus
* increases the chance of a write conflict because each concurrent findAndModify operation is
* trying to update the same document from the queue.
*
* This workload was designed to reproduce SERVER-21434.
*/
load('jstests/concurrency/fsm_libs/extend_workload.js'); // for extendWorkload
load('jstests/concurrency/fsm_workloads/findAndModify_update_queue.js'); // for $config

var $config = extendWorkload($config,
function($config, $super) {

// Use the workload name as the database name, since the workload
// name is assumed to be unique.
$config.data.uniqueDBName = 'findAndModify_update_queue_unindexed';

$config.data.getIndexSpecs = function getIndexSpecs() {
return [];
};

return $config;
});
1 change: 1 addition & 0 deletions src/mongo/db/exec/SConscript
Expand Up @@ -77,6 +77,7 @@ env.Library(
"text_or.cpp",
"update.cpp",
"working_set_common.cpp",
"write_stage_common.cpp",
],
LIBDEPS = [
"scoped_timer",
Expand Down

0 comments on commit 178e241

Please sign in to comment.