Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
SERVER-2001 part 1: hashing BSONElements
- Loading branch information
Showing
7 changed files
with
564 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
//hashtest1.js | ||
//Simple tests to check hashing of various types | ||
//make sure that different numeric types hash to same thing, and other sanity checks | ||
|
||
var hash = function( v , seed ){ | ||
if (seed) | ||
return db.runCommand({"_hashBSONElement" : v , "seed" : seed})["out"]; | ||
else | ||
return db.runCommand({"_hashBSONElement" : v})["out"]; | ||
}; | ||
|
||
var oidHash = hash( ObjectId() ); | ||
var oidHash2 = hash( ObjectId() ); | ||
var oidHash3 = hash( ObjectId() ); | ||
assert(! friendlyEqual( oidHash, oidHash2) , "ObjectIDs should hash to different things"); | ||
assert(! friendlyEqual( oidHash, oidHash3) , "ObjectIDs should hash to different things"); | ||
assert(! friendlyEqual( oidHash2, oidHash3) , "ObjectIDs should hash to different things"); | ||
|
||
var intHash = hash( NumberInt(3) ); | ||
var doubHash = hash( 3 ); | ||
var doubHash2 = hash( 3.0 ); | ||
var longHash = hash( NumberLong(3) ); | ||
var fracHash = hash( NumberInt(3.5) ); | ||
assert.eq( intHash , doubHash ); | ||
assert.eq( intHash , doubHash2 ); | ||
assert.eq( intHash , longHash ); | ||
assert.eq( intHash , fracHash ); | ||
|
||
var trueHash = hash( true ); | ||
var falseHash = hash( false ); | ||
assert(! friendlyEqual( trueHash, falseHash) , "true and false should hash to different things"); | ||
|
||
var nullHash = hash( null ); | ||
assert(! friendlyEqual( falseHash , nullHash ) , "false and null should hash to different things"); | ||
|
||
var dateHash = hash( new Date() ); | ||
sleep(1); | ||
var isodateHash = hash( ISODate() ); | ||
assert(! friendlyEqual( dateHash, isodateHash) , "different dates should hash to different things"); | ||
|
||
var stringHash = hash( "3" ); | ||
assert(! friendlyEqual( intHash , stringHash ), "3 and \"3\" should hash to different things"); | ||
|
||
var regExpHash = hash( RegExp("3") ); | ||
assert(! friendlyEqual( stringHash , regExpHash) , "\"3\" and RegExp(3) should hash to different things"); | ||
|
||
var intHash4 = hash( 4 ); | ||
assert(! friendlyEqual( intHash , intHash4 ), "3 and 4 should hash to different things"); | ||
|
||
var intHashSeeded = hash( 4 , 3 ); | ||
assert(! friendlyEqual(intHash4 , intHashSeeded ), "different seeds should make different hashes"); | ||
|
||
var minkeyHash = hash( MinKey ); | ||
var maxkeyHash = hash( MaxKey ); | ||
assert(! friendlyEqual(minkeyHash , maxkeyHash ), "minkey and maxkey should hash to different things"); | ||
|
||
var arrayHash = hash( [0,1.0,NumberLong(2)] ); | ||
var arrayHash2 = hash( [0,NumberInt(1),2] ); | ||
assert.eq( arrayHash , arrayHash2 , "didn't squash numeric types in array"); | ||
|
||
var objectHash = hash( {"0":0, "1" : NumberInt(1), "2" : 2} ); | ||
assert(! friendlyEqual(objectHash , arrayHash2) , "arrays and sub-objects should hash to different things"); | ||
|
||
var c = hash( {a : {}, b : 1} ); | ||
var d = hash( {a : {b : 1}} ); | ||
assert(! friendlyEqual( c , d ) , "hashing doesn't group sub-docs and fields correctly"); | ||
|
||
var e = hash( {a : 3 , b : [NumberLong(3), {c : NumberInt(3)}]} ); | ||
var f = hash( {a : NumberLong(3) , b : [NumberInt(3), {c : 3.0}]} ); | ||
assert.eq( e , f , "recursive number squashing doesn't work"); | ||
|
||
var nanHash = hash( 0/0 ); | ||
var zeroHash = hash( 0 ); | ||
assert.eq( nanHash , zeroHash , "NaN and Zero should hash to the same thing"); | ||
|
||
|
||
//should also test that CodeWScope hashes correctly | ||
//but waiting for SERVER-3391 (CodeWScope support in shell) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
/* hashcmd.cpp | ||
* | ||
* Defines a shell command for hashing a BSONElement value | ||
*/ | ||
|
||
|
||
/** | ||
* Copyright (C) 2012 10gen Inc. | ||
* | ||
* This program is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU Affero General Public License, version 3, | ||
* as published by the Free Software Foundation. | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU Affero General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Affero General Public License | ||
* along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
#include "mongo/db/commands.h" | ||
#include "mongo/db/hasher.h" | ||
|
||
namespace mongo { | ||
|
||
class CmdHashElt : public Command { | ||
public: | ||
CmdHashElt() : Command("_hashBSONElement") {}; | ||
virtual LockType locktype() const { return NONE; } | ||
virtual bool slaveOk() const { return true; } | ||
virtual void help( stringstream& help ) const { | ||
help << "returns the hash of the first BSONElement val in a BSONObj"; | ||
} | ||
|
||
/* CmdObj has the form {"hash" : <thingToHash>} | ||
* or {"hash" : <thingToHash>, "seed" : <number> } | ||
* Result has the form | ||
* {"key" : <thingTohash>, "seed" : <int>, "out": NumberLong(<hash>)} | ||
* | ||
* Example use in the shell: | ||
*> db.runCommand({hash: "hashthis", seed: 1}) | ||
*> {"key" : "hashthis", | ||
*> "seed" : 1, | ||
*> "out" : NumberLong(6271151123721111923), | ||
*> "ok" : 1 } | ||
**/ | ||
bool run( const string& db, | ||
BSONObj& cmdObj, | ||
int options, string& errmsg, | ||
BSONObjBuilder& result, | ||
bool fromRepl = false ){ | ||
result.appendAs(cmdObj.firstElement(),"key"); | ||
|
||
int seed = 0; | ||
if (cmdObj.hasField("seed")){ | ||
if (! cmdObj["seed"].isNumber()) { | ||
errmsg += "seed must be a number"; | ||
return false; | ||
} | ||
seed = cmdObj["seed"].numberInt(); | ||
} | ||
result.append( "seed" , seed ); | ||
|
||
result.append( "out" , BSONElementHasher::hash64( cmdObj.firstElement() , seed ) ); | ||
return true; | ||
} | ||
} cmdHashElt; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
/* hasher.cpp | ||
* | ||
* Defines a simple hash function class | ||
*/ | ||
|
||
|
||
/** | ||
* Copyright (C) 2012 10gen Inc. | ||
* | ||
* This program is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU Affero General Public License, version 3, | ||
* as published by the Free Software Foundation. | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU Affero General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Affero General Public License | ||
* along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
#include "mongo/db/hasher.h" | ||
#include "mongo/db/jsobj.h" | ||
|
||
namespace mongo { | ||
|
||
Hasher::Hasher( HashSeed seed ) : _seed( seed ) { | ||
md5_init( &_md5State ); | ||
md5_append( &_md5State , reinterpret_cast< const md5_byte_t * >( & _seed ) , sizeof( _seed ) ); | ||
} | ||
|
||
void Hasher::addData( const void * keyData , size_t numBytes ) { | ||
md5_append( &_md5State , static_cast< const md5_byte_t * >( keyData ), numBytes ); | ||
} | ||
|
||
void Hasher::finish( HashDigest out ) { | ||
md5_finish( &_md5State , out ); | ||
} | ||
|
||
long long int BSONElementHasher::hash64( const BSONElement& e , HashSeed seed ){ | ||
scoped_ptr<Hasher> h( HasherFactory::createHasher( seed ) ); | ||
recursiveHash( h.get() , e , false ); | ||
HashDigest d; | ||
h->finish(d); | ||
//HashDigest is actually 16 bytes, but we just get 8 via truncation | ||
// NOTE: assumes little-endian | ||
return *reinterpret_cast< long long int * >( d ); | ||
} | ||
|
||
void BSONElementHasher::recursiveHash( Hasher* h , | ||
const BSONElement& e , | ||
bool includeFieldName ) { | ||
|
||
int canonicalType = e.canonicalType(); | ||
h->addData( &canonicalType , sizeof( canonicalType ) ); | ||
|
||
if ( includeFieldName ){ | ||
h->addData( e.fieldName() , e.fieldNameSize() ); | ||
} | ||
|
||
if ( !e.mayEncapsulate() ){ | ||
//if there are no embedded objects (subobjects or arrays), | ||
//compute the hash, squashing numeric types to 64-bit ints | ||
if ( e.isNumber() ){ | ||
long long int i = e.safeNumberLong(); //well-defined for troublesome doubles | ||
h->addData( &i , sizeof( i ) ); | ||
} | ||
else { | ||
h->addData( e.value() , e.valuesize() ); | ||
} | ||
} | ||
else { | ||
//else identify the subobject. | ||
//hash any preceding stuff (in the case of codeWscope) | ||
//then each sub-element | ||
//then finish with the EOO element. | ||
BSONObj b; | ||
if ( e.type() == CodeWScope ) { | ||
h->addData( e.codeWScopeCode() , e.codeWScopeCodeLen() ); | ||
b = e.codeWScopeObject(); | ||
} | ||
else { | ||
b = e.embeddedObject(); | ||
} | ||
BSONObjIterator i(b); | ||
while( i.moreWithEOO() ) { | ||
BSONElement el = i.next(); | ||
recursiveHash( h , el , true ); | ||
} | ||
} | ||
} | ||
|
||
} |
Oops, something went wrong.