Skip to content

Commit

Permalink
Replace stl map/set with robinhood map/set (and 'auto' some iterators).
Browse files Browse the repository at this point in the history
  • Loading branch information
brianwalenz committed Jan 25, 2021
1 parent c081d85 commit 167215e
Show file tree
Hide file tree
Showing 8 changed files with 41 additions and 43 deletions.
4 changes: 4 additions & 0 deletions LICENSE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ Open Bloom Filter
https://code.google.com/p/bloom/source/browse/trunk/bloom_filter.hpp
Common Public License

Robin_Hood Unordered Map and Set
https://github.com/martinus/robin-hood-hashing
MIT License

COPYRIGHT LICENSE

Copyright © 2015, Battelle National Biodefense Institute (BNBI);
Expand Down
14 changes: 6 additions & 8 deletions src/mash/CommandFind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#include "kseq.h"
#include <iostream>
#include <set>
#include <unordered_set>
#include "robin_hood.h"
#include "ThreadPool.h"
#include "sketchParameterSetup.h"

Expand Down Expand Up @@ -229,11 +229,9 @@ CommandFind::FindOutput * find(CommandFind::FindInput * data)

void findPerStrand(const CommandFind::FindInput * input, CommandFind::FindOutput * output, bool minusStrand)
{
typedef std::unordered_map < uint32_t, std::set<uint32_t> > PositionsBySequence_umap;

bool verbose = false;

Sketch::Hash_set minHashes;
robin_hood::unordered_set<Sketch::hash_t> minHashes;

const Sketch & sketch = input->sketch;
int kmerSize = sketch.getKmerSize();
Expand Down Expand Up @@ -302,9 +300,9 @@ void findPerStrand(const CommandFind::FindInput * input, CommandFind::FindOutput
// get sorted lists of positions, per reference sequence, that have
// mutual min-hashes with the query
//
PositionsBySequence_umap hits;
//
for ( Sketch::Hash_set::const_iterator i = minHashes.begin(); i != minHashes.end(); i++ )
robin_hood::unordered_map < uint32_t, std::set<uint32_t> > hits;

for ( auto i = minHashes.begin(); i != minHashes.end(); i++ )
{
Sketch::hash_t hash = *i;

Expand All @@ -326,7 +324,7 @@ void findPerStrand(const CommandFind::FindInput * input, CommandFind::FindOutput
}
}

for ( PositionsBySequence_umap::iterator i = hits.begin(); i != hits.end(); i++ )
for ( auto i = hits.begin(); i != hits.end(); i++ )
{
using std::set;

Expand Down
25 changes: 12 additions & 13 deletions src/mash/CommandScreen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include <zlib.h>
#include "ThreadPool.h"
#include <math.h>
#include <set>
#include "robin_hood.h"

#ifdef USE_BOOST
#include <boost/math/distributions/binomial.hpp>
Expand All @@ -29,8 +29,6 @@ using std::cout;
using std::endl;
using std::list;
using std::string;
using std::unordered_map;
using std::unordered_set;
using std::vector;

namespace mash {
Expand Down Expand Up @@ -93,8 +91,8 @@ int CommandScreen::run() const
parameters.minHashesPerWindow = sketch.getMinHashesPerWindow();

HashTable hashTable;
unordered_map<uint64_t, std::atomic<uint32_t>> hashCounts;
unordered_map<uint64_t, list<uint32_t> > saturationByIndex;
robin_hood::unordered_map<uint64_t, std::atomic<uint32_t>> hashCounts;
robin_hood::unordered_map<uint64_t, list<uint32_t> > saturationByIndex;

cerr << "Loading " << arguments[0] << "..." << endl;

Expand All @@ -117,7 +115,7 @@ int CommandScreen::run() const

cerr << " " << hashTable.size() << " distinct hashes." << endl;

unordered_set<MinHashHeap *> minHashHeaps;
robin_hood::unordered_set<MinHashHeap *> minHashHeaps;

bool trans = (alphabet == alphabetProtein);

Expand Down Expand Up @@ -289,7 +287,7 @@ int CommandScreen::run() const

MinHashHeap minHashHeap(sketch.getUse64(), sketch.getMinHashesPerWindow());

for ( unordered_set<MinHashHeap *>::const_iterator i = minHashHeaps.begin(); i != minHashHeaps.end(); i++ )
for ( auto i = minHashHeaps.begin(); i != minHashHeaps.end(); i++ )
{
HashList hashList(parameters.use64);

Expand Down Expand Up @@ -337,13 +335,13 @@ int CommandScreen::run() const

memset(shared, 0, sizeof(uint64_t) * sketch.getReferenceCount());

for ( unordered_map<uint64_t, std::atomic<uint32_t> >::const_iterator i = hashCounts.begin(); i != hashCounts.end(); i++ )
for ( auto i = hashCounts.begin(); i != hashCounts.end(); i++ )
{
if ( i->second >= minCov )
{
const unordered_set<uint64_t> & indeces = hashTable.at(i->first);
const auto & indeces = hashTable.at(i->first);

for ( unordered_set<uint64_t>::const_iterator k = indeces.begin(); k != indeces.end(); k++ )
for ( auto k = indeces.begin(); k != indeces.end(); k++ )
{
shared[*k]++;
depths[*k].push_back(i->second);
Expand Down Expand Up @@ -381,12 +379,12 @@ int CommandScreen::run() const
continue;
}

const unordered_set<uint64_t> & indeces = i->second;
const auto & indeces = i->second;
double maxScore = 0;
uint64_t maxLength = 0;
uint64_t maxIndex;

for ( unordered_set<uint64_t>::const_iterator k = indeces.begin(); k != indeces.end(); k++ )
for ( auto k = indeces.begin(); k != indeces.end(); k++ )
{
if ( scores[*k] > maxScore )
{
Expand Down Expand Up @@ -456,6 +454,7 @@ int CommandScreen::run() const
}
}

delete [] depths;
delete [] shared;

return 0;
Expand Down Expand Up @@ -809,7 +808,7 @@ char aaFromCodon(const char * codon)
return aa;//(aa == '*') ? 0 : aa;
}

void useThreadOutput(CommandScreen::HashOutput * output, unordered_set<MinHashHeap *> & minHashHeaps)
void useThreadOutput(CommandScreen::HashOutput * output, robin_hood::unordered_set<MinHashHeap *> & minHashHeaps)
{
minHashHeaps.emplace(output->minHashHeap);
delete output;
Expand Down
17 changes: 8 additions & 9 deletions src/mash/CommandScreen.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
#include <string>
#include <vector>
#include <atomic>
#include <unordered_set>
#include <unordered_map>
#include "robin_hood.h"
#include "MinHashHeap.h"

namespace mash {
Expand All @@ -24,13 +23,13 @@ struct HashTableEntry
HashTableEntry() : count(0) {}

uint32_t count;
std::unordered_set<uint64_t> indices;
robin_hood::unordered_set<uint64_t> indices;
};

//typedef std::unordered_map< uint64_t, HashTableEntry > HashTable;
typedef std::unordered_map< uint64_t, std::unordered_set<uint64_t> > HashTable;
//typedef robin_hood::unordered_map< uint64_t, HashTableEntry > HashTable;
typedef robin_hood::unordered_map< uint64_t, robin_hood::unordered_set<uint64_t> > HashTable;

static const std::unordered_map< std::string, char > codons =
static const robin_hood::unordered_map< std::string, char > codons =
{
{"AAA", 'K'},
{"AAC", 'N'},
Expand Down Expand Up @@ -104,7 +103,7 @@ class CommandScreen : public Command

struct HashInput
{
HashInput(std::unordered_map<uint64_t, std::atomic<uint32_t> > & hashCountsNew, MinHashHeap * minHashHeapNew, char * seqNew, uint64_t lengthNew, const Sketch::Parameters & parametersNew, bool transNew)
HashInput(robin_hood::unordered_map<uint64_t, std::atomic<uint32_t> > & hashCountsNew, MinHashHeap * minHashHeapNew, char * seqNew, uint64_t lengthNew, const Sketch::Parameters & parametersNew, bool transNew)
:
hashCounts(hashCountsNew),
minHashHeap(minHashHeapNew),
Expand All @@ -129,7 +128,7 @@ class CommandScreen : public Command
bool trans;

Sketch::Parameters parameters;
std::unordered_map<uint64_t, std::atomic<uint32_t> > & hashCounts;
robin_hood::unordered_map<uint64_t, std::atomic<uint32_t> > & hashCounts;
MinHashHeap * minHashHeap;
};

Expand Down Expand Up @@ -165,7 +164,7 @@ double estimateIdentity(uint64_t common, uint64_t denom, int kmerSize, double km
CommandScreen::HashOutput * hashSequence(CommandScreen::HashInput * input);
double pValueWithin(uint64_t x, uint64_t setSize, double kmerSpace, uint64_t sketchSize);
void translate(const char * src, char * dst, uint64_t len);
void useThreadOutput(CommandScreen::HashOutput * output, std::unordered_set<MinHashHeap *> & minHashHeaps);
void useThreadOutput(CommandScreen::HashOutput * output, robin_hood::unordered_set<MinHashHeap *> & minHashHeaps);

} // namespace mash

Expand Down
8 changes: 4 additions & 4 deletions src/mash/HashSet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,14 @@ void HashSet::toCounts(std::vector<uint32_t> & counts) const
{
if ( use64 )
{
for ( std::unordered_map<hash64_t, uint32_t>::const_iterator i = hashes64.begin(); i != hashes64.end(); i++ )
for ( auto i = hashes64.begin(); i != hashes64.end(); i++ )
{
counts.push_back(i->second);
}
}
else
{
for ( std::unordered_map<hash32_t, uint32_t>::const_iterator i = hashes32.begin(); i != hashes32.end(); i++ )
for ( auto i = hashes32.begin(); i != hashes32.end(); i++ )
{
counts.push_back(i->second);
}
Expand All @@ -96,14 +96,14 @@ void HashSet::toHashList(HashList & hashList) const
{
if ( use64 )
{
for ( std::unordered_map<hash64_t, uint32_t>::const_iterator i = hashes64.begin(); i != hashes64.end(); i++ )
for ( auto i = hashes64.begin(); i != hashes64.end(); i++ )
{
hashList.push_back64(i->first);
}
}
else
{
for ( std::unordered_map<hash32_t, uint32_t>::const_iterator i = hashes32.begin(); i != hashes32.end(); i++ )
for ( auto i = hashes32.begin(); i != hashes32.end(); i++ )
{
hashList.push_back32(i->first);
}
Expand Down
6 changes: 3 additions & 3 deletions src/mash/HashSet.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#define HashSet_h

#include "HashList.h"
#include <unordered_map>
#include "robin_hood.h"
#include <vector>

class HashSet
Expand All @@ -28,8 +28,8 @@ class HashSet
private:

bool use64;
std::unordered_map<hash32_t, uint32_t> hashes32;
std::unordered_map<hash64_t, uint32_t> hashes64;
robin_hood::unordered_map<hash32_t, uint32_t> hashes32;
robin_hood::unordered_map<hash64_t, uint32_t> hashes64;
};

#endif
9 changes: 3 additions & 6 deletions src/mash/Sketch.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
#define Sketch_h

#include "mash/capnp/MinHash.capnp.h"
#include <unordered_map>
#include <unordered_set>
#include "robin_hood.h"
#include <map>
#include <vector>
#include <string>
Expand Down Expand Up @@ -126,8 +125,6 @@ class Sketch
uint32_t position;
};

typedef std::unordered_set<hash_t> Hash_set;

struct Reference
{
// no sequence for now
Expand Down Expand Up @@ -215,9 +212,9 @@ class Sketch
void createIndex();

std::vector<Reference> references;
std::unordered_map<std::string, int> referenceIndecesById;
robin_hood::unordered_map<std::string, int> referenceIndecesById;
std::vector<std::vector<PositionHash>> positionHashesByReference;
std::unordered_map<hash_t, std::vector<Locus>> lociByHash;
robin_hood::unordered_map<hash_t, std::vector<Locus>> lociByHash;

Parameters parameters;
double kmerSpace;
Expand Down
1 change: 1 addition & 0 deletions src/mash/robin_hood.h

0 comments on commit 167215e

Please sign in to comment.