Skip to content

Commit

Permalink
Merge branch 'f-977-hash-table-size-m'. Closes krlmlr#17. Closes #977.
Browse files Browse the repository at this point in the history
- Using larger hash tables gives slightly better performance for `n_distinct()` and ordering of character vectors (#977).
  • Loading branch information
krlmlr committed Sep 29, 2016
2 parents 62ff44d + 3506a8f commit b0fb5b0
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 7 deletions.
6 changes: 4 additions & 2 deletions inst/include/dplyr/Result/Count_Distinct.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@ namespace dplyr {
typedef dplyr_hash_set<int, Hash, Pred > Set;

Count_Distinct(Visitor v_):
v(v_), set(1024, Hash(v), Pred(v))
v(v_), set(0, Hash(v), Pred(v))
{}

inline int process_chunk(const SlicingIndex& indices) {
set.clear();
set.rehash(indices.size());
int n = indices.size();
for (int i=0; i<n; i++) {
set.insert(indices[i]);
Expand All @@ -42,11 +43,12 @@ namespace dplyr {
typedef dplyr_hash_set<int, Hash, Pred > Set;

Count_Distinct_Narm(Visitor v_):
v(v_), set(1024, Hash(v), Pred(v))
v(v_), set(0, Hash(v), Pred(v))
{}

inline int process_chunk(const SlicingIndex& indices) {
set.clear();
set.rehash(indices.size());
int n = indices.size();
for (int i=0; i<n; i++) {
int index=indices[i];
Expand Down
7 changes: 3 additions & 4 deletions inst/include/dplyr/Result/In.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,9 @@ namespace dplyr {
typedef typename Rcpp::Vector<RTYPE> Vec;
typedef typename Rcpp::traits::storage_type<RTYPE>::type STORAGE;

In(Vec data_, Vec table_) :
In(Vec data_, const Vec& table_) :
data(data_),
table(table_),
set(table.begin(), table.end())
set(table_.begin(), table_.end())
{}

void process_slice(LogicalVector& out, const SlicingIndex& index, const SlicingIndex& out_index) {
Expand All @@ -32,7 +31,7 @@ namespace dplyr {
}

private:
Vec data, table;
Vec data;
dplyr_hash_set<STORAGE> set;

};
Expand Down
2 changes: 1 addition & 1 deletion src/api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ namespace dplyr {

CharacterVectorOrderer::CharacterVectorOrderer(const CharacterVector& data_) :
data(data_),
set(),
set(data.size()),
orders(no_init(data.size()))
{
int n = data.size();
Expand Down

0 comments on commit b0fb5b0

Please sign in to comment.