Skip to content

Commit

Permalink
Merge pull request #28 from DominikHorn/feature/update-dependencies-a…
Browse files Browse the repository at this point in the history
…nd-fix-build

Update dependencies and improve local development experience
  • Loading branch information
andreaskipf committed Jan 19, 2022
2 parents 9011f3c + e504722 commit d0a9e1b
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 38 deletions.
13 changes: 8 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@ if(UNIX AND NOT APPLE)
set(LINUX TRUE)
endif()

if (${LINUX})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math -Wall -Wfatal-errors -march=native -fopenmp")
else ()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math -Wall -Wfatal-errors -march=native")
endif ()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math -Wall -Wfatal-errors -march=native")

# Enable OpenMP if available
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag(-fopenmp HAS_OPENMP)
if (HAS_OPENMP)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
endif()

set(CMAKE_CXX_STANDARD 17)

Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ We provide a number of scripts to automate things. Each is located in the `scrip

## Running the benchmark

* `./scripts/download.sh` downloads and stores required data from the Internet
* `./scripts/build_rmis.sh` compiles and builds the RMIs for each dataset
* `./scripts/download_rmis.sh` will download pre-built RMIs instead, which may be faster. You'll need to run `build_rmis.sh` if you want to measure build times on your platform.
* `./scripts/prepare.sh` constructs query workloads and compiles the benchmark
* `./scripts/execute.sh` executes the benchmark on each workload, storing the results in `results`. You can use the `-c` flag to output a .csv file of results rather than a .txt.
- `./scripts/download.sh` downloads and stores required data from the Internet
- `./scripts/build_rmis.sh` compiles and builds the RMIs for each dataset. If you run into the error message `error: no override and no default toolchain set`, try running `rustup install stable`.
- `./scripts/download_rmis.sh` will download pre-built RMIs instead, which may be faster. You'll need to run `build_rmis.sh` if you want to measure build times on your platform.
- `./scripts/prepare.sh` constructs query workloads and compiles the benchmark
- `./scripts/execute.sh` executes the benchmark on each workload, storing the results in `results`. You can use the `-c` flag to output a .csv file of results rather than a .txt.

Build times can be long, as we make aggressive use of templates to ensure we do not accidentally measure vtable lookup time. For development, this can be annoying: you can set `USE_FAST_MODE` in `config.h` to disable some features and get a faster build time.

Expand Down
2 changes: 1 addition & 1 deletion competitors/CHT
Submodule CHT updated 1 files
+1 −1 include/cht/builder.h
30 changes: 30 additions & 0 deletions competitors/stanford_hash.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class CuckooHashMap {
uint32_t i1 = alt_mod(hash, num_buckets_);
Bucket* b1 = &buckets_[i1];

#ifdef __AVX__
__m256i vkey = _mm256_set1_epi32(key);
__m256i vbucket = _mm256_load_si256((const __m256i*)&b1->keys);
__m256i cmp = _mm256_cmpeq_epi32(vkey, vbucket);
Expand All @@ -88,20 +89,31 @@ class CuckooHashMap {
int index = __builtin_ctz(mask) / 4;
return {true, b1->values[index]};
}
#else
for (size_t i = 0; i < BUCKET_SIZE; i++)
if (b1->keys[i] == key)
return {true, b1->values[i]};
#endif

uint32_t i2 = alt_mod(hash32(key ^ hash), num_buckets_);
if (i2 == i1) {
i2 = (i1 == num_buckets_ - 1) ? 0 : i1 + 1;
}
Bucket* b2 = &buckets_[i2];

#ifdef __AVX__
vbucket = _mm256_load_si256((const __m256i*)&b2->keys);
cmp = _mm256_cmpeq_epi32(vkey, vbucket);
mask = _mm256_movemask_epi8(cmp);
if (mask != 0) {
int index = __builtin_ctz(mask) / 4;
return {true, b2->values[index]};
}
#else
for (size_t i = 0; i < BUCKET_SIZE; i++)
if (b2->keys[i] == key)
return {true, b2->values[i]};
#endif

return {false, uninitialized_value_};
}
Expand All @@ -127,6 +139,7 @@ class CuckooHashMap {
Bucket* b2 = &buckets_[i2];

// Update old value if the key is already in the table
#ifdef __AVX__
__m256i vkey = _mm256_set1_epi32(key);
__m256i vbucket = _mm256_load_si256((const __m256i*)&b1->keys);
__m256i cmp = _mm256_cmpeq_epi32(vkey, vbucket);
Expand All @@ -136,7 +149,16 @@ class CuckooHashMap {
b1->values[index] = value;
return;
}
#else
for (size_t i = 0; i < BUCKET_SIZE; i++) {
if (b1->keys[i] == key) {
b1->values[i] = value;
return;
}
}
#endif

#ifdef __AVX__
vbucket = _mm256_load_si256((const __m256i*)&b2->keys);
cmp = _mm256_cmpeq_epi32(vkey, vbucket);
mask = _mm256_movemask_epi8(cmp);
Expand All @@ -145,6 +167,14 @@ class CuckooHashMap {
b2->values[index] = value;
return;
}
#else
for (size_t i = 0; i < BUCKET_SIZE; i++) {
if (b2->keys[i] == key) {
b2->values[i] = value;
return;
}
}
#endif

if (!is_reinsert) {
size_++;
Expand Down
2 changes: 1 addition & 1 deletion competitors/ts
Submodule ts updated from 2e814a to 862e3c
33 changes: 8 additions & 25 deletions competitors/ts.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@ class TS : public Competitor {
min = data.front().key;
max = data.back().key;
}
ts::Builder<KeyType> tsb(min, max, config_.spline_max_error,
config_.num_bins, config_.tree_max_error,
/*single_pass=*/false, /*use_cache=*/false);
ts::Builder<KeyType> tsb(min, max, spline_max_error_);
for (const auto& key_and_value : data) tsb.AddKey(key_and_value.key);
ts_ = tsb.Finalize();
});
Expand Down Expand Up @@ -49,44 +47,29 @@ class TS : public Competitor {
int variant() const { return size_scale; }

private:
struct TSConfig {
size_t spline_max_error;
size_t num_bins;
size_t tree_max_error;
};

bool SetParameters(const std::string& dataset) {
assert(size_scale >= 1 && size_scale <= 10);
std::vector<TSConfig> configs;
std::vector<size_t> configs;

if (dataset == "books_200M_uint64") {
configs = {{512, 128, 2}, {256, 256, 2}, {128, 256, 16}, {64, 1024, 4},
{32, 1024, 16}, {16, 1024, 16}, {16, 1024, 8}, {4, 256, 8},
{2, 512, 8}, {2, 1024, 8}};
configs = {500, 200, 150, 60, 50, 25, 25, 4, 2, 1};
} else if (dataset == "fb_200M_uint64") {
configs = {{1024, 1024, 16}, {1024, 1024, 16}, {1024, 512, 8},
{256, 512, 8}, {128, 512, 8}, {16, 128, 16},
{16, 1024, 16}, {8, 1024, 16}, {4, 256, 16},
{2, 256, 16}};
configs = {225, 225, 225, 225, 100, 32, 16, 8, 8, 2};
} else if (dataset == "osm_cellids_200M_uint64") {
configs = {{1024, 32, 16}, {1024, 32, 16}, {512, 32, 16}, {128, 128, 16},
{64, 128, 16}, {16, 64, 16}, {8, 32, 16}, {8, 256, 16},
{2, 256, 16}, {2, 512, 16}};
configs = {150, 150, 150, 150, 80, 25, 8, 8, 4, 1};
} else if (dataset == "wiki_ts_200M_uint64") {
configs = {{1024, 128, 4}, {128, 128, 8}, {64, 256, 8}, {32, 1024, 8},
{16, 1024, 8}, {16, 1024, 4}, {4, 128, 16}, {8, 128, 2},
{2, 512, 8}, {2, 128, 2}};
configs = {175, 175, 90, 32, 25, 16, 16, 4, 2, 1};
} else {
// No config.
return false;
}

config_ = configs[size_scale - 1];
spline_max_error_ = configs[size_scale - 1];
parameters_set_ = true;
return true;
}

ts::TrieSpline<KeyType> ts_;
TSConfig config_;
size_t spline_max_error_;
bool parameters_set_ = false;
};

0 comments on commit d0a9e1b

Please sign in to comment.