Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Prerequisites
*.d

# Compiled Object files
*.slo
*.lo
*.o
*.obj

# Precompiled Headers
*.gch
*.pch

# Compiled Dynamic libraries
*.so
*.dylib
*.dll

# Fortran module files
*.mod
*.smod

# Compiled Static libraries
*.lai
*.la
*.a
*.lib

# Executables
*.exe
*.out
*.app

**/.vscode

2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ bench: $(BENCHDIR)/main.cpp $(SRCS)
$(CXX) $(BENCHDIR)/main.cpp $(CXXFLAGS) -march=icelake-client -O3 -o benchexe

clean:
rm -f $(TESTDIR)/*.o testexe benchexe
rm -f $(TESTDIR)/*.o testexe benchexe
2 changes: 1 addition & 1 deletion benchmarks/bench-tgl.out
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@
| uniform random | int16_t | 10000 | 84703 | 1547726 | 18.3 |
| uniform random | int16_t | 100000 | 1442726 | 19705242 | 13.7 |
| uniform random | int16_t | 1000000 | 20210224 | 212137465 | 10.5 |
|-----------------+-------------+------------+-----------------+-----------+----------|
|-----------------+-------------+------------+-----------------+-----------+----------|
54 changes: 54 additions & 0 deletions benchmarks/bench.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,19 @@

#include "avx512-16bit-qsort.hpp"
#include "avx512-32bit-qsort.hpp"
#include "avx512-64bit-keyvaluesort.hpp"
#include "avx512-64bit-qsort.hpp"
#include <iostream>
#include <numeric>
#include <tuple>
#include <vector>

template <typename K, typename V>
struct sorted_t {
K key;
V value;
};

static inline uint64_t cycles_start(void)
{
unsigned a, d;
Expand Down Expand Up @@ -72,3 +79,50 @@ std::tuple<uint64_t, uint64_t> bench_sort(const std::vector<T> arr,
/ lastfew;
return std::make_tuple(avx_sort, std_sort);
}

template <typename K, typename V = uint64_t>
std::tuple<uint64_t, uint64_t>
bench_sort_kv(const std::vector<K> keys,
const std::vector<V> values,
const std::vector<sorted_t<K, V>> sortedaar,
const uint64_t iters,
const uint64_t lastfew)
{

std::vector<K> keys_bckup = keys;
std::vector<V> values_bckup = values;
std::vector<sorted_t<K, V>> sortedaar_bckup = sortedaar;

std::vector<uint64_t> runtimes1, runtimes2;
uint64_t start(0), end(0);
for (uint64_t ii = 0; ii < iters; ++ii) {
start = cycles_start();
avx512_qsort_kv<K>(
keys_bckup.data(), values_bckup.data(), keys_bckup.size());
end = cycles_end();
runtimes1.emplace_back(end - start);
keys_bckup = keys;
values_bckup = values;
}
uint64_t avx_sort = std::accumulate(runtimes1.end() - lastfew,
runtimes1.end(),
(uint64_t)0)
/ lastfew;

for (uint64_t ii = 0; ii < iters; ++ii) {
start = cycles_start();
std::sort(sortedaar_bckup.begin(),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if std::sort is a fair comparison. The way the key-values are arranged in memory for std::vector<sorted_t<K, V>> is very different when compared two distinct vectors std::vector<K>, std::vector<V>. I would think this can alter performance significantly because you can no longer load continuous chunk of memory into ZMM registers. Does Oceanbase have two separate vectors for key and values (I assume they do cos that's the way you implemented the sort). How does Oceanbase currently sort them?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ObStoreRow is the base unit to do the sort in Oceanbase. The key and value are all ObObject class type in the ObStoreRow. There is no sperate vector for key and value. The ObStoreRow is passed to the std::sort in Oceanbase. std::sort first get the key type from ObObject and then do the sort work.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To enable the AVX512 sort, we need extract the key from the ObStoreRow and then pass the key and ObStoreRow as the value to avx512_sort(key, value) like API. That's way we need the key-value interface

sortedaar_bckup.end(),
[](sorted_t<K, V> a, sorted_t<K, V> b) {
return a.key < b.key;
});
end = cycles_end();
runtimes2.emplace_back(end - start);
sortedaar_bckup = sortedaar;
}
uint64_t std_sort = std::accumulate(runtimes2.end() - lastfew,
runtimes2.end(),
(uint64_t)0)
/ lastfew;
return std::make_tuple(avx_sort, std_sort);
}
69 changes: 68 additions & 1 deletion benchmarks/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ template <typename T1,
void printLine(const char fill, T1 t1, T2 t2, T3 t3, T4 t4, T5 t5, T6 t6, T7 t7)
{
std::cout << std::left << std::setw(3) << std::setfill(fill) << " | ";
std::cout << std::left << std::setw(15) << std::setfill(fill) << t1
std::cout << std::left << std::setw(18) << std::setfill(fill) << t1
<< " | ";
std::cout << std::left << std::setw(13) << std::setfill(fill) << t2
<< " | ";
Expand Down Expand Up @@ -82,6 +82,60 @@ void run_bench(const std::string datatype)
std::cout << std::setprecision(ss);
}

template <typename K, typename V = uint64_t>
void run_bench_kv(const std::string datatype)
{
std::streamsize ss = std::cout.precision();
std::cout << std::fixed;
std::cout << std::setprecision(1);
std::vector<int> array_sizes = {10000, 100000, 1000000};
for (auto size : array_sizes) {
std::vector<K> keys;
std::vector<V> values;
std::vector<sorted_t<K, V>> sortedarr;

if (datatype.find("kv_uniform") != std::string::npos) {
keys = get_uniform_rand_array<K>(size);
}
else if (datatype.find("kv_reverse") != std::string::npos) {
for (int ii = 0; ii < size; ++ii) {
//arr.emplace_back((T)(size - ii));
keys.emplace_back((K)(size - ii));
}
}
else if (datatype.find("kv_ordered") != std::string::npos) {
for (int ii = 0; ii < size; ++ii) {
keys.emplace_back((ii));
}
}
else if (datatype.find("kv_limited") != std::string::npos) {
keys = get_uniform_rand_array<K>(size, (K)10, (K)0);
}
else {
std::cout << "Skipping unrecognized array type: " << datatype
<< std::endl;
return;
}
values = get_uniform_rand_array<V>(size);
for (size_t i = 0; i < keys.size(); i++) {
sorted_t<K, V> tmp_s;
tmp_s.key = keys[i];
tmp_s.value = values[i];
sortedarr.emplace_back(tmp_s);
}

auto out = bench_sort_kv(keys, values, sortedarr, 20, 10);
printLine(' ',
datatype,
typeid(K).name(),
sizeof(K),
size,
std::get<0>(out),
std::get<1>(out),
(float)std::get<1>(out) / std::get<0>(out));
}
std::cout << std::setprecision(ss);
}
void bench_all(const std::string datatype)
{
if (cpu_has_avx512bw()) {
Expand All @@ -97,7 +151,15 @@ void bench_all(const std::string datatype)
}
}
}
void bench_all_kv(const std::string datatype)
{
if (cpu_has_avx512bw()) {

run_bench_kv<uint64_t>(datatype);
run_bench_kv<int64_t>(datatype);
run_bench_kv<double>(datatype);
}
}
int main(/*int argc, char *argv[]*/)
{
printLine(' ',
Expand All @@ -113,6 +175,11 @@ int main(/*int argc, char *argv[]*/)
bench_all("reverse");
bench_all("ordered");
bench_all("limitedrange");

bench_all_kv("kv_uniform random");
bench_all_kv("kv_reverse");
bench_all_kv("kv_ordered");
bench_all_kv("kv_limitedrange");
printLine('-', "", "", "", "", "", "", "");
return 0;
}
Loading