Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split test=Speed into SpeedBulk and SpeedSmall and report weighted average for Small key speed test #293

Merged
merged 4 commits into from
Sep 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ elseif ((CMAKE_SYSTEM_PROCESSOR STREQUAL "arm")
add_definitions(-DHAVE_NEON)
endif()

if ((CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") # GNU and others
OR (CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")) # macOS
set(SYSTEM_PROCESSOR_IS_AARCH64 TRUE)
else()
set(SYSTEM_PROCESSOR_IS_AARCH64 FALSE)
endif()

# TODO: rather parse `$CC -march=native -dM -E - <<< ''` [GH #10]
if(NOT CMAKE_CROSSCOMPILING)
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
Expand Down Expand Up @@ -324,7 +331,7 @@ else(AVX512VL_TRUE)
endif(AVX512VL_TRUE)

if(CMAKE_CROSSCOMPILING)
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
if(SYSTEM_PROCESSOR_IS_AARCH64)
set(SSE2_FOUND true)
set(SSE42_FOUND true)
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
Expand Down Expand Up @@ -470,13 +477,13 @@ if(SSE42_FOUND)
list(APPEND X86_64ONLY_SRC metrohash/metrohash64crc.cpp
metrohash/metrohash128crc.cpp)
endif()
if(NOT (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") AND NOT MSVC)
if(NOT SYSTEM_PROCESSOR_IS_AARCH64 AND NOT MSVC)
list(APPEND X86_64ONLY_SRC crc32_hw1.c)
endif()
else()
message(STATUS "32bit only: CMAKE_SIZEOF_VOID_P=${CMAKE_SIZEOF_VOID_P}")
endif()
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
if(SYSTEM_PROCESSOR_IS_AARCH64)
set(SIPHASH_SRC siphash.c)
else()
set(SIPHASH_SRC siphash_ssse3.c)
Expand Down Expand Up @@ -574,7 +581,7 @@ endif()
set(BLAKE3_SRC blake3/blake3.c blake3/blake3_dispatch.c
blake3/blake3_portable.c)
if(SSE42_FOUND)
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
if(SYSTEM_PROCESSOR_IS_AARCH64)
set(BLAKE3_SRC ${BLAKE3_SRC} blake3/blake3_neon.c)
else()
set(BLAKE3_SRC ${BLAKE3_SRC} blake3/blake3_sse41.c)
Expand Down Expand Up @@ -618,7 +625,7 @@ if(CMAKE_CROSSCOMPILING)
unset(PMPML_SRC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCROSSCOMPILING")
endif()
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
if(SYSTEM_PROCESSOR_IS_AARCH64)
#HAVE_SSE42
unset(PMPML_SRC)
endif()
Expand Down
16 changes: 13 additions & 3 deletions Platform.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,21 @@
#include "Platform.h"

#include <stdio.h>
#include <assert.h>

void testRDTSC ( void )
long getenvlong(const char *name, long minval, long defval, long maxval)
{
int64_t temp = rdtsc();
printf("%ld",(long)temp);
assert(minval <= defval && defval <= maxval);
const char *s = getenv(name);
if (!s)
return defval;
char *tail;
long l = strtol(s, &tail, 0);
if (*tail)
return defval;
if (l < minval) l = minval;
if (l > maxval) l = maxval;
return l;
}

#if defined(_WIN32)
Expand Down
9 changes: 8 additions & 1 deletion Platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ void SetThreadAffinity ( std::thread &t, int cpu );
# endif
#endif
void SetAffinity ( int cpu );
long getenvlong(const char *name, long minval, long defval, long maxval);

// That's not UINT64_MAX as it's converted to int64_t sometimes.
constexpr uint64_t timer_inf = INT64_MAX;
Expand Down Expand Up @@ -156,6 +157,12 @@ inline uint64_t rotr64 ( uint64_t x, int8_t r )
# undef CLOCK_MONOTONIC_FASTEST
#endif

#ifdef CLOCK_MONOTONIC_COARSE
# define CLOCK_MNTCOARSE_FASTEST CLOCK_MONOTONIC_COARSE
#else
# define CLOCK_MNTCOARSE_FASTEST CLOCK_MONOTONIC_FASTEST
#endif

__inline__ uint64_t timeofday()
{
#ifdef CLOCK_MONOTONIC_FASTEST
Expand Down Expand Up @@ -230,7 +237,7 @@ __inline__ uint64_t timer_mips()
// Unfortunately, 32-bit counter overflows in a few seconds, so wall clock timestamp
// has to be embedded into the timer value. Hopefully, clock_gettime call is VDSO...
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC_FASTEST, &ts);
clock_gettime(CLOCK_MNTCOARSE_FASTEST, &ts);
const uint32_t s28 = ts.tv_sec & (UINT32_MAX >> 4);
return uint64_t(scale) << 60 | (uint64_t(s28) << 32) | cntr;
}
Expand Down
108 changes: 106 additions & 2 deletions SpeedTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,15 @@
#include <math.h> // for sqrt
#include <algorithm> // for sort, min
#include <string>
#include <sstream>

#include <unordered_map>
#include <parallel_hashmap/phmap.h>
#include <functional>

#define COUNT_OF(x) ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
#define ARRAY_END(x) (&(x)[COUNT_OF(x)])

typedef std::unordered_map<std::string, int,
std::function<size_t (const std::string &key)>> std_hashmap;
typedef phmap::flat_hash_map<std::string, int,
Expand Down Expand Up @@ -240,7 +244,7 @@ double SpeedTest ( pfHash hash, uint32_t seed, const int trials, const int block

double t;

if(blocksize < 100)
if(blocksize <= TIMEHASH_SMALL_LEN_MAX)
{
t = (double)timehash_small(hash,block,blocksize,itrial);
}
Expand Down Expand Up @@ -305,6 +309,107 @@ double TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bo
return cycles;
}

static void ReportAverage ( const std::vector<double>& cph, int minkey, int maxkey )
{
double sum = 0;
for (int i = minkey; i <= maxkey; i++)
sum += cph[i];
sum /= (maxkey - minkey + 1);
printf("Average %9.3f cycles/hash\n",sum);
}

static void ReportWeighted ( const std::vector<double>& cph, const std::vector<double>& weights, int minkey, int maxkey, const char *name )
{
assert(0 <= minkey && minkey <= maxkey && maxkey <= cph.size() + 1);
if (weights.size() < cph.size()) {
printf("Average, weighted by key length, SKIP %s dataset, need %lu more weights\n",
name, cph.size() - weights.size());
return;
}
double tot = 0.0, use = 0.0, sum = 0.0;
for (int i = 0; i < minkey; i++)
tot += weights[i];
for (int i = minkey; i <= maxkey; i++) {
sum += weights[i] * cph[i];
use += weights[i];
tot += weights[i];
}
for (int i = maxkey + 1; i < weights.size(); i++)
tot += weights[i];
printf("Average, weighted by key length freq. %9.3f cycles/hash (using %.1f%% of %s dataset)\n",
sum / use, 100. * use / tot, name);
}

// These are lengths of top 7,073,200 domain names from Tranco. The list represents "popular" domain
// names. The dataset was downloaded from https://tranco-list.eu/list/LJ5W4/1000000 on 2024-Sep-05
// SHA256(tranco_LJ5W4.csv) = 4593f2a162697946f36ef7bbe7c8b434eec42e0e93c4298517c4a3966b08c054
//
// Victor Le Pochat, Tom Van Goethem, Samaneh Tajalizadehkhoob, Maciej Korczyński, and Wouter
// Joosen. 2019. "Tranco: A Research-Oriented Top Sites Ranking Hardened Against Manipulation",
// Proceedings of the 26th Annual Network and Distributed System Security Symposium (NDSS 2019).
// https://doi.org/10.14722/ndss.2019.23386
//
static const unsigned TrancoDNSNameLengths[] = { 0, 0, 5, 0, 326, 5568, 41632, 88175, 151138, 253649,
386024, 416786, 458718, 482490, 491891, 473417, 450606, 413517, 371676, 327361, 288868, 251641,
213514, 178542, 159986, 132611, 113222, 101498, 82455, 67296, 67906, 56843, 53731, 49744, 36404,
32346, 30329, 26978, 24359, 24345, 19161, 16914, 16370, 13708, 13714, 10832, 13548, 9635, 8125,
15536, 6273, 8207, 7490, 5196, 7330, 6202, 3801, 4455, 3756, 3709, 4142, 3989, 3593, 4783, 5052,
1403, 1580, 2072, 1998, 1420, 1836, 1872, 1135, 2664, 1172, 837, 998, 1063, 685, 566, 2020, 627,
2146, 1144, 635, 618, 569, 756, 411, 361, 362, 1138, 218, 278, 182, 185, 175, 220, 3205, 143, 353,
131, 132, 199, 134, 139, 130, 168, 135, 169, 630, 155, 137, 129, 229, 154, 166, 205, 204, 203, 208,
201, 211, 141, 157, 147, 172, 183, 134, 155, 123, 159, 148, 165, 145, 143, 112, 111, 112, 115, 128,
120, 116, 119, 137, 123, 106, 118, 105, 125, 126, 106, 99, 124, 102, 94, 95, 113, 105, 103, 118, 81,
103, 86, 78, 80, 82, 70, 72, 74, 52, 58, 71, 46, 67, 65, 70, 74, 75, 66, 59, 81, 110, 97, 107, 116,
109, 72, 67, 89, 82, 79, 73, 82, 83, 73, 71, 89, 98, 103, 90, 118, 120, 67, 63, 50, 71, 57, 67, 64,
54, 55, 65, 53, 73, 65, 63, 60, 83, 80, 61, 87, 82, 55, 74, 66, 38, 41, 22, 47, 27, 36, 30, 38, 33,
46, 33, 36, 58, 50, 61, 71, 99, 46, 50, 54, 38, 17, 15, 4, 3, 0, 0, 116, 0, 0 };

// These are lengths of 1,000,000 calls to umash_full() during the batch hash table phase.
// It's arguably with an off-by-one, since NUL terminators are included in the hashed data.
//
// All the lengths are clamped to 256 bytes per TIMEHASH_SMALL_LEN_MAX.
// The last bin UmashStartupLengths[256] is essentially the long tail that is never used.
//
// startup-1M.2020-08-28.trace.bz2 @ https://github.com/backtrace-labs/umash/wiki/Execution-traces
// SHA256(trace.bz2) = 02bae7f0e07880bf24fdd67b6d5fc2a675c6ca05b534081925a16f06c11659c0
//
static const unsigned UmashStartupLengths[] = { 0, 7, 51, 396, 1312, 3110, 5616, 7887, 11145, 68172,
14618, 16670, 9502, 8275, 7444, 8088, 105451, 246, 100, 117, 116, 487, 367, 179, 293, 58, 56, 124,
191, 340, 323, 333, 303, 274, 238, 202, 246, 409961, 235, 10119, 239, 171, 128, 100, 5217, 51, 62,
53, 42, 69, 63, 89, 38, 52, 102, 84, 90, 75, 61, 90, 55, 57, 60, 71, 106, 92520, 54, 57, 101, 316,
961, 1873, 1714, 290, 88, 185, 600, 1038, 1762, 3228, 3174, 284, 266, 292, 752, 1381, 1331, 145,
161, 177, 1517, 304, 176, 9464, 342, 1809, 286, 962, 116, 390, 383, 244, 50, 54, 46, 88, 191, 74,
54, 91, 110, 11347, 4310, 5021, 51, 189, 902, 60, 3476, 44543, 275, 5960, 58, 1705, 84, 15, 34, 68,
1113, 43, 55, 27, 126, 15, 33, 1512, 14, 359, 13, 43, 7604, 78108, 43, 27, 7, 23, 140, 5, 3, 0, 13,
6, 8, 33, 54, 3, 0, 0, 13, 10, 13, 0, 6, 5, 11, 0, 11, 25, 11, 9, 0, 12, 13, 0, 0, 41, 3, 4, 8, 49,
29, 25, 17, 10, 3, 29, 7, 9, 2, 20, 17, 17, 5, 35, 3, 5, 0, 13, 0, 149, 17, 6, 8, 3, 11, 17, 0, 1,
780, 0, 0, 14, 29, 10, 3, 14, 20, 9, 12, 29, 11, 6, 10, 6, 12, 0, 10, 7, 22, 13, 6, 10, 14, 167, 0,
3, 0, 11, 7, 5, 9, 35, 4, 5, 7, 2, 14, 6, 7, 2, 16, 5, 6, 8, 0, 4, 1022 };

// Weighted average exist under assumption that hash speed does not depend on input,
// which is not true due to multiplication instruction having certain amount of variance.
void ReportTinySpeedTest ( const std::vector<double>& cycles_per_hash, int minkey, int maxkey )
{
ReportAverage(cycles_per_hash, minkey, maxkey);

std::vector<double> w(TrancoDNSNameLengths, ARRAY_END(TrancoDNSNameLengths));
ReportWeighted(cycles_per_hash, w, minkey, maxkey, "top-7m Tranco DNS names");
w.clear();

w.insert(w.begin(), UmashStartupLengths, ARRAY_END(UmashStartupLengths));
ReportWeighted(cycles_per_hash, w, minkey, maxkey, "startup-1M UMASH trace");
w.clear();

if (const char *ew = getenv("SMHASHER_SMALLKEY_WEIGHTS"))
{
std::istringstream ssws(ew);
for (double flt; ssws >> flt; )
w.push_back(flt);
ReportWeighted(cycles_per_hash, w, minkey, maxkey, "${SMHASHER_SMALLKEY_WEIGHTS}");
w.clear();
}
}

double HashMapSpeedTest ( pfHash pfhash, const int hashbits,
std::vector<std::string> words,
const uint32_t seed, const int trials, bool verbose )
Expand Down Expand Up @@ -453,4 +558,3 @@ double HashMapSpeedTest ( pfHash pfhash, const int hashbits,
return mean;
}

//-----------------------------------------------------------------------------
4 changes: 3 additions & 1 deletion SpeedTest.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

#include "Types.h"

constexpr int TIMEHASH_SMALL_LEN_MAX = 255;

void BulkSpeedTest ( pfHash hash, uint32_t seed );
double TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose );
double HashMapSpeedTest ( pfHash pfhash, int hashbits, std::vector<std::string> words,
const uint32_t seed, const int trials, bool verbose );
//-----------------------------------------------------------------------------
void ReportTinySpeedTest ( const std::vector<double>& cycles_per_hash, int minkey, int maxkey );
47 changes: 31 additions & 16 deletions main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ bool g_testExtra = false; // excessive torture tests: Sparse, Avalanche, D
bool g_testVerifyAll = false;

bool g_testSanity = false;
bool g_testSpeed = false;
bool g_testSpeedAll = false;
bool g_testSpeedBulk = false;
bool g_testSpeedSmall = false;
bool g_testHashmap = false;
bool g_testAvalanche = false;
bool g_testSparse = false;
Expand Down Expand Up @@ -56,7 +58,9 @@ TestOpts g_testopts[] =
{ g_testAll, "All" },
{ g_testVerifyAll, "VerifyAll" },
{ g_testSanity, "Sanity" },
{ g_testSpeed, "Speed" },
{ g_testSpeedAll, "Speed" },
{ g_testSpeedBulk, "SpeedBulk" },
{ g_testSpeedSmall, "SpeedSmall" },
{ g_testHashmap, "Hashmap" },
{ g_testAvalanche, "Avalanche" },
{ g_testSparse, "Sparse" },
Expand Down Expand Up @@ -997,7 +1001,7 @@ void test ( hashfunc<hashtype> hash, HashInfo* info )
printf("PASS\n\n"); fflush(NULL); // if not it does exit(1)
}

if (g_testAll || g_testSpeed || g_testHashmap) {
if (g_testAll || g_testSpeedBulk || g_testSpeedSmall || g_testHashmap) {
printf("--- Testing %s \"%s\" %s\n\n", info->name, info->desc, quality_str[info->quality]);
} else {
fprintf(stderr, "--- Testing %s \"%s\" %s\n\n", info->name, info->desc, quality_str[info->quality]);
Expand All @@ -1021,9 +1025,8 @@ void test ( hashfunc<hashtype> hash, HashInfo* info )
//-----------------------------------------------------------------------------
// Speed tests

if(g_testSpeed || g_testAll)
if(g_testSpeedBulk || g_testSpeedSmall || g_testAll)
{
double sum = 0.0;
printf("[[[ Speed Tests ]]]\n\n");
if (timer_counts_ns())
printf("WARNING: no cycle counter, cycle == 1ns\n");
Expand All @@ -1038,19 +1041,29 @@ void test ( hashfunc<hashtype> hash, HashInfo* info )
fflush(NULL);

Seed_init (info, info->verification);
BulkSpeedTest(info->hash,info->verification);
printf("\n");
fflush(NULL);

for(int i = 1; i < 32; i++)
{
volatile int j = i;
sum += TinySpeedTest(hashfunc<hashtype>(info->hash),sizeof(hashtype),j,info->verification,true);
if (g_testSpeedBulk || g_testAll) {
BulkSpeedTest(info->hash,info->verification);
printf("\n");
fflush(NULL);
}

if (g_testSpeedSmall || g_testAll) {
const int dflmax = g_testExtra ? 64 : 32;
const int minkey = getenvlong("SMHASHER_SMALLKEY_MIN", 1, 1, TIMEHASH_SMALL_LEN_MAX);
const int maxkey = getenvlong("SMHASHER_SMALLKEY_MAX", minkey, dflmax, TIMEHASH_SMALL_LEN_MAX);
std::vector<double> cph(maxkey+1, NAN);
for(int i = minkey, g_speed = 0.0; i <= maxkey; i++)
{
volatile int j = i;
cph[j] = TinySpeedTest(hashfunc<hashtype>(info->hash),sizeof(hashtype),j,info->verification,true);
g_speed += cph[j];
}
g_speed /= (maxkey - minkey + 1);
ReportTinySpeedTest(cph, minkey, maxkey);
printf("\n");
fflush(NULL);
}
g_speed = sum = sum / 31.0;
printf("Average %6.3f cycles/hash\n",sum);
printf("\n");
fflush(NULL);
} else {
// known slow hashes (> 500), cycle/hash
const struct { pfHash h; double cycles; } speeds[] = {
Expand Down Expand Up @@ -2786,6 +2799,8 @@ int main ( int argc, const char ** argv )
// Not a command ? => interpreted as hash name
hashToTest = arg;
}
if (g_testSpeedAll)
g_testSpeedBulk = g_testSpeedSmall = true;

// Code runs on the 3rd CPU by default? only for speed tests
//SetAffinity((1 << 2));
Expand Down
Loading