Skip to content

Commit

Permalink
Add support for running constant-time tests on aarch64, Apple Silicon…
Browse files Browse the repository at this point in the history
… and wider spectrum of non-x86_64 targets

Signed-off-by: Anjan Roy <hello@itzmeanjan.in>
  • Loading branch information
itzmeanjan committed Jan 31, 2024
1 parent a18fdee commit 3dc6585
Showing 1 changed file with 52 additions and 1 deletion.
53 changes: 52 additions & 1 deletion src/dudect.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,16 @@ extern "C" {

#include <stddef.h>
#include <stdint.h>

#if defined __x86_64__
#include <emmintrin.h>
#include <x86intrin.h>
#elif defined __APPLE__
#include <mach/mach_time.h>
#include <stdatomic.h>
#else
#include <time.h>
#endif

#ifdef DUDECT_VISIBLITY_STATIC
#define DUDECT_VISIBILITY static
Expand Down Expand Up @@ -262,8 +270,10 @@ uint8_t randombit(void) {
return (ret & 1);
}

#if defined(__x86_64__)

/*
Returns current CPU tick count from *T*ime *S*tamp *C*ounter.
Returns current CPU tick count from x86_64 *T*ime *S*tamp *C*ounter.
To enforce CPU to issue RDTSC instruction where we want it to, we put a `mfence` instruction before
issuing `rdtsc`, which should make all memory load/ store operations, prior to RDTSC, globally visible.
Expand All @@ -279,6 +289,47 @@ static inline int64_t cpucycles(void) {
return (int64_t)__rdtsc();
}

#elif defined(__aarch64__) && defined(__linux__)

/*
Returns current CPU cycle count from aarch64 *P*erformance *M*onitors *C*ycle Counter (PMCCNTR_EL0).
To enforce CPU to complete all pending memory access operations, appearing before PMCCTR_EL0, we issue a
*D*ata *S*ynchronization *B*arrier instruction right before reading CPU cycle counter.
Note, issuing PMCCTR_EL0 instruction from the userspace will probably result in panicing with
a message "illegal instruction executed". So we've to install a Linux Kernel Module. I've tested the
LKM @ https://github.com/jerinjacobk/armv8_pmu_cycle_counter_el0 and it works fine.
See PMCCTR_EL0 documentation @ https://developer.arm.com/documentation/ddi0595/2021-09/External-Registers/PMCCNTR-EL0--Performance-Monitors-Cycle-Counter?lang=en
See DSB documentation @ https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/DSB--Data-Synchronization-Barrier-
Also see https://github.com/itzmeanjan/criterion-cycles-per-byte/blob/a270a496/src/lib.rs#L61-L74
*/
static inline int64_t cpucycles(void) {
uint64_t val = 0;
__asm__ volatile("dsb sy; mrs %0, pmccntr_el0" : "=r"(val));
return (int64_t)val;
}

#elif defined(__APPLE__)

/*
Returns the number of "mach time units" elapsed since system startup, on non-x86_64 Apple targets.
See https://github.com/google/benchmark/blob/4682db08/src/cycleclock.h#L63-L73
*/
static inline int64_t cpucycles(void) {
atomic_thread_fence(memory_order_seq_cst);
return (int64_t)mach_absolute_time();
}

#else

#error "`dudect` doesn't yet support your OS/ CPU."

#endif

// threshold values for Welch's t-test
#define t_threshold_bananas 500 // test failed, with overwhelming probability
#define t_threshold_moderate \
Expand Down

0 comments on commit 3dc6585

Please sign in to comment.