In [1]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.reset_defaults()
sns.set_theme(style='whitegrid')

In [40]:
def benchmark(source, n=2**20, k=2**32, cc='clang++'):
    print(f"compiling {source}")
    !{cc} -std=c++17 -O3 -funroll-loops -march=native {source}.cc -o {source}
    result = !./{source} {n} {k}
    print(source, n, result)
    return float(result[1].split()[0])

benchmark('reverse-std')
#benchmark('reverse-naive')
benchmark('reverse-simd')
benchmark('reverse-simd-aligned')

compiling reverse-std
reverse-std 1048576 ['77125909', '4.6262 seconds', '0.9284 GFLOPS']
compiling reverse-simd
reverse-simd 1048576 ['77125909', '0.1518 seconds', '28.3011 GFLOPS']
compiling reverse-simd-aligned
reverse-simd-aligned 1048576 ['77125909', '0.1491 seconds', '28.8141 GFLOPS']


0.1491

In [None]:
benchmark('shuffle-std', n=2**20, k=2**24)

compiling shuffle-std
    std::random_shuffle(a, a + n);
[0;1;32m         ^
[0m[1m/bin/../lib64/gcc/x86_64-pc-linux-gnu/12.1.0/../../../../include/c++/12.1.0/bits/stl_algo.h:4539:5: [0m[0;1;30mnote: [0m'random_shuffle<char *>' has been explicitly marked deprecated here[0m
    _GLIBCXX14_DEPRECATED_SUGGEST("std::shuffle")
[0;1;32m    ^
[0m[1m/bin/../lib64/gcc/x86_64-pc-linux-gnu/12.1.0/../../../../include/c++/12.1.0/x86_64-pc-linux-gnu/bits/c++config.h:112:45: [0m[0;1;30mnote: [0mexpanded from macro '_GLIBCXX14_DEPRECATED_SUGGEST'[0m
# define _GLIBCXX14_DEPRECATED_SUGGEST(ALT) _GLIBCXX_DEPRECATED_SUGGEST(ALT)
[0;1;32m                                            ^
[0m[1m/bin/../lib64/gcc/x86_64-pc-linux-gnu/12.1.0/../../../../include/c++/12.1.0/x86_64-pc-linux-gnu/bits/c++config.h:96:19: [0m[0;1;30mnote: [0mexpanded from macro '_GLIBCXX_DEPRECATED_SUGGEST'[0m
  __attribute__ ((__deprecated__ ("use '" ALT "' instead")))
[0;1;32m                  ^
shuffle-std 1048576 

0.3998

In [66]:
benchmark('shuffle-fisher-yates-rand', n=2**20, k=2**26)

compiling shuffle-fisher-yates-rand
shuffle-fisher-yates-rand 1048576 ['85129352', '1.6524 seconds', '0.0406 B/s', '49.2448 c/B']


1.6524

In [76]:
benchmark('shuffle-fisher-yates-precalc', n=2**12, k=2**26)

compiling shuffle-fisher-yates-precalc
shuffle-fisher-yates-precalc 4096 ['-146652', '0.1523 seconds', '0.4407 B/s', '4.5384 c/B']


0.1523

In [70]:
benchmark('shuffle-permutation-precalc', n=2**20, k=2**26)

compiling shuffle-permutation-precalc
shuffle-permutation-precalc 1048576 ['64340491', '0.1921 seconds', '0.3494 B/s', '5.7239 c/B']


0.1921

In [128]:
benchmark('shuffle-permutation-precalc', n=2**12, k=2**28)

compiling shuffle-permutation-precalc
shuffle-permutation-precalc 4096 ['362725', '0.3380 seconds', '0.7943 B/s', '2.5180 c/B']


0.338

In [127]:
benchmark('shuffle-permutation-blocked', n=2**12, k=2**28, cc='g++')
# 1.5
# read, read, shift, or
# write every 8, maybe update base

compiling shuffle-permutation-blocked
shuffle-permutation-blocked 4096 ['362725', '0.1916 seconds', '1.4011 B/s', '1.4275 c/B']


0.1916

In [126]:
benchmark('shuffle-permutation-blocked-ilp', n=2**12, k=2**28, cc='g++')

compiling shuffle-permutation-blocked-ilp
shuffle-permutation-blocked-ilp 4096 ['362725', '0.1818 seconds', '1.4764 B/s', '1.3547 c/B']


0.1818

In [65]:
n = 2**5

k = 1  # staring seed
a = 5
c = 1

for i in range(n):
    print(i, k)
    k = (k * a + c) % n

0 1
1 6
2 31
3 28
4 13
5 2
6 11
7 24
8 25
9 30
10 23
11 20
12 5
13 26
14 3
15 16
16 17
17 22
18 15
19 12
20 29
21 18
22 27
23 8
24 9
25 14
26 7
27 4
28 21
29 10
30 19
31 0
