In [1]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.reset_defaults()
sns.set_theme(style='whitegrid')

In [79]:
def bench(source, n=2**16, cc='g++', dec=False):
    res = !{cc} -std=c++17 -O3 -funroll-loops -march=native -D N={n} {'-D DEC' if dec else ''} {source}.cc -o run && ./run
    print(source, n, res)
    return float(res[-1].split()[0])

ns = list(int(2**k) for k in range(10, 21))

In [24]:
bench('simple')

simple 65536 ['35545 3722', '0.00004148 x 32768', '1.58 GFLOPS']


1.58

In [25]:
results = {}
results_dec = {}
for source in ['std', 'simple', 'cmov', 'hint', 'index', 'simdmin-single', 'simdmin', 'simdmin-testz']:
    results[source] = [bench(source, n=n) for n in ns]
    results_dec[source] = [bench(source, n=n, dec=True) for n in ns]

std 1024 ['35545 3722', '0.00023086 x 32768', '0.28 GFLOPS']
std 2048 ['35545 3722', '0.00023119 x 32768', '0.28 GFLOPS']
std 4096 ['35545 3722', '0.00023115 x 32768', '0.28 GFLOPS']
std 8192 ['35545 3722', '0.00023077 x 32768', '0.28 GFLOPS']
std 16384 ['35545 3722', '0.00023088 x 32768', '0.28 GFLOPS']
std 32768 ['35545 3722', '0.00023105 x 32768', '0.28 GFLOPS']
std 65536 ['35545 3722', '0.00023095 x 32768', '0.28 GFLOPS']
std 131072 ['35545 3722', '0.00023054 x 32768', '0.28 GFLOPS']
std 262144 ['35545 3722', '0.00023059 x 32768', '0.28 GFLOPS']
std 524288 ['35545 3722', '0.00023129 x 32768', '0.28 GFLOPS']
std 1048576 ['35545 3722', '0.00023096 x 32768', '0.28 GFLOPS']
std 1024 ['65535 1', '0.00023060 x 32768', '0.28 GFLOPS']
std 2048 ['65535 1', '0.00023070 x 32768', '0.28 GFLOPS']
std 4096 ['65535 1', '0.00023063 x 32768', '0.28 GFLOPS']
std 8192 ['65535 1', '0.00023118 x 32768', '0.28 GFLOPS']
std 16384 ['65535 1', '0.00023055 x 32768', '0.28 GFLOPS']
std 32768 ['65535 1', '0.0

In [26]:
import pickle

with open('results.pkl', 'wb') as f:
    pickle.dump([results, results_dec], f)

In [38]:
bench('simdmin-intrinsics')

simdmin-intrinsics 65536 ['35545 3722', '0.00000679 x 32768', '9.65 GFLOPS']


9.65

In [40]:
for source in results.keys():
    print(source, max(results[source]), max(results_dec[source]))

std 0.28 0.28
simple 1.58 1.94
cmov 1.44 1.94
hint 2.26 1.5
index 4.38 4.38
simdmin-single 9.36 0.54
simdmin 14.65 1.41
simdmin-testz 13.59 1.41


In [81]:
bench('std', n=2**16)

std 65536 ['35545 3722', '0.28 GFLOPS']


0.28

In [86]:
bench('simple', n=2**12)

simple 4096 ['1270 100669', '1.54 GFLOPS']


1.54

In [83]:
bench('hint', n=2**16)

hint 65536 ['35545 3722', '2.61 GFLOPS']


2.61

In [87]:
bench('cmov', n=2**12)

cmov 4096 ['1270 100669', '1.51 GFLOPS']


1.51

In [92]:
bench('simdmin-intrinsics-ilp', n=2**13)

simdmin-intrinsics-ilp 8192 ['1270 100669', '14.89 GFLOPS']


14.89

In [11]:
[bench('simdmin', n=n) for n in ns]

simdmin 1024 ['35545 3722', '0.00000473 x 262144', '13.84 GFLOPS']
simdmin 2048 ['35545 3722', '0.00000450 x 262144', '14.55 GFLOPS']
simdmin 4096 ['35545 3722', '0.00000454 x 262144', '14.45 GFLOPS']
simdmin 8192 ['35545 3722', '0.00000447 x 262144', '14.65 GFLOPS']
simdmin 16384 ['35545 3722', '0.00000486 x 262144', '13.49 GFLOPS']
simdmin 32768 ['35545 3722', '0.00000451 x 262144', '14.53 GFLOPS']
simdmin 65536 ['35545 3722', '0.00000480 x 262144', '13.65 GFLOPS']
simdmin 131072 ['35545 3722', '0.00000447 x 262144', '14.66 GFLOPS']
simdmin 262144 ['35545 3722', '0.00000450 x 262144', '14.56 GFLOPS']
simdmin 524288 ['35545 3722', '0.00000478 x 262144', '13.71 GFLOPS']
simdmin 1048576 ['35545 3722', '0.00000480 x 262144', '13.66 GFLOPS']


[13.84, 14.55, 14.45, 14.65, 13.49, 14.53, 13.65, 14.66, 14.56, 13.71, 13.66]

In [7]:
bench('cmov')

cmov 65536 ['35545 3722', '0.00004547 x 262144', '1.44 GFLOPS']


1.44

In [8]:
bench('simple')

simple 65536 ['35545 3722', '0.00004134 x 262144', '1.59 GFLOPS']


1.59

In [9]:
bench('simdmin-testz')

simdmin-testz 65536 ['35545 3722', '0.00000514 x 262144', '12.75 GFLOPS']


12.75

In [None]:
def plot(title=None, ylabel=None, path=None, ch=None, legend=False, ylim=None, xlabel='Array size', loc='best'):
    if ch:
        lines = [
            (2**13, "32K"),
            (2**17, "512K"),
            (2**20, "4M"),
        ]

        for x, t in lines:
            plt.text(x * 1.2, ch, t)
            plt.axvline(x=x, color='black', linestyle='--')

    plt.xscale('log', basex=2)
    plt.xlabel(xlabel)

    if ylabel:
        plt.ylabel(ylabel)

    if title:
        plt.title(title, pad=12)

    if legend:
        plt.legend(loc=loc)

    plt.ylim(bottom=0, top=ylim)
    plt.margins(0)

    if path:
        fig = plt.gcf()
        fig.savefig(path)
    plt.show()


#plt.plot(ns, inc_res, color='darkred')
#plot('for (int i = 0; i < n; i++) a[i]++', 'Increments per second ($10^9$)', 'inc.svg', 4.7)