In [1]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.reset_defaults()
sns.set_theme(style='whitegrid')

In [33]:
def bench(source, n=2**20, cc='g++'):
    res = !{cc} -std=c++17 -O3 -funroll-loops -march=native -D N={n} {source}.cc -o run && ./run
    print(source, n, res)
    return float(res[0].split()[0])

ns = list(int(1.17**k) for k in range(50, 100))

In [42]:
bench('blend-autovec', n=2**12)

blend-autovec 4096 ['12.98 GFLOPS', '1508481368']


12.98

In [23]:
bench('blend-simd', n=2**12)

blend-simd 4096 ['8.95 GFLOPS', '1508481368']


8.95

In [41]:
bench('blend-simd-and', n=2**12)

blend-simd-and 4096 ['12.35 GFLOPS', '1404258952']


12.35

In [32]:
bench('blend-extensions', n=2**12)

blend-extensions 4096 ['13.21 GFLOPS', '1508481368']


13.21

No idea why it doesn't vectorize optimally.

In [None]:
def plot(title=None, ylabel=None, path=None, ch=None, legend=False, ylim=None, xlabel='Array size', loc='best'):
    if ch:
        lines = [
            (2**13, "32K"),
            (2**17, "512K"),
            (2**20, "4M"),
        ]

        for x, t in lines:
            plt.text(x * 1.2, ch, t)
            plt.axvline(x=x, color='black', linestyle='--')

    plt.xscale('log', basex=2)
    plt.xlabel(xlabel)

    if ylabel:
        plt.ylabel(ylabel)

    if title:
        plt.title(title, pad=12)

    if legend:
        plt.legend(loc=loc)

    plt.ylim(bottom=0, top=ylim)
    plt.margins(0)

    if path:
        fig = plt.gcf()
        fig.savefig(path)
    plt.show()


plt.plot(ns, inc_res, color='darkred')
plot('for (int i = 0; i < n; i++) a[i]++', 'Increments per second ($10^9$)', 'inc.svg', 4.7)

In [None]:
plt.plot(ns, inc_res, color='darkred', label='2GHz')
plt.plot(ns, inc_res_boost, color='darkblue', label='4.1GHz')
plot('for (int i = 0; i < n; i++) a[i]++', 'Increments per second ($10^9$)', 'boost.svg', 6.2, legend=True)