In [3]:
import re
import pandas as pd
import matplotlib.pyplot as plt

In [46]:
def parse(log, simd_width=1):
    benchmarks = []
    for line in log.split('\n'):
        match = re.search( r'\[(.+)\]\s+mean =\s+(.+)\(ns\)\s+sd =\s+(.+)\(ns\)(?:.+ SIMD (\d+))?', line)
        if match:
            benchmarks.append({
                'mso': match.group(1),
                'mean_ns': float(match.group(2)),
                'std_ns': float(match.group(3)),
                'simd_width': int(match.group(4) or simd_width)
            })
    return pd.DataFrame(benchmarks)

In [49]:
expected = parse("""
[   Union Scalar]       mean =   1.32e+05(ns)   sd =   3.18e+02(ns)     nsample =     5 // Scalar baseline (from any SIMD length)
[Intersect Scalar]      mean =   1.31e+05(ns)   sd =   1.01e+03(ns)     nsample =     5 // Scalar baseline (from any SIMD length)
[     XOR Scalar]       mean =   1.32e+05(ns)   sd =   2.60e+02(ns)     nsample =     5 // Scalar baseline (from any SIMD length)
[    Diff Scalar]       mean =   1.29e+05(ns)   sd =   2.54e+02(ns)     nsample =     5 // Scalar baseline (from any SIMD length)
[Join-Full-Outer Scalar]        mean =   1.58e+05(ns)   sd =   2.93e+02(ns)     nsample =     5 // Scalar baseline (from any SIMD length)
[Join-Inner Scalar]     mean =   1.44e+05(ns)   sd =   1.71e+02(ns)     nsample =     5 // Scalar baseline (from any SIMD length)
[Join-Outer-Ex Scalar]  mean =   1.52e+05(ns)   sd =   2.81e+02(ns)     nsample =     5 // Scalar baseline (from any SIMD length)
[Join-Left-Ex Scalar]   mean =   1.35e+05(ns)   sd =   2.44e+02(ns)     nsample =     5 // Scalar baseline (from any SIMD length)
[Join-Left Scalar]      mean =   1.51e+05(ns)   sd =   1.39e+03(ns)     nsample =     5 // Scalar baseline (from any SIMD length)

[     Union SIMD]       mean =   6.73e+04(ns)   sd =   4.30e+02(ns)     nsample =     5 // when setting SIMD 4
[ Intersect SIMD]       mean =   5.48e+04(ns)   sd =   7.65e+02(ns)     nsample =     5 // when setting SIMD 4
[       XOR SIMD]       mean =   5.84e+04(ns)   sd =   6.68e+02(ns)     nsample =     5 // when setting SIMD 4
[      Diff SIMD]       mean =   5.58e+04(ns)   sd =   7.83e+01(ns)     nsample =     5 // when setting SIMD 4
[Join-Full-Outer SIMD]  mean =   9.39e+04(ns)   sd =   6.88e+02(ns)     nsample =     5 // when setting SIMD 4
[Join-Inner SIMD]       mean =   6.34e+04(ns)   sd =   1.36e+02(ns)     nsample =     5 // when setting SIMD 4
[Join-Outer-Ex SIMD]    mean =   7.51e+04(ns)   sd =   7.69e+02(ns)     nsample =     5 // when setting SIMD 4
[Join-Left-Ex SIMD]     mean =   6.71e+04(ns)   sd =   1.17e+02(ns)     nsample =     5 // when setting SIMD 4
[ Join-Left SIMD]       mean =   6.70e+04(ns)   sd =   1.12e+02(ns)     nsample =     5 // when setting SIMD 4

[     Union SIMD]       mean =   3.25e+04(ns)   sd =   2.50e+02(ns)     nsample =     5 // when setting SIMD 8
[ Intersect SIMD]       mean =   2.77e+04(ns)   sd =   7.73e+01(ns)     nsample =     5 // when setting SIMD 8
[       XOR SIMD]       mean =   3.08e+04(ns)   sd =   4.87e+02(ns)     nsample =     5 // when setting SIMD 8
[      Diff SIMD]       mean =   2.84e+04(ns)   sd =   7.28e+01(ns)     nsample =     5 // when setting SIMD 8
[Join-Full-Outer SIMD]  mean =   4.74e+04(ns)   sd =   3.33e+02(ns)     nsample =     5 // when setting SIMD 8
[Join-Inner SIMD]       mean =   3.32e+04(ns)   sd =   1.34e+02(ns)     nsample =     5 // when setting SIMD 8
[Join-Outer-Ex SIMD]    mean =   4.04e+04(ns)   sd =   6.72e+02(ns)     nsample =     5 // when setting SIMD 8
[Join-Left-Ex SIMD]     mean =   3.53e+04(ns)   sd =   1.16e+02(ns)     nsample =     5 // when setting SIMD 8
[ Join-Left SIMD]       mean =   3.53e+04(ns)   sd =   1.12e+02(ns)     nsample =     5 // when setting SIMD 8

[     Union SIMD]       mean =   1.67e+04(ns)   sd =   8.78e+01(ns)     nsample =     5 // when setting SIMD 16 // <- example
[ Intersect SIMD]       mean =   1.46e+04(ns)   sd =   7.66e+01(ns)     nsample =     5 // when setting SIMD 16
[       XOR SIMD]       mean =   1.66e+04(ns)   sd =   4.03e+02(ns)     nsample =     5 // when setting SIMD 16
[      Diff SIMD]       mean =   1.49e+04(ns)   sd =   7.69e+01(ns)     nsample =     5 // when setting SIMD 16
[Join-Full-Outer SIMD]  mean =   2.46e+04(ns)   sd =   1.30e+02(ns)     nsample =     5 // when setting SIMD 16
[Join-Inner SIMD]       mean =   1.75e+04(ns)   sd =   1.31e+02(ns)     nsample =     5 // when setting SIMD 16
[Join-Outer-Ex SIMD]    mean =   2.15e+04(ns)   sd =   5.28e+02(ns)     nsample =     5 // when setting SIMD 16
[Join-Left-Ex SIMD]     mean =   1.84e+04(ns)   sd =   1.14e+02(ns)     nsample =     5 // when setting SIMD 16
[ Join-Left SIMD]       mean =   1.84e+04(ns)   sd =   1.08e+02(ns)     nsample =     5 // when setting SIMD 16

[     Union SIMD]       mean =   8.99e+03(ns)   sd =   6.37e+01(ns)     nsample =     5 // when setting SIMD 32
[ Intersect SIMD]       mean =   7.82e+03(ns)   sd =   7.50e+01(ns)     nsample =     5 // when setting SIMD 32
[       XOR SIMD]       mean =   8.88e+03(ns)   sd =   2.81e+02(ns)     nsample =     5 // when setting SIMD 32
[      Diff SIMD]       mean =   7.99e+03(ns)   sd =   6.72e+01(ns)     nsample =     5 // when setting SIMD 32
[Join-Full-Outer SIMD]  mean =   1.29e+04(ns)   sd =   7.09e+01(ns)     nsample =     5 // when setting SIMD 32
[Join-Inner SIMD]       mean =   9.46e+03(ns)   sd =   1.27e+02(ns)     nsample =     5 // when setting SIMD 32
[Join-Outer-Ex SIMD]    mean =   1.16e+04(ns)   sd =   4.25e+02(ns)     nsample =     5 // when setting SIMD 32
[Join-Left-Ex SIMD]     mean =   9.85e+03(ns)   sd =   1.10e+02(ns)     nsample =     5 // when setting SIMD 32
[ Join-Left SIMD]       mean =   9.85e+03(ns)   sd =   1.03e+02(ns)     nsample =     5 // when setting SIMD 32

[     Union SIMD]       mean =   4.91e+03(ns)   sd =   6.00e+01(ns)     nsample =     5 // when setting SIMD 64
[ Intersect SIMD]       mean =   4.23e+03(ns)   sd =   7.27e+01(ns)     nsample =     5 // when setting SIMD 64
[       XOR SIMD]       mean =   4.91e+03(ns)   sd =   2.45e+02(ns)     nsample =     5 // when setting SIMD 64
[      Diff SIMD]       mean =   4.32e+03(ns)   sd =   6.66e+01(ns)     nsample =     5 // when setting SIMD 64
[Join-Full-Outer SIMD]  mean =   1.16e+04(ns)   sd =   5.10e+01(ns)     nsample =     5 // when setting SIMD 64
[Join-Inner SIMD]       mean =   7.28e+03(ns)   sd =   8.04e+01(ns)     nsample =     5 // when setting SIMD 64
[Join-Outer-Ex SIMD]    mean =   9.47e+03(ns)   sd =   3.46e+01(ns)     nsample =     5 // when setting SIMD 64
[Join-Left-Ex SIMD]     mean =   7.26e+03(ns)   sd =   5.73e+01(ns)     nsample =     5 // when setting SIMD 64
[ Join-Left SIMD]       mean =   9.55e+03(ns)   sd =   6.26e+01(ns)     nsample =     5 // when setting SIMD 64
""")
expected

Unnamed: 0,mso,mean_ns,std_ns,simd_width
0,Union Scalar,132000.0,318.0,1
1,Intersect Scalar,131000.0,1010.0,1
2,XOR Scalar,132000.0,260.0,1
3,Diff Scalar,129000.0,254.0,1
4,Join-Full-Outer Scalar,158000.0,293.0,1
5,Join-Inner Scalar,144000.0,171.0,1
6,Join-Outer-Ex Scalar,152000.0,281.0,1
7,Join-Left-Ex Scalar,135000.0,244.0,1
8,Join-Left Scalar,151000.0,1390.0,1
9,Union SIMD,67300.0,430.0,4


In [93]:
# MacBook Pro M1
measured = pd.concat([
    parse("""
        [   Union Scalar]	mean =   1.32e+05(ns)	sd =   3.14e+02(ns)	nsample =     5
        [     Union SIMD]	mean =   6.79e+04(ns)	sd =   5.46e+02(ns)	nsample =     5
        [Intersect Scalar]	mean =   1.31e+05(ns)	sd =   9.26e+02(ns)	nsample =     5
        [ Intersect SIMD]	mean =   5.44e+04(ns)	sd =   7.95e+01(ns)	nsample =     5
        [     XOR Scalar]	mean =   1.30e+05(ns)	sd =   3.56e+02(ns)	nsample =     5
        [       XOR SIMD]	mean =   5.90e+04(ns)	sd =   6.02e+02(ns)	nsample =     5
        [    Diff Scalar]	mean =   1.29e+05(ns)	sd =   2.81e+02(ns)	nsample =     5
        [      Diff SIMD]	mean =   5.60e+04(ns)	sd =   4.54e+01(ns)	nsample =     5
    """, simd_width=4),
    parse("""
        [   Union Scalar]	mean =   1.32e+05(ns)	sd =   3.14e+02(ns)	nsample =     5
        [     Union SIMD]	mean =   3.19e+04(ns)	sd =   3.24e+02(ns)	nsample =     5
        [Intersect Scalar]	mean =   1.31e+05(ns)	sd =   9.89e+02(ns)	nsample =     5
        [ Intersect SIMD]	mean =   2.77e+04(ns)	sd =   6.60e+01(ns)	nsample =     5
        [     XOR Scalar]	mean =   1.30e+05(ns)	sd =   2.51e+02(ns)	nsample =     5
        [       XOR SIMD]	mean =   3.16e+04(ns)	sd =   5.37e+02(ns)	nsample =     5
        [    Diff Scalar]	mean =   1.29e+05(ns)	sd =   2.57e+02(ns)	nsample =     5
        [      Diff SIMD]	mean =   2.85e+04(ns)	sd =   4.48e+01(ns)	nsample =     5
    """, simd_width=8),
    parse("""
        [   Union Scalar]	mean =   1.32e+05(ns)	sd =   3.14e+02(ns)	nsample =     5
        [     Union SIMD]	mean =   1.64e+04(ns)	sd =   1.10e+02(ns)	nsample =     5
        [Intersect Scalar]	mean =   1.31e+05(ns)	sd =   9.95e+02(ns)	nsample =     5
        [ Intersect SIMD]	mean =   1.46e+04(ns)	sd =   6.45e+01(ns)	nsample =     5
        [     XOR Scalar]	mean =   1.30e+05(ns)	sd =   2.51e+02(ns)	nsample =     5
        [       XOR SIMD]	mean =   1.68e+04(ns)	sd =   4.28e+02(ns)	nsample =     5
        [    Diff Scalar]	mean =   1.29e+05(ns)	sd =   1.96e+02(ns)	nsample =     5
        [      Diff SIMD]	mean =   1.49e+04(ns)	sd =   4.44e+01(ns)	nsample =     5
    """, simd_width=16),
    parse("""
        [   Union Scalar]	mean =   1.32e+05(ns)	sd =   3.14e+02(ns)	nsample =     5
        [     Union SIMD]	mean =   8.80e+03(ns)	sd =   7.58e+01(ns)	nsample =     5
        [Intersect Scalar]	mean =   1.31e+05(ns)	sd =   9.86e+02(ns)	nsample =     5
        [ Intersect SIMD]	mean =   7.79e+03(ns)	sd =   6.31e+01(ns)	nsample =     5
        [     XOR Scalar]	mean =   1.30e+05(ns)	sd =   2.51e+02(ns)	nsample =     5
        [       XOR SIMD]	mean =   9.04e+03(ns)	sd =   3.86e+02(ns)	nsample =     5
        [    Diff Scalar]	mean =   1.30e+05(ns)	sd =   3.84e+02(ns)	nsample =     5
        [      Diff SIMD]	mean =   7.96e+03(ns)	sd =   4.10e+01(ns)	nsample =     5
    """, simd_width=32),
    parse("""
        [   Union Scalar]	mean =   1.32e+05(ns)	sd =   3.14e+02(ns)	nsample =     5
        [     Union SIMD]	mean =   4.90e+03(ns)	sd =   7.29e+01(ns)	nsample =     5
        [Intersect Scalar]	mean =   1.31e+05(ns)	sd =   9.86e+02(ns)	nsample =     5
        [ Intersect SIMD]	mean =   4.24e+03(ns)	sd =   6.10e+01(ns)	nsample =     5
        [     XOR Scalar]	mean =   1.30e+05(ns)	sd =   2.51e+02(ns)	nsample =     5
        [       XOR SIMD]	mean =   4.90e+03(ns)	sd =   2.77e+02(ns)	nsample =     5
        [    Diff Scalar]	mean =   1.29e+05(ns)	sd =   4.77e+02(ns)	nsample =     5
        [      Diff SIMD]	mean =   4.32e+03(ns)	sd =   3.86e+01(ns)	nsample =     5
    """, simd_width=64),
    parse("""
        [Join-Full-Outer Scalar]	mean =   1.57e+05(ns)	sd =   2.91e+02(ns)	nsample =     5
        [Join-Full-Outer SIMD]	mean =   9.40e+04(ns)	sd =   6.57e+02(ns)	nsample =     5
        [Join-Inner Scalar]	mean =   1.44e+05(ns)	sd =   1.87e+02(ns)	nsample =     5
        [Join-Inner SIMD]	mean =   6.33e+04(ns)	sd =   9.71e+01(ns)	nsample =     5
        [Join-Outer-Ex Scalar]	mean =   1.52e+05(ns)	sd =   2.60e+02(ns)	nsample =     5
        [Join-Outer-Ex SIMD]	mean =   7.49e+04(ns)	sd =   7.56e+02(ns)	nsample =     5
        [Join-Left-Ex Scalar]	mean =   1.35e+05(ns)	sd =   3.37e+02(ns)	nsample =     5
        [Join-Left-Ex SIMD]	mean =   6.71e+04(ns)	sd =   9.68e+01(ns)	nsample =     5
        [Join-Left Scalar]	mean =   1.46e+05(ns)	sd =   8.14e+01(ns)	nsample =     5
        [ Join-Left SIMD]	mean =   6.70e+04(ns)	sd =   8.97e+01(ns)	nsample =     5
    """, simd_width=4),
    parse("""

    """, simd_width=8),
    parse("""

    """, simd_width=16),
    parse("""

    """, simd_width=32),
    parse("""

    """, simd_width=64),
])
measured

Unnamed: 0,mso,mean_ns,std_ns,simd_width
0,Union Scalar,132000.0,314.0,4
1,Union SIMD,67900.0,546.0,4
2,Intersect Scalar,131000.0,926.0,4
3,Intersect SIMD,54400.0,79.5,4
4,XOR Scalar,130000.0,356.0,4
5,XOR SIMD,59000.0,602.0,4
6,Diff Scalar,129000.0,281.0,4
7,Diff SIMD,56000.0,45.4,4
0,Union Scalar,132000.0,314.0,8
1,Union SIMD,31900.0,324.0,8


In [94]:
comparison = pd.merge(
    left=measured[['mso', 'simd_width', 'mean_ns', 'std_ns']].rename(
        columns={'mean_ns': 'mean_measured', 'std_ns': 'std_measured'}),
    right=expected[['mso', 'simd_width', 'mean_ns', 'std_ns']].rename(
        columns={'mean_ns': 'mean_expected', 'std_ns': 'std_expected'}),
    on=['mso', 'simd_width']
)
comparison['diff'] = comparison['mean_measured'] - comparison['mean_expected']
comparison['diff_ratio'] = comparison['diff'] / comparison['mean_expected']
comparison['abs_diff_percent'] = abs(comparison['diff_ratio']) * 100
comparison = comparison.sort_values(by='abs_diff_percent', ascending=False)
comparison

Unnamed: 0,mso,simd_width,mean_measured,std_measured,mean_expected,std_expected,diff,diff_ratio,abs_diff_percent
6,XOR SIMD,8,31600.0,537.0,30800.0,487.0,800.0,0.025974,2.597403
12,Union SIMD,32,8800.0,75.8,8990.0,63.7,-190.0,-0.021135,2.113459
4,Union SIMD,8,31900.0,324.0,32500.0,250.0,-600.0,-0.018462,1.846154
14,XOR SIMD,32,9040.0,386.0,8880.0,281.0,160.0,0.018018,1.801802
8,Union SIMD,16,16400.0,110.0,16700.0,87.8,-300.0,-0.017964,1.796407
10,XOR SIMD,16,16800.0,428.0,16600.0,403.0,200.0,0.012048,1.204819
2,XOR SIMD,4,59000.0,602.0,58400.0,668.0,600.0,0.010274,1.027397
0,Union SIMD,4,67900.0,546.0,67300.0,430.0,600.0,0.008915,0.89153
1,Intersect SIMD,4,54400.0,79.5,54800.0,765.0,-400.0,-0.007299,0.729927
13,Intersect SIMD,32,7790.0,63.1,7820.0,75.0,-30.0,-0.003836,0.383632


In [95]:
comparison['abs_diff_percent'].mean()

0.6661124263892003

In [96]:
(comparison['std_expected'] <= comparison['diff'].abs()).mean()

0.28