## Prelude

Imports & configs & utilities

In [2]:
#/usr/bin/python3
import csv
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from functools import reduce
from scipy.stats import gmean
try:
  import os
  os.mkdir('images')
except FileExistsError:
  pass

plt.rcParams.update({
  'text.usetex': True,
  # 'figure.subplot.left': 0.09,
  # 'figure.subplot.right': 0.99,
  # 'figure.subplot.bottom': 0.17,
  # 'figure.subplot.top': 0.99,
  'font.size': 14,          # Default font size for text
  'axes.titlesize': 15,     # Font size for axes title
  'axes.labelsize': 15,     # Font size for x and y labels
  'xtick.labelsize': 14,    # Font size for x tick labels
  'ytick.labelsize': 14,    # Font size for y tick labels
  'legend.fontsize': 14,    # Font size for legend
  'figure.titlesize': 15    # Font size for figure title
})
MARKERS = 'hosDxp*^'

PLUS = lambda x, y: x + y
def floatCsvToLst(csvPath):
  reader = csv.reader(open(csvPath, 'r'))
  while len(next(reader)) <= 1:
    pass
  data = [[float(x) for x in r] for r in reader]
  return data

In [None]:
cmap = plt.colormaps["tab20c"]
fig, (andAx, orAx) = plt.subplots(1, 2, figsize=(8, 3))

# AND
innerPct = [50.9*12400/14920, 50.9*2520/14920, 18.0, 7.3]
innerLbl = ['\\textbf{1st} ($\\times12400$)', '\\textbf{2nd}\n\\textbf{Direct}\n($\\times2520$)',
  '\\textbf{2nd Semi}\n($\\times9880$)', '\\textbf{Partition}']
innerClr = cmap([1,4,8,12])
for text in andAx.pie(innerPct, labels=innerLbl, colors=innerClr,
    autopct='%1.1f%%', radius=1.1)[1]:
  text.set_color('indigo')
# andAx.set_position([0.14, 0.08, 0.83, 0.87])

# OR
innerPct = [12.8,13.2, 10.3,5.1,26.2, 16.5/2,9.5/2, 5.2]
innerLbl = ['Direct', 'Semi', "FinalTarget\nUpsweep", "Downsweep", 'resAssoc', '', '', '']
innerClr = cmap([3,1, 6,7,5, 10,11, 14])
outerPct = [12.8+13.2, 26.2+10.3+5.1, 16.5/2+9.5/2, 5.2]
outerLbl = ['\\textbf{AND}', '\\textbf{OR}', '\\textbf{Scan}\n\\textbf{Intermediate}', '\\textbf{Partition}']
outerClr = cmap(np.arange(4) * 4)

for text in orAx.pie(outerPct, labels=outerLbl, colors=outerClr, radius=1.2)[1]:
  text.set_color('purple')
orAx.pie(innerPct, labels=innerLbl, autopct='%1.1f%%',
          colors=innerClr, radius=0.9)
andAx.set_position([0.0, 0.08, 0.415, 0.87])
orAx.set_position([0.5, 0.08, 0.415, 0.87])
fig.savefig('images/zipfBreakdown.pdf')

In [None]:
cmap = plt.colormaps["tab20c"]
# Inner pie chart data (does not add to 100)
innerPct = [28.1, 9.5, 6.2, 5.45, 10.3, 6.4, 3.05, 16.1]
innerLbl = ['resAssoc', "Fused Final\nTarget\nUpsweep", "Downsweep", "Partition",
            'Downsweep', 'Upsweep',
            'Partition', 'Final\nTarget']
innerClr = cmap([3,1,3,2, 6,7, 10,11])
# Outer pie chart data
outerPct = [28.1+9.5+6.2+5.45, 10.3+6.4, 3.05+16.1]
outerLbl = ['\\textbf{OR}', '\\textbf{Scanning}\n\\textbf{Intermediates}', '\\textbf{AND}']
outerClr = cmap(np.arange(3) * 4)

fig, pieAx = plt.subplots(figsize=(4, 3.2))
# Create the outer pie chart
for text in pieAx.pie(outerPct, labels=outerLbl, colors=outerClr, radius=1.2)[1]:
  text.set_color('purple')
# Create the inner pie chart
pieAx.pie(innerPct, labels=innerLbl, autopct='%1.0f%%',
          colors=innerClr, radius=0.9)
# pieAx.legend(labels_outer, ncols=3, loc='outside lower center')
pieAx.set_position([0.14, 0.08, 0.83, 0.87])
fig.savefig('images/s34Breakdown.pdf')

## Draw SSB and TPC-H results
Create databases with `ssb-dbgen`, TPC-H `dbgen`, follow `stPrep.ipynb`,
then run `wahProfileGPU` with no argument to reproduce.

In [None]:
bmDict = {
    'case': ['S12', 'S13', 'S23', 'S34', 'S41', 'T 3', 'T 6', 'T12', 'T17'],
    'merle': [0.068362, 0.045798, 0.040204, 0.241404, 0.094042, 0.256202, 0.147531, 0.131812, 0.146818],
    'mxfer': [0.080871, 0.037394, 0.043645, 0.079251, 0.634684, 0.631165, 0.453208, 0.103725, 0.502463],
    'dnq': [0.158576, 0.159712, 0.101960, 0.316917, 0.457562, 0.317097, 0.316275, 0.159244, 0.325601],
    'dxfer': [0.619867, 0.630213, 0.628804, 0.628414, 0.634684, 0.630186, 0.628895, 0.629510, 0.630182],
    'roaring': [0.619950, 0.356720, 0.111745, 0.764271, 1.458686, 2.212054, 1.172320, 0.841689, 0.861328],
    'RTScan': [0.537842, 0.468018, 0.190918, 0.0, 0.0, 0.55, 0.4, 0.0, 0.25],
    'Crystal': [1.1, 1.1, 3.0, 2.0, 6.1, 0.0, 0.0, 0.0, 0.0]
}

def calculate_geomean(data):
    return gmean([x for x in data if x > 0])
bmDict['case'].append('geomean')
bmDict['merle'].append(calculate_geomean(bmDict['merle']))
bmDict['mxfer'].append(calculate_geomean(bmDict['mxfer']))
bmDict['dnq'].append(calculate_geomean(bmDict['dnq']))
bmDict['dxfer'].append(calculate_geomean(bmDict['dxfer']))
bmDict['roaring'].append(calculate_geomean(bmDict['roaring']))
bmDict['RTScan'].append(calculate_geomean(bmDict['RTScan']))
bmDict['Crystal'].append(calculate_geomean(bmDict['Crystal']))
del calculate_geomean

ind = np.arange(len(bmDict['case']))
BAR_WIDTH = 0.15
fig, ax = plt.subplots(figsize=(12, 3.2))

ax.bar(ind, bmDict['merle'], BAR_WIDTH, label='MeRLE', edgecolor='orange',
       color='none', hatch='////')
ax.bar(ind, bmDict['merle'], BAR_WIDTH, edgecolor='black', color='none')
ax.bar(ind, bmDict['mxfer'], BAR_WIDTH, bottom=bmDict['merle'], label='Transfer',
       edgecolor='black', color='white')

ax.bar(ind + BAR_WIDTH, bmDict['dnq'], BAR_WIDTH, label='Dec-n-Query',
       edgecolor='green', color='none', hatch='++++')
ax.bar(ind + BAR_WIDTH, bmDict['dnq'], BAR_WIDTH, edgecolor='black', color='none')
ax.bar(ind + BAR_WIDTH, bmDict['dxfer'], BAR_WIDTH, bottom=bmDict['dnq'],
       edgecolor='black', color='none')

ax.bar(ind + 2 * BAR_WIDTH, bmDict['roaring'], BAR_WIDTH, label='Roaring$\\times16$',
       edgecolor='olive', color='none', hatch='\\\\\\\\')
ax.bar(ind + 2 * BAR_WIDTH, bmDict['roaring'], BAR_WIDTH, edgecolor='black', color='none')
ax.bar(ind + 3 * BAR_WIDTH, bmDict['RTScan'], BAR_WIDTH, label='RTScan',
       edgecolor='cornflowerblue', color='none', hatch='----')
ax.bar(ind + 3 * BAR_WIDTH, bmDict['RTScan'], BAR_WIDTH, edgecolor='black', color='none')
ax.bar(ind + 4 * BAR_WIDTH, bmDict['Crystal'], BAR_WIDTH,
       label='Crystal', edgecolor='orchid', color='none', hatch='xxxx')
ax.bar(ind + 4 * BAR_WIDTH, bmDict['Crystal'], BAR_WIDTH, edgecolor='black', color='none')

ax.set_xticks(ind + 2 * BAR_WIDTH)
ax.set_xticklabels(bmDict['case'])
ax.set_ylabel('Time (msecs)')
ax.set_yscale('log')
ax.set_ylim(0.03, 7)
fig.legend(loc='outside upper center', ncol=6)
fig.subplots_adjust(top=0.85)
fig.savefig('images/stRes.pdf')
bmDict['merle'][9], bmDict['mxfer'][9], bmDict['roaring'][9], bmDict['RTScan'][9]

In [None]:
bmDictZ = {
    'case': ['$1.2\\wedge$', '$1.4\\wedge$', '$1.6\\wedge$', '$1.8\\wedge$', '$2.0\\wedge$',
             '$1.2\\vee$', '$1.4\\vee$', '$1.6\\vee$', '$1.8\\vee$', '$2.0\\vee$'],
    'merle': [0.110819, 0.094782, 0.080280, 0.067822, 0.057382, 0.287951, 0.263583, 0.238128, 0.214205, 0.191883],
    'mxfer': [0.251399, 0.200295, 0.154765, 0.117037, 0.087416, 0.469783, 0.441673, 0.406164, 0.367615, 0.328785],
    'roaring': [0.756046, 0.645176, 0.511496, 0.384339, 0.300323, 3.354645, 2.862961, 2.176883, 1.886276, 1.502227],
    'RTScan': [0.747126, 0.701149, 0.586207, 0.586207, 0.586207, 0.0, 0.0, 0.0, 0.0, 0.0],
    'RTxfer': [1.059692, 1.071578, 1.067202, 1.067202, 1.067202, 0.0, 0.0, 0.0, 0.0, 0.0]
}

ind = np.arange(len(bmDictZ['case']))
BAR_WIDTH = 0.25
fig, ax = plt.subplots(figsize=(6, 3.2))

ax.bar(ind, bmDictZ['merle'], BAR_WIDTH, label='MeRLE', edgecolor='orange',
       color='none', hatch='////')
ax.bar(ind, bmDictZ['merle'], BAR_WIDTH, edgecolor='black', color='none')
ax.bar(ind, bmDictZ['mxfer'], BAR_WIDTH, bottom=bmDictZ['merle'], label='Transfer',
       edgecolor='black', color='white')

ax.bar(ind + BAR_WIDTH, bmDictZ['roaring'], BAR_WIDTH, label='Roaring$\\times16$',
       edgecolor='olive', color='none', hatch='\\\\\\\\')
ax.bar(ind + BAR_WIDTH, bmDictZ['roaring'], BAR_WIDTH, edgecolor='black', color='none')

ax.bar(ind + 2 * BAR_WIDTH, bmDictZ['RTScan'], BAR_WIDTH, label='RTScan',
       edgecolor='cornflowerblue', color='none', hatch='----')
ax.bar(ind + 2 * BAR_WIDTH, bmDictZ['RTScan'], BAR_WIDTH, edgecolor='black', color='none')
ax.bar(ind + 2 * BAR_WIDTH, bmDictZ['RTxfer'], BAR_WIDTH, bottom=bmDictZ['RTScan'],
       edgecolor='black', color='none')

ax.set_xticks(ind + BAR_WIDTH)
ax.set_xticklabels(bmDictZ['case'])
ax.set_ylabel('Time (msecs)')
ax.set_yscale('log')
ax.set_ylim(0.04, 4)
fig.legend(loc='outside upper center', ncol=2)
fig.subplots_adjust(top=0.77)
fig.savefig('images/zipf.pdf')

In [None]:
# Data for the creation overhead comparison plot
bmDictCret = {
  'case': ['S12', 'S23', 'S34', 'S41', 'T3', 'T6', 'T12', 'T17'],
  'cJoin': [48, 58, 61, 86, 90, 49, 33, 265],
  'cCmprs': [32, 14, 4, 53, 74, 34, 21, 13],
  'gJoin': [9, 25, 17, 32, 23, 10, 6, 98],
  'gCmprs': [2, 1, 4, 3, 3, 2, 2, 2]
}

ind = np.arange(len(bmDictCret['case']))
BAR_WIDTH = 0.35
fig, ax = plt.subplots(figsize=(5, 3.2))

ax.bar(ind, bmDictCret['cJoin'], BAR_WIDTH, label='Roaring Populate',
       edgecolor='olive', color='none', hatch='////')
ax.bar(ind, bmDictCret['cJoin'], BAR_WIDTH, edgecolor='black', color='none')
ax.bar(ind + BAR_WIDTH, bmDictCret['gJoin'], BAR_WIDTH, label='MeRLE Populate',
       edgecolor='orange', color='none', hatch='\\\\')
ax.bar(ind + BAR_WIDTH, bmDictCret['gJoin'], BAR_WIDTH, edgecolor='black', color='none')

ax.bar(ind, bmDictCret['cCmprs'], BAR_WIDTH, bottom=bmDictCret['cJoin'],
       label='Roaring Compress', edgecolor='black', color='lightgray')
ax.bar(ind + BAR_WIDTH, bmDictCret['gCmprs'], BAR_WIDTH, bottom=bmDictCret['gJoin'],
       label='MeRLE Compress', edgecolor='darkred', color='red')

ax.set_xticks(ind + BAR_WIDTH / 2)
ax.set_xticklabels(bmDictCret['case'])
fig.legend(loc='outside upper center', ncol=2)
fig.subplots_adjust(top=0.77)
fig.savefig('images/creation.pdf')

### Output of `wc -c tbl/wah/*.wah`

```plaintext
 15483220 ./-12l0.wah
 15483204 ./-12l1.wah
  2938276 ./-12l2.wah
   925296 ./-23l0.wah
 15483208 ./-23l1.wah
   990728 ./-34l0.wah
   953332 ./-34l1.wah
  3162084 ./-34l2.wah
  3424840 ./-34l3.wah
  2936344 ./-34l4.wah
 15483216 ./-41l0.wah
 15483204 ./-41l1.wah
 15483200 ./-41l2.wah
 15019200 ./-41l3.wah
 15482812 ./3l0.wah
  1683452 ./3l1.wah
 15482036 ./3l2.wah
  3973420 ./3l3.wah
 15035712 ./3l4.wah
 11058368 ./6l0.wah
 15483156 ./6l1.wah
 15452596 ./6l2.wah
 15482116 ./12l0.wah
 15482104 ./12l1.wah
  2336684 ./12l2.wah
 12293044 ./17l0.wah
 14252332 ./17l1.wah
 15460632 ./17l2.wah
292207816 total
```

### Output of `wc -c tbl/wah/*.ra`

```plaintext
 15499277 ./-12l0.ra
 15498947 ./-12l1.ra
  1695744 ./-12l2.ra
   253431 ./-23l0.ra
 15498979 ./-23l1.ra
   552196 ./-34l0.ra
   531140 ./-34l1.ra
   897065 ./-34l2.ra
   981043 ./-34l3.ra
  1693476 ./-34l4.ra
 15498937 ./-41l0.ra
 15498981 ./-41l1.ra
 15498989 ./-41l2.ra
 15498680 ./-41l3.ra
 15498968 ./3l0.ra
   943468 ./3l1.ra
 15498980 ./3l2.ra
  1413283 ./3l3.ra
 15498688 ./3l4.ra
  4816433 ./6l0.ra
 15498779 ./6l1.ra
 15498900 ./6l2.ra
 15498663 ./12l0.ra
 15498615 ./12l1.ra
   711075 ./12l2.ra
  6051693 ./17l0.ra
  9632837 ./17l1.ra
 15498487 ./17l2.ra
262655754 total
```

In [None]:
bmDictMem = {
    'case': ['S12', 'S23', 'S34', 'S41', 'T3', 'T6', 'T12', 'T17'],
    'gMem': [
        15483220 + 15483204 + 2 * 2938276,  # S12
        2 * 925296 + 15483208,                  # S23
        2 * (990728 + 3162084 + 3424840 + 2936344),  # S34
        15483216 + 15483204 + 15483200 + 15019200,  # S41
        15482812 + 15035712 + 15482036 + 2 * (1683452 + 3973420),  # T3
        2 * (11058368 + 15483156) + 15452596,  # T6
        15482116 + 15482104 + 2 * 2336684,  # T12
        2 * (12293044 + 14252332) + 15460632  # T17
    ],
    'cMem': [
        15499277 + 15498947 + 1695744,  # S12
        253431 + 15498979,              # S23
        552196 + 531140 + 897065 + 981043 + 1693476,  # S34
        15498937 + 15498981 + 15498989 + 15498680,  # S41
        15498968 + 943468 + 15498980 + 1413283 + 15498688,  # T3
        4816433 + 15498779 + 15498900,  # T6
        15498663 + 15498615 + 711075,   # T12
        6051693 + 9632837 + 15498487    # T17
    ]
}
for key in ['gMem', 'cMem']:
    bmDictMem[key] = [x / (1 << 20) for x in bmDictMem[key]]

ind = np.arange(len(bmDictMem['case']))
BAR_WIDTH = 0.35
fig, ax = plt.subplots(figsize=(5, 3.2))

ax.bar(ind, bmDictMem['cMem'], BAR_WIDTH, label='Roaring', edgecolor='olive', color='none', hatch='////')
ax.bar(ind, bmDictMem['cMem'], BAR_WIDTH, edgecolor='black', color='none')
ax.bar(ind + BAR_WIDTH, bmDictMem['gMem'], BAR_WIDTH, label='MeRLE', edgecolor='orange', color='none', hatch='\\\\')
ax.bar(ind + BAR_WIDTH, bmDictMem['gMem'], BAR_WIDTH, edgecolor='black', color='none')

ax.set_xticks(ind + BAR_WIDTH / 2)
ax.set_xticklabels(bmDictMem['case'])
ax.set_ylabel('Memory Usage (MB)')
fig.legend(loc='outside upper center', ncol=2)
fig.subplots_adjust(top=0.85)
fig.savefig('images/stMem.pdf')

## Plotting Results on Bitmap Collections

The `benchCollections.sh` generates the result needed for these plots. The script may take very
long (>1 hour, even longer if result verification is used).

The code below is absolutely horrific cause that's written when I first learn `matplotlib` lol


In [9]:
def hybridTimeAggr(stashLst, opLst, inputNrByte, decPctThr, gpuThr, withXfer):
  elapse = 0.0
  if len(opLst[0]) < 12: decPctThr = 99
  else: decPctThr /= 50
  for op in opLst:
    lhs, rhs = int(op[0]), int(op[1])
    lStash, rStash = stashLst[lhs], stashLst[rhs]
    if lStash[3] < gpuThr and rStash[3] < gpuThr:
      elapse += op[-1]
      continue
    # lDec = lStash[3] / 2 > lStash[4] * decPctThr / 100
    lDec = lStash[3] > lStash[4] * decPctThr
    rDec = rStash[3] > rStash[4] * decPctThr
    if lDec and rDec:
      t = op[2] + (min(op[3], op[4] * 1.2) if withXfer else 0)
    elif not lDec and not rDec:
      t = op[5] + (min(op[6], op[7] * 1.2) if withXfer else 0)
    else:
      t = op[8] + (min(op[9], op[10] * 1.2) if withXfer else 0)
    elapse += t
  return inputNrByte / elapse

def hybridTimeAggrPlt(ax, stashLsts, opLsts, datNames, inputNrBytes, decPctThr, gpuThr, Xs, withXfer):
  global MARKERS
  markersAt = 0
  for st, op, nm, sz in zip(stashLsts, opLsts, datNames, inputNrBytes):
    thputs = [hybridTimeAggr(st, op, sz, th1, th2, withXfer)
              for th1, th2 in zip(decPctThr, gpuThr)]
    ax.plot(Xs, thputs, label=nm, marker=MARKERS[markersAt % 8], linewidth=2)
    markersAt += 1

In [None]:
figNoX, (axAndNoX, axOrNoX) = plt.subplots(1, 2, figsize=(8, 3.2))

rateThr = range(0, 110, 5)
encSzThr = (0,) * len(rateThr)
datNames = ['wea', 'weaSrt', 'inc']
stashData = [floatCsvToLst(x) for x in map('{}_stash.csv'.format, datNames)]
opData = [floatCsvToLst(y) for y in map('{}_and.csv'.format, datNames)]
inputNrBytes = [800 * reduce(PLUS, (x[1] for x in floatCsvToLst(dl))) / 2**30
                for dl in map('{}_decode.csv'.format, datNames)]

hybridTimeAggrPlt(axAndNoX, stashData, opData, datNames, inputNrBytes, rateThr, encSzThr, rateThr, False)
encSzThr = (20000,) * len(rateThr)

encSzThr = (20000,) * len(rateThr)
opData = [floatCsvToLst(y) for y in map('{}_or.csv'.format, datNames)]
hybridTimeAggrPlt(axOrNoX, stashData, opData, datNames, inputNrBytes, rateThr, encSzThr, rateThr, False)
encSzThr = (20000,) * len(rateThr)

# axAndNoX.set_xlabel('(a) and', ha='left', x=0.3)
# axOrNoX.set_xlabel('(b) or', ha='right', x=0.6)
axAndNoX.set_ylabel('Throughput (GiB Inputs/s)')
figNoX.legend(['wea', 'weaSrt', 'inc'], loc='outside upper center', ncol=3)
figNoX.subplots_adjust(top=0.86)

figNoX.savefig('images/semi.pdf')