## Prelude

Imports & configs & utilities

In [None]:
#/usr/bin/python3
import csv
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from functools import reduce
try:
  import os
  os.mkdir('images')
except FileExistsError:
  pass

plt.rcParams.update({
  'text.usetex': True,
  'figure.subplot.left': 0.09,
  'figure.subplot.right': 0.99,
  'figure.subplot.bottom': 0.17,
  'figure.subplot.top': 0.99,
  'font.size': 14,          # Default font size for text
  'axes.titlesize': 15,     # Font size for axes title
  'axes.labelsize': 15,     # Font size for x and y labels
  'xtick.labelsize': 14,    # Font size for x tick labels
  'ytick.labelsize': 14,    # Font size for y tick labels
  'legend.fontsize': 14,    # Font size for legend
  'figure.titlesize': 15    # Font size for figure title
})

MARKERS = 'hosDxp*^'
PLUS = lambda x, y: x + y

def floatCsvToLst(csvPath):
  reader = csv.reader(open(csvPath, 'r'))
  while len(next(reader)) <= 1:
    pass
  data = [[float(x) for x in r] for r in reader]
  return data

## Draw SSB and TPC-H results
Create databases with `ssb-dbgen`, TPC-H `dbgen`, follow `stPrep.ipynb`,
then run `wahProfileGPU` with no argument to reproduce.

In [None]:
df = {
    'case': ['S12', 'S13', 'S23', 'S34', 'S41'],
    'merle': [0.193156, 0.092955, 0.077375, 0.696571, 0.422069],
    'mxfer': [0.145770, 0.064786, 0.081248, 0.145641, 1.208079],
    'dnq': [0.671066, 0.682726, 0.430283, 1.362166, 1.717493],
    'dxfer': [1.187220, 1.205836, 1.208296, 1.205427, 1.208079],
    'roaring': [2.220208, 1.131190, 0.288249, 3.281850, 4.567914],
    'RTScan(SF=1)': [0.537842, 0.468018, 0.190918, 0.0, 0.0],
    'Crystal(V100)': [1.1, 1.1, 3.0, 2.0, 6.1]
}

ind = np.arange(len(df['case']))
BAR_WIDTH = 0.15
fig, ax = plt.subplots(figsize=(8, 4))

ax.bar(ind, df['merle'], BAR_WIDTH, label='MeRLE', edgecolor='orange',
       color='none', hatch='////')
ax.bar(ind, df['merle'], BAR_WIDTH, edgecolor='black', color='none')
ax.bar(ind, df['mxfer'], BAR_WIDTH, bottom=df['merle'], label='Transfer',
       edgecolor='black', color='white')

ax.bar(ind + BAR_WIDTH, df['dnq'], BAR_WIDTH, label='Dec-n-Query',
       edgecolor='green', color='none', hatch='++++')
ax.bar(ind + BAR_WIDTH, df['dnq'], BAR_WIDTH, edgecolor='black', color='none')
ax.bar(ind + BAR_WIDTH, df['dxfer'], BAR_WIDTH, bottom=df['dnq'],
       edgecolor='black', color='none')

ax.bar(ind + 2 * BAR_WIDTH, df['roaring'], BAR_WIDTH, label='Roaring',
       edgecolor='olive', color='none', hatch='\\\\\\\\')
ax.bar(ind + 2 * BAR_WIDTH, df['roaring'], BAR_WIDTH, edgecolor='black', color='none')
ax.bar(ind + 3 * BAR_WIDTH, df['RTScan(SF=1)'], BAR_WIDTH, label='RTScan(SF=1)',
       edgecolor='cornflowerblue', color='none', hatch='----')
ax.bar(ind + 3 * BAR_WIDTH, df['RTScan(SF=1)'], BAR_WIDTH, edgecolor='black', color='none')
ax.bar(ind + 4 * BAR_WIDTH, df['Crystal(V100)'], BAR_WIDTH,
       label='Crystal(V100)', edgecolor='orchid', color='none', hatch='xxxx')
ax.bar(ind + 4 * BAR_WIDTH, df['Crystal(V100)'], BAR_WIDTH, edgecolor='black', color='none')

ax.set_xticks(ind + 2 * BAR_WIDTH)
ax.set_xticklabels(df['case'])
ax.set_ylabel('Time (msecs)')
ax.set_yscale('log')
ax.set_ylim(0.05, 7)
fig.legend(loc='outside upper center', ncol=3)
fig.subplots_adjust(top=0.8)
fig.savefig('images/ssbRes.pdf')

In [None]:
df = {
    'case': ['T3', 'T6', 'T12', 'T17', 'geomean'],
    'merle': [0.971811, 0.629631, 0.537062, 0.617467, 0.351959],
    'mxfer': [1.206702, 0.939114, 0.190986, 0.859589, 0.308236],
    'dnq': [1.361083, 1.274706, 0.681731, 1.273838, 0.960362],
    'dxfer': [1.207735, 1.206871, 1.210284, 1.203316, 1.204767],
    'roaring': [6.364347, 2.775487, 2.468616, 3.163035, 2.252974],
    'RTScan(SF=1)': [0.55, 0.4, 0.0, 0.25, 0.371839]
}

ind = np.arange(len(df['case']))
fig, ax = plt.subplots(figsize=(8, 3.2))

ax.bar(ind, df['merle'], BAR_WIDTH, edgecolor='orange', color='none', hatch='////')
ax.bar(ind, df['merle'], BAR_WIDTH, edgecolor='black', color='none')
ax.bar(ind, df['mxfer'], BAR_WIDTH, bottom=df['merle'], edgecolor='black', color='white')

ax.bar(ind + BAR_WIDTH, df['dnq'], BAR_WIDTH, edgecolor='green', color='none', hatch='++++')
ax.bar(ind + BAR_WIDTH, df['dnq'], BAR_WIDTH, edgecolor='black', color='none')
ax.bar(ind + BAR_WIDTH, df['dxfer'], BAR_WIDTH, bottom=df['dnq'], edgecolor='black', color='none')

ax.bar(ind + 2 * BAR_WIDTH, df['roaring'], BAR_WIDTH, edgecolor='olive', color='none', hatch='\\\\\\\\')
ax.bar(ind + 2 * BAR_WIDTH, df['roaring'], BAR_WIDTH, edgecolor='black', color='none')
ax.bar(ind + 3 * BAR_WIDTH, df['RTScan(SF=1)'], BAR_WIDTH, edgecolor='cornflowerblue', color='none', hatch='----')
ax.bar(ind + 3 * BAR_WIDTH, df['RTScan(SF=1)'], BAR_WIDTH, edgecolor='black', color='none')

ax.set_xticks(ind + 2 * BAR_WIDTH)
ax.set_xticklabels(df['case'])
ax.set_yscale('log')
ax.set_ylabel('Time (msecs)')
ax.set_ylim(0.1, 7)
fig.savefig('images/tpcRes.pdf')

In [None]:
#!/bin/bash
# Decode, r={10,40}%
! ../build/wahSynthGPU --maxTail 1 --maxFill 37 --op dec --out r10_dec.csv >/dev/null
! ../build/wahSynthGPU --maxTail 5 --maxFill 13 --op dec --out r40_dec.csv >/dev/null

# AND ANDno1 OR XOR, r=20%
! ../build/wahSynthGPU --maxTail 1 --maxFill 17 --op and --out r20_and.csv >/dev/null
! ../build/wahSynthGPU --maxTail 1 --maxFill 17 --fillDens 0 --op and \
  --out r20_andNo1.csv >/dev/null
! ../build/wahSynthGPU --maxTail 1 --maxFill 17 --op or  --out r20_or.csv  >/dev/null
! ../build/wahSynthGPU --maxTail 1 --maxFill 17 --op xor --out r20_xor.csv >/dev/null

# AND, r={10,20,30,40}%
! ../build/wahSynthGPU --maxTail 1 --maxFill 37 --op and --out r10_and.csv >/dev/null
# ! ../build/wahSynthGPU --maxTail 1 --maxFill 17 --op or  --out r20_or.csv  >/dev/null
! ../build/wahSynthGPU --maxTail 2 --maxFill 13 --op and --out r30_and.csv >/dev/null
! ../build/wahSynthGPU --maxTail 5 --maxFill 13 --op and --out r40_and.csv >/dev/null

## \subsection{Decode Efficiency} \label{sec:expDec}

The decoding procedure is used in various situations like query with explicit
decoding and is a building block of semi-explicit operations, so we evaluate its
performance in the first place. We compare our tile-based approach to the one
proposed by Andrzejewski \textit{et al.} \cite{dexa10} and optimized by \textit{Trans
et al.} \cite{dasfaa20}, which is not tile-based and requires creation of several
large temporary global arrays.

\textbf{Synthetic data}.

## Running Synthetic Data

The running process is fairly quick, just a few made-up sequences.
(2 secs for each line of command below)  
Therefore all the commands generating results on synthetic data are integrated into here.  

Benchmarking on real data is long and extensive and is done in a separate script.

### Setup: Synthetic Data Generation

We use a generation method made specifically for WAH, in which multiple aspects of the
generated bit vector can be toggled. It is dictated by four independent parameters:

- \item{Average fill length of fill words} $f_v$
- \item{Average length of consecutive tail words} $t_v$
- \item{Bit density of fill words} $f_d$
- \item{Bit density of tail words} $t_d$

Notably, compression rate of generated WAH bit vector can be predetermined: on average,
a subsequence of length $t_v + 1$ would have $t_v$ tail words and 1 fill word with fill
length $f_v$. These $t_v + 1$ encoded words enclose $t_v + f_v$ decoded words. Hence,  
$$ r = \frac{t_v + 1}{t_v + f_v} \times 100\% $$

The generation can be seen as a two-state Markov process, as shown in Figure
\ref{fig:syncGen}. Each time the current state falls in \texttt{Tail}, a tail word of
bit density $t_d$ is generated. When \textit{transitioning} from \texttt{Tail} state
to \texttt{Fill}, a fill word is produced with a $f_d$ chance of being a 1-fill.
Staying in \texttt{Fill} state would add 1 to the fill word's fill length.

Later sections explore how compression rate affects performance. 4 configurations of
$(t_v, f_v)$ combination are chosen, yielding compression rates of $10\%, 20\%, 30\%,
40\%$ respectively. These configurations will be used throughout.

- $t_v = 1, f_v = 19, r = 10\%$
- $t_v = 1, f_v = 9, r = 20\%$
- $t_v = 1.5, f_v = 7, r = 30\%$
- $t_v = 3, f_v = 7, r = 40\%$

### Trust me bro

Efficiency of AND without 1 fills do not change with compression rate

In [None]:
#!/bin/bash
# ANDno1, r={10,20,30,40}%
! ../build/wahSynthGPU --maxTail 1 --maxFill 37 --fillDens 0 --op and \
  --out r10_andNo1.csv >/dev/null
# ! ../build/wahSynthGPU --maxTail 1 --maxFill 17 --fillDens 0 --op and \
  # --out r20_andNo1.csv >/dev/null
! ../build/wahSynthGPU --maxTail 3 --maxFill 13 --fillDens 0 --op and \
  --out r30_andNo1.csv >/dev/null
! ../build/wahSynthGPU --maxTail 5 --maxFill 13 --fillDens 0 --op and \
  --out r40_andNo1.csv >/dev/null

### Logical Operation Efficiency vs. Other Parameters

- AND gets worse when fill word bit density increases. OR/XOR not affected
- Tail word bit density does not matter.
- Only compression rate matters. Exact values of average tail length and average fill
  length do not affect efficiency as long as compression rate remain unchanged.

No figures :)

In [None]:
#!/bin/bash
! echo Changing Bit Density in Fill words: AND
! tail -1 r20_and.csv # --fillDens 1 --tailDens 4
# ! ../build/wahSynthGPU --maxTail 1 --maxFill 17 --fillDens 1 --op and | tail -1
! ../build/wahSynthGPU --maxTail 1 --maxFill 17 --fillDens 2 --op and | tail -1
! ../build/wahSynthGPU --maxTail 1 --maxFill 17 --fillDens 16 --op and | tail -1
! echo Changing Bit Density in Fill words: OR
! tail -1 r20_or.csv # --fillDens 1 --tailDens 4
# ! ../build/wahSynthGPU --maxTail 1 --maxFill 17 --fillDens 1 --op or | tail -1
! ../build/wahSynthGPU --maxTail 1 --maxFill 17 --fillDens 16 --op or | tail -1

! echo Changing Bit Density in Tail words
! tail -1 r20_and.csv # --fillDens 1 --tailDens 4
# ! ../build/wahSynthGPU --maxTail 1 --maxFill 17 --tailDens 1 --op and | tail -1
! ../build/wahSynthGPU --maxTail 1 --maxFill 17 --tailDens 16 --op and | tail -1

! echo Changing Tail/Fill Length
! tail -1 r20_and.csv # --maxTail 1 --maxFill 17
! ../build/wahSynthGPU --maxTail 3 --maxFill 25 --op and | tail -1

## Plotting Synthetic Data

No Functions for CPU Throughputs and Synthetic data Plots

In [None]:
fig, (axAnd, axOr) = plt.subplots(1, 2, figsize=(8, 3.2))

bruh = floatCsvToLst('r20_and.csv')
xs = [x[0] for x in bruh]
axAnd.plot(xs, [(x[-2] / 2**30, x[-1] / 2**30) for x in bruh],
           label=('Dec-n-Query', 'MeRLE-Direct'))
axAnd.set_ylabel('Throughput (GiB/s)')
bruh = floatCsvToLst('r20_or.csv')
axOr.plot(xs, [(x[-2] / 2**30, x[-1] / 2**30) for x in bruh],
           label=('Decode-n-Query', 'MeRLE-Direct'))

bruh = floatCsvToLst('r20_andNo1.csv')
axAnd.plot(xs, [x[-1] / 2**30 for x in bruh], label='No1Fill')

axAnd.set_xscale('log'); axOr.set_xscale('log')#; axXor.set_xscale('log')
axAnd.set_xlabel('(a) AND, $r = 20\\%$')
axOr.set_xlabel('(b) OR, $r = 20\\%$')

fig.legend(['Decode and Query', 'MeRLE-Direct', 'No 1 Run Specialization'],
           loc='outside upper center', ncol=3)
fig.subplots_adjust(top=0.85)
fig.savefig('./images/synthOp.pdf')

In [None]:
# fig, (ax10, ax30, ax40) = plt.subplots(1, 3, figsize=(12, 3))
fig, (ax10, ax30) = plt.subplots(1, 2, figsize=(8, 3))
bruh = floatCsvToLst('r10_and.csv'); xs = [x[0] for x in bruh]
ax10.plot(xs, [(x[-2] / 2**30, x[-1] / 2**30) for x in bruh],
           label=('Decode-n-Query', 'MeRLE-Direct'))
ax10.set_ylabel('Throughput (GiB Inputs/s)')
bruh = floatCsvToLst('r30_and.csv')
ax30.plot(xs, [(x[-2] / 2**30, x[-1] / 2**30) for x in bruh],
           label=('Decode-n-Query', 'MeRLE-Direct'))
ax10.set_xlabel('(c) AND, $r = 10\\%$')
ax30.set_xlabel('(d) AND, $r = 30\\%$')

ax10.set_xscale('log'); ax30.set_xscale('log')#; ax40.set_xscale('log')
fig.savefig('./images/synthRate.pdf')
del bruh; del xs

## Plotting Results on Real Data

The `benchReal.sh` generates the result needed for these plots. The script may take very
long (>40 min on my crappy laptop GPU although most of time it is running slow CPU procedures
for comparison and verification).

A function is defined for each type of plot cause there are quite a lot of plots.

### Comparing direct operation with explicit & plain

In [None]:
def simpleTimeAggr(stashLst, opLst, inputNrByte, withXfer):
  explNoDec, explDec, direct, bad = 0.0,0.0,0.0,0.0
  for op in opLst:
    lhs, rhs = int(op[0]), int(op[1])
    explNoDec += op[2]
    explDec += op[2] + stashLst[lhs][2] + stashLst[rhs][2]
    direct += op[5]
    if withXfer:
      explNoDec += min(op[3], op[4] * 1.2)
      explDec += min(op[3], op[4] * 1.2)
      direct += min(op[6], op[7] * 1.2)
  explNoDec = inputNrByte / explNoDec
  explDec = inputNrByte / explDec
  direct = inputNrByte / direct
  return explNoDec, explDec, direct

def simpleTimeAggrPlt(ax, stashLsts, opLsts, datNames, inputNrBytes, withXfer):
  nrDat = len(stashLsts)
  results = np.zeros((nrDat, 3))
  for idx, dat in enumerate(zip(stashLsts, opLsts, inputNrBytes)):
    expl, explNoDec, direct = simpleTimeAggr(dat[0], dat[1], dat[2], withXfer)
    results[idx, 0] = round(expl, 1)
    results[idx, 1] = round(explNoDec, 1)
    results[idx, 2] = round(direct, 1)

  # fig, ax = plt.subplots(figsize=(6.2, 3))
  BAR_WIDTH = 0.25
  xPoses = np.arange(nrDat) + 0 * BAR_WIDTH
  rects = ax.bar(xPoses, results[:, 0], width=BAR_WIDTH, label='Plain')
  # ax.bar_label(rects)
  xPoses = np.arange(nrDat) + 1 * BAR_WIDTH
  rects = ax.bar(xPoses, results[:, 1], width=BAR_WIDTH, label='Dec-n-Query')
  # ax.bar_label(rects)
  xPoses = np.arange(nrDat) + 2 * BAR_WIDTH
  rects = ax.bar(xPoses, results[:, 2], width=BAR_WIDTH, label='MeRLE-Direct')
  ax.bar_label(rects, padding=-5)
  ax.set_xticks(np.arange(nrDat) + BAR_WIDTH, datNames)

In [None]:
figNoX, (axAndNoX, axOrNoX, axXorNoX) = plt.subplots(1, 3, figsize=(12, 3.2))
figHasX, (axAndHasX, axOrHasX) = plt.subplots(1, 2, figsize=(8, 3))
datNames = ['leak', 'leakSrt', 'wea', 'weaSrt', 'inc', 'incSrt']
stashData = [floatCsvToLst(x) for x in map('{}_stash.csv'.format, datNames)]
opData = [floatCsvToLst(y) for y in map('{}_and.csv'.format, datNames)]
inputNrBytes = [800 * reduce(PLUS, (x[1] for x in floatCsvToLst(dl))) / 2**30
                for dl in map('{}_decode.csv'.format, datNames)]

simpleTimeAggrPlt(axAndNoX, stashData, opData, datNames, inputNrBytes, False)
simpleTimeAggrPlt(axAndHasX, stashData, opData, datNames, inputNrBytes, True)

opData = [floatCsvToLst(y) for y in map('{}_or.csv'.format, datNames)]
simpleTimeAggrPlt(axOrNoX, stashData, opData, datNames, inputNrBytes, False)
simpleTimeAggrPlt(axOrHasX, stashData, opData, datNames, inputNrBytes, True)

opData = [floatCsvToLst(y) for y in map('{}_xor.csv'.format, datNames)]
simpleTimeAggrPlt(axXorNoX, stashData, opData, datNames, inputNrBytes, False)

figNoX.subplots_adjust(top=0.85, left=0.05, bottom=0.17)
figNoX.legend(['No Compression', 'Decode-and-Query', 'MeRLE-Direct'],
              loc='outside upper center', ncol=3)
axAndHasX.set_xlabel('(a) and')
axAndNoX.set_xlabel('(a) and')
axOrHasX.set_xlabel('(b) or')
axOrNoX.set_xlabel('(b) or')
axXorNoX.set_xlabel('(c) xor')
axAndNoX.set_ylabel('Throughput (GiB Inputs/s)')
axAndHasX.set_ylabel('Throughput (GiB Inputs/s)')

figNoX.savefig('images/direct.pdf')
figHasX.savefig('images/xferDirect.pdf')

In [None]:
def hybridTimeAggr(stashLst, opLst, inputNrByte, decPctThr, gpuThr, withXfer):
  elapse = 0.0
  if len(opLst[0]) < 12: decPctThr = 99
  else: decPctThr /= 50
  for op in opLst:
    lhs, rhs = int(op[0]), int(op[1])
    lStash, rStash = stashLst[lhs], stashLst[rhs]
    if lStash[3] < gpuThr and rStash[3] < gpuThr:
      elapse += op[-1]
      continue
    # lDec = lStash[3] / 2 > lStash[4] * decPctThr / 100
    lDec = lStash[3] > lStash[4] * decPctThr
    rDec = rStash[3] > rStash[4] * decPctThr
    if lDec and rDec:
      t = op[2] + (min(op[3], op[4] * 1.2) if withXfer else 0)
    elif not lDec and not rDec:
      t = op[5] + (min(op[6], op[7] * 1.2) if withXfer else 0)
    else:
      t = op[8] + (min(op[9], op[10] * 1.2) if withXfer else 0)
    elapse += t
  return inputNrByte / elapse

def hybridTimeAggrPlt(ax, stashLsts, opLsts, datNames, inputNrBytes, decPctThr, gpuThr, Xs, withXfer):
  global MARKERS
  markersAt = 0
  for st, op, nm, sz in zip(stashLsts, opLsts, datNames, inputNrBytes):
    thputs = [hybridTimeAggr(st, op, sz, th1, th2, withXfer)
              for th1, th2 in zip(decPctThr, gpuThr)]
    ax.plot(Xs, thputs, label=nm, marker=MARKERS[markersAt % 8], linewidth=2)
    markersAt += 1

In [None]:
figNoX, (axAndNoX, axOrNoX) = plt.subplots(1, 2, figsize=(8, 3.4))
figHasX, (axAndHasX, axOrHasX) = plt.subplots(1, 2, figsize=(8, 3))

rateThr = range(0, 110, 5)
encSzThr = (0,) * len(rateThr)
datNames = ['leak', 'wea', 'weaSrt', 'inc']
stashData = [floatCsvToLst(x) for x in map('{}_stash.csv'.format, datNames)]
opData = [floatCsvToLst(y) for y in map('{}_and.csv'.format, datNames)]
inputNrBytes = [800 * reduce(PLUS, (x[1] for x in floatCsvToLst(dl))) / 2**30
                for dl in map('{}_decode.csv'.format, datNames)]

hybridTimeAggrPlt(axAndNoX, stashData, opData, datNames, inputNrBytes, rateThr, encSzThr, rateThr, False)
encSzThr = (20000,) * len(rateThr)
hybridTimeAggrPlt(axAndHasX, stashData, opData, datNames, inputNrBytes, rateThr, encSzThr, rateThr, True)

encSzThr = (20000,) * len(rateThr)
opData = [floatCsvToLst(y) for y in map('{}_or.csv'.format, datNames)]
hybridTimeAggrPlt(axOrNoX, stashData, opData, datNames, inputNrBytes, rateThr, encSzThr, rateThr, False)
encSzThr = (20000,) * len(rateThr)
hybridTimeAggrPlt(axOrHasX, stashData, opData, datNames, inputNrBytes, rateThr, encSzThr, rateThr, True)

axAndNoX.set_xlabel('(a) and', ha='left', x=0.2)
axOrNoX.set_xlabel('(b) or', ha='right', x=0.7)
axAndHasX.set_xlabel('(a) and', ha='left', x=0.1)
axOrHasX.set_xlabel('(b) or', ha='right', x=0.8)
axAndNoX.set_ylabel('Throughput (GiB Inputs/s)')
axAndHasX.set_ylabel('Throughput (GiB Inputs/s)')
figNoX.text(0.5, 0.03, 'Compression Rate Threshold $r_t$', ha="center", va = 'center')
figHasX.text(0.5, 0.02, f'Compression Rate Threshold $r_t$, with $s = {encSzThr[0]}$',
             ha="center", va = 'center')
figNoX.legend(['leak', 'wea', 'weaSrt', 'inc'], loc='outside upper center', ncol=4)
figNoX.subplots_adjust(top=0.86, left=0.08)

figNoX.savefig('images/semi.pdf')
figHasX.savefig('images/xferSemi.pdf')

In [None]:
def hybridMemAggr(stashLst, decPctThr):
  decPctThr /= 50
  return reduce(PLUS, (st[4] if st[3] > st[4] * decPctThr
                       else st[3] for st in stashLst)) * 4

def hybridMemAggrPlt(ax, stashLsts, datNames, decPctThr):
  global MARKERS
  markersAt = 0
  for st, nm in zip(stashLsts, datNames):
    ax.plot(decPctThr, [hybridMemAggr(st, th) / 2**20 for th in decPctThr],
            label=nm, marker=MARKERS[markersAt % 8], linewidth=2)
    markersAt += 1
  ax.set_ylabel('GPU Memory (MiB)')
  ax.set_xlabel('Compression Rate Threshold $r_t$')
  ax.legend(ncol=2)

In [None]:
fig, ax = plt.subplots(figsize=(8, 3))
fig.subplots_adjust(left=0.09)
rateThr = range(0, 110, 5)
datNames = ['leak', 'wea', 'weaSrt', 'inc']
stashData = [floatCsvToLst(x) for x in map('{}_stash.csv'.format, datNames)]
hybridMemAggrPlt(ax, stashData, datNames, rateThr)
fig.savefig('images/mem.pdf')

In [None]:
def decodeThputPlt(ax, decodeLsts, datNames):
  BAR_WIDTH = 0.35
  nrDat = len(datNames)
  inputNrBytes = [4 * reduce(PLUS, (x[1] for x in dl)) for dl in decodeLsts]
  andrzejewskiThput = np.divide(inputNrBytes,
    [(2**30) * reduce(PLUS, (x[3] for x in dl)) for dl in decodeLsts]
  )
  ourThput = np.divide(inputNrBytes,
    [(2**30) * reduce(PLUS, (x[4] for x in dl)) for dl in decodeLsts]
  )
  np.round(andrzejewskiThput, 2, andrzejewskiThput)
  np.round(ourThput, 2, ourThput)

  xPoses = np.arange(nrDat) + 0 * BAR_WIDTH
  rects = ax.bar(xPoses, andrzejewskiThput, width=BAR_WIDTH, label='Original')
  ax.bar_label(rects)
  xPoses = np.arange(nrDat) + 1 * BAR_WIDTH
  rects = ax.bar(xPoses, ourThput, width=BAR_WIDTH, label='MeRLE-Decode')
  ax.bar_label(rects, padding=-3)
  ax.set_ylabel('Throughput (GiB Inputs/s)')
  ax.set_xticks(np.arange(nrDat) + BAR_WIDTH, datNames)
  ax.legend()

In [None]:
datNames = ['leak', 'leakSrt', 'wea', 'weaSrt', 'inc', 'incSrt']
decodeData = [floatCsvToLst(x) for x in map('{}_decode.csv'.format, datNames)]
fig, ax = plt.subplots(figsize=(8, 3))
decodeThputPlt(ax, decodeData, datNames)
fig.subplots_adjust(bottom=0.09, top=0.98)
fig.savefig('images/decodeCompare.pdf')

In [None]:
fig, (ax10, ax40) = plt.subplots(1, 2, figsize=(8, 3))
bruh = floatCsvToLst('r10_dec.csv')
xs = [x[0] for x in bruh]
ax10.plot(xs, [(x[-2] / (2**30), x[-1] / (2**30)) for x in bruh],
          linewidth=2, label=('Original', 'MeRLE-Decode'))
# ax10.legend()
ax10.set_xscale('log')
ax10.set_xlabel('(a) $r = 10\\%$')
ax10.set_ylabel('Throughput (GiB/s)')

bruh = floatCsvToLst('r40_dec.csv')
ax40.plot(xs, [(x[-2] / (2**30), x[-1] / (2**30)) for x in bruh],
          linewidth=2, label=('Original', 'MeRLE-Decode'))
ax40.set_xlabel('(b) $r = 40\\%$')
ax40.legend(); ax40.set_xscale('log')

fig.savefig('./images/synthDecode.pdf')