# Pystencils Multi Phase LBM Kernel - Performance and Volumes

In [None]:

import sys 
sys.path.append('../pystencils')
sys.path.append('../genpredict')

%load_ext autoreload
%autoreload 1




In [None]:
import math
import time

from griditeration import *
from predict_metrics import *
from volumes_isl import *
from plot_utils import *

from measured_metrics import MeasuredMetrics, ResultComparer

import random




from meas_db import MeasDB

lbmdb = MeasDB("multiphaselbm.db")
stencildb = MeasDB("3dstencils.db")


In [None]:


device = DeviceAmpere()
print(device.name)

def nextStencilBlockSize():
    for xblock in [1,2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]:
        for yblock in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]:
            for zblock in [1, 2, 4, 8, 16, 32, 64]:
                if xblock*yblock*zblock not in [1024]:
                    continue
                yield (xblock, yblock, zblock)    
                
def nextLBMBlockSize():
    for xblock in [1,2, 4, 8, 16, 32, 64, 128, 256, 512]:
        for yblock in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]:
            for zblock in [1, 2, 4, 8, 16, 32, 64]:
                if xblock*yblock*zblock not in [256, 512]:
                    continue
                yield (xblock, yblock, zblock)    

                
def getResults(meas_db, r, nextBlockSize, threadFoldingSizes, domainSize):
    predValues = dict()
    measValues = dict()
    for block in nextBlockSize():
        for threadFolding in threadFoldingSizes:
            key = (r, *block, threadFolding)

            lc, basic, meas = meas_db.getEntry(r, block, threadFolding, domainSize, device)

            if  meas is None or basic is None:        
                print(str(key) + " not found")
                continue

            metrics = DerivedMetrics(lc, basic, device, meas)

            measValues[key] = meas
            predValues[key] = metrics

            print(meas, end="")
            #print(str(lc), end="")
            #print(str(basic), end="--\n")
            #rc = ResultComparer(meas, metrics)
            #print(str(rc))              

            #print()
    return measValues, predValues
            
            
stencilMeas, stencilPred = getResults(stencildb, 4, nextStencilBlockSize, [(1,1,1), (1,2,1), (1,1,2)], (632, 504, 992))
#getResults(lbmdb, 0)
lbmMeas, lbmPred = getResults(lbmdb, 1, nextLBMBlockSize, [(1,1,1)], None)
#lbmMeas = {}
#lbmPred = {}

combinedMeas = {**stencilMeas, **lbmMeas}
combinedPred = {**stencilPred, **lbmPred}

In [None]:
allStencils = stencildb.getBasicMetricsRange(4, device)    

allMeas = {}
allPred = {}

for a in allStencils:
    key = (4, *(a[0].block), tuple(a[0].blocking_factors), tuple(a[0].domain)) 
    if key not in allMeas:
        allMeas[key] = {}
        allPred[key] = {}
    
    allMeas[key] = a[2]
    allPred[key] = DerivedMetrics(a[0], a[1], device, a[2])


In [None]:
import matplotlib.transforms as transforms
from matplotlib import collections

fig, ax = plt.subplots()
fig.set_figwidth(4)
fig.set_figheight(4)
fig.set_dpi(150)

ckeys = [*[k for k in stencilPred.keys() if k[1]*k[2]*k[3] == 1024],
        *[k for k in lbmPred.keys() if k[1]*k[2]*k[3] == 512]]

import random


keys = [(1, 1, 16,32, (1,1,1)),
        (1, 2, 16,16, (1,1,1)),
        (1, 4, 8,16, (1,1,1)),
        (1, 16, 4,8, (1,1,1)),
        (1, 32, 4,4, (1,1,1)),
        (4, 1, 32,32, (1,1,1)),
        (4, 1, 32,32, (1,2,1)),
        (4, 1, 32,32, (1,1,2)),
        (4, 2, 32,16, (1,1,1)),
        (4, 4, 16,16, (1,1,1)),
        (4, 16, 8,8, (1,1,1)),
        (4, 32, 4,8, (1,1,1))
]
keys = [k for k in keys if k in stencilPred or k in lbmPred]
random.shuffle(ckeys)
keys.extend( ckeys[:13-len(keys)] )



#keys.extend([k for k in combinedMeas.keys() if hasattr(combinedMeas[k], "L1Volume")])


keys.sort()
print(keys)

def getLabelGroups(keys, labelFuncs):
    labelGroups = [ [[l(keys[0]), 0]]  for l in labelFuncs ]

    for k in keys:
        for l in range(len(labelFuncs)):
            if labelFuncs[l](k) != labelGroups[l][-1][0]:
                labelGroups[l].append([labelFuncs[l](k), 1])
                for lo in range(0, l):
                    if labelGroups[lo][-1][1] > 1:
                        labelGroups[lo][-1][1] -= 1
                        labelGroups[lo].append( [labelFuncs[lo](k), 1] )
            else:
                labelGroups[l][-1][1] += 1

    return labelGroups
    
    
labelGroups = getLabelGroups(keys, [ lambda k: "2y" if k[4] == (1,2,1) else "2z" if k[4] == (1,1,2) else "-", lambda k: k[3], lambda k : k[2], lambda k : k[1], lambda k: "LBM" if k[0] == 1 else "3D25pt"])

yv1 = np.array( [ combinedPred[k].L1Cycles for k in keys  ] )
yl1 = np.array( [ combinedPred[k].L1Load for k in keys  ] )

cycles = np.array( [combinedMeas[k].L1Wavefronts *32 if hasattr(combinedMeas[k], "L1Wavefronts") else 0 for k in keys ])
volume = np.array( [(combinedMeas[k].L1Volume * 32) if hasattr(combinedMeas[k], "L1Volume") else 0 for k in keys])


x = np.arange(0,len(yv1))

ax.bar(x  ,yv1, width=0.6, label="pred", color="#DDDDDD", hatch="///", edgecolor="black", zorder=-1)
#ax.bar(x + 0.22,yl1, width=0.4, label="L1 Load Volume", color="#DDDDDD", hatch="...", edgecolor="black", zorder=-1)

ax.scatter(x, cycles, zorder=1, marker="+", s=[82]*len(x), linewidth=3, label="meas")
#ax.scatter(x-0.22, volume, zorder=1, marker="*" )

mape = 0
for i in range(len(cycles)):
    mape += abs(cycles[i] - yv1[i]) / cycles[i]
mape /= len(cycles)


#ax.set_xticks(x)
#ax.set_xticklabels(labels)

trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)


yoff = -0.06
for labelGroup in labelGroups:
    xoff = 0
    ax.annotate("",xy=(xoff-0.5, yoff+0.07), xycoords=trans, xytext=(xoff-0.5,  yoff-0.01), textcoords=trans, arrowprops=dict(arrowstyle="-",
                      connectionstyle="arc3, rad=0"),)
    for t in labelGroup:
        ax.text( xoff + (t[1]-1) / 2, yoff, t[0], ha='center', transform=trans)
        ax.annotate("",
            xy=(xoff + t[1] - 0.5, yoff+0.07), xycoords=trans,
            xytext=(xoff + t[1] - 0.5, yoff-0.01), textcoords=trans,
            arrowprops=dict(arrowstyle="-",
                      connectionstyle="arc3, rad=0"),
            )

        xoff += t[1]

    yoff -= 0.06

ax.annotate( "MAPE:{:4.1f}%".format(mape*100), (0.4, 0.95), xycoords="axes fraction",va="center", ha="left",
             fontfamily="monospace", fontsize="small",
              bbox=dict(boxstyle="round", fc="#FFFFFFAA", ec="#CCCCCC00"))
    
    
ax.text( -0.7, -0.06,  "tf", ha='right', transform=trans)    
ax.text( -0.7, -0.12, "bz", ha='right', transform=trans)    
ax.text( -0.7, -0.18, "by", ha='right', transform=trans)    
ax.text( -0.7, -0.24, "bx", ha='right', transform=trans)    
ax.text( -0.7, -0.30, "kernel", ha='right', transform=trans)    

ax.set_xticks([])
ax.set_xlim((-0.8,len(keys)-0.2))
ax.legend()

#ax.set_xlabel("configuration")
ax.set_ylabel("cycles/(warp*Lup)")
fig.tight_layout()
fig.savefig("paperplots/l1_throughput.pdf")

In [None]:
fig, ax = plt.subplots()
fig.set_figwidth(4)
fig.set_figheight(4)
fig.set_dpi(150)

xdata = []
ydata = []

for measValues, predValues, label in [(allMeas, allPred, "3D25pt"), (lbmMeas, lbmPred, "LBM")]:
    validKeys = [k for k in measValues if predValues[k].smL1Alloc > 10*1024 and
                 (predValues[k].L1Load - measValues[k].L2Load_tex) > 12]
    x = [predValues[k].smL1Alloc  / (192*1024) for k in validKeys]
    y = [ ( predValues[k].L1Load - measValues[k].L2Load_tex) / (predValues[k].L1Load - predValues[k].L2LoadV1) for k in validKeys]

    plt.scatter(x, y, alpha=0.01, s=[150]*len(x), c=[getColor(k[1:4]) for k in validKeys],  edgecolors="None")
    plt.scatter(x, y, alpha=0.8, s=[15]*len(x), c=[getColor(k[1:4]) for k in validKeys],  edgecolors="None")
    xdata.extend(x)
    ydata.extend(y)


values = np.arange(1.0, 16.0, 0.1)

ax.set_xlim([0, 13])

from scipy.optimize import curve_fit


def func(x, a, b, c):
        return a * np.exp(b*np.exp(c*x))


popt, pcov = curve_fit(func, xdata, ydata, bounds=([1.0, -20, -10], [1.1, 20, 10]), maxfev=20000)


print(popt)
print(pcov)

xdata = np.array([*list(xdata), *[  i / 5 for i in range(1,100) ]])
xdata.sort()
plt.plot( xdata , func(xdata, *popt), '-.', color="gray",
         label='fit: $%5.2f \; e^ {{%5.2f \: e^ {{%5.2f}\:O^{{{L1}}}}}}$' % tuple(popt))

ax.axvline(1.0, color="gray")
ax.legend()
ax.set_xlabel("oversubscription factor $O^{L1}$")
ax.set_ylabel("hit rate $R_{hit}^{L1,load}$")

fig.tight_layout()
fig.savefig("paperplots/rcap_L1.pdf")

In [None]:
tfs = { (1,1,1): "no thread folding", (1,2,1) : "$2y$ thread folding", (1,1,2) : "$2z$ thread folding", (1,2,2) : "$2z2y$ thread folding" }

fig, ax = volumeScatterPlot([(k[1:4], stencilMeas[k].L2Load_tex, stencilPred[k].L2LoadV2, tfs[k[4]], stencilPred[k].L2LoadV1) for k in stencilMeas], "Stencil L2 Volume")
ax.set_title(None)
#ax.get_legend().remove()
fig.tight_layout()
plt.savefig( "./paperplots/stencil_L2_volumes.pdf")

In [None]:
fig, ax = volumeScatterPlot([(k[1:4], lbmMeas[k].L2Load_tex, lbmPred[k].L2LoadV2, k[0], lbmPred[k].L2LoadV1) for k in lbmMeas], "LBM L2 Volume")
ax.set_title(None)
ax.get_legend().remove()
fig.tight_layout()
plt.savefig( "./paperplots/lbm_L2_volumes.pdf")

In [None]:
import matplotlib.transforms as transforms
from matplotlib import collections

fig, ax = plt.subplots()
fig.set_figwidth(8)
fig.set_figheight(4)
fig.set_dpi(150)

keys = [*[k for k in stencilPred.keys() if k[1]*k[2]*k[3] == 1024],
        *[k for k in lbmPred.keys() if k[1]*k[2]*k[3] == 512]]

import random

random.shuffle(keys)
keys = keys[:4]

keys.extend([(1, 2, 16,16, (1,1,1)),
             (1, 2, 256, 1, (1,1,1)),
             (1, 4, 8, 16, (1,1,1)),
             (1, 32, 4, 4, (1,1,1)),
             (1, 128, 2, 2, (1,1,1)),
             (1, 256, 2, 1, (1,1,1)),
             (1, 512, 1, 1, (1,1,1)),
             (4, 1, 16, 64, (1,1,1)),
             (4, 1, 16, 64, (1,1,2)),
             (4, 1, 16, 64, (1,2,1)),
             (4, 1, 16, 64, (1,2,2)),
             (4, 2, 8, 64, (1,1,1)),
             (4, 2, 8, 64, (1,2,1)),
             (4, 2, 8, 64, (1,1,2)),
             (4, 2, 8, 64, (1,2,2)),
             (4, 16, 8, 8, (1,1,1)),
             (4, 64, 4, 4, (1,1,1)),             
             (4, 256, 2, 2, (1,1,1)),             
             (4, 512, 2, 1, (1,1,1)),             
             (4, 1024, 1, 1, (1,1,1)),            
             

            ])
keys = [k for k in keys if k in lbmPred or k in stencilPred]
keys = list(set(keys))

keys.sort()
print(keys)

def getLabelGroups(keys, labelFuncs):
    labelGroups = [ [[l(keys[0]), 0]]  for l in labelFuncs ]

    for k in keys:
        for l in range(len(labelFuncs)):
            if labelFuncs[l](k) != labelGroups[l][-1][0]:
                labelGroups[l].append([labelFuncs[l](k), 1])
                for lo in range(0, l):
                    if labelGroups[lo][-1][1] > 1:
                        labelGroups[lo][-1][1] -= 1
                        labelGroups[lo].append( [labelFuncs[lo](k), 1] )
            else:
                labelGroups[l][-1][1] += 1

    return labelGroups
    
    
labelGroups = getLabelGroups(keys, [ lambda k: "2zy" if k[4] == (1,2,2) else "2y" if k[4] == (1,2,1) else "2z" if k[4] == (1,1,2) else "-", lambda k: k[3], lambda k : k[2], lambda k : k[1], lambda k: "LBM" if k[0] == 1 else "3D25pt"])

yv1 = [ combinedPred[k].L2LoadV1 for k in keys  ]
yv2 = [ combinedPred[k].L2LoadV2 for k in keys  ]
yl1 = [ combinedPred[k].L1Load for k in keys  ]
ymeas = [ combinedMeas[k].L2Load_tex for k in keys  ]
x = np.arange(0,len(yv1))





plt.bar(x,yv2, width=0.8, label="capacity", zorder=-1, color="black", hatch="//", edgecolor="white")
plt.bar(x,yv1, width=0.8, label="compulsory", zorder=-1, color="black", hatch="..", edgecolor="white")
plt.bar(x,yl1, width=0.8, fill=False, linewidth=1, label="L1 Load Volume")
plt.scatter(x, yv2, marker="_", color="black", s=[600]*len(x), linewidth=2, zorder=1)
plt.scatter(x, ymeas, c=[getColor(k[1:4]) for k in keys], s=[400]*len(x), linewidth=2, zorder=1, marker="_")
plt.scatter(x, ymeas, c=[getColor(k[1:4]) for k in keys], s=[40]*len(x), label="measured", zorder=1, marker="o")

#ax.set_xticks(x)
#ax.set_xticklabels(labels)

trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)


yoff = -0.06
for labelGroup in labelGroups:
    xoff = 0
    ax.annotate("",xy=(xoff-0.5, yoff+0.07), xycoords=trans, xytext=(xoff-0.5,  yoff-0.01), textcoords=trans, arrowprops=dict(arrowstyle="-",
                      connectionstyle="arc3, rad=0"),)
    for t in labelGroup:
        ax.text( xoff + (t[1]-1) / 2, yoff, t[0], ha='center', transform=trans)
        ax.annotate("",
            xy=(xoff + t[1] - 0.5, yoff+0.07), xycoords=trans,
            xytext=(xoff + t[1] - 0.5, yoff-0.01), textcoords=trans,
            arrowprops=dict(arrowstyle="-",
                      connectionstyle="arc3, rad=0"),
            )

        xoff += t[1]

    yoff -= 0.06

ax.text( -0.7, -0.06,  "tf", ha='right', transform=trans)    
ax.text( -0.7, -0.12, "bz", ha='right', transform=trans)    
ax.text( -0.7, -0.18, "by", ha='right', transform=trans)    
ax.text( -0.7, -0.24, "bx", ha='right', transform=trans)    
ax.text( -0.7, -0.30, "kernel", ha='right', transform=trans)    

#ax.text( 0, -0.2, "testst", ha='center', transform=ax.transAxes)
#ax.text( 0, -0.3, "testst", ha='center', transform=ax.transAxes)
#ax.text( 0, -0.4, "testst", ha='center', transform=ax.transAxes)

ax.set_xticks([])
ax.set_xlim((-0.6,len(keys)-0.4))
ax.set_ylim((0, min(800, ax.get_ylim()[1])))
ax.legend()
#ax.set_xlabel("configuration")
ax.set_ylabel("Volume, $B/Lup$")
fig.tight_layout()
fig.savefig("paperplots/comp_l1.pdf")

In [None]:
fig, ax = plt.subplots()
fig.set_figwidth(4)
fig.set_figheight(4)
fig.set_dpi(150)

xdata = []
ydata = []

for measValues, predValues, label in [(allMeas, allPred, "3D25pt"), (lbmMeas, lbmPred, "LBM")]:
    

    
    keys = [k for k in measValues if predValues[k].L2LoadV2 - predValues[k].memLoadV2 > 1.2 and
            measValues[k].memLoad - predValues[k].memLoadV2 > 0 ]   
     
    x = [ (predValues[k].basic.waveMemLoadNew +
           predValues[k].basic.waveMemStoreNew) / (20*1024*1024) for k in keys]
    y = [ ( measValues[k].L2Load_tex - measValues[k].memLoad) / (measValues[k].L2Load_tex - predValues[k].memLoadV3) for k in keys] 
    plt.scatter(x, y, alpha=0.1, s=[150]*len(x), c=[getColor(k[1:4]) for k in keys],  edgecolors="None")
    plt.scatter(x, y, alpha=0.8, s=[15]*len(x), c=[getColor(k[1:4]) for k in keys],  edgecolors="None")
    xdata.extend(x)
    ydata.extend(y)
    
  

bucketWidth = 0.25
buckets = [0] * int(1.9 // bucketWidth)
bucketCounts = [0]* int(1.9// bucketWidth)

for x,y in zip(xdata, ydata):
    bucket = round(x/bucketWidth)
    if bucket >= 0 and bucket < len(buckets):
        buckets[bucket ] += y
        bucketCounts[bucket] += 1

for b in range(len(buckets)):
    buckets[b] /= max(1, bucketCounts[b])
    
for b in range(len(buckets)):
    if bucketCounts[b] == 0:
        buckets[b] = (buckets[b-1] + buckets[min(len(buckets)-1, b+1)])/2

b = 0
while b*bucketWidth < 0.6:
    buckets[b] = 1.0
    b += 1

values = np.arange(0.0, 10.0, 0.1)


from scipy.optimize import curve_fit

def func(x, a, b, c):
    return a * np.exp(b*np.exp(c*x))


popt, pcov = curve_fit(func, np.arange(len(buckets))*bucketWidth , buckets, bounds=([0.9999, -20, -np.inf], [1.0, 23, np.inf]), maxfev=20000)
print(popt)
print(pcov)

xdata = np.array([*list(xdata), *[  i / 25 for i in range(1,70) ]])
xdata.sort()

plt.plot(np.arange(len(buckets))*bucketWidth, buckets, color="#999999")

plt.plot(xdata, func(xdata, *popt), '-', color="black",
         label='fit: a=%5.3f, \n     b=%5.3f,\n     c=%5.3f' % tuple(popt))    




ax.axvline(1, color="gray")
#ax.axvline(0, color="gray")
ax.legend()
ax.set_xlabel("oversubscription factor $O^{L2}$")
ax.set_ylabel("hitare rate $R_{cap}^{L2,load}$")
fig.tight_layout()
fig.savefig("paperplots/rmiss_mem.pdf")

In [None]:
fig, ax = plt.subplots()
fig.set_figwidth(4)
fig.set_figheight(4)
fig.set_dpi(150)

xdata = []
ydata = []

for measValues, predValues, label in [(allMeas, allPred, "3D25pt"), (lbmMeas, lbmPred, "LBM")]:
    
    
    keys = [k for k in measValues if predValues[k].L2Store - predValues[k].memStoreV1 > 1.2 and
            measValues[k].memStore - predValues[k].memStoreV1 > 0 ]   
    x = [ (predValues[k].basic.waveMemLoadNew +
           predValues[k].basic.waveMemStoreNew) / (20*1024*1024) for k in keys]
    y = [ ( predValues[k].L2Store - measValues[k].memStore) / (predValues[k].L2Store - predValues[k].memStoreV1) for k in keys]        
    plt.scatter(x, y, alpha=0.1, s=[150]*len(x), c=[getColor(k[1:4]) for k in keys],  edgecolors="None")
    plt.scatter(x, y, alpha=0.8, s=[15]*len(x), c=[getColor(k[1:4]) for k in keys],  edgecolors="None")
    xdata.extend(x)
    ydata.extend(y)


    
bucketWidth = 0.25
buckets = [0] * int(2.25 // bucketWidth)
bucketCounts = [0]* int(2.25 // bucketWidth)

for x,y in zip(xdata, ydata):
    bucket = round(x/bucketWidth)
    if bucket >= 0 and bucket < len(buckets):
        buckets[bucket ] += y
        bucketCounts[bucket] += 1

for b in range(len(buckets)):
    buckets[b] /= max(1, bucketCounts[b])

b = 0
while b*bucketWidth < 0.6:
    buckets[b] = 1.0
    b += 1

values = np.arange(0.0, 10.0, 0.1)


from scipy.optimize import curve_fit

def func(x, a, b, c):
    return a * np.exp(b*np.exp(c*x))


popt, pcov = curve_fit(func, np.arange(len(buckets))*bucketWidth , buckets, bounds=([0.9999, -20, -np.inf], [1.0, 23, np.inf]), maxfev=20000)
print(popt)
print(pcov)

xdata = np.array([*list(xdata), *[  i / 25 for i in range(1,70) ]])
xdata.sort()

plt.plot(np.arange(len(buckets))*bucketWidth, buckets, color="#999999")

plt.plot(xdata, func(xdata, *popt), '-', color="black",
         label='fit: $%5.2f \; e^ {{%5.2f \: e^ {{%5.2f}O^{{{L1}}}}}}$' % tuple(popt))

    

ax.axvline(1, color="gray")
#ax.axvline(0, color="gray")
ax.legend()
ax.set_xlabel("oversubscription factor $O^{L2}$")
ax.set_ylabel("hit rate $R_{cap}^{L2,load}$")
fig.tight_layout()
fig.savefig("paperplots/rmiss_mem.pdf")

In [None]:
fig, ax = plt.subplots()
fig.set_figwidth(4)
fig.set_figheight(4)
fig.set_dpi(150)

xdata = []
ydata = []


for measValues, predValues, label in [(allMeas, allPred, "3D25pt"), (lbmMeas, lbmPred, "LBM")]:
    
    keys = [k for k in measValues if predValues[k].L2Store - predValues[k].memStoreV1 > 0.1 and
            measValues[k].memStore - predValues[k].memStoreV1 > 0.1 ] 
    
    x = [ predValues[k].L2Oversubscription for k in keys]
    y = [ ( measValues[k].L2Store - measValues[k].memStore) / (measValues[k].L2Store - predValues[k].memStoreV1) for k in keys]        
    plt.scatter(x, y, alpha=0.04, s=[150]*len(x), c=[getColor(k[1:4]) for k in keys],  edgecolors="None")
    plt.scatter(x, y, alpha=0.8, s=[15]*len(x), c=[getColor(k[1:4]) for k in keys],  edgecolors="None")
    xdata.extend(x)
    ydata.extend(y)

#ax.set_xlim([-1.2, 1.2])

from scipy.optimize import curve_fit

def func(x, a, b, c):
    return a * np.exp(b*np.exp(c*x))

popt, pcov = curve_fit(func, xdata, ydata, bounds=([0.99, -np.inf, -np.inf], [1.00, np.inf, np.inf]), maxfev=20000)
print(popt)
print(pcov)

xdata = np.array([*list(xdata), *[  i / 50  for i in range(1,100) ]])
xdata.sort()
plt.plot(xdata, func(xdata, *popt), '-.', color="gray",
         label='fit: $%5.1f \; e^ {{%5.2f \: e^ {{%5.2f}O^{{L2}}}}}$' % tuple(popt))

ax.axvline(1, color="gray")
#ax.axvline(0, color="gray")
ax.legend()
ax.set_xlabel("oversubscription factor $O^{L2}$")
ax.set_ylabel("hit rate $R_{cap}^{L2,store}$")

fig.tight_layout()
fig.savefig("paperplots/rstoremiss_mem.pdf")

In [None]:
tfs = { (1,1,1): "no thread folding", (1,2,1) : "$2y$ thread folding", (1,1,2) : "$2z$ thread folding", (1,2,2) : "$2zy$ thread folding" }
fig, ax = volumeScatterPlot([(k[1:4], stencilMeas[k].memLoad, stencilPred[k].memLoadV3, tfs[k[4]], stencilPred[k].memLoadV1) for k in stencilMeas], "Stencil Memory Load Volume")
ax.set_title(None)
#ax.get_legend().remove()

fig.tight_layout()
plt.savefig( "./paperplots/stencil_mem_volumes.pdf")

In [None]:
fig, ax = volumeScatterPlot([(k[1:4], lbmMeas[k].memLoad, lbmPred[k].memLoadV3, k[0], lbmPred[k].memLoadV1) for k in lbmMeas], "LBM Memory Load Volume")
ax.set_title(None)
ax.set_yticks([160, 192, 256] )
ax.set_xticks([160, 192, 256] )
ax.set_xlim((145, 260))
ax.set_ylim((145, 260))
ax.get_legend().remove()

fig.tight_layout()
plt.savefig( "./paperplots/lbm_mem_volumes.pdf")

In [None]:
import matplotlib.transforms as transforms
from matplotlib import collections

def compositionPlot(keys, ceiling):
    fig, ax = plt.subplots()
    fig.set_figwidth(8)
    fig.set_figheight(4)
    fig.set_dpi(150)

    def getLabelGroups(keys, labelFuncs):
        labelGroups = [ [[l(keys[0]), 0]]  for l in labelFuncs ]

        for k in keys:
            for l in range(len(labelFuncs)):
                if labelFuncs[l](k) != labelGroups[l][-1][0]:
                    labelGroups[l].append([labelFuncs[l](k), 1])
                    for lo in range(0, l):
                        if labelGroups[lo][-1][1] > 1:
                            labelGroups[lo][-1][1] -= 1
                            labelGroups[lo].append( [labelFuncs[lo](k), 1] )
                else:
                    labelGroups[l][-1][1] += 1

        return labelGroups


    labelGroups = getLabelGroups(keys, [ lambda k: "2y" if k[4] == (1,2,1) else "2z" if k[4] == (1,1,2) else "-", lambda k: k[3], lambda k : k[2], lambda k : k[1], lambda k: "LBM" if k[0] == 1 else "3D25pt"])

    

    ycomp = np.array( [combinedPred[k].memLoadV1 - combinedPred[k].memLoadOverlap[0] - combinedPred[k].memLoadOverlap[1] for k in keys] )   
    yl2 = np.array( [combinedPred[k].L2LoadV2 for k in keys]    )
    ymissy = np.array( [combinedPred[k].memLoadOverlap[0] - combinedPred[k].memLoadOverlapHit[0] for k in keys]  )  
    ymissz = np.array( [combinedPred[k].memLoadOverlap[1] - combinedPred[k].memLoadOverlapHit[1] for k in keys]  )  
    ymissover = ymissy + ymissz
    ymisscap = np.array( [combinedPred[k].memLoadEvicts  for k in keys]  )  
    ymissstore = np.array( [combinedPred[k].memStoreEvicts  for k in keys]  )  
    
    
    ymemvolume = np.array( [combinedPred[k].memLoadV2   for k in keys]  ) + ymissstore
    ymemvolumev1 = np.array( [combinedPred[k].memLoadV1  for k in keys]  )  
    
    yhity = np.array( [combinedPred[k].memLoadOverlapHit[0]  for k in keys]  ) 
    yhitz = np.array( [combinedPred[k].memLoadOverlapHit[1]  for k in keys]  )  
    yhitover = yhity + yhitz
    ymeasLoad = np.array( [combinedMeas[k].memLoad for k in keys] )   
    
    x = np.arange(0,len(keys))



    plt.bar(x, ymissstore, width=0.71, label="store miss", zorder=-1, color="#000000", hatch="---", edgecolor="white")
    plt.bar(x, yl2, bottom=ymissstore, width=0.7, fill=False, linewidth=1, label="L2 Load Volume", edgecolor="#AAAAAA", zorder=-2)
    plt.bar(x, ycomp, bottom=ymissstore, width=0.7, label="compulsory", zorder=-1, color="black", hatch="...", edgecolor="white")
    plt.bar(x, ymissover, bottom=ycomp+ymissstore, width=0.7, label="overlap miss", zorder=-1, color="black", hatch="///", edgecolor="white")   
    
    plt.bar(x, yhitover, bottom=ycomp + ymissover + ymissstore , width=0.7, label="overlap hit", zorder=-1, color="#FFFFFF", hatch="///", edgecolor="black")

    plt.scatter(x, ymemvolume, marker="_", color="black", s=[600]*len(x), linewidth=2, zorder=1)
    plt.scatter(x, ymeasLoad, marker="o", c=[getColor(k[1:4]) for k in keys], s=[40]*len(x), label="measured", zorder=1)
    plt.scatter(x, ymeasLoad, marker="_", c=[getColor(k[1:4]) for k in keys],  s=[400]*len(x), linewidth=2, zorder=1)
    
    
    ylim = ax.get_ylim();
    
        
    ax.set_ylim( (0, (ylim[1] * 40 + ax.get_ylim()[1]) / 41))

    trans = transforms.blended_transform_factory(ax.transData, ax.transAxes)


    yoff = -0.06
    for labelGroup in labelGroups:
        xoff = 0
        ax.annotate("",xy=(xoff-0.5, yoff+0.07), xycoords=trans, xytext=(xoff-0.5,  yoff-0.01), textcoords=trans, arrowprops=dict(arrowstyle="-",
                          connectionstyle="arc3, rad=0"),)
        for t in labelGroup:
            ax.text( xoff + (t[1]-1) / 2, yoff, t[0], ha='center', transform=trans)
            ax.annotate("",
                xy=(xoff + t[1] - 0.5, yoff+0.07), xycoords=trans,
                xytext=(xoff + t[1] - 0.5, yoff-0.01), textcoords=trans,
                arrowprops=dict(arrowstyle="-",
                          connectionstyle="arc3, rad=0"),
                )

            xoff += t[1]

        yoff -= 0.06

    ax.text( -0.7, -0.06, "tf", ha='right', transform=trans)    
    ax.text( -0.7, -0.12, "bz", ha='right', transform=trans)    
    ax.text( -0.7, -0.18, "by", ha='right', transform=trans)    
    ax.text( -0.7, -0.24, "bx", ha='right', transform=trans)    
    ax.text( -0.7, -0.30, "kernel", ha='right', transform=trans)    



    ax.set_xticks([])
    ax.set_xlim((-0.8,len(keys)-0.2))
    ax.set_ylim((0, min(ceiling, ax.get_ylim()[1])))
    ax.legend()
    #ax.set_xlabel("configuration")
    ax.set_ylabel("Volume, $B/Lup$")
    fig.tight_layout()
    return fig,ax


stencilKeys = [k for k in stencilPred.keys() if k[1] >= 32]
lbmKeys =     [k for k in lbmPred.keys() if k[1] > 0]

import random

random.shuffle(stencilKeys)
keys = stencilKeys[:4]

keys.extend([(4, 512, 1, 2, (1,1,1)),
             (4, 512, 2, 1, (1,1,1)),
             (4, 256, 4, 1, (1,1,1)),
             (4, 256, 1, 4, (1,1,1)),
             (4, 256, 1, 4, (1,2,1)),
             (4, 256, 1, 4, (1,1,2)),
             (4, 32, 1, 32, (1,1,1)),
             (4, 32, 2, 16, (1,1,1)),
             (4, 32, 2, 16, (1,2,1)),
             (4, 32, 2, 16, (1,1,2)),
             (4, 32, 8, 4, (1,1,1)),
             (4, 32, 32, 1, (1,1,1)),
             (4, 32, 32, 1, (1,2,1)),
             (4, 32, 32, 1, (1,1,2)),
             (4, 128, 8, 1, (1,1,1)),
             (4, 32, 4, 8, (1,1,1)),
             (4, 16, 1, 64, (1,1,1)),
             (4, 8, 2, 64, (1,1,1)),
             (4, 8, 128, 1, (1,1,1)),
             (4, 8, 16, 8, (1,1,1)),
             
             (4, 1, 256, 4, (1,1,1)),
             (4, 4, 256, 1, (1,1,1)),
             (4, 1, 512, 2, (1,1,1)),
             (4, 2, 512, 1, (1,1,1)),
             (4, 1, 16, 64, (1,1,1)),
             (4, 4, 4, 64, (1,1,1)),          
            ])


keys = list(set(keys))
keys.sort()
keys = [k for k in keys if k in lbmPred or k in stencilPred]

fig, ax = compositionPlot(keys, 120)
fig.tight_layout()
fig.savefig("paperplots/comp_mem_stencil.pdf")

random.shuffle(lbmKeys)
keys = lbmKeys[:4]

keys.extend([ (1, 512, 1,1, (1,1,1)),
              (1, 256, 2,1, (1,1,1)),
              (1, 256, 1,2, (1,1,1)),
              (1, 128, 4,1, (1,1,1)),
              (1, 128, 1,4, (1,1,1)),
              (1, 64, 8,1, (1,1,1)),
              (1, 64, 1,8, (1,1,1)),
              (1, 32, 8,2, (1,1,1)),
              (1, 32, 2,8, (1,1,1)),
              (1, 16, 2, 16, (1,1,1)),
              (1, 16, 16, 2, (1,1,1)),
              (1, 8, 8, 8, (1,1,1)),
              (1, 8, 64, 1, (1,1,1)),
              (1, 8, 1, 64, (1,1,1)),
              (1, 4, 4, 32, (1,1,1)),
              (1, 4, 32, 4, (1,1,1)),            
              (1, 2, 8, 16, (1,1,1)),
              (1, 2, 2, 64, (1,1,1)),
              (1, 2, 128, 1, (1,1,1)),
              (1, 1, 512, 1, (1,1,1)),
             
              #(1, 1, 4, 64, (1,1,1))
            ])
keys.sort()
keys = [k for k in keys if k in lbmPred or k in stencilPred]

fig, ax = compositionPlot(keys, 300)
fig.tight_layout()
fig.savefig("paperplots/comp_mem_lbm.pdf")

In [None]:
categories = ["L1", "L2", "RAM"]
   
keys = [k for k in stencilMeas]

fig, ax = volumeScatterPlot([(k[1:4], combinedMeas[k].lups, combinedPred[k].perfV4, categories[combinedPred[k].limV3], combinedPred[k].perfPheno) for k in keys], "stencil perf v3")
fig.set_size_inches(4,4)

#ax.set_xticks([0.25, 0.5, 1, 2, 4])
#ax.set_yticks([0.25, 0.5, 1, 2, 4])
ax.set_ylim([4, 90])
ax.set_xlim([3, 85])

ax.set_title(None)
ax.set_xlabel("measured performance, GLup/s")
ax.set_ylabel("predicted performance, GLup/s")
fig.tight_layout()
fig.savefig("paperplots/stencil_perf_v3.pdf")

In [None]:
categories = ["L1", "L2", "RAM"]
   
keys = [k for k in lbmMeas]

print(len(keys))

fig, ax = volumeScatterPlot([(k[1:4], combinedMeas[k].lups, combinedPred[k].perfV3, categories[combinedPred[k].limV3], combinedPred[k].perfPheno) for k in keys], "stencil perf v3")
fig.set_size_inches(4,4)
ax.set_title(None)
ax.set_xlabel("measured performance, GLup/s")
ax.set_ylabel("predicted performance, GLup/s")
ax.set_xticks([0.25, 0.4, 0.5, 0.75, 1, 2, 4])
ax.set_yticks([0.25, 0.4, 0.5, 0.75, 1, 2, 4])
ax.set_ylim([3.0, 6])
ax.set_xlim([1, 6])

fig.tight_layout()
fig.savefig("paperplots/lbm_perf_v3.pdf")

In [None]:
predTop = [(combinedPred[k].perfV4, k[1:4], k[4]) for k in stencilMeas.keys()]
measTop = [(combinedMeas[k].lups,  k[1:4], k[4]) for k in stencilMeas.keys()]

print(len(predTop))

predTop = sorted(predTop)[-25:]
measTop = sorted(measTop)[-25:]



for p in (predTop):

    if p[1:3] == measTop[-1][1:3]:
        print ("**", end="")
    print(p)
print()

for p in measTop:    
    if p[1:3] == predTop[-1][1:3]:
        print ("**", end="")
    print(p)

# Presentation Plots

In [None]:

keys = [k for k in stencilMeas if k[4] == (1,1,1)]


fig, ax = volumeScatterPlot([(k[1:4], stencilMeas[k].L2Load_tex, stencilPred[k].L2LoadV2) for k in keys], "Stencil L2 Volume")
ax.set_title(None)
ax.get_legend().remove()
fig.tight_layout()
plt.savefig( "./paperplots/pres_L2_volumes_stencil.png", dpi=400)

In [None]:

keys = [k for k in lbmMeas if k[4] == (1,1,1)]


fig, ax = volumeScatterPlot([(k[1:4], lbmMeas[k].L2Load_tex, lbmPred[k].L2LoadV2) for k in keys], "LBM L2 Volume")
ax.set_title(None)
ax.get_legend().remove()
fig.tight_layout()
plt.savefig( "./paperplots/pres_L2_volumes_lbm.png", dpi=400)

In [None]:

keys = [k for k in stencilMeas if k[4] == (1,1,1)]

fig, ax = volumeScatterPlot([(k[1:4], stencilMeas[k].memLoad, stencilPred[k].memLoadV3) for k in keys], "Stencil DRAM Volume")
ax.set_title(None)
ax.get_legend().remove()
fig.tight_layout()
plt.savefig( "./paperplots/pres_mem_volumes_stencil.png", dpi=400)

In [None]:

keys = [k for k in lbmMeas if k[4] == (1,1,1)]

fig, ax = volumeScatterPlot([(k[1:4], lbmMeas[k].memLoad, lbmPred[k].memLoadV3) for k in keys], "LBM DRAM Volume")
ax.set_title(None)
ax.get_legend().remove()
fig.tight_layout()
plt.savefig( "./paperplots/pres_mem_volumes_lbm.png", dpi=400)

In [None]:
categories = ["L1", "L2", "RAM"]
   
keys = [k for k in stencilMeas if k[4] == (1,1,1)]

fig, ax = volumeScatterPlot([(k[1:4], combinedMeas[k].lups, combinedPred[k].perfV3, categories[combinedPred[k].limV3], combinedPred[k].perfPheno) for k in keys], "stencil perf v3")
#fig.set_size_inches(5,4)

#ax.set_xticks([0.25, 0.5, 1, 2, 4])
#ax.set_yticks([0.25, 0.5, 1, 2, 4])
ax.set_ylim([3, 90])
ax.set_xlim([1.5, 85])

ax.set_title(None)
ax.set_xlabel("measured performance, GLup/s")
ax.set_ylabel("predicted performance, GLup/s")
fig.tight_layout()
fig.savefig("paperplots/pres_perf_stencil_v3.png", dpi=400)

In [None]:
categories = ["L1", "L2", "RAM"]
   
keys = [k for k in lbmMeas if k[4] == (1,1,1)]

fig, ax = volumeScatterPlot([(k[1:4], combinedMeas[k].lups, combinedPred[k].perfV3, categories[combinedPred[k].limV3], combinedPred[k].perfPheno) for k in keys], "lbm perf v3")
#fig.set_size_inches(5,4)

#ax.set_xticks([0.25, 0.5, 1, 2, 4])
#ax.set_yticks([0.25, 0.5, 1, 2, 4])
ax.set_ylim([1, 6])
ax.set_xlim([1, 6])

ax.set_title(None)
ax.set_xlabel("measured performance, GLup/s")
ax.set_ylabel("predicted performance, GLup/s")
fig.tight_layout()
fig.savefig("paperplots/pres_perf_lbm_v3.png", dpi=400)