# Pystencils Multi Phase LBM Kernel - Performance and Volumes

In [None]:

import sys 
sys.path.append('../pystencils')
sys.path.append('../genpredict')

%load_ext autoreload
%autoreload 1




In [None]:
import math
import time

from griditeration import *
from predict_metrics import *
from volumes_isl import *
from plot_utils import *

import random

from plot_utils import *
from meas_utils import *

from measured_metrics import *
from meas_db import MeasDB

lbmdb = MeasDB("multiphaselbm.db")
stencildb = MeasDB("3dstencils.db")


In [None]:
predValues = dict()
measValues = dict()

device = DeviceAmpere()
print(device.name)

def nextBlockSize():
    for xblock in [1,2, 4, 8, 16, 32, 64, 128, 256, 512]:
        for yblock in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]:
            for zblock in [1, 2, 4, 8, 16, 32, 64]:
                if xblock*yblock*zblock not in [256, 512, 1024]:
                    continue
                yield (xblock, yblock, zblock)    
                
def getResults(meas_db, r):
    for block in nextBlockSize():
        for threadFolding in [(1,1,1), (1,2,1), (1,1,2)]:
            key = (r, *block, threadFolding)

            lc, basic, meas = meas_db.getEntry(r, block, threadFolding, device)

            if  meas is None or basic is None:        
                continue

            metrics = DerivedMetrics(lc, basic, device, meas)

            measValues[key] = meas
            predValues[key] = metrics

            print(str(lc), end="")
            print(str(basic), end="--\n")
            rc = ResultComparer(meas, metrics)
            print(str(rc))              

            print()

getResults(stencildb, 4)
#getResults(lbmdb, 0)
getResults(lbmdb, 1)

In [None]:
volumeScatterPlot([(k[1:4], measValues[k].memLoad, predValues[k].memLoadV1, k[0]) for k in measValues], "Multi Phase LBM Memory Load Volumes V1")
volumeScatterPlot([(k[1:4], measValues[k].memLoad, predValues[k].memLoadV2, k[0], predValues[k].memLoadV1) for k in measValues], "Multi Phase LBM Memory Load Volumes V2")
volumeScatterPlot([(k[1:4], measValues[k].memLoad, predValues[k].memLoadV3, k[0], predValues[k].memLoadV2) for k in measValues], "Multi Phase LBM Memory Load Volumes V3")
volumeScatterPlot([(k[1:4], measValues[k].memLoad, predValues[k].memLoadV4, k[0], predValues[k].memLoadV3) for k in measValues], "Multi Phase LBM Memory Load Volumes V4")

In [None]:
volumeScatterPlot([(k[1:4], measValues[k].L2Load_tex, predValues[k].L2LoadV1, k[0]) for k in measValues], "Multi Phase LBM L2 Load Volumes V1")
volumeScatterPlot([(k[1:4], measValues[k].L2Load_tex, predValues[k].L2LoadV2, k[0], predValues[k].L2LoadV1) for k in measValues], "Multi Phase LBM L2 Load Volumes V2")

In [None]:
volumeScatterPlot([(k[1:4], measValues[k].L2Store, predValues[k].L2Store, k[0]) for k in measValues], "Multi Phase LBM L2 Store Volumes V1")
volumeScatterPlot([(k[1:4], measValues[k].memStore, predValues[k].memStoreV1, k[0]) for k in measValues], "Multi Phase LBM Memory Store Volumes V1")
volumeScatterPlot([(k[1:4], measValues[k].memStore, predValues[k].memStoreV2, k[0], predValues[k].memStoreV1, ) for k in measValues], "Multi Phase LBM Memory Store Volumes V2")

In [None]:
volumeScatterPlot([(k[1:4], measValues[k].lups, min( predValues[k].perfV4, ( 400 + 400 / max(1, (predValues[k].basic.TLBpages / 32) * 0.4) )  / (measValues[k].memLoad+ measValues[k].memStore)), k[0]) for k in measValues], "Multi Phase LBM TLB Pages V1")

In [None]:
volumeScatterPlot([(k[1:4], k[1], predValues[k].L1Cycles, k[0]) for k in measValues], "Multi Phase LBM L1 Cycles V1")

In [None]:
fig, ax = plt.subplots()
fig.set_figwidth(4)
fig.set_figheight(4)
fig.set_dpi(150)

for r in [4, 0, 1]:
    keys = [k for k in measValues if k[0] == r]
    ax.plot([predValues[k].smL1Alloc /  (1024 / (k[1]*k[2]*k[3]))  / (128*1024) for k in keys],
            [(measValues[k].L2Load - predValues[k].L2LoadV1) / (predValues[k].L1Load - predValues[k].L2LoadV1) for k in keys], ".", alpha=0.2)

validKeys = [k for k in measValues if predValues[k].smL1Alloc > 8*1028 and k[0] < 23]
    

values = np.arange(1.0, 16.0, 0.1)

ax.set_xlim([0, 10])

#ax.plot (values, 0.45*np.exp(-9.0*np.exp(-0.7*values))) 
#ax.plot (values, 1.0*(1 - 1 / ((values+2)*0.33 )))
#ax.plot (values, np.maximum(0, 0.8*np.exp(-6.0*np.exp(-0.45*values)))) 
#ax.plot (values, 0.43*np.exp(-9.0*np.exp(-0.65*values))) 
#ax.plot (values, 0.25*np.exp(-9.0*np.exp(-0.5*values))) 


from scipy.optimize import curve_fit

def func(x, a, b, c):
    return a * np.exp(-b*np.exp(-c*x))

xdata = np.array( [predValues[k].smL1Alloc /  (1024 / (k[1]*k[2]*k[3])) / (128*1024) for k in validKeys] )
ydata = np.array(  [(measValues[k].L2Load - predValues[k].L2LoadV1) / (predValues[k].L1Load - predValues[k].L2LoadV1) for k in validKeys])
popt, pcov = curve_fit(func, xdata, ydata)
print(popt)
print(pcov)

xdata = np.array([*list(xdata), *[  i / 10 for i in range(1,100) ]])
xdata.sort()
plt.plot( xdata , func(xdata, *popt), 'r-',
         label='fit: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt))



ax.axvline(1.0)
ax.set_xlabel("oversubscription factor")
ax.set_ylabel("$R_{cap}$")
ax.set_title("L1 capacity miss rate")
fig.tight_layout()
fig.savefig("rcap_L1.pdf")

In [None]:
fig, ax = plt.subplots()
fig.set_figwidth(4)
fig.set_figheight(4)
fig.set_dpi(150)

for r in [0,1,4]:
    keys = [k for k in measValues if k[0] == r]
    ax.plot([predValues[k].L2Oversubscription for k in keys],
            [np.minimum(1.0, np.maximum(0, measValues[k].memLoad - predValues[k].memLoadV2) / np.maximum(0, predValues[k].L2LoadV2 - predValues[k].memLoadV2)) for k in keys],
            ".", alpha=0.1)
    

values = np.array(range(1024, 1024*1024, 1024))

#ax.set_ylim([-0.1, 1.1])

from scipy.optimize import curve_fit

def func(x, a, b, c):
    return a * np.exp(-b*np.exp(-c*x))

xdata = np.array( [predValues[k].L2Oversubscription for k in measValues] )
ydata = np.array( [np.minimum(1.0, np.maximum(0, measValues[k].memLoad - predValues[k].memLoadV1) / np.maximum(0, predValues[k].L2LoadV2 - predValues[k].memLoadV2)) for k in measValues])
popt, pcov = curve_fit(func, xdata, ydata, bounds=([0.0, -np.inf, -np.inf], [1, np.inf, np.inf]), maxfev=20000)
print(popt)
print(pcov)

xdata = np.array([*list(xdata), *[  i / 25 for i in range(1,100) ]])
xdata.sort()
plt.plot(xdata, func(xdata, *popt), 'r-',
         label='fit: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt))


#ax.plot (values, 0.43*np.exp(-9.0*np.exp(-0.0000065*values))) 
#ax.plot (values, 0.25*np.exp(-9.0*np.exp(-0.000005*values))) 

ax.axvline(1)

In [None]:



for r in [0,1,4]:
    keys = [k for k in measValues if k[0] == r and (predValues[k].memLoadV1 - measValues[k].memLoad) > 0.1 and
           (predValues[k].basic.waveMemLoadOverlap / predValues[k].basic.waveValidCells) > 6]
    ax.plot([ (20*1024*1024) / (predValues[k].basic.waveMemStoreOld + predValues[k].basic.waveMemLoadOld) for k in keys],
            [  (predValues[k].memLoadV1 - measValues[k].memLoad) / (predValues[k].basic.waveMemLoadOverlap / predValues[k].basic.waveValidCells)  for k in keys], ".", alpha=0.2)
    
    #keys = [k for k in measValues if k[0] == r]
    #ax.plot([predValues[k].waveL2Alloc / 6 / 1024 / 1024 for k in keys if (measValues[k].L2Store / measValues[k].memStore) > 1.1],
    #        [ min(10.0, (measValues[k].memStore - predValues[k].memStoreV1) / (predValues[k].L2Store - predValues[k].memStoreV1)) for k in keys if (measValues[k].L2Store / measValues[k].memStore) > 1.1],
    #        ".", alpha=0.1)
    

values = np.array(range(1024, 1024*1024, 1024))

#ax.set_ylim([-0.1, 1.1])

from scipy.optimize import curve_fit

def func(x, a, b, c):
    return a * np.exp(-b*np.exp(-c*x))

xdata = np.array( [predValues[k].waveL2Alloc / 6 / 1024 / 1024 for k in measValues if (measValues[k].L2Store / measValues[k].memStore) > 1.1] )
ydata = np.array( [ min(1.0, (measValues[k].memStore - predValues[k].memStoreV1) / (predValues[k].L2Store - predValues[k].memStoreV1)) for k in measValues if measValues[k].L2Store / measValues[k].memStore > 1.1])
popt, pcov = curve_fit(func, xdata, ydata, bounds=([0.0, -np.inf, -np.inf], [1, np.inf, np.inf]), maxfev=20000)
print(popt)
print(pcov)

xdata = np.array([*list(xdata), *[  i / 25 for i in range(1,100) ]])
xdata.sort()

plt.plot(xdata, func(xdata, *popt), 'r-',
         label='fit: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt))


#ax.plot (values, 0.43*np.exp(-9.0*np.exp(-0.0000065*values))) 
#ax.plot (values, 0.25*np.exp(-9.0*np.exp(-0.000005*values))) 

ax.axvline(1)

In [None]:
fig, ax = plt.subplots()
fig.set_figwidth(4)
fig.set_figheight(4)
fig.set_dpi(150)

for r in [0,1,4]:
    keys = [k for k in measValues if k[0] == r  and (predValues[k].memLoadV1 - measValues[k].memLoad) > 0]
    ax.plot([predValues[k].memLoadCoverage for k in keys],
            [(predValues[k].memLoadV1 - measValues[k].memLoad) / (predValues[k].memLoadOverlapAbsolute) for k in keys], ".", alpha=0.2)


ax.set_ylim([-0.1, 1.1])

from scipy.optimize import curve_fit

def func(x, a, b, c):
    return a * np.exp(-b*np.exp(-c*x))

keys = [k for k in measValues if  (predValues[k].memLoadV1 - measValues[k].memLoad) > 0]

xdata = np.array( [predValues[k].memLoadCoverage for k in keys] )
ydata = np.array( [(predValues[k].memLoadV1 - measValues[k].memLoad) / (predValues[k].memLoadOverlapAbsolute) for k in keys])

popt, pcov = curve_fit(func, xdata, ydata, bounds=([0.99, -np.inf, -np.inf], [1, np.inf, np.inf]), maxfev=20000)
print(popt)
print(pcov)

xdata = np.array([*list(xdata), *[  i / 50 for i in range(1,100) ]])
xdata.sort()
plt.plot(xdata, func(xdata, *popt), 'r-',
         label='fit: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt))


#ax.plot (values, 0.43*np.exp(-9.0*np.exp(-0.0000065*values))) 
#ax.plot (values, 0.25*np.exp(-9.0*np.exp(-0.000005*values))) 

ax.axvline(1)

In [None]:
categories = ["L1", "L2", "RAM"]

   
keys = [k for k in measValues]

volumeScatterPlot([(k[1:4], measValues[k].lups, predValues[k].perfV1, categories[predValues[k].limV1]) for k in keys], "Multi Phase LBM Roofline range " + str(r) + " V1")
volumeScatterPlot([(k[1:4], measValues[k].lups, predValues[k].perfV2, categories[predValues[k].limV2], predValues[k].perfV1) for k in keys], "Multi Phase LBM Roofline range " + str(r) + " V2")
volumeScatterPlot([(k[1:4], measValues[k].lups, predValues[k].perfV3, categories[predValues[k].limV3], predValues[k].perfV3) for k in keys], "Multi Phase LBM Roofline range " + str(r) + " V3")
volumeScatterPlot([(k[1:4], measValues[k].lups, predValues[k].perfV4, categories[predValues[k].limV4], predValues[k].perfV3) for k in keys], "Multi Phase LBM Roofline range " + str(r) + " V4")
volumeScatterPlot([(k[1:4], measValues[k].lups, predValues[k].perfPheno, categories[predValues[k].limPheno], predValues[k].perfV4) for k in keys], "Multi Phase LBM Roofline range " + str(r) + " Pheno")



In [None]:
top = [(m, measValues[m].lups) for m in measValues]
top.sort(key = lambda x : x[1])
print(top[-8:])

print()

top = [(m, predValues[m].perfV4) for m in predValues]
top.sort(key = lambda x : x[1])
print(top[-8:])
