# Pystencils Multi Phase LBM Kernel - Performance and Volumes

In [None]:

import sys 
sys.path.append('../../pystencils')
sys.path.append('../pystencils')
sys.path.append('../measutils')
sys.path.append('../warpspeed')

%load_ext autoreload
%autoreload 1




In [None]:
import cProfile
import re


import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import sys
from subprocess import run, PIPE


import pystencils as ps
from pystencils.display_utils import show_code, get_code_str
from griditeration import *
from predict_metrics import *
from volumes_isl import *
from plot_utils import *
from measured_metrics import MeasuredMetrics, ResultComparer



import timeit

from meas_db import MeasDB

In [None]:
stencildb = MeasDB("3dstencils2.db")
#meas_db.clearDB()

In [None]:


device = DeviceAmpereA100_80GB()
print(device.name)


def getBlockSizes(x1, x2, y1, y2, z1, z2, threadCounts):
    blockSizes = []
    xblock = x1
    while xblock <= x2:
        yblock = y1
        while yblock <= y2:
            zblock = z1
            while zblock <= z2:
                if xblock*yblock*zblock in threadCounts:
                    blockSizes.append((xblock, yblock, zblock))
                zblock *= 2
            yblock *= 2
        xblock *= 2
    return blockSizes

                
def getResults(meas_db, r, blockSizes, threadFoldingSizes, domainSize):
    predValues = dict()
    measValues = dict()
    for block in blockSizes:
        for threadFolding in threadFoldingSizes:
            key = (r, *block, threadFolding)

            lc, basic, meas = meas_db.getEntry(r, block, threadFolding, domainSize, device)

            if  meas is None or basic is None:        
                print(str(key) + " not found")
                continue

            metrics = DerivedMetrics(lc, basic, device, meas)

            measValues[key] = meas
            predValues[key] = metrics

            #print(str(lc), end="")
            #print(str(basic), end="--\n")
            #rc = ResultComparer(meas, metrics)
            #print(str(rc))              

            #print()
    return measValues, predValues
            

In [None]:
stencilMeas, stencilPred = getResults(stencildb, 4, getBlockSizes(1,1024, 1, 1024, 1, 64, [1024]), [(1,1,1)], (1024, 512, 200))
#lbmMeas, lbmPred = getResults(lbmdb, 1, getBlockSizes(1, 1024, 1, 1024, 1, 64, [512]), [(1,1,1)], None)

In [None]:

def computeMAPE(meas, pred, measName, predName):
    mape = 0
    for key in meas.keys():

        measValue = getattr(meas[key], measName)
        predValue = getattr(pred[key], predName)
        
        mape += abs( measValue - predValue) / measValue
    return mape / max(1, len(meas))

def computeKTau(meas, pred, measName, predName):
    return stats.kendalltau([getattr(stencilMeas[k], measName) for k in stencilMeas.keys()],
                             [getattr(stencilPred[k], predName) for k in stencilMeas.keys()])[0]

def sortedDict(d):
    return {k : d[k] for k in sorted(d.keys(), key=lambda k : d[k].lups)}
                    
for r in [w4]:
    for threadCount in [512,1024]:
        stencilMeas, stencilPred = getResults(stencildb, r, getBlockSizes(1,1024, 1, 1024, 1, 64, [threadCount]), [(1,1,1),  (1,2,2)], (1024, 512, 200))
    
        stencilMeas = {k : stencilMeas[k] for k in sorted(stencilMeas.keys(), key=lambda k : stencilMeas[k].lups)}
        stencilPred = {k : stencilPred[k] for k in sorted(stencilPred.keys(), key=lambda k : stencilPred[k].perfV3)}

        print() 
        #for k in list(stencilMeas.keys())[-30:]:
        #    if k == list(stencilPred.keys())[-1]:
        #        print("*", end="")
        #    else:
        #        print(" ", end="")
        #        
        #    print("{:6.3f}  {}".format(stencilMeas[k].lups, k))
        #print()
        #for k in list(stencilPred.keys())[-30:]:
        #    if k == list(stencilMeas.keys())[-1]:
        #        print("*", end="")
        #    else:
        #        print(" ", end="")
        #    print("{:6.3f}  {}".format(stencilPred[k].perfV3, k))
        #print()

        if len(stencilMeas) == 0:
            continue
        print(stencilMeas[list(stencilMeas.keys())[-1]].lups)        

        print("meas of best pred: {:6.2f}".format(stencilMeas[list(stencilPred.keys())[-1]].lups))
        print("pred of best meas: {:6.2f}".format(stencilPred[list(stencilMeas.keys())[-1]].perfV3))
        print()

        print("{} {:4.1f} {:4.2f} | {:4.1f} {:4.2f} | {:4.1f} {:4.2f} | {:4.1f}  {:4.2f}| {:4.1f} {:4.2f} |  {:4.1f} {:4.2f}".format(
            len(stencilMeas),
            computeMAPE(stencilMeas, stencilPred, "memLoad", "memLoadV3")* 100,
            computeKTau(stencilMeas, stencilPred, "memLoad", "memLoadV3"),
            computeMAPE(stencilMeas, stencilPred, "L2Load_tex", "L2LoadV2")* 100,
            computeKTau(stencilMeas, stencilPred, "L2Load_tex", "L2LoadV2"),
            computeMAPE(stencilMeas, stencilPred, "memStore", "memStoreV2")* 100,
            computeKTau(stencilMeas, stencilPred, "memStore", "memStoreV2"),
            computeMAPE(stencilMeas, stencilPred, "L2Store", "L2Store")* 100,
            computeKTau(stencilMeas, stencilPred, "L2Store", "L2Store"),
            computeMAPE(stencilMeas, stencilPred, "lups", "perfV3")* 100,
            computeKTau(stencilMeas, stencilPred, "lups", "perfV3"),
            computeMAPE(stencilMeas, stencilPred, "lups", "perfPheno")*100,
            computeKTau(stencilMeas, stencilPred, "lups", "perfPheno")))
        print()
    print()