# stencilgen pred verification

In [None]:
import sys
sys.path.append('../warpspeed')
sys.path.append('../applications')
sys.path.append('../measutils')

import pycuda.autoinit
import pycuda.driver as drv
import numpy as np

import matplotlib.pyplot as plt
from IPython.display import set_matplotlib_formats
%matplotlib inline
set_matplotlib_formats('svg')

import measure_metric.measureMetric as measureMetric

from stencilgen.stencil import *
from tsmgen.kernel import *
import stencilgen.bench as stencilbench
import tsmgen.benchmark as tsmbench
from predict import *

#% matplotlib notebook

In [None]:
%load_ext autoreload
%autoreload 1
%aimport stencilgen.stencil
%aimport stencilgen.bench
%aimport predict
%aimport griditeration

In [None]:
storeVolumes = []
loadVolumes = []
for r in range(0, 10):
    kernel = Kernel2DBoxStencil(stencil_range=r, l1only=False)
    

    block = (32, 8, 1)
    grid = kernel.getGrid(1, block, 15000, 15000)
    concurrentGrid = getConcurrentGrid(getBlocksPerSM(block, 32)*80, grid)
    truncatedConcurrentGrid = tuple(min(4, c) for c in concurrentGrid)

    print(concurrentGrid)
    print(truncatedConcurrentGrid)
    
    measureMetric.measureBandwidthStart()    
    stencilbench.runKernel(kernel, kernel.getGrid(1, block, 15000, 15000), block)
    result = measureMetric.measureMetricStop()
    
    storeVolumes.append(result[1])
    loadVolumes.append(result[0])
    
    L2LoadBlockVolume = getL2LoadBlockVolume(block, truncatedConcurrentGrid, kernel.genLoads(), 32)
    L2StoreBlockVolume = getL2StoreBlockVolume(block, truncatedConcurrentGrid, kernel.genStores())

    memLoadBlockVolume = getMemLoadBlockVolume(block, concurrentGrid, kernel.genLoads())
    memStoreBlockVolume = getMemStoreBlockVolume(block, concurrentGrid, kernel.genStores())

    L1Cycles = getL1Cycles(block, truncatedConcurrentGrid, {**kernel.genLoads(), ** kernel.genStores()})
    
    print("mem load:  {:7.3f} {:7.3f}".format(result[0] / 15000**2, memLoadBlockVolume / block[0] / block[1] / block[2]))
    print("mem store: {:7.3f} {:7.3f}".format(result[1] / 15000**2, memStoreBlockVolume / block[0] / block[1] / block[2]))
    print("L2 load:   {:7.3f} {:7.3f}".format(result[2]*32 / 15000**2, L2LoadBlockVolume / block[0] / block[1] / block[2]))
    print("L2 store:  {:7.3f} {:7.3f}".format(result[3]*32 / 15000**2, L2StoreBlockVolume / block[0] / block[1] / block[2]))
    print("L1 cycles: {:7.3f} ".format(L1Cycles))
    print()

In [None]:
def compareSeries( xblocks, yblocks, ranges, version=2, overlap=randomOverlap, **stencilArgs):
    best_values = []

    measuredValues = []
    predictedValues = []
   
    xtickCounter = 0
    xticks = []
    xtickLabels = []

    
    for r in ranges:
        kernel = Kernel2DBoxStencil(stencil_range=r, **stencilArgs)
    
        for xblock in xblocks:
            
            newXtick = xtickCounter
            for yblock in yblocks:
                if xblock*yblock > 1024 or xblock*yblock < 32:
                    continue

                block = (xblock, yblock, 1)

                print("r={}, block={}x{}".format(r, xblock, yblock))
                if newXtick == xtickCounter:
                    newLabel = "r={};{}x{}".format(r, xblock, yblock)
                measuredValues.append( stencilbench.benchKernel(kernel, 11, (xblock, yblock, 1))[2])
                if version == 1:
                    predictedValues.append(predictPerformanceV1(kernel, block, (16,16,1), 32) )
                else:
                    predictedValues.append(predictPerformance(kernel, block, (32,16,1), overlap=overlap) * kernel.flops)
                print("Meas / Pred (GFlop/s): {:.0f} / {:.0f}".format( measuredValues[-1], predictedValues[-1]))
                print()
                xtickCounter += 1
                
            if newXtick != xtickCounter:
                xticks.append(newXtick)
                xtickLabels.append(newLabel)        

        print()
    return measuredValues, predictedValues, xticks, xtickLabels

def plotComparison(measuredValues, predictedValues, xticks, xtickLabels):
   
    fig, ax = plt.subplots()
    fig.set_figwidth(9)
    fig.set_figheight(4.5)
    fig.set_dpi(200)

    ax.plot(predictedValues, "-+", label="predicted",  color="#CCE699", linewidth=3, markersize=9, markeredgewidth=2, markeredgecolor="#B5CC88")
    ax.plot(measuredValues, "-x", label="measured", color="#99B3E6", linewidth=3, markersize=7, markeredgewidth=2, markeredgecolor="#889FCC")

    #ax.set_yscale("log")

    ax.set_xticks(xticks)
    ax.set_xticklabels( xtickLabels, rotation=60, rotation_mode="anchor", horizontalalignment="right")
    ax.set_ylim(0, ax.get_ylim()[1])
    ax.set_xlim(-1, len(predictedValues))

    #fig.tight_layout()
    ax.legend()
    ax.grid()
    plt.show()


In [None]:
l1data = compareSeries([1, 2, 4, 8, 16, 32, 64, 128], [8, 16, 32, 64, 128, 256, 512], [1], l1only=True)

In [None]:
plotComparison(*l1data)

In [None]:
memdata = compareSeries([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024], [1, 2, 4, 8, 16, 32, 64, 128, 256], [1, 2, 3])

In [None]:
plotComparison(*memdata)

In [None]:
maxOverlapData = compareSeries([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024], [1, 2, 4, 8, 16, 32, 64, 128, 256], [1], overlap=maxOverlap)

In [None]:
randomOverlapData = compareSeries([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024], [1, 2, 4, 8, 16, 32, 64, 128, 256], [1], overlap=randomOverlap)

In [None]:
v1Data = randomOverlapData#compareSeries([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024], [1, 2, 4, 8, 16, 32, 64, 128, 256], [1], version=1)

In [None]:
fig, ax = plt.subplots()
fig.set_figwidth(9)
fig.set_figheight(4.5)
fig.set_dpi(200)

ax.plot(randomOverlapData[1], "-o", label="randomOverlap",  color="#CCE699", linewidth=1, markersize=5, markeredgewidth=1, markeredgecolor="#B5CC88")
ax.plot(maxOverlapData[1], "-+", label="maxOverlap",  color="#CC06AA", linewidth=1, markersize=5, markeredgewidth=1, markeredgecolor="#B50CAA")
ax.plot(v1Data[1], "-x", label="version 1",  color="#22E6AA", linewidth=1, markersize=5, markeredgewidth=1, markeredgecolor="#22CCAA")

ax.plot(randomOverlapData[0], "-x", label="measured", color="#99B3E6", linewidth=1, markersize=4, markeredgewidth=1, markeredgecolor="#889FCC")

#ax.set_yscale("log")

ax.set_xticks(randomOverlapData[2])
ax.set_xticklabels( randomOverlapData[3], rotation=60, rotation_mode="anchor", horizontalalignment="right")
ax.set_ylim(0, ax.get_ylim()[1])
ax.set_xlim(-1, len(randomOverlapData[0]))

fig.tight_layout()
ax.legend()
ax.grid()
plt.show()