In [None]:
# give us interactive plots in jupyter lab:
# %matplotlib widget
%load_ext autoreload

import matplotlib
import numpy as np
import json
import matplotlib.pyplot as plt
import numpy



In [None]:
import importlib
# import helper_libs as util
# importlib.reload(util)
# %autoreload 1
# %aimport helper_libs

In [None]:
def geomean(a):
    return np.exp( np.mean( np.log(a) ) )
print(f"Geomean of [2 8] {geomean([2,8])}")
class GEMMResults():
    def __init__( self, blob ):
        self.fine_axis = blob['fine_axis']
        self.blob = blob
        n = len(self.fine_axis)
        self.ncores = blob['OMP_NUM_THREADS']
        self.test_name = blob['test_name']
        self.blas = blob['blas']
        self.generate_name()
        self.gflops = numpy.zeros( (n,n,n) )
        self.latency_usec = numpy.zeros( (n,n,n) )
        self.count = numpy.zeros( (n,n,n), dtype=numpy.int32 )
        a2i = { a:i for (i,a) in enumerate(self.fine_axis) }
        # print(blob['results'])
        for line in blob['results']:
            self.gflops[        a2i[line['m']], a2i[line['n']], a2i[line['k']] ] = line['gflops']
            self.latency_usec[  a2i[line['m']], a2i[line['n']], a2i[line['k']] ] = line['latency_usec']
            self.count[         a2i[line['m']], a2i[line['n']], a2i[line['k']] ] = line['count']
        # self.nelems = numpy.outer(numpy.outer( self.fine_axis, self.fine_axis ), self.fine_axis)
        self.nelems = np.multiply.outer( np.multiply.outer( self.fine_axis, self.fine_axis ), self.fine_axis )
        print(self.nelems.ravel().shape)

    def generate_name(self):
        self.name = f"{self.test_name}-{self.blas}-{self.ncores}"
        
    @classmethod
    def from_file(cls, fname):
        with open(fname) as fp:
            out = json.loads(fp.read())
        return cls(out)


fnames = []

fnames.append('results-openblas.json')
fnames.append('results-armpl_mp.json')
# fnames.append('results-armpl.json')
# fnames.append('results-openblas-grav4-24xl-b2.json')
# fnames.append('results-armpl_mp-grav4-24xl.json')
# fnames.append('results-armpl_mp-grav4-48xl.json')
# fnames.append('results-nvpl_blas_lp64_gomp-grav4-24xl.json')
# fnames.append('results-nvpl_blas_lp64_gomp-grav4-48xl.json')
# fnames.append('results-openblas-grav4-24xl.json')
# fnames.append('results-openblas-grav4-48xl.json')
# fnames.append('results-nvpl_blas_lp64_gomp-grav3.json')
# fnames.append('results-mkl_intel_lp64_192.json')
# fnames.append('results-mkl_intel_lp64_64.json')

dats = [ GEMMResults.from_file(f) for f in fnames ]
for name, dat in zip(fnames, dats):
    if name == 'results-armpl.json':
        dat.ncores=1
        dat.generate_name()

In [None]:
def plot_elems_vs_flops(dats, datref, logy=False):
    fig = plt.figure(figsize=(10,6))
    ii = datref.count > 0
    ax =     plt.subplot(1,2,1)
    # ax.plot( [ datref.latency_usec[ii].min(), datref.latency_usec[ii].max()], [1, 1], "--", color='grey', label="Reference")
    ax.set_xscale('log')
    if logy:
        ax.set_yscale('log')
    for dat in dats:
        ii = dat.count > 0
        plt.plot(dat.nelems[ii], dat.gflops[ii],'.', label=dat.name, alpha=.8, linestyle='none', markersize=4, mec='none')
        # plt.plot(10*np.log10(dat.latency_usec[ii]), 10*np.log10(dat.gflops[ii]/datref.gflops[ii]),'.', label=dat.name, alpha=.8, linestyle='none', markersize=4, mec='none')
    plt.xlabel("Number of Elements - dB [10*log10(M*N*K)]")
    plt.ylabel("Throughput - GFLOPS\n(Higher is better)")
    ax.set_ylim([10, 6000])
    plt.legend()
    ax =     plt.subplot(1,2,2)
    if logy:
        ax.set_yscale('log')
    ax.set_xscale('log')
    for dat in dats:
        ii = dat.count > 0
        plt.plot(dat.nelems[ii], dat.gflops[ii]/dat.ncores,'.', label=dat.name, alpha=.8, linestyle='none', markersize=4, mec='none')
        # plt.plot(10*np.log10(dat.latency_usec[ii]), 10*np.log10(dat.gflops[ii]/datref.gflops[ii]),'.', label=dat.name, alpha=.8, linestyle='none', markersize=4, mec='none')
    plt.xlabel("Number of Elements - dB [10*log10(M*N*K)]")
    plt.ylabel("Throughput - GFLOPS per core\n(Higher is better)")
    ax.set_ylim([.1, 40])
    plt.legend()
    fig.tight_layout()

plot_elems_vs_flops(dats, dats[0])
plot_elems_vs_flops(dats, dats[0], True)

In [None]:
def plot_improvement_vs_latency(dats,datref):
    fig = plt.figure()
    ax = plt.subplot(1,1,1)
    for dat in dats:
        # if dat == datref: continue
        ii = dat.count > 0
        # plt.plot(10*np.log10(dat.nelems[ii]), 10*np.log10(dat.gflops[ii]/datref.gflops[ii]),'.', label=dat.name, alpha=.8, linestyle='none', markersize=4, mec='none')
        ax.plot(dat.latency_usec[ii], dat.gflops[ii]/datref.gflops[ii],'.', label=dat.name, alpha=1, linestyle='none', markersize=6, mec='none')
    ii = datref.count > 0
    ax.plot( [ datref.latency_usec[ii].min(), datref.latency_usec[ii].max()], [1, 1], "--", color='black', label="Reference")
    ax.set_xscale('log')
    # ax.set_yscale('log')
    ax.set_ylim([0, 5])
    plt.xlabel("Latency - usec")
    plt.ylabel("Speedup - factor\n(Higher is better)")
    ax.set_title(f"Speedup vs Latency relative to {datref.name}")
    ax.legend()
plot_improvement_vs_latency(dats, dats[0])

In [None]:
def print_summary(dats, dat_filt_fun, filter_name):
    # fig = plt.figure()
    # ax1 = plt.subplot(2,1,1)
    # ax2 = plt.subplot(2,1,2)
    geomeans = []
    latencymeans = []

    print(f"--------- For {filter_name}")
    for jdat,dat in enumerate(dats):
        ii = (dat.count > 0)
        ii = ii & dat_filt_fun(dat)
        # ii = ii & (dat.latency_usec > 100000)
        
        mean_lat = np.mean( dat.latency_usec[ii] )
        geomean_gflops = geomean( dat.gflops[ii] )
        print(f"Geomean of GFLOPS: {geomean_gflops:8.2f} ({geomean_gflops/dat.ncores:8.3f}/core). Mean Latency {1e-3*mean_lat:7.5f} msec for {dat.name:25s} ({ii.sum():4d} matched)")
    
        # plt.plot(10*np.log10(dat.nelems[ii]), 10*np.log10(dat.gflops[ii]/datref.gflops[ii]),'.', label=dat.name, alpha=.8, linestyle='none', markersize=4, mec='none')
        # ax.plot(dat.latency_usec[ii], dat.gflops[ii]/datref.gflops[ii],'.', label=dat.name, alpha=.8, linestyle='none', markersize=4, mec='none')
    # xaxis = np.linspace(0, len(dats), len(dats))
    # width = 1

    # print(geomeans)
    # print(xaxis)
    # ax1.bar(xaxis, geomeans, width=width)
    # plt.xlabel("Latency - usec")
    # plt.ylabel("Speedup - factor\n(Higher is better)")
print_summary(dats, lambda dat: dat.nelems < 100000, "GEMM with nelem < 100000")
print_summary(dats, lambda dat: dat.latency_usec < 1000, "GEMM with latency < 1 msec")
print_summary(dats, lambda dat: dat.latency_usec > 20000, "GEMM with latency > 20 msec")

In [None]:
fig = plt.figure()
dat = dats[0]
ax = plt.subplot(1,1,1)
flops1 = dat.gflops[0,:,:]
im = ax.imshow(flops1/dat.ncores)
dat.gflops[0,:,:].shape

t = fig.colorbar(im, label='GFLOP/s/core')

In [None]:
dats[0].fine_axis[10]