In [2]:
import numpy as np
from collections import OrderedDict as odict
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm

In [3]:
#(hardware name, number of nodes)
filesD = {
         'knl_mpi1':('knl',1), 'knl_mpi2':('knl',2), 'knl_mpi4':('knl',4),
         'skl_mpi1':('skl',1), 'skl_mpi2':('skl',2), 'skl_mpi4':('skl',4),
         'i5':('i5',1),
         'p100_mpi1':('p100',1), 'p100_mpi2':('p100',2), 'p100_mpi4':('p100',4),
         'v100_mpi1':('v100',1), 'v100_mpi2':('v100',2), 'v100_mpi4':('v100',4),
         'gtx1060':('gtx1060',1)
        }

files = odict(sorted(filesD.items(), key= lambda t : t[1][1]))

In [4]:
pd.set_option('precision',1)
names={'axpby':3,'dot':2,'dx':3, 'dy':3}
ns=[3,4]
values = []
for f, v in files.items() :#{'knl_mpi2':('knl',2)}.items():
    df=pd.read_csv('benchmark_'+f+'.csv', delimiter=' ')
    #add size and bandwidth columns
    df.insert(0,'size', 8*df['n']*df['n']*df['Nx']*df['Ny']/1e6/v[1]) #inplace transformation
    for name,memops in names.items() :
        df.insert(0,name+'_bw',df['size']/1000*memops/df[name])
    #compute one version with aggregated grouped sizes and one without
    agg=df.groupby(['n', 'Nx','Ny','size']).agg(['mean', 'std'])
    agg=agg.reset_index(level=['n','Nx','Ny','size'])
    agg.sort_values(by='size',inplace=True) #sort by size
    df.sort_values(by='size',inplace=True)
    #compute average and average bandwidth among dx and dy
    dxdy = df.assign( dxdy=(df['dx']+df['dy'])/2)
    dxdy = dxdy.assign( dxdy_bw=2.0*df['dx_bw']*df['dy_bw']/(df['dx_bw']+df['dy_bw']))
    #take ns
    dxdy=dxdy.set_index('n')
    dxdy=dxdy.loc[ns,:]
    dxdy.reset_index(inplace=True)
    dxdy.sort_values(by='size',inplace=True)
    #compute one version with aggregated grouped sizes and one without
    aggdxdy=dxdy.groupby(['n', 'Nx','Ny','size']).agg(['mean', 'std'])
    aggdxdy=aggdxdy.reset_index(level=['n','Nx','Ny','size'])
    aggdxdy.sort_values(by='size',inplace=True) #sort by size
    #aggdxdy[aggdxdy['size']>9]

    
    n =3
    s =30
    mb=9

    line = []
    l=len(df)
    line.append(v[0])
    line.append(v[1])
    line.append(agg[0:n][('axpby','mean')].mean()/1e-6)
    line.append(agg[0:n][('axpby','mean')].min()/1e-6)
    line.append(df[l-s:l]['axpby_bw'].mean())
    line.append(df[l-s:l]['axpby_bw'].std())
    line.append(agg[0:n][('dot','mean')].mean()/1e-6)
    line.append(agg[0:n][('dot','mean')].min()/1e-6)
    line.append(df[l-s:l]['dot_bw'].mean())
    line.append(df[l-s:l]['dot_bw'].std())
    line.append(aggdxdy[0:n][('dxdy','mean')].mean()/1e-6)
    line.append(aggdxdy[0:n][('dxdy','mean')].min()/1e-6)
    line.append(dxdy[dxdy['size']>mb]['dxdy_bw'].mean())
    line.append(dxdy[dxdy['size']>mb]['dxdy_bw'].std())
    values.append(line)
      
tuples=[('arch','',''),('nodes','','')]        
for n in ['axpby','dot','dxdy']:
    tuples.append((n,'lat','avg'))
    tuples.append((n,'lat','min'))
    tuples.append((n,'bw','avg'))
    tuples.append((n,'bw','std'))
cols=pd.MultiIndex.from_tuples(tuples)
arr = pd.DataFrame(values,index=files.keys(), columns=cols)
arr.sort_values(by='arch',inplace=True)
arr.set_index(['arch','nodes'],inplace=True)
#arr.loc[:,[('dot','bw','avg'),('dot','lat','avg')]]
#arr

In [5]:
#arr=arr.reset_index()

In [6]:
def toString(x): 
    np.ceil(x)
    #string = '%.1f'% x
    string = '%d' %np.ceil(x)
    return string
addto = []
for n in ['axpby','dot','dxdy']:
    arr.loc[:,(n,'bw','string')]= arr[n]['bw']['avg'].apply(toString) +" ± "+arr[n]['bw']['std'].apply(toString)
    addto.append((n,'lat','min'))
    addto.append((n,'bw','string'))

#make a table for display
nicetable=arr[addto]
drop = nicetable.columns.droplevel(2)
nicetable.columns=drop
#nicetable.reset_index(inplace=True)
#nicetable.set_index('arch')
newindex=[('i5',1)]
for n in ['skl','knl']:
    for m in [1,2,4]:
        newindex.append((n,m))
newindex.append(('gtx1060',1))
for n in ['p100','v100']:
    for m in [1,2,4]:
        newindex.append((n,m))
    
nicetable=nicetable.reindex(newindex)
nicetable

Unnamed: 0_level_0,Unnamed: 1_level_0,axpby,axpby,dot,dot,dxdy,dxdy
Unnamed: 0_level_1,Unnamed: 1_level_1,lat,bw,lat,bw,lat,bw
arch,nodes,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
i5,1,12.4,30 ± 1,117.4,10 ± 1,69.8,28 ± 3
skl,1,6.1,207 ± 3,24.5,194 ± 9,50.6,124 ± 28
skl,2,6.9,211 ± 7,32.6,185 ± 9,60.5,116 ± 28
skl,4,6.0,226 ± 13,40.7,173 ± 13,51.2,112 ± 28
knl,1,14.4,447 ± 15,67.3,144 ± 3,84.3,135 ± 44
knl,2,14.9,454 ± 9,93.9,129 ± 9,139.2,93 ± 30
knl,4,13.8,451 ± 27,128.5,113 ± 6,171.1,88 ± 19
gtx1060,1,3.5,158 ± 1,131.6,27 ± 1,34.5,98 ± 17
p100,1,3.1,553 ± 2,56.4,346 ± 2,15.7,219 ± 19
p100,2,3.3,555 ± 1,56.1,339 ± 4,473.1,119 ± 50


#### Assumptions
- there are three basic functions: trivially parallel(axpby), nearest neighbor (dxdy), global reduction (dot)
- each can be represented by the single node bandwidth, the single node latency and the multinode latency

#### But
- does not capture cache effect e.g. in SKl

In [7]:
index = ['i5','skl','knl','gtx1060','v100','p100']  
lines = []
for arch in  index: 
    line = []
    line.append(arch)
    for n in ['axpby','dot','dxdy']:
        line.append( arr.loc[(arch,1),(n,'bw','avg')] )
        line.append( arr.loc[(arch,1),(n,'lat','min')] )
        if arch == 'i5' or arch == 'gtx1060':
            line.append(None)
        else:
            line.append( arr.loc[(arch,2),(n,'lat','min')] )
    lines.append(line)
    
tuples=['arch']     
for n in ['axpby','dot','dxdy']:
    tuples.append(n+'_bw')
    tuples.append(n+'_lat_shared')
    tuples.append(n+'_lat_dist')
cols=tuples
toDisk = pd.DataFrame(lines, columns=cols)
toDisk.to_csv('performance.csv',sep=' ',index=False)

In [8]:
test = pd.read_csv('performance.csv',delimiter=' ')
test

Unnamed: 0,arch,axpby_bw,axpby_lat_shared,axpby_lat_dist,dot_bw,dot_lat_shared,dot_lat_dist,dxdy_bw,dxdy_lat_shared,dxdy_lat_dist
0,i5,30.0,12.4,,9.3,117.4,,27.3,69.8,
1,skl,206.8,6.1,6.9,193.6,24.5,32.6,123.2,50.6,60.5
2,knl,447.0,14.4,14.9,143.3,67.3,93.9,134.8,84.3,139.2
3,gtx1060,157.0,3.5,,26.5,131.6,,97.5,34.5,
4,v100,848.3,3.1,3.1,591.6,41.5,48.2,681.1,7.8,452.2
5,p100,552.8,3.1,3.3,345.8,56.4,56.1,218.7,15.7,473.1
