In [None]:
import os
import re
import numpy as np
import proplot as plot

In [None]:
### Parse markdown tables
# We still want markdown tables so user can look at stuff in more detail!
benchmark = 'fluxes'
dir_ = '60lev'
file = f'logs/{benchmark}_{dir_}_uriah.log'
with open(file, 'r') as f:
    data = f.read()
# Tables are formatted with leading and trailing columns divisors '|', ignore these; also ignore
# trailing spaces and italics or bold asterisks in each cell, and the 2 header rows
def sanitize(cell):
    cell = cell.strip(' *')
    regex = re.match('^([0-9.]+)([kKmMgG])?$', cell) # *optional* size parameter, test
    if regex:
        num, size = regex.groups()
        mult = 1
        if size:
            mult = {'k':1e-3, 'g':1e3}.get(size.lower(), 1)
        cell = float(num)*mult
    return cell or None # replace empty string with None
tables = [[[sanitize(cell) for cell in row.split('|')[1:-1]] for row in table.strip().split('\n')[2:]]
          for table in data.split('\n\n')]
# Check tables
# Change this as table format changes!
cats = [row[2] for row in tables[0]]
nlats = [int(table[0][0]) for table in tables]
sizes = [table[0][1] for table in tables]
for table in tables: # make sure each table has same categories as first one
    if [row[2] for row in table] != cats:
        raise ValueError('Inconsistent tables.')
for i,vec in enumerate((nlats,sizes)): # make sure each table has same nlats, size in every row
    for ivec,table in zip(vec,tables):
        if any(ivec!=jvec for jvec in [row[i] for row in table]):
            raise ValueError('Inconsistent tables.')
# Plot data
plot.rc.cycle = 'colorblind10'
f, ax = plot.subplots(axwidth=3, aspect=(2,3), legend='b')
# print(plot.rc['axes.prop_cycle'])
hs = []
nxarray = len([cat for cat in cats if 'xarray' in cat.lower()])
xcolors = plot.colors('greys', nxarray, left=0.3)
ocolors = [color for i,color in enumerate(plot.colors('colorblind10')) if i not in (5,6,7)]
ic, xc = 0, 0 # make xarray lines different shades of same color
idxs = np.argsort(cats) # use alphabetical order
for i in idxs:
    cat = cats[i]
    if 'xarray' in cat.lower():
        color = xcolors[xc]
        xc += 1
    else:
        # color = f'C{ic}'
        color = ocolors[ic]
        ic += 1
    times = [table[i][3] for table in tables] # 4th cell contains 'real' time
    hs += ax.plot(sizes, times, color=color, marker='o', markersize=5, label=cat, lw=0.5)
    # hs += [ax.scatter(sizes, times, color=[color], label=cat)]
ax.format(xlabel='file size (MB)', ylabel='time (seconds)', gridminor=True,
# ax.format(xlabel='latitude count', ylabel='time (seconds)',
          ylim=(0.2, 150), yscale='log', yformatter='scalar',
          xlim=(20, 5000), xscale='log', xformatter='scalar',
          title=f'{benchmark.title()} benchmark')
f.bpanel.legend(hs, ncols=2, order='F')
f.save('fluxes.png')