In [None]:
'''
### Internal notebook for generating figures

It is meant to be used from command line with `jupyter-nbconvert`

```
cd <results_folder>
jupyter-nbconvert --execute --to html --output-dir . <path-to-this-notebook>
```

This will read pickle files from the current folder and generate various graphs and text reports.
''';

In [None]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import os
import glob
import pickle
from types import SimpleNamespace
import itertools
from benchmark_rio_s3 import reports
from benchmark_rio_s3.reports import unpack_stats, gen_stats_report, join_reports
from benchmark_rio_s3.plots import plot_stats_results, plot_comparison, plot_results

os.chdir(os.environ.get('PWD','.'))

## Load data

In [None]:
xx_all = reports.load_dir()
xx_time = reports.pick_best(xx_all, 'time')
xx_throughput = reports.pick_best(xx_all, 'throughput')
nthreads = np.array(sorted(xx_all.keys()))

In [None]:
figs = {}

print('{:d} files, {:d} configurations, {:d}-{:d} threads'.format(
    sum(len(x) for x in xx_all.values()),
    nthreads.shape[0], nthreads.min(), nthreads.max()
))

## In depth stats for single threaded case

In [None]:
print(gen_stats_report(xx_time[1]))
fig = plt.figure(figsize=(12,8))
figs['single-thread-in-depth'] = fig

plot_results(xx_time[1]._raw.stats, fig=fig);

## Scaling with more threads

In [None]:
fig = plt.figure(figsize=(12,3))
fig.tight_layout()
figs['threads'] = fig

best_idx = plot_stats_results(xx_throughput, fig=fig)

## Single Thread vs Best Throughput

In [None]:
_reports = (gen_stats_report(xx_throughput[1], 'One Thread'),
            gen_stats_report(xx_throughput[best_idx], 'Highest Throughput'))
print(join_reports(*_reports))

fig = plt.figure(figsize=(12,6))
figs['comparison'] = fig

plot_comparison(fig, [xx_throughput[1], xx_throughput[best_idx]],
                      nochunk=True,
                      threshs=[400, 200, 200],
                      alpha=0.4,
                      names=['c1', 'c{}'.format(best_idx)]);

## Latency hiding graph

In [None]:
def add_hbar_plot(ax, st, n=None, height=1, **kwargs):
    if n is None:
        n = st.t0.shape[0]
    
    for i in range(n):
        y = i + 1
        width = st.t_total[i]
        t0 = st.t0[i]
        ax.barh(y, left=t0, width=width, height=height, **kwargs)

fig = plt.figure(figsize=(12,6))
figs['latency-hiding'] = fig
axs = [fig.add_subplot(121), fig.add_subplot(122)]
ii = (1, 2, 4)
if best_idx not in ii:
    ii += (best_idx,)

best = xx_throughput[best_idx]
N = min(40, best.t_total.shape[0])
for i, c in zip(ii, ['C0', 'C1', 'C3', 'C2']):
    st = xx_throughput[i]
    for ax in axs:
        add_hbar_plot(ax, st, n=N, 
                      color=c, 
                      alpha=0.4, 
                      linewidth=0, 
                      label='c{}'.format(st.nthreads))
        ax.set_xlabel('ms')
        ax.axis( ax.axis()[:2] + (1,N+1))

axs[1].axis((0,750, *ax.axis()[2:]))
axs[1].yaxis.set_visible(False)
axs[1].legend()
fig.tight_layout()

## Throughput

In [None]:
fig = plt.figure(figsize=(16,6))
figs['fps'] = fig
ax = fig.add_subplot(111)

mf = np.r_[[st.throughput for st in xx_throughput.values()]]

for st in xx_throughput.values():
    ax.plot(st.fps, 'k-', alpha=0.4, linewidth=0.7)

for n in [1, 8, 16, 24, best_idx, nthreads.max()]:
    if n not in xx_throughput:
        continue
    st = xx_throughput[n]
    ax.plot(st.fps, '-', linewidth=2, label='c{}'.format(st.nthreads))

ax.set_xlabel('Files proccessed')
ax.set_ylabel('Files per second')
ax.axis((-3, st.t_end.shape[0]+10) + ax.axis()[2:])
ax.legend(loc='upper left');
fig.tight_layout()

## Warmup Costs

In [None]:
dd = [xx_throughput[i]._raw for i in nthreads]
if hasattr(dd[0], '_warmup'):
    warmup_time = np.array([np.median([st.t_open for st in d._warmup.stats]) for d in dd])
    wm_max = np.ceil(warmup_time.max()*10)/10 + 0.1

    fig = plt.figure(figsize=(4,4))
    figs['warmup'] = fig
    ax = fig.add_subplot(111)
    ax.barh(nthreads, warmup_time*1000, height=0.5, alpha=0.7)
    ax.axis([0, wm_max*1000, 0.5, nthreads[-1]+1.5])
    ax.yaxis.set_ticks([1, 8, 16, 24, 32, nthreads[-1]])
    ax.set_xlabel('Median time to open first file (ms)')
    ax.set_ylabel('Number of threads')
    fig.tight_layout()
else:
    print('No warmup costs were captured')

## Saving Figures

In [None]:
!mkdir -p report_images

In [None]:
defaults = dict(dpi=100)
overrides = dict(warmup=dict(dpi=200))

for name, fig in figs.items():
    for fmt in ['svg', 'png']:
        fname = './report_images/{name}.{fmt}'.format(name=name, fmt=fmt)
        print('Saving to: ' + fname)
        fig.savefig(fname, **overrides.get(name, defaults))