In [None]:
propno = 3333

In [None]:
import os
import math
import fnmatch
import numpy as np
import matplotlib.pyplot as plt

from extra_data import RunDirectory, open_run
from extra_data.read_machinery import find_proposal

In [None]:
xgm_src = 'SA1_XTD1_XGM/XGM/DOOCS:output'
xgm_key = 'data.intensityTD'
agipd_id = 'MID_DET_AGIPD1M-1'
lff_src = 'MID_EXP_AGIPD1M1/REDU/LITFRM:output'

xgm_src = 'SPB_XTD9_XGM/XGM/DOOCS:output'
xgm_key = 'data.intensitySa1TD'
agipd_id = 'SPB_DET_AGIPD1M-1'
lff_src = 'SPB_IRU_AGIPD1M1/REDU/LITFRM:output'

In [None]:
def get_run_size(basedir, file_pattern='*'):
    runiter = (int(rundir.name[1:]) for rundir in os.scandir(basedir)
               if rundir.is_dir() and fnmatch.fnmatch(rundir.name, 'r*'))

    size = dict([
        (runno, sum((f.stat().st_size)
                    for f in os.scandir(os.path.join(basedir, f"r{runno:04d}"))
                    if f.is_file() and fnmatch.fnmatch(f.name, file_pattern))
        ) for runno in runiter
    ])
    return size

def get_npulse(basedir):
    runiter = (int(rundir.name[1:]) for rundir in os.scandir(os.path.join(basedir, 'raw'))
               if rundir.is_dir() and fnmatch.fnmatch(rundir.name, 'r*'))

    npulse = {}
    for runno in runiter:
        run = RunDirectory(os.path.join(basedir, f"raw/r{runno:04d}"))
        npulse[runno] = int(np.median(
            #run['MID_RR_SYS/MDL/PULSE_PATTERN_DECODER', 'sase2.nPulses'].ndarray()))
            np.sum(run[xgm_src, xgm_key].ndarray() != 1., axis=1)))

    return npulse

def get_lff_train_fraction(basedir):
    runiter = (int(rundir.name[1:]) for rundir in os.scandir(os.path.join(basedir, 'raw'))
               if rundir.is_dir() and fnmatch.fnmatch(rundir.name, 'r*'))

    frac = {}
    for runno in runiter:
        run = RunDirectory(os.path.join(basedir, f"raw/r{runno:04d}"))
        
        src = lff_src
        if src not in run.all_sources:
            frac[runno] = .0
            continue

        ntrn_lff = np.sum(run[src, 'data.nFrame'].data_counts().values)
        
        tidm = []
        for modno in range(16):
            src = f"{agipd_id}/DET/{modno}CH0:xtdf"
            if src in run.all_sources:
                count = run[src, "image.cellId"].data_counts()
                tidm.append(count.index.values[count.values > 0])

        tidm = np.unique(np.concatenate(tidm)) if len(tidm) else np.array([])
        ntrn_det = len(tidm)

        frac[runno] = min(1., ntrn_lff / ntrn_det) if ntrn_det > 0 else 1.

    return frac
    
def join_attributes(**kwargs):
    kwargs_iter = iter(kwargs.values())
    common_keys = next(kwargs_iter)
    for v in kwargs_iter:
        common_keys &= v.keys()

    r = {}
    for k in common_keys:
        r[k] = dict([
            (aname, v[k]) for aname, v in kwargs.items()
        ])

    return r

def filter_le(d, **kwargs):
    return dict(
        (k, v) for k, v in d.items() if all(v[aname] > lb for aname, lb in kwargs.items())
    )

def filter_adj_size(d, frac, key='proc_size'):
    r = {}
    for k, v in d.items():
        v[key] = int(v[key] / frac[k] + 0.5)
        r[k] = v
    return r

def frm_bytes(nb):
    prfx = ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi']
    nbit = int(math.ceil(math.log2(nb))) if nb > 0 else 0
    i = nbit // 10
    return f"{nb / (1 << (i * 10)):.3g} {prfx[i]}B"

In [None]:
propdir = find_proposal(f"p{propno:06d}")
basedir = os.path.join(propdir, 'raw')

agipd_raw_size = get_run_size(os.path.join(propdir, 'raw'), 'RAW-*-AGIPD*-S*.h5')
agipd_proc_size = get_run_size(os.path.join(propdir, 'proc'), 'CORR-*-AGIPD*-S*.h5')

In [None]:
npulse = get_npulse(propdir)
frac = get_lff_train_fraction(propdir)

In [None]:
d = join_attributes(raw_size=agipd_raw_size, proc_size=agipd_proc_size, npulse=npulse)
d = filter_le(d, proc_size=0, npulse=0.5)

In [None]:
r = np.array([[k,] + list(i.values()) for k, i in d.items()])

In [None]:
from matplotlib.scale import LogScale

def plot_run_sizes(r, ratios=[]):
    fig, ax = plt.subplots(1, 1, figsize=(9, 6), clear=True)

    npls_uniq, ix = np.unique(r[:,3], return_inverse=True)
    for i, npulse in enumerate(npls_uniq):
        if npulse == 0:
            continue
        ri = r[(ix == i), :]
        ax.scatter(ri[:, 1], ri[:, 2], alpha=0.6, label=f"{npulse} pulses, {len(ri)} runs")

    sz = max(r[:, 2])
    for i in [1e-4, 0.001, .01, .1, 1.]:
        ax.axline((sz, sz*i), slope=1, ls=':', c='gray')

    for i in ratios:
        ax.axline((sz, sz*i), slope=1, ls=':', c='C1')
        

    ax.set_xscale(LogScale(ax, base=2))
    ax.set_yscale(LogScale(ax, base=2))
    ax.legend()

    ax.xaxis.set_major_formatter(lambda x, pos: frm_bytes(x))
    ax.yaxis.set_major_formatter(lambda x, pos: frm_bytes(x))

    def yaxis_to_ratio(v):
        return v / ax.xaxis.get_view_interval()[1]

    def yaxis_to_abs(v):
        return v * ax.xaxis.get_view_interval()[1]

    def xaxis_to_ratio(v):
        _, vmax = ax.yaxis.get_view_interval()
        return np.divide(vmax, v, where = v != 0)

    secay = ax.secondary_yaxis('right', functions=(
        yaxis_to_ratio, yaxis_to_abs
    ))
    secay.yaxis.set_major_formatter(lambda x, pos: f"{x*100:.0g}%")

    secax = ax.secondary_xaxis('top', functions=(
        xaxis_to_ratio, xaxis_to_ratio
    ))
    secax.xaxis.set_major_formatter(lambda x, pos: f"{x*100:.3g}%")

    ax.set_ylim(min(r[:, 2]) * 0.5, max(r[:, 2]) * 1.5)
    ax.set_xlim(min(r[:, 1]) * 0.75, max(r[:, 1]) * 1.25)
    
    for i in ratios:
        xl, xh = ax.get_xlim()
        y = xh * i
        
        yl, yh = ax.get_ylim()
        x = yh / i
        
        if x < xh:
            ax.text(x, yh * 1.15, f"{i:.2%}", ha='center', va='bottom')
        else:
            ax.text(xh * 1.05, y, f"{i:.2%}", va='center', ha='left')
            
            
        # Rotate angle
        x0 = xl + 0.01 * (xh - xl)
        l = np.array((x0, x0 * i * 1.3))
        t = ax.transData.transform([[xl, xl*i], [xh, xh*i]])
        angle = np.arctan2(t[1,1] - t[0,1], t[1,0] - t[0,0]) / np.pi * 180
        ax.text(*l, 'photonization', rotation=angle, rotation_mode='anchor',
                va='center', ha='center', alpha=0.75, color="C1")
        
    

    ax.set_xlabel('raw size')
    ax.set_ylabel('proc size')
    secay.set_ylabel('reduction ratio')

    return fig, ax
    

In [None]:
cr = np.sum(r[:, 2] / r[:, 3] * 352) / np.sum(r[:, 1])

plot_run_sizes(r, ratios=[cr])
plt.show()

In [None]:
d = filter_adj_size(d, frac, 'proc_size')
r = np.array([[k,] + list(i.values()) for k, i in d.items()])

cr = np.sum(r[:, 2] / r[:, 3] * 352) / np.sum(r[:, 1])
print(cr)

In [None]:
fig, ax = plot_run_sizes(r, ratios=[cr])
plt.show()

In [None]:
fig.savefig(f'redratio_p{propno:06d}.png', dpi=200, facecolor='white')